예제 #1
0
def create_training_dataset(
    config,
    num_shuffles=1,
    Shuffles=None,
    windows2linux=False,
    userfeedback=False,
    trainIndices=None,
    testIndices=None,
    net_type=None,
    augmenter_type=None,
):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n

    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    trainIndices: list of lists, optional (default=None)
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndices: list of lists, optional (default=None)
        List of one or multiple lists containing test indexes.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.

    augmenter_type: string
        Type of augmenter. Currently default, imgaug, tensorpack, and deterministic are supported.

    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    import scipy.io as sio

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    if cfg.get("multianimalproject", False):
        from deeplabcut.generate_training_dataset.multiple_individuals_trainingsetmanipulation import (
            create_multianimaltraining_dataset, )

        create_multianimaltraining_dataset(config, num_shuffles, Shuffles,
                                           windows2linux, net_type)
    else:
        scorer = cfg["scorer"]
        project_path = cfg["project_path"]
        # Create path for training sets & store data there
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
            cfg)  # Path concatenation OS platform independent
        auxiliaryfunctions.attempttomakefolder(Path(
            os.path.join(project_path, str(trainingsetfolder))),
                                               recursive=True)

        Data = merge_annotateddatasets(
            cfg, Path(os.path.join(project_path, trainingsetfolder)),
            windows2linux)
        if Data is None:
            return
        Data = Data[scorer]  # extract labeled data

        # loading & linking pretrained models
        if net_type is None:  # loading & linking pretrained models
            net_type = cfg.get("default_net_type", "resnet_50")
        else:
            if "resnet" in net_type or "mobilenet" in net_type:
                pass
            else:
                raise ValueError("Invalid network type:", net_type)

        if augmenter_type is None:
            augmenter_type = cfg.get("default_augmenter", "imgaug")
            if augmenter_type is None:  # this could be in config.yaml for old projects!
                # updating variable if null/None! #backwardscompatability
                auxiliaryfunctions.edit_config(config,
                                               {"default_augmenter": "imgaug"})
                augmenter_type = "imgaug"
        else:
            if augmenter_type in [
                    "default",
                    "scalecrop",
                    "imgaug",
                    "tensorpack",
                    "deterministic",
            ]:
                pass
            else:
                raise ValueError("Invalid augmenter type:", augmenter_type)

        # Loading the encoder (if necessary downloading from TF)
        dlcparent_path = auxiliaryfunctions.get_deeplabcut_path()
        defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml")
        model_path, num_shuffles = auxfun_models.Check4weights(
            net_type, Path(dlcparent_path), num_shuffles)

        if Shuffles is None:
            Shuffles = range(1, num_shuffles + 1)
        else:
            Shuffles = [i for i in Shuffles if isinstance(i, int)]

        # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type)
        if trainIndices is None and testIndices is None:
            splits = [(
                trainFraction,
                shuffle,
                SplitTrials(range(len(Data.index)), trainFraction),
            ) for trainFraction in cfg["TrainingFraction"]
                      for shuffle in Shuffles]
        else:
            if len(trainIndices) != len(testIndices) != len(Shuffles):
                raise ValueError(
                    "Number of Shuffles and train and test indexes should be equal."
                )
            splits = []
            for shuffle, (train_inds, test_inds) in enumerate(
                    zip(trainIndices, testIndices)):
                trainFraction = round(
                    len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)),
                    2)
                print(
                    f"You passed a split with the following fraction: {int(100 * trainFraction)}%"
                )
                splits.append((trainFraction, Shuffles[shuffle], (train_inds,
                                                                  test_inds)))

        bodyparts = cfg["bodyparts"]
        nbodyparts = len(bodyparts)
        for trainFraction, shuffle, (trainIndices, testIndices) in splits:
            if len(trainIndices) > 0:
                if userfeedback:
                    trainposeconfigfile, _, _ = training.return_train_network_path(
                        config,
                        shuffle=shuffle,
                        trainingsetindex=cfg["TrainingFraction"].index(
                            trainFraction),
                    )
                    if trainposeconfigfile.is_file():
                        askuser = input(
                            "The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): "
                        )
                        if (askuser == "no" or askuser == "No"
                                or askuser == "N" or askuser == "No"):
                            raise Exception(
                                "Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details."
                            )

                ####################################################
                # Generating data structure with labeled information & frame metadata (for deep cut)
                ####################################################
                # Make training file!
                (
                    datafilename,
                    metadatafilename,
                ) = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)

                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################
                data, MatlabData = format_training_data(
                    Data, trainIndices, nbodyparts, project_path)
                sio.savemat(os.path.join(project_path, datafilename),
                            {"dataset": MatlabData})

                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename),
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                )

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################
                modelfoldername = auxiliaryfunctions.GetModelFolder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/train")
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/test")

                path_train_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "train",
                        "pose_cfg.yaml",
                    ))
                path_test_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "pose_cfg.yaml",
                    ))
                # str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')
                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": model_path,
                    "project_path": str(cfg["project_path"]),
                    "net_type": net_type,
                    "dataset_type": augmenter_type,
                }

                items2drop = {}
                if augmenter_type == "scalecrop":
                    # these values are dropped as scalecrop
                    # doesn't have rotation implemented
                    items2drop = {"rotation": 0, "rotratio": 0.0}

                trainingdata = MakeTrain_pose_yaml(items2change,
                                                   path_train_config,
                                                   defaultconfigfile,
                                                   items2drop)

                keys2save = [
                    "dataset",
                    "num_joints",
                    "all_joints",
                    "all_joints_names",
                    "net_type",
                    "init_weights",
                    "global_scale",
                    "location_refinement",
                    "locref_stdev",
                ]
                MakeTest_pose_yaml(trainingdata, keys2save, path_test_config)
                print(
                    "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
                )
        return splits
예제 #2
0
def create_training_dataset(config,num_shuffles=1,Shuffles=None,windows2linux=False,userfeedback=False,
        trainIndexes=None,testIndexes=None,
        net_type=None,augmenter_type=None):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n

    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    trainIndexes: list of lists, optional (default=None)
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndexes: list of lists, optional (default=None)
        List of test indexes.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.

    augmenter_type: string
        Type of augmenter. Currently default, imgaug, tensorpack, and deterministic are supported.

    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    import scipy.io as sio

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(os.path.join(project_path,str(trainingsetfolder))),recursive=True)

    Data = merge_annotateddatasets(cfg,project_path,Path(os.path.join(project_path,trainingsetfolder)),windows2linux)
    Data = Data[scorer] #extract labeled data

    #loading & linking pretrained models
    if net_type is None: #loading & linking pretrained models
        net_type =cfg.get('default_net_type', 'resnet_50')
    else:
        if 'resnet' in net_type or 'mobilenet' in net_type:
            pass
        else:
            raise ValueError('Invalid network type:', net_type)

    if augmenter_type is None:
        augmenter_type=cfg.get('default_augmenter', 'default')
    else:
        if augmenter_type in ['default','imgaug','tensorpack','deterministic']:
            pass
        else:
            raise ValueError('Invalid augmenter type:', augmenter_type)

    import deeplabcut
    parent_path = Path(os.path.dirname(deeplabcut.__file__))
    defaultconfigfile = str(parent_path / 'pose_cfg.yaml')
    model_path,num_shuffles=auxfun_models.Check4weights(net_type,parent_path,num_shuffles) #if the model does not exist >> throws error!

    if Shuffles is None:
        Shuffles = range(1, num_shuffles + 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    if trainIndexes is None and testIndexes is None:
        splits = [(trainFraction, shuffle, SplitTrials(range(len(Data.index)), trainFraction))
                  for trainFraction in cfg['TrainingFraction'] for shuffle in Shuffles]
    else:
        if len(trainIndexes) != len(testIndexes):
            raise ValueError('Number of train and test indexes should be equal.')
        splits = []
        for shuffle, (train_inds, test_inds) in enumerate(zip(trainIndexes, testIndexes)):
            trainFraction = len(train_inds) / (len(train_inds) + len(test_inds))
            print(f"You passed a split with the following fraction: {int(100 * trainFraction)}%")
            splits.append((trainFraction, shuffle, (train_inds, test_inds)))

    bodyparts = cfg['bodyparts']
    nbodyparts = len(bodyparts)
    for trainFraction, shuffle, (trainIndexes, testIndexes) in splits:
        if len(trainIndexes)>0:
            if userfeedback:
                trainposeconfigfile, _, _ = training.return_train_network_path(config, shuffle=shuffle, trainFraction=trainFraction)
                if trainposeconfigfile.is_file():
                    askuser=input ("The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): ")
                    if askuser=='no'or askuser=='No' or askuser=='N' or askuser=='No':
                        raise Exception("Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details.")

            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################
            # Make training file!
            datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(trainingsetfolder,
                                                                                            trainFraction, shuffle, cfg)

            ################################################################################
            # Saving data file (convert to training file for deeper cut (*.mat))
            ################################################################################
            data, MatlabData = format_training_data(Data, trainIndexes, nbodyparts, project_path)
            sio.savemat(os.path.join(project_path,datafilename), {'dataset': MatlabData})

            ################################################################################
            # Saving metadata (Pickle file)
            ################################################################################
            auxiliaryfunctions.SaveMetadata(os.path.join(project_path,metadatafilename),data, trainIndexes, testIndexes, trainFraction)

            ################################################################################
            # Creating file structure for training &
            # Test files as well as pose_yaml files (containing training and testing information)
            #################################################################################
            modelfoldername=auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg)
            auxiliaryfunctions.attempttomakefolder(Path(config).parents[0] / modelfoldername,recursive=True)
            auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/train')
            auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/test')

            path_train_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'train','pose_cfg.yaml'))
            path_test_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'test','pose_cfg.yaml'))
            #str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')

            items2change = {
                "dataset": datafilename,
                "metadataset": metadatafilename,
                "num_joints": len(bodyparts),
                "all_joints": [[i] for i in range(len(bodyparts))],
                "all_joints_names": [str(bpt) for bpt in bodyparts],
                "init_weights": model_path,
                "project_path": str(cfg['project_path']),
                "net_type": net_type,
                "dataset_type": augmenter_type,
            }
            trainingdata = MakeTrain_pose_yaml(items2change,path_train_config,defaultconfigfile)
            keys2save = [
                "dataset", "num_joints", "all_joints", "all_joints_names",
                "net_type", 'init_weights', 'global_scale', 'location_refinement',
                'locref_stdev'
            ]
            MakeTest_pose_yaml(trainingdata, keys2save,path_test_config)
            print("The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!")
예제 #3
0
def create_training_dataset(config,num_shuffles=1,Shuffles=None,windows2linux=False,userfeedback=False,
        trainIndexes=None,testIndexes=None,
        net_type=None,augmenter_type=None,defaultconfigfile=None,items2change_pose={}):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n

    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.

    augmenter_type: string
        Type of augmenter. Currently default, imgaug, tensorpack, and deterministic are supported.

    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """

    from skimage import io
    import scipy.io as sio

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(os.path.join(project_path,str(trainingsetfolder))),recursive=True)

    Data = merge_annotateddatasets(cfg,project_path,Path(os.path.join(project_path,trainingsetfolder)),windows2linux)
    Data = Data[scorer] #extract labeled data

    #%%
    # check if we need to update the netwoek
    items2change_flag = bool(items2change_pose)
    if items2change_flag: # not empty
        net_type = items2change_pose.get('net_type', None)

    #loading & linking pretrained models
    if net_type is None: #loading & linking pretrained models
        net_type =cfg.get('default_net_type', 'resnet_50')
    else:
        if 'resnet' in net_type or 'mobilenet' in net_type:
            pass
        else:
            raise ValueError('Invalid network type:', net_type)

    if augmenter_type is None:
        augmenter_type=cfg.get('default_augmenter', 'default')
    else:
        if augmenter_type in ['default','imgaug','tensorpack','deterministic']:
            pass
        else:
            raise ValueError('Invalid augmenter type:', augmenter_type)

    import deeplabcut
    if defaultconfigfile is None:
        parent_path = Path(os.path.dirname(deeplabcut.__file__))
        defaultconfigfile = str(parent_path / 'pose_cfg.yaml')
    model_path,num_shuffles=auxfun_models.Check4weights(net_type,parent_path,num_shuffles) #if the model does not exist >> throws error!

    if Shuffles==None:
        Shuffles=range(1,num_shuffles+1,1)
    else:
        Shuffles=[i for i in Shuffles if isinstance(i,int)]

    bodyparts = cfg['bodyparts']
    TrainingFraction = cfg['TrainingFraction']
    for shuffle in Shuffles: # Creating shuffles starting from 1
        for trainingsetindex,trainFraction in enumerate(TrainingFraction):
            if userfeedback:
                trainposeconfigfile,testposeconfigfile,snapshotfolder =  training.return_train_network_path(config,shuffle=shuffle,trainingsetindex=trainingsetindex)
                if os.path.isfile(trainposeconfigfile):
                    askuser=input ("The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): ")
                    if askuser=='no'or askuser=='No' or askuser=='N' or askuser=='No':
                        raise Exception("Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details.")
                    else:
                        pass
            #trainIndexes, testIndexes = SplitTrials(range(len(Data.index)), trainFraction)
            if trainIndexes is None and testIndexes is None:
                trainIndexes, testIndexes = SplitTrials(range(len(Data.index)), trainFraction)
            else:
                print("You passed a split with the following fraction:", len(trainIndexes)*1./(len(testIndexes)+len(trainIndexes))*100)
            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################
            # Make training file!
            data = []
            for jj in trainIndexes:
                H = {}
                # load image to get dimensions:
                filename = Data.index[jj]
                im = io.imread(os.path.join(cfg['project_path'],filename))
                H['image'] = filename

                if np.ndim(im)==3:
                    H['size'] = np.array(
                        [np.shape(im)[2],
                         np.shape(im)[0],
                         np.shape(im)[1]])
                else:
                    # print "Grayscale!"
                    H['size'] = np.array([1, np.shape(im)[0], np.shape(im)[1]])

                indexjoints=0
                joints=np.zeros((len(bodyparts),3))*np.nan
                for bpindex,bodypart in enumerate(bodyparts):
                    # check whether the labels are positive and inside the img
                    x_pos_n_inside = 0 <= Data[bodypart]['x'][jj] < np.shape(im)[1]
                    y_pos_n_inside = 0 <= Data[bodypart]['y'][jj] < np.shape(im)[0]
                    if x_pos_n_inside and y_pos_n_inside:
                        joints[indexjoints,0]=int(bpindex)
                        joints[indexjoints,1]=Data[bodypart]['x'][jj]
                        joints[indexjoints,2]=Data[bodypart]['y'][jj]
                        indexjoints+=1

                joints = joints[np.where(
                    np.prod(np.isfinite(joints),
                            1))[0], :]  # drop NaN, i.e. lines for missing body parts

                assert (np.prod(np.array(joints[:, 2]) < np.shape(im)[0])
                        )  # y coordinate within image?
                assert (np.prod(np.array(joints[:, 1]) < np.shape(im)[1])
                        )  # x coordinate within image?

                H['joints'] = np.array(joints, dtype=int)
                if np.size(joints)>0: #exclude images without labels
                        data.append(H)

            if len(trainIndexes)>0:

                datafilename,metadatafilename=auxiliaryfunctions.GetDataandMetaDataFilenames(trainingsetfolder,trainFraction,shuffle,cfg)
                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(os.path.join(project_path,metadatafilename),data, trainIndexes, testIndexes, trainFraction)
                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################

                DTYPE = [('image', 'O'), ('size', 'O'), ('joints', 'O')]
                MatlabData = np.array(
                    [(np.array([data[item]['image']], dtype='U'),
                      np.array([data[item]['size']]),
                      boxitintoacell(data[item]['joints']))
                     for item in range(len(data))],
                    dtype=DTYPE)

                sio.savemat(os.path.join(project_path,datafilename), {'dataset': MatlabData})

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################
                modelfoldername=auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg)
                auxiliaryfunctions.attempttomakefolder(Path(config).parents[0] / modelfoldername,recursive=True)
                auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/train')
                auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/test')

                path_train_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'train','pose_cfg.yaml'))
                path_test_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'test','pose_cfg.yaml'))
                #str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')

                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": model_path,
                    "project_path": str(cfg['project_path']),
                    "net_type": net_type,
                    "dataset_type": augmenter_type
                }
                items2change.update(items2change_pose)
                trainingdata = MakeTrain_pose_yaml(items2change,path_train_config,defaultconfigfile)
                keys2save = [
                    "dataset", "num_joints", "all_joints", "all_joints_names",
                    "net_type", 'init_weights', 'global_scale', 'location_refinement',
                    'locref_stdev'
                ]
                MakeTest_pose_yaml(trainingdata, keys2save,path_test_config)
                print("The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!")

    return
def create_training_dataset(
    config,
    num_shuffles=1,
    Shuffles=None,
    windows2linux=False,
    userfeedback=False,
    trainIndices=None,
    testIndices=None,
    net_type=None,
    augmenter_type=None,
    posecfg_template=None,
):
    """Creates a training dataset.

    Labels from all the extracted frames are merged into a single .h5 file.
    Only the videos included in the config file are used to create this dataset.

    Parameters
    ----------
    config : string
        Full path of the ``config.yaml`` file as a string.

    num_shuffles : int, optional, default=1
        Number of shuffles of training dataset to create, i.e. ``[1,2,3]`` for
        ``num_shuffles=3``.

    Shuffles: list[int], optional
        Alternatively the user can also give a list of shuffles.

    userfeedback: bool, optional, default=False
        If ``False``, all requested train/test splits are created (no matter if they
        already exist). If you want to assure that previous splits etc. are not
        overwritten, set this to ``True`` and you will be asked for each split.

    trainIndices: list of lists, optional, default=None
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndices: list of lists, optional, default=None
        List of one or multiple lists containing test indexes.

    net_type: list, optional, default=None
        Type of networks. Currently supported options are

        * ``resnet_50``
        * ``resnet_101``
        * ``resnet_152``
        * ``mobilenet_v2_1.0``
        * ``mobilenet_v2_0.75``
        * ``mobilenet_v2_0.5``
        * ``mobilenet_v2_0.35``
        * ``efficientnet-b0``
        * ``efficientnet-b1``
        * ``efficientnet-b2``
        * ``efficientnet-b3``
        * ``efficientnet-b4``
        * ``efficientnet-b5``
        * ``efficientnet-b6``

    augmenter_type: string, optional, default=None
        Type of augmenter. Currently supported augmenters are
        
        * ``default``
        * ``scalecrop``
        * ``imgaug``
        * ``tensorpack``
        * ``deterministic``

    posecfg_template: string, optional, default=None
        Path to a ``pose_cfg.yaml`` file to use as a template for generating the new
        one for the current iteration. Useful if you would like to start with the same
        parameters a previous training iteration. None uses the default
        ``pose_cfg.yaml``.

    Returns
    -------
    list(tuple) or None
        If training dataset was successfully created, a list of tuples is returned.
        The first two elements in each tuple represent the training fraction and the
        shuffle value. The last two elements in each tuple are arrays of integers
        representing the training and test indices.

        Returns None if training dataset could not be created.

    Notes
    -----
    Use the function ``add_new_videos`` at any stage of the project to add more videos
    to the project.

    Examples
    --------

    Linux/MacOS

    >>> deeplabcut.create_training_dataset(
            '/analysis/project/reaching-task/config.yaml', num_shuffles=1,
        )

    Windows

    >>> deeplabcut.create_training_dataset(
            'C:\\Users\\Ulf\\looming-task\\config.yaml', Shuffles=[3,17,5],
        )
    """
    import scipy.io as sio

    if windows2linux:
        # DeprecationWarnings are silenced since Python 3.2 unless triggered in __main__
        warnings.warn(
            "`windows2linux` has no effect since 2.2.0.4 and will be removed in 2.2.1.",
            FutureWarning,
        )

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    if posecfg_template:
        if not posecfg_template.endswith("pose_cfg.yaml"):
            raise ValueError(
                "posecfg_template argument must contain path to a pose_cfg.yaml file"
            )
        else:
            print("Reloading pose_cfg parameters from " + posecfg_template +
                  '\n')
            from deeplabcut.utils.auxiliaryfunctions import read_plainconfig

            prior_cfg = read_plainconfig(posecfg_template)
    if cfg.get("multianimalproject", False):
        from deeplabcut.generate_training_dataset.multiple_individuals_trainingsetmanipulation import (
            create_multianimaltraining_dataset, )

        create_multianimaltraining_dataset(config,
                                           num_shuffles,
                                           Shuffles,
                                           net_type=net_type)
    else:
        scorer = cfg["scorer"]
        project_path = cfg["project_path"]
        # Create path for training sets & store data there
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
            cfg)  # Path concatenation OS platform independent
        auxiliaryfunctions.attempttomakefolder(Path(
            os.path.join(project_path, str(trainingsetfolder))),
                                               recursive=True)

        Data = merge_annotateddatasets(
            cfg,
            Path(os.path.join(project_path, trainingsetfolder)),
        )
        if Data is None:
            return
        Data = Data[scorer]  # extract labeled data

        # loading & linking pretrained models
        if net_type is None:  # loading & linking pretrained models
            net_type = cfg.get("default_net_type", "resnet_50")
        else:
            if ("resnet" in net_type or "mobilenet" in net_type
                    or "efficientnet" in net_type):
                pass
            else:
                raise ValueError("Invalid network type:", net_type)

        if augmenter_type is None:
            augmenter_type = cfg.get("default_augmenter", "imgaug")
            if augmenter_type is None:  # this could be in config.yaml for old projects!
                # updating variable if null/None! #backwardscompatability
                auxiliaryfunctions.edit_config(config,
                                               {"default_augmenter": "imgaug"})
                augmenter_type = "imgaug"
        elif augmenter_type not in [
                "default",
                "scalecrop",
                "imgaug",
                "tensorpack",
                "deterministic",
        ]:
            raise ValueError("Invalid augmenter type:", augmenter_type)

        if posecfg_template:
            if net_type != prior_cfg["net_type"]:
                print(
                    "WARNING: Specified net_type does not match net_type from posecfg_template path entered. Proceed with caution."
                )
            if augmenter_type != prior_cfg["dataset_type"]:
                print(
                    "WARNING: Specified augmenter_type does not match dataset_type from posecfg_template path entered. Proceed with caution."
                )

        # Loading the encoder (if necessary downloading from TF)
        dlcparent_path = auxiliaryfunctions.get_deeplabcut_path()
        if not posecfg_template:
            defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml")
        elif posecfg_template:
            defaultconfigfile = posecfg_template
        model_path, num_shuffles = auxfun_models.check_for_weights(
            net_type, Path(dlcparent_path), num_shuffles)

        if Shuffles is None:
            Shuffles = range(1, num_shuffles + 1)
        else:
            Shuffles = [i for i in Shuffles if isinstance(i, int)]

        # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type)
        if trainIndices is None and testIndices is None:
            splits = [(
                trainFraction,
                shuffle,
                SplitTrials(range(len(Data.index)), trainFraction),
            ) for trainFraction in cfg["TrainingFraction"]
                      for shuffle in Shuffles]
        else:
            if len(trainIndices) != len(testIndices) != len(Shuffles):
                raise ValueError(
                    "Number of Shuffles and train and test indexes should be equal."
                )
            splits = []
            for shuffle, (train_inds, test_inds) in enumerate(
                    zip(trainIndices, testIndices)):
                trainFraction = round(
                    len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)),
                    2)
                print(
                    f"You passed a split with the following fraction: {int(100 * trainFraction)}%"
                )
                # Now that the training fraction is guaranteed to be correct,
                # the values added to pad the indices are removed.
                train_inds = np.asarray(train_inds)
                train_inds = train_inds[train_inds != -1]
                test_inds = np.asarray(test_inds)
                test_inds = test_inds[test_inds != -1]
                splits.append((trainFraction, Shuffles[shuffle], (train_inds,
                                                                  test_inds)))

        bodyparts = cfg["bodyparts"]
        nbodyparts = len(bodyparts)
        for trainFraction, shuffle, (trainIndices, testIndices) in splits:
            if len(trainIndices) > 0:
                if userfeedback:
                    trainposeconfigfile, _, _ = training.return_train_network_path(
                        config,
                        shuffle=shuffle,
                        trainingsetindex=cfg["TrainingFraction"].index(
                            trainFraction),
                    )
                    if trainposeconfigfile.is_file():
                        askuser = input(
                            "The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): "
                        )
                        if (askuser == "no" or askuser == "No"
                                or askuser == "N" or askuser == "No"):
                            raise Exception(
                                "Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details."
                            )

                ####################################################
                # Generating data structure with labeled information & frame metadata (for deep cut)
                ####################################################
                # Make training file!
                (
                    datafilename,
                    metadatafilename,
                ) = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)

                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################
                data, MatlabData = format_training_data(
                    Data, trainIndices, nbodyparts, project_path)
                sio.savemat(os.path.join(project_path, datafilename),
                            {"dataset": MatlabData})

                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename),
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                )

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################
                modelfoldername = auxiliaryfunctions.get_model_folder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/train")
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/test")

                path_train_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "train",
                        "pose_cfg.yaml",
                    ))
                path_test_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "pose_cfg.yaml",
                    ))
                # str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')
                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": model_path,
                    "project_path": str(cfg["project_path"]),
                    "net_type": net_type,
                    "dataset_type": augmenter_type,
                }

                items2drop = {}
                if augmenter_type == "scalecrop":
                    # these values are dropped as scalecrop
                    # doesn't have rotation implemented
                    items2drop = {"rotation": 0, "rotratio": 0.0}
                # Also drop maDLC smart cropping augmentation parameters
                for key in [
                        "pre_resize", "crop_size", "max_shift", "crop_sampling"
                ]:
                    items2drop[key] = None

                trainingdata = MakeTrain_pose_yaml(items2change,
                                                   path_train_config,
                                                   defaultconfigfile,
                                                   items2drop)

                keys2save = [
                    "dataset",
                    "num_joints",
                    "all_joints",
                    "all_joints_names",
                    "net_type",
                    "init_weights",
                    "global_scale",
                    "location_refinement",
                    "locref_stdev",
                ]
                MakeTest_pose_yaml(trainingdata, keys2save, path_test_config)
                print(
                    "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
                )

        return splits