Exemple #1
0
def return_evaluate_network_data(config,
                                 shuffle=0,
                                 trainingsetindex=0,
                                 comparisonbodyparts="all",
                                 Snapindex=None,
                                 rescale=False,
                                 fulldata=False,
                                 show_errors=True):
    """
    Returns the results for (previously evaluated) network. deeplabcutcore.evaluate_network(..)
    Returns list of (per model): [trainingsiterations,trainfraction,shuffle,trainerror,testerror,pcutoff,trainerrorpcutoff,testerrorpcutoff,Snapshots[snapindex],scale,net_type]

    If fulldata=True, also returns (the complete annotation and prediction array)
    Returns list of: (DataMachine, Data, data, trainIndices, testIndices, trainFraction, DLCscorer,comparisonbodyparts, cfg, Snapshots[snapindex])
    ----------
    config : string
        Full path of the config.yaml file as a string.

    shuffle: integer
        integers specifying shuffle index of the training dataset. The default is 0.

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This
        variable can also be set to "all".

    comparisonbodyparts: list of bodyparts, Default is "all".
        The average error will be computed for those body parts only (Has to be a subset of the body parts).

    rescale: bool, default False
        Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every
        image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported
        in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated
        on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the
        original size!

    Examples
    --------
    If you do not want to plot
    >>> deeplabcutcore._evaluate_network_data('/analysis/project/reaching-task/config.yaml', shuffle=[1])
    --------
    If you want to plot
    >>> deeplabcutcore.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],True)
    """

    import os
    from skimage import io
    import skimage.color

    from deeplabcutcore.pose_estimation_tensorflow.config import load_config
    from deeplabcutcore.pose_estimation_tensorflow.dataset.pose_dataset import data_to_input
    from deeplabcutcore.utils import auxiliaryfunctions, visualization

    start_path = os.getcwd()
    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    #Data=pd.read_hdf(os.path.join(cfg["project_path"],str(trainingsetfolder),'CollectedData_' + cfg["scorer"] + '.h5'),'df_with_missing')

    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts)
    ##################################################
    # Load data...
    ##################################################
    trainFraction = cfg["TrainingFraction"][trainingsetindex]
    datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
        trainingsetfolder, trainFraction, shuffle, cfg)
    modelfolder = os.path.join(
        cfg["project_path"],
        str(auxiliaryfunctions.GetModelFolder(trainFraction, shuffle, cfg)))
    path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml'
    # Load meta data
    data, trainIndices, testIndices, trainFraction = auxiliaryfunctions.LoadMetadata(
        os.path.join(cfg["project_path"], metadatafn))

    try:
        dlc_cfg = load_config(str(path_test_config))
    except FileNotFoundError:
        raise FileNotFoundError(
            "It seems the model for shuffle %s and trainFraction %s does not exist."
            % (shuffle, trainFraction))

    ########################### RESCALING (to global scale)
    if rescale == True:
        scale = dlc_cfg['global_scale']
        print("Rescaling Data to ", scale)
        Data = pd.read_hdf(
            os.path.join(cfg["project_path"], str(trainingsetfolder),
                         'CollectedData_' + cfg["scorer"] + '.h5'),
            'df_with_missing') * scale
    else:
        scale = 1
        Data = pd.read_hdf(
            os.path.join(cfg["project_path"], str(trainingsetfolder),
                         'CollectedData_' + cfg["scorer"] + '.h5'),
            'df_with_missing')

    evaluationfolder = os.path.join(
        cfg["project_path"],
        str(auxiliaryfunctions.GetEvaluationFolder(trainFraction, shuffle,
                                                   cfg)))
    # Check which snapshots are available and sort them by # iterations
    Snapshots = np.array([
        fn.split('.')[0]
        for fn in os.listdir(os.path.join(str(modelfolder), 'train'))
        if "index" in fn
    ])

    if len(Snapshots) == 0:
        print(
            "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
            % (shuffle, trainFraction))
        snapindices = []
    else:
        increasing_indices = np.argsort(
            [int(m.split('-')[1]) for m in Snapshots])
        Snapshots = Snapshots[increasing_indices]
        if Snapindex == None:
            Snapindex = cfg["snapshotindex"]

        if Snapindex == -1:
            snapindices = [-1]
        elif Snapindex == "all":
            snapindices = range(len(Snapshots))
        elif Snapindex < len(Snapshots):
            snapindices = [Snapindex]
        else:
            print(
                "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
            )

    DATA = []
    results = []
    for snapindex in snapindices:
        dlc_cfg['init_weights'] = os.path.join(
            str(modelfolder), 'train',
            Snapshots[snapindex])  #setting weights to corresponding snapshot.
        trainingsiterations = (dlc_cfg['init_weights'].split(
            os.sep)[-1]).split('-')[
                -1]  #read how many training siterations that corresponds to.

        #name for deeplabcut net (based on its parameters)
        #DLCscorer = auxiliaryfunctions.GetScorerName(cfg,shuffle,trainFraction,trainingsiterations)
        DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
            cfg, shuffle, trainFraction, trainingsiterations)
        print("Retrieving ", DLCscorer, " with # of trainingiterations:",
              trainingsiterations)
        notanalyzed, resultsfilename, DLCscorer = auxiliaryfunctions.CheckifNotEvaluated(
            str(evaluationfolder), DLCscorer, DLCscorerlegacy,
            Snapshots[snapindex])
        #resultsfilename=os.path.join(str(evaluationfolder),DLCscorer + '-' + str(Snapshots[snapindex])+  '.h5') # + '-' + str(snapshot)+  ' #'-' + Snapshots[snapindex]+  '.h5')
        print(resultsfilename)
        if not notanalyzed and os.path.isfile(resultsfilename):  #data exists..
            DataMachine = pd.read_hdf(resultsfilename, 'df_with_missing')
            DataCombined = pd.concat([Data.T, DataMachine.T], axis=0).T
            RMSE, RMSEpcutoff = pairwisedistances(DataCombined, cfg["scorer"],
                                                  DLCscorer, cfg["pcutoff"],
                                                  comparisonbodyparts)

            testerror = np.nanmean(RMSE.iloc[testIndices].values.flatten())
            trainerror = np.nanmean(RMSE.iloc[trainIndices].values.flatten())
            testerrorpcutoff = np.nanmean(
                RMSEpcutoff.iloc[testIndices].values.flatten())
            trainerrorpcutoff = np.nanmean(
                RMSEpcutoff.iloc[trainIndices].values.flatten())
            if show_errors == True:
                print("Results for",
                      trainingsiterations, " training iterations:",
                      int(100 * trainFraction), shuffle, "train error:",
                      np.round(trainerror, 2), "pixels. Test error:",
                      np.round(testerror, 2), " pixels.")
                print("With pcutoff of", cfg["pcutoff"], " train error:",
                      np.round(trainerrorpcutoff, 2), "pixels. Test error:",
                      np.round(testerrorpcutoff, 2), "pixels")
                print("Snapshot", Snapshots[snapindex])

            r = [
                trainingsiterations,
                int(100 * trainFraction), shuffle,
                np.round(trainerror, 2),
                np.round(testerror, 2), cfg["pcutoff"],
                np.round(trainerrorpcutoff, 2),
                np.round(testerrorpcutoff, 2), Snapshots[snapindex], scale,
                dlc_cfg['net_type']
            ]
            results.append(r)
        else:
            print("Model not trained/evaluated!")
        if fulldata == True:
            DATA.append([
                DataMachine, Data, data, trainIndices, testIndices,
                trainFraction, DLCscorer, comparisonbodyparts, cfg,
                evaluationfolder, Snapshots[snapindex]
            ])

    os.chdir(start_path)
    if fulldata == True:
        return DATA, results
    else:
        return results
Exemple #2
0
def evaluate_network(config,
                     Shuffles=[1],
                     trainingsetindex=0,
                     plotting=None,
                     show_errors=True,
                     comparisonbodyparts="all",
                     gputouse=None,
                     rescale=False):
    """

    Evaluates the network based on the saved models at different stages of the training network.\n
    The evaluation results are stored in the .h5 and .csv file under the subdirectory 'evaluation_results'.
    Change the snapshotindex parameter in the config file to 'all' in order to evaluate all the saved models.
    Parameters
    ----------
    config : string
        Full path of the config.yaml file as a string.

    Shuffles: list, optional
        List of integers specifying the shuffle indices of the training dataset. The default is [1]

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This
        variable can also be set to "all".

    plotting: bool, optional
        Plots the predictions on the train and test images. The default is ``False``; if provided it must be either ``True`` or ``False``

    show_errors: bool, optional
        Display train and test errors. The default is `True``

    comparisonbodyparts: list of bodyparts, Default is "all".
        The average error will be computed for those body parts only (Has to be a subset of the body parts).

    gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None.
        See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries

    rescale: bool, default False
        Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every
        image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported
        in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated
        on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the
        original size!

    Examples
    --------
    If you do not want to plot
    >>> deeplabcutcore.evaluate_network('/analysis/project/reaching-task/config.yaml', Shuffles=[1])
    --------
    If you want to plot
    >>> deeplabcutcore.evaluate_network('/analysis/project/reaching-task/config.yaml',Shuffles=[1],True)

    """
    import os
    #import skimage.color
    #from skimage.io import imread

    from deeplabcutcore.utils.auxfun_videos import imread, imresize
    from deeplabcutcore.pose_estimation_tensorflow.nnet import predict
    from deeplabcutcore.pose_estimation_tensorflow.config import load_config
    from deeplabcutcore.pose_estimation_tensorflow.dataset.pose_dataset import data_to_input
    from deeplabcutcore.utils import auxiliaryfunctions
    import tensorflow as tf

    if 'TF_CUDNN_USE_AUTOTUNE' in os.environ:
        del os.environ[
            'TF_CUDNN_USE_AUTOTUNE']  #was potentially set during training

    tf.compat.v1.reset_default_graph()
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #
    #    tf.logging.set_verbosity(tf.logging.WARN)

    start_path = os.getcwd()
    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)
    if gputouse is not None:  #gpu selectinon
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gputouse)

    if trainingsetindex == 'all':
        TrainingFractions = cfg["TrainingFraction"]
    else:
        if trainingsetindex < len(
                cfg["TrainingFraction"]) and trainingsetindex >= 0:
            TrainingFractions = [
                cfg["TrainingFraction"][int(trainingsetindex)]
            ]
        else:
            raise Exception('Please check the trainingsetindex! ',
                            trainingsetindex,
                            ' should be an integer from 0 .. ',
                            int(len(cfg["TrainingFraction"]) - 1))

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(cfg["project_path"], str(trainingsetfolder),
                     'CollectedData_' + cfg["scorer"] + '.h5'),
        'df_with_missing')

    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts)
    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(
        str(cfg["project_path"] + "/evaluation-results/"))
    for shuffle in Shuffles:
        for trainFraction in TrainingFractions:
            ##################################################
            # Load and setup CNN part detector
            ##################################################
            datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                trainingsetfolder, trainFraction, shuffle, cfg)

            modelfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(trainFraction, shuffle,
                                                      cfg)))
            path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml'
            # Load meta data
            data, trainIndices, testIndices, trainFraction = auxiliaryfunctions.LoadMetadata(
                os.path.join(cfg["project_path"], metadatafn))

            try:
                dlc_cfg = load_config(str(path_test_config))
            except FileNotFoundError:
                raise FileNotFoundError(
                    "It seems the model for shuffle %s and trainFraction %s does not exist."
                    % (shuffle, trainFraction))

            #change batch size, if it was edited during analysis!
            dlc_cfg['batch_size'] = 1  #in case this was edited for analysis.

            #Create folder structure to store results.
            evaluationfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetEvaluationFolder(
                        trainFraction, shuffle, cfg)))
            auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                                   recursive=True)
            #path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

            # Check which snapshots are available and sort them by # iterations
            Snapshots = np.array([
                fn.split('.')[0]
                for fn in os.listdir(os.path.join(str(modelfolder), 'train'))
                if "index" in fn
            ])
            try:  #check if any where found?
                Snapshots[0]
            except IndexError:
                raise FileNotFoundError(
                    "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                    % (shuffle, trainFraction))

            increasing_indices = np.argsort(
                [int(m.split('-')[1]) for m in Snapshots])
            Snapshots = Snapshots[increasing_indices]

            if cfg["snapshotindex"] == -1:
                snapindices = [-1]
            elif cfg["snapshotindex"] == "all":
                snapindices = range(len(Snapshots))
            elif cfg["snapshotindex"] < len(Snapshots):
                snapindices = [cfg["snapshotindex"]]
            else:
                raise ValueError(
                    "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                )

            final_result = []

            ########################### RESCALING (to global scale)
            if rescale == True:
                scale = dlc_cfg['global_scale']
                Data = pd.read_hdf(
                    os.path.join(cfg["project_path"], str(trainingsetfolder),
                                 'CollectedData_' + cfg["scorer"] + '.h5'),
                    'df_with_missing') * scale
            else:
                scale = 1

            ##################################################
            # Compute predictions over images
            ##################################################
            for snapindex in snapindices:
                dlc_cfg['init_weights'] = os.path.join(
                    str(modelfolder), 'train', Snapshots[snapindex]
                )  #setting weights to corresponding snapshot.
                trainingsiterations = (
                    dlc_cfg['init_weights'].split(os.sep)[-1]
                ).split(
                    '-'
                )[-1]  #read how many training siterations that corresponds to.

                # Name for deeplabcut net (based on its parameters)
                DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
                    cfg, shuffle, trainFraction, trainingsiterations)
                print("Running ", DLCscorer, " with # of trainingiterations:",
                      trainingsiterations)
                notanalyzed, resultsfilename, DLCscorer = auxiliaryfunctions.CheckifNotEvaluated(
                    str(evaluationfolder), DLCscorer, DLCscorerlegacy,
                    Snapshots[snapindex])
                if notanalyzed:
                    # Specifying state of model (snapshot / training state)
                    sess, inputs, outputs = predict.setup_pose_prediction(
                        dlc_cfg)
                    Numimages = len(Data.index)
                    PredicteData = np.zeros(
                        (Numimages, 3 * len(dlc_cfg['all_joints_names'])))
                    print("Analyzing data...")
                    for imageindex, imagename in tqdm(enumerate(Data.index)):
                        image = imread(os.path.join(cfg['project_path'],
                                                    imagename),
                                       mode='RGB')
                        if scale != 1:
                            image = imresize(image, scale)

                        #image = skimage.color.gray2rgb(image)
                        image_batch = data_to_input(image)

                        # Compute prediction with the CNN
                        outputs_np = sess.run(outputs,
                                              feed_dict={inputs: image_batch})
                        scmap, locref = predict.extract_cnn_output(
                            outputs_np, dlc_cfg)

                        # Extract maximum scoring location from the heatmap, assume 1 person
                        pose = predict.argmax_pose_predict(
                            scmap, locref, dlc_cfg.stride)
                        PredicteData[imageindex, :] = pose.flatten(
                        )  # NOTE: thereby     cfg_test['all_joints_names'] should be same order as bodyparts!

                    sess.close()  #closes the current tf session

                    index = pd.MultiIndex.from_product(
                        [[DLCscorer], dlc_cfg['all_joints_names'],
                         ['x', 'y', 'likelihood']],
                        names=['scorer', 'bodyparts', 'coords'])

                    # Saving results
                    DataMachine = pd.DataFrame(PredicteData,
                                               columns=index,
                                               index=Data.index.values)
                    DataMachine.to_hdf(resultsfilename,
                                       'df_with_missing',
                                       format='table',
                                       mode='w')

                    print("Done and results stored for snapshot: ",
                          Snapshots[snapindex])
                    DataCombined = pd.concat([Data.T, DataMachine.T],
                                             axis=0,
                                             sort=False).T

                    RMSE, RMSEpcutoff = pairwisedistances(
                        DataCombined, cfg["scorer"], DLCscorer, cfg["pcutoff"],
                        comparisonbodyparts)
                    testerror = np.nanmean(
                        RMSE.iloc[testIndices].values.flatten())
                    trainerror = np.nanmean(
                        RMSE.iloc[trainIndices].values.flatten())
                    testerrorpcutoff = np.nanmean(
                        RMSEpcutoff.iloc[testIndices].values.flatten())
                    trainerrorpcutoff = np.nanmean(
                        RMSEpcutoff.iloc[trainIndices].values.flatten())
                    results = [
                        trainingsiterations,
                        int(100 * trainFraction), shuffle,
                        np.round(trainerror, 2),
                        np.round(testerror, 2), cfg["pcutoff"],
                        np.round(trainerrorpcutoff, 2),
                        np.round(testerrorpcutoff, 2)
                    ]
                    final_result.append(results)

                    if show_errors == True:
                        print("Results for",
                              trainingsiterations, " training iterations:",
                              int(100 * trainFraction), shuffle,
                              "train error:",
                              np.round(trainerror, 2), "pixels. Test error:",
                              np.round(testerror, 2), " pixels.")
                        print("With pcutoff of",
                              cfg["pcutoff"], " train error:",
                              np.round(trainerrorpcutoff,
                                       2), "pixels. Test error:",
                              np.round(testerrorpcutoff, 2), "pixels")
                        if scale != 1:
                            print(
                                "The predictions have been calculated for rescaled images (and rescaled ground truth). Scale:",
                                scale)
                        print(
                            "Thereby, the errors are given by the average distances between the labels by DLC and the scorer."
                        )

                    if plotting == True:
                        print("Plotting...")
                        foldername = os.path.join(
                            str(evaluationfolder), 'LabeledImages_' +
                            DLCscorer + '_' + Snapshots[snapindex])
                        auxiliaryfunctions.attempttomakefolder(foldername)
                        Plotting(
                            cfg, comparisonbodyparts, DLCscorer, trainIndices,
                            DataCombined * 1. / scale, foldername
                        )  #Rescaling coordinates to have figure in original size!

                    tf.compat.v1.reset_default_graph()
                    #print(final_result)
                else:
                    DataMachine = pd.read_hdf(resultsfilename,
                                              'df_with_missing')
                    if plotting == True:
                        DataCombined = pd.concat([Data.T, DataMachine.T],
                                                 axis=0,
                                                 sort=False).T
                        print(
                            "Plotting...(attention scale might be inconsistent in comparison to when data was analyzed; i.e. if you used rescale)"
                        )
                        foldername = os.path.join(
                            str(evaluationfolder), 'LabeledImages_' +
                            DLCscorer + '_' + Snapshots[snapindex])
                        auxiliaryfunctions.attempttomakefolder(foldername)
                        Plotting(cfg, comparisonbodyparts, DLCscorer,
                                 trainIndices, DataCombined * 1. / scale,
                                 foldername)

            if len(final_result) > 0:  #Only append if results were calculated
                make_results_file(final_result, evaluationfolder, DLCscorer)
                print(
                    "The network is evaluated and the results are stored in the subdirectory 'evaluation_results'."
                )
                print(
                    "If it generalizes well, choose the best model for prediction and update the config file with the appropriate index for the 'snapshotindex'.\nUse the function 'analyze_video' to make predictions on new videos."
                )
                print(
                    "Otherwise consider retraining the network (see DeepLabCut workflow Fig 2)"
                )

    #returning to intial folder
    os.chdir(str(start_path))
def mergeandsplit(config,trainindex=0,uniform=True,windows2linux=False):
    """
    This function allows additional control over "create_training_dataset".

    Merge annotated data sets (from different folders) and split data in a specific way, returns the split variables (train/test indices).
    Importantly, this allows one to freeze a split.

    One can also either create a uniform split (uniform = True; thereby indexing TrainingFraction in config file) or leave-one-folder out split
    by passing the index of the corrensponding video from the config.yaml file as variable trainindex.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    trainindex: int, optional
        Either (in case uniform = True) indexes which element of TrainingFraction in the config file should be used (note it is a list!).
        Alternatively (uniform = False) indexes which folder is dropped, i.e. the first if trainindex=0, the second if trainindex =1, etc.

    uniform: bool, optional
        Perform uniform split (disregarding folder structure in labeled data), or (if False) leave one folder out.

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    Examples
    --------
    To create a leave-one-folder-out model:
    >>> trainIndexes, testIndexes=deeplabcutcore.mergeandsplit(config,trainindex=0,uniform=False)
    returns the indices for the first video folder (as defined in config file) as testIndexes and all others as trainIndexes.
    You can then create the training set by calling (e.g. defining it as Shuffle 3):
    >>> deeplabcutcore.create_training_dataset(config,Shuffles=[3],trainIndexes=trainIndexes,testIndexes=testIndexes)

    To freeze a (uniform) split:
    >>> trainIndexes, testIndexes=deeplabcutcore.mergeandsplit(config,trainindex=0,uniform=True)
    You can then create two model instances that have the identical trainingset. Thereby you can assess the role of various parameters on the performance of DLC.

    >>> deeplabcutcore.create_training_dataset(config,Shuffles=[0],trainIndexes=trainIndexes,testIndexes=testIndexes)
    >>> deeplabcutcore.create_training_dataset(config,Shuffles=[1],trainIndexes=trainIndexes,testIndexes=testIndexes)
    --------

    """
    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(os.path.join(project_path,str(trainingsetfolder))),recursive=True)
    fn=os.path.join(project_path,trainingsetfolder,'CollectedData_'+cfg['scorer'])

    try:
        Data= pd.read_hdf(fn+'.h5', 'df_with_missing')
    except FileNotFoundError:
        Data = merge_annotateddatasets(cfg,project_path,Path(os.path.join(project_path,trainingsetfolder)),windows2linux=windows2linux)

    Data = Data[scorer] #extract labeled data

    if uniform==True:
        TrainingFraction = cfg['TrainingFraction']
        trainFraction=TrainingFraction[trainindex]
        trainIndexes, testIndexes = SplitTrials(range(len(Data.index)), trainFraction)
    else: #leave one folder out split
        videos = cfg['video_sets'].keys()
        test_video_name = [Path(i).stem for i in videos][trainindex]
        print("Excluding the following folder (from training):", test_video_name)
        trainIndexes, testIndexes=[],[]
        for index,name in enumerate(Data.index):
            #print(index,name.split(os.sep)[1])
            if test_video_name==name.split(os.sep)[1]: #this is the video name
                #print(name,test_video_name)
                testIndexes.append(index)
            else:
                trainIndexes.append(index)

    return trainIndexes, testIndexes
def create_training_dataset(config,num_shuffles=1,Shuffles=None,windows2linux=False,userfeedback=False,
        trainIndexes=None,testIndexes=None,
        net_type=None,augmenter_type=None):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n

    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    trainIndexes: list of lists, optional (default=None)
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndexes: list of lists, optional (default=None)
        List of test indexes.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.

    augmenter_type: string
        Type of augmenter. Currently default, imgaug, tensorpack, and deterministic are supported.

    Example
    --------
    >>> deeplabcutcore.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcutcore.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    import scipy.io as sio

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(os.path.join(project_path,str(trainingsetfolder))),recursive=True)

    Data = merge_annotateddatasets(cfg,project_path,Path(os.path.join(project_path,trainingsetfolder)),windows2linux)
    Data = Data[scorer] #extract labeled data

    #loading & linking pretrained models
    if net_type is None: #loading & linking pretrained models
        net_type =cfg.get('default_net_type', 'resnet_50')
    else:
        if 'resnet' in net_type or 'mobilenet' in net_type:
            pass
        else:
            raise ValueError('Invalid network type:', net_type)

    if augmenter_type is None:
        augmenter_type=cfg.get('default_augmenter', 'default')
    else:
        if augmenter_type in ['default','imgaug','tensorpack','deterministic']:
            pass
        else:
            raise ValueError('Invalid augmenter type:', augmenter_type)

    import deeplabcutcore
    parent_path = Path(os.path.dirname(deeplabcutcore.__file__))
    defaultconfigfile = str(parent_path / 'pose_cfg.yaml')
    model_path,num_shuffles=auxfun_models.Check4weights(net_type,parent_path,num_shuffles) #if the model does not exist >> throws error!

    if Shuffles is None:
        Shuffles = range(1, num_shuffles + 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    #print(trainIndexes,testIndexes, Shuffles, augmenter_type,net_type)
    if trainIndexes is None and testIndexes is None:
        splits = [(trainFraction, shuffle, SplitTrials(range(len(Data.index)), trainFraction))
                  for trainFraction in cfg['TrainingFraction'] for shuffle in Shuffles]
    else:
        if len(trainIndexes) != len(testIndexes) != len(Shuffles):
            raise ValueError('Number of Shuffles and train and test indexes should be equal.')
        splits = []
        for shuffle, (train_inds, test_inds) in enumerate(zip(trainIndexes, testIndexes)):
            trainFraction = round(len(train_inds) * 1./ (len(train_inds) + len(test_inds)), 2)
            print(f"You passed a split with the following fraction: {int(100 * trainFraction)}%")
            splits.append((trainFraction, Shuffles[shuffle], (train_inds, test_inds)))

    bodyparts = cfg['bodyparts']
    nbodyparts = len(bodyparts)
    for trainFraction, shuffle, (trainIndexes, testIndexes) in splits:
        if len(trainIndexes)>0:
            if userfeedback:
                trainposeconfigfile, _, _ = training.return_train_network_path(config, shuffle=shuffle, trainFraction=trainFraction)
                if trainposeconfigfile.is_file():
                    askuser=input ("The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): ")
                    if askuser=='no'or askuser=='No' or askuser=='N' or askuser=='No':
                        raise Exception("Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details.")

            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################
            # Make training file!
            datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(trainingsetfolder,
                                                                                            trainFraction, shuffle, cfg)

            ################################################################################
            # Saving data file (convert to training file for deeper cut (*.mat))
            ################################################################################
            data, MatlabData = format_training_data(Data, trainIndexes, nbodyparts, project_path)
            sio.savemat(os.path.join(project_path,datafilename), {'dataset': MatlabData})

            ################################################################################
            # Saving metadata (Pickle file)
            ################################################################################
            auxiliaryfunctions.SaveMetadata(os.path.join(project_path,metadatafilename),data, trainIndexes, testIndexes, trainFraction)

            ################################################################################
            # Creating file structure for training &
            # Test files as well as pose_yaml files (containing training and testing information)
            #################################################################################
            modelfoldername=auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg)
            auxiliaryfunctions.attempttomakefolder(Path(config).parents[0] / modelfoldername,recursive=True)
            auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/train')
            auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/test')

            path_train_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'train','pose_cfg.yaml'))
            path_test_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'test','pose_cfg.yaml'))
            #str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')

            items2change = {
                "dataset": datafilename,
                "metadataset": metadatafilename,
                "num_joints": len(bodyparts),
                "all_joints": [[i] for i in range(len(bodyparts))],
                "all_joints_names": [str(bpt) for bpt in bodyparts],
                "init_weights": model_path,
                "project_path": str(cfg['project_path']),
                "net_type": net_type,
                "dataset_type": augmenter_type,
            }
            trainingdata = MakeTrain_pose_yaml(items2change,path_train_config,defaultconfigfile)
            keys2save = [
                "dataset", "num_joints", "all_joints", "all_joints_names",
                "net_type", 'init_weights', 'global_scale', 'location_refinement',
                'locref_stdev'
            ]
            MakeTest_pose_yaml(trainingdata, keys2save,path_test_config)
            print("The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!")
    return splits
Exemple #5
0
def create_pretrained_project(
    project,
    experimenter,
    videos,
    model="full_human",
    working_directory=None,
    copy_videos=False,
    videotype=None,
    analyzevideo=True,
    filtered=True,
    createlabeledvideo=True,
    trainFraction=None,
):
    """
    Creates a new project directory, sub-directories and a basic configuration file.
    Change its parameters to your projects need.

    The project will also be initialized with a pre-trained model from the DeepLabCut model zoo!

    http://www.mousemotorlab.org/dlc-modelzoo

    Parameters
    ----------
    project : string
        String containing the name of the project.

    experimenter : string
        String containing the name of the experimenter.

    model: string, options see  http://www.mousemotorlab.org/dlc-modelzoo
        Current option and default: 'full_human'  Creates a demo human project and analyzes a video with ResNet 101 weights pretrained on MPII Human Pose. This is from the DeeperCut paper
        by Insafutdinov et al. https://arxiv.org/abs/1605.03170 Please make sure to cite it too if you use this code!

    videos : list
        A list of string containing the full paths of the videos to include in the project.

    working_directory : string, optional
        The directory where the project will be created. The default is the ``current working directory``; if provided, it must be a string.

    copy_videos : bool, optional  ON WINDOWS: TRUE is often necessary!
        If this is set to True, the videos are copied to the ``videos`` directory. If it is False,symlink of the videos are copied to the project/videos directory. The default is ``False``; if provided it must be either
        ``True`` or ``False``.

    analyzevideo " bool, optional
        If true, then the video is analzyed and a labeled video is created. If false, then only the project will be created and the weights downloaded. You can then access them

    filtered: bool, default false
        Boolean variable indicating if filtered pose data output should be plotted rather than frame-by-frame predictions.
        Filtered version can be calculated with deeplabcutcore.filterpredictions

    trainFraction: By default value from *new* projects. (0.95)
            Fraction that will be used in dlc-model/trainingset folder name.

    Example
    --------
    Linux/MacOs loading full_human model and analzying video /homosapiens1.avi
    >>> deeplabcutcore.create_pretrained_project('humanstrokestudy','Linus',['/data/videos/homosapiens1.avi'], copy_videos=False)

    Loading full_cat model and analzying video "felixfeliscatus3.avi"
    >>> deeplabcutcore.create_pretrained_project('humanstrokestudy','Linus',['/data/videos/felixfeliscatus3.avi'], model='full_cat')

    Windows:
    >>> deeplabcutcore.create_pretrained_project('humanstrokestudy','Bill',[r'C:\yourusername\rig-95\Videos\reachingvideo1.avi'],r'C:\yourusername\analysis\project' copy_videos=True)
    Users must format paths with either:  r'C:\ OR 'C:\\ <- i.e. a double backslash \ \ )

    """
    if model in globals()["Modeloptions"]:
        cwd = os.getcwd()

        cfg = deeplabcutcore.create_new_project(project, experimenter, videos,
                                                working_directory, copy_videos,
                                                videotype)
        if trainFraction is not None:
            auxiliaryfunctions.edit_config(
                cfg, {"TrainingFraction": [trainFraction]})

        config = auxiliaryfunctions.read_config(cfg)
        if model == "full_human":
            config["bodyparts"] = [
                "ankle1",
                "knee1",
                "hip1",
                "hip2",
                "knee2",
                "ankle2",
                "wrist1",
                "elbow1",
                "shoulder1",
                "shoulder2",
                "elbow2",
                "wrist2",
                "chin",
                "forehead",
            ]
            config["skeleton"] = [
                ["ankle1", "knee1"],
                ["ankle2", "knee2"],
                ["knee1", "hip1"],
                ["knee2", "hip2"],
                ["hip1", "hip2"],
                ["shoulder1", "shoulder2"],
                ["shoulder1", "hip1"],
                ["shoulder2", "hip2"],
                ["shoulder1", "elbow1"],
                ["shoulder2", "elbow2"],
                ["chin", "forehead"],
                ["elbow1", "wrist1"],
                ["elbow2", "wrist2"],
            ]
            config["default_net_type"] = "resnet_101"
        else:  # just make a case and put the stuff you want.
            # TBD: 'partaffinityfield_graph' >> use to set skeleton!
            pass

        auxiliaryfunctions.write_config(cfg, config)
        config = auxiliaryfunctions.read_config(cfg)

        train_dir = Path(
            os.path.join(
                config["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction=config["TrainingFraction"][0],
                        shuffle=1,
                        cfg=config,
                    )),
                "train",
            ))
        test_dir = Path(
            os.path.join(
                config["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction=config["TrainingFraction"][0],
                        shuffle=1,
                        cfg=config,
                    )),
                "test",
            ))

        # Create the model directory
        train_dir.mkdir(parents=True, exist_ok=True)
        test_dir.mkdir(parents=True, exist_ok=True)

        modelfoldername = auxiliaryfunctions.GetModelFolder(
            trainFraction=config["TrainingFraction"][0], shuffle=1, cfg=config)
        path_train_config = str(
            os.path.join(config["project_path"], Path(modelfoldername),
                         "train", "pose_cfg.yaml"))
        path_test_config = str(
            os.path.join(config["project_path"], Path(modelfoldername), "test",
                         "pose_cfg.yaml"))

        # Download the weights and put then in appropriate directory
        print("Dowloading weights...")
        auxfun_models.DownloadModel(model, train_dir)

        pose_cfg = deeplabcutcore.auxiliaryfunctions.read_plainconfig(
            path_train_config)
        print(path_train_config)
        # Updating config file:
        dict = {
            "default_net_type": pose_cfg["net_type"],
            "default_augmenter": pose_cfg["dataset_type"],
            "bodyparts": pose_cfg["all_joints_names"],
            "skeleton": [],  # TODO: update with paf_graph
            "dotsize": 6,
        }
        auxiliaryfunctions.edit_config(cfg, dict)

        # Create the pose_config.yaml files
        parent_path = Path(os.path.dirname(deeplabcutcore.__file__))
        defaultconfigfile = str(parent_path / "pose_cfg.yaml")
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(config)
        datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(
            trainingsetfolder,
            trainFraction=config["TrainingFraction"][0],
            shuffle=1,
            cfg=config,
        )

        # downloading base encoder / not required unless on re-trains (but when a training set is created this happens anyway)
        # model_path, num_shuffles=auxfun_models.Check4weights(pose_cfg['net_type'], parent_path, num_shuffles= 1)

        # Updating training and test pose_cfg:
        snapshotname = [fn for fn in os.listdir(train_dir)
                        if ".meta" in fn][0].split(".meta")[0]
        dict2change = {
            "init_weights": str(os.path.join(train_dir, snapshotname)),
            "project_path": str(config["project_path"]),
        }

        UpdateTrain_pose_yaml(pose_cfg, dict2change, path_train_config)
        keys2save = [
            "dataset",
            "dataset_type",
            "num_joints",
            "all_joints",
            "all_joints_names",
            "net_type",
            "init_weights",
            "global_scale",
            "location_refinement",
            "locref_stdev",
        ]

        MakeTest_pose_yaml(pose_cfg, keys2save, path_test_config)

        video_dir = os.path.join(config["project_path"], "videos")
        if analyzevideo == True:
            print("Analyzing video...")
            deeplabcutcore.analyze_videos(cfg, [video_dir],
                                          videotype,
                                          save_as_csv=True)

        if createlabeledvideo == True:
            if filtered:
                deeplabcutcore.filterpredictions(cfg, [video_dir], videotype)

            print("Plotting results...")
            deeplabcutcore.create_labeled_video(cfg, [video_dir],
                                                videotype,
                                                draw_skeleton=True,
                                                filtered=filtered)
            deeplabcutcore.plot_trajectories(cfg, [video_dir],
                                             videotype,
                                             filtered=filtered)

        os.chdir(cwd)
        return cfg, path_train_config

    else:
        return "N/A", "N/A"