Ejemplos de GetTrainingSetFolder en Python

Lenguaje de programación: Python

Namespace/Package Name: deeplabcut.utils.auxiliaryfunctions

Método / Función: GetTrainingSetFolder

Ejemplos en hotexamples.com: 27

Python GetTrainingSetFolder - 27 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de deeplabcut.utils.auxiliaryfunctions.GetTrainingSetFolder extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: crossvalutils.py Proyecto: truc102/DeepLabCut

def cross_validate_paf_graphs(
    config,
    inference_config,
    full_data_file,
    metadata_file,
    output_name="",
    pcutoff=0.1,
    greedy=False,
    add_discarded=True,
    calibrate=False,
    overwrite_config=True,
):
    cfg = auxiliaryfunctions.read_config(config)
    inf_cfg = auxiliaryfunctions.read_plainconfig(inference_config)
    inf_cfg_temp = inf_cfg.copy()
    inf_cfg_temp["pcutoff"] = pcutoff

    with open(full_data_file, "rb") as file:
        data = pickle.load(file)
    with open(metadata_file, "rb") as file:
        metadata = pickle.load(file)

    params = _set_up_evaluation(data)
    to_ignore = _filter_unwanted_paf_connections(config, params["paf_graph"])
    paf_inds, paf_scores = _get_n_best_paf_graphs(
        data, metadata, params["paf_graph"], ignore_inds=to_ignore
    )

    if calibrate:
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
        calibration_file = os.path.join(
            cfg["project_path"],
            str(trainingsetfolder),
            "CollectedData_" + cfg["scorer"] + ".h5",
        )
    else:
        calibration_file = ""

    results = _benchmark_paf_graphs(
        cfg,
        inf_cfg_temp,
        data,
        paf_inds,
        greedy,
        add_discarded,
        calibration_file=calibration_file,
    )
    # Select optimal PAF graph
    df = results[1]
    size_opt = np.argmax((1 - df.loc["miss", "mean"]) * df.loc["purity", "mean"])
    pose_config = inference_config.replace("inference_cfg", "pose_cfg")
    if not overwrite_config:
        shutil.copy(pose_config, pose_config.replace(".yaml", "_old.yaml"))
    inds = list(paf_inds[size_opt])
    auxiliaryfunctions.edit_config(
        pose_config, {"paf_best": [int(ind) for ind in inds]}
    )
    if output_name:
        with open(output_name, "wb") as file:
            pickle.dump([results], file)

Ejemplo n.º 2

Mostrar archivo

def evaluate_multianimal_full(
    config,
    Shuffles=[1],
    trainingsetindex=0,
    plotting=None,
    show_errors=True,
    comparisonbodyparts="all",
    gputouse=None,
    modelprefix="",
    c_engine=False,
):
    from deeplabcut.pose_estimation_tensorflow.nnet import predict
    from deeplabcut.pose_estimation_tensorflow.nnet import (
        predict_multianimal as predictma, )
    from deeplabcut.utils import auxiliaryfunctions, auxfun_multianimal

    import tensorflow as tf

    if "TF_CUDNN_USE_AUTOTUNE" in os.environ:
        del os.environ[
            "TF_CUDNN_USE_AUTOTUNE"]  # was potentially set during training

    tf.reset_default_graph()
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  #
    if gputouse is not None:  # gpu selectinon
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse)

    start_path = os.getcwd()

    ##################################################
    # Load data...
    ##################################################
    cfg = auxiliaryfunctions.read_config(config)
    if trainingsetindex == "all":
        TrainingFractions = cfg["TrainingFraction"]
    else:
        TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]]

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(
            cfg["project_path"],
            str(trainingsetfolder),
            "CollectedData_" + cfg["scorer"] + ".h5",
        ),
        "df_with_missing",
    )
    # Handle data previously annotated on a different platform
    sep = "/" if "/" in Data.index[0] else "\\"
    if sep != os.path.sep:
        Data.index = Data.index.str.replace(sep, os.path.sep)
    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts)
    all_bpts = np.asarray(
        len(cfg["individuals"]) * cfg["multianimalbodyparts"] +
        cfg["uniquebodyparts"])
    colors = visualization.get_cmap(len(comparisonbodyparts),
                                    name=cfg["colormap"])
    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(
        str(cfg["project_path"] + "/evaluation-results/"))
    for shuffle in Shuffles:
        for trainFraction in TrainingFractions:
            ##################################################
            # Load and setup CNN part detector
            ##################################################
            datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                trainingsetfolder, trainFraction, shuffle, cfg)
            modelfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix)),
            )
            path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"

            # Load meta data
            (
                data,
                trainIndices,
                testIndices,
                trainFraction,
            ) = auxiliaryfunctions.LoadMetadata(
                os.path.join(cfg["project_path"], metadatafn))

            try:
                dlc_cfg = load_config(str(path_test_config))
            except FileNotFoundError:
                raise FileNotFoundError(
                    "It seems the model for shuffle %s and trainFraction %s does not exist."
                    % (shuffle, trainFraction))

            # TODO: IMPLEMENT for different batch sizes?
            dlc_cfg["batch_size"] = 1  # due to differently sized images!!!

            joints = dlc_cfg["all_joints_names"]

            # Create folder structure to store results.
            evaluationfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetEvaluationFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix)),
            )
            auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                                   recursive=True)
            # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

            # Check which snapshots are available and sort them by # iterations
            Snapshots = np.array([
                fn.split(".")[0]
                for fn in os.listdir(os.path.join(str(modelfolder), "train"))
                if "index" in fn
            ])
            if len(Snapshots) == 0:
                print(
                    "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                    % (shuffle, trainFraction))
            else:
                increasing_indices = np.argsort(
                    [int(m.split("-")[1]) for m in Snapshots])
                Snapshots = Snapshots[increasing_indices]

                if cfg["snapshotindex"] == -1:
                    snapindices = [-1]
                elif cfg["snapshotindex"] == "all":
                    snapindices = range(len(Snapshots))
                elif cfg["snapshotindex"] < len(Snapshots):
                    snapindices = [cfg["snapshotindex"]]
                else:
                    print(
                        "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                    )

                final_result = []
                ##################################################
                # Compute predictions over images
                ##################################################
                for snapindex in snapindices:
                    dlc_cfg["init_weights"] = os.path.join(
                        str(modelfolder), "train", Snapshots[snapindex]
                    )  # setting weights to corresponding snapshot.
                    trainingsiterations = (
                        dlc_cfg["init_weights"].split(os.sep)[-1]
                    ).split(
                        "-"
                    )[-1]  # read how many training siterations that corresponds to.

                    # name for deeplabcut net (based on its parameters)
                    DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
                        cfg,
                        shuffle,
                        trainFraction,
                        trainingsiterations,
                        modelprefix=modelprefix,
                    )
                    print(
                        "Running ",
                        DLCscorer,
                        " with # of trainingiterations:",
                        trainingsiterations,
                    )
                    (
                        notanalyzed,
                        resultsfilename,
                        DLCscorer,
                    ) = auxiliaryfunctions.CheckifNotEvaluated(
                        str(evaluationfolder),
                        DLCscorer,
                        DLCscorerlegacy,
                        Snapshots[snapindex],
                    )

                    if os.path.isfile(
                            resultsfilename.split(".h5")[0] + "_full.pickle"):
                        print("Model already evaluated.", resultsfilename)
                    else:
                        if plotting:
                            foldername = os.path.join(
                                str(evaluationfolder),
                                "LabeledImages_" + DLCscorer + "_" +
                                Snapshots[snapindex],
                            )
                            auxiliaryfunctions.attempttomakefolder(foldername)

                        # print(dlc_cfg)
                        # Specifying state of model (snapshot / training state)
                        sess, inputs, outputs = predict.setup_pose_prediction(
                            dlc_cfg)

                        PredicteData = {}
                        dist = np.full((len(Data), len(all_bpts)), np.nan)
                        conf = np.full_like(dist, np.nan)
                        distnorm = np.full(len(Data), np.nan)
                        print("Analyzing data...")
                        for imageindex, imagename in tqdm(enumerate(
                                Data.index)):
                            image_path = os.path.join(cfg["project_path"],
                                                      imagename)
                            image = io.imread(image_path)
                            frame = img_as_ubyte(skimage.color.gray2rgb(image))

                            GT = Data.iloc[imageindex]
                            df = GT.unstack("coords").reindex(
                                joints, level='bodyparts')

                            # Evaluate PAF edge lengths to calibrate `distnorm`
                            temp = GT.unstack("bodyparts")[joints]
                            xy = temp.values.reshape(
                                (-1, 2, temp.shape[1])).swapaxes(1, 2)
                            edges = xy[:, dlc_cfg["partaffinityfield_graph"]]
                            lengths = np.sum(
                                (edges[:, :, 0] - edges[:, :, 1])**2, axis=2)
                            distnorm[imageindex] = np.nanmax(lengths)

                            # FIXME Is having an empty array vs nan really that necessary?!
                            groundtruthidentity = list(
                                df.index.get_level_values(
                                    "individuals").to_numpy().reshape((-1, 1)))
                            groundtruthcoordinates = list(
                                df.values[:, np.newaxis])
                            for i, coords in enumerate(groundtruthcoordinates):
                                if np.isnan(coords).any():
                                    groundtruthcoordinates[i] = np.empty(
                                        (0, 2), dtype=float)
                                    groundtruthidentity[i] = np.array(
                                        [], dtype=str)

                            PredicteData[imagename] = {}
                            PredicteData[imagename]["index"] = imageindex

                            pred = predictma.get_detectionswithcostsandGT(
                                frame,
                                groundtruthcoordinates,
                                dlc_cfg,
                                sess,
                                inputs,
                                outputs,
                                outall=False,
                                nms_radius=dlc_cfg.nmsradius,
                                det_min_score=dlc_cfg.minconfidence,
                                c_engine=c_engine,
                            )
                            PredicteData[imagename]["prediction"] = pred
                            PredicteData[imagename]["groundtruth"] = [
                                groundtruthidentity,
                                groundtruthcoordinates,
                                GT,
                            ]

                            coords_pred = pred["coordinates"][0]
                            probs_pred = pred["confidence"]
                            for bpt, xy_gt in df.groupby(level="bodyparts"):
                                inds_gt = np.flatnonzero(
                                    np.all(~np.isnan(xy_gt), axis=1))
                                n_joint = joints.index(bpt)
                                xy = coords_pred[n_joint]
                                if inds_gt.size and xy.size:
                                    # Pick the predictions closest to ground truth,
                                    # rather than the ones the model has most confident in
                                    d = cdist(xy_gt.iloc[inds_gt], xy)
                                    rows, cols = linear_sum_assignment(d)
                                    min_dists = d[rows, cols]
                                    inds = np.flatnonzero(all_bpts == bpt)
                                    sl = imageindex, inds[inds_gt[rows]]
                                    dist[sl] = min_dists
                                    conf[sl] = probs_pred[n_joint][
                                        cols].squeeze()

                            if plotting:
                                fig = visualization.make_multianimal_labeled_image(
                                    frame,
                                    groundtruthcoordinates,
                                    coords_pred,
                                    probs_pred,
                                    colors,
                                    cfg["dotsize"],
                                    cfg["alphavalue"],
                                    cfg["pcutoff"],
                                )

                                visualization.save_labeled_frame(
                                    fig,
                                    image_path,
                                    foldername,
                                    imageindex in trainIndices,
                                )

                        sess.close()  # closes the current tf session

                        # Compute all distance statistics
                        df_dist = pd.DataFrame(dist, columns=df.index)
                        df_conf = pd.DataFrame(conf, columns=df.index)
                        df_joint = pd.concat([df_dist, df_conf],
                                             keys=["rmse", "conf"],
                                             names=["metrics"],
                                             axis=1)
                        df_joint = df_joint.reorder_levels(list(
                            np.roll(df_joint.columns.names, -1)),
                                                           axis=1)
                        df_joint.sort_index(axis=1,
                                            level=["individuals", "bodyparts"],
                                            ascending=[True, True],
                                            inplace=True)
                        write_path = os.path.join(
                            evaluationfolder,
                            f"dist_{trainingsiterations}.csv")
                        df_joint.to_csv(write_path)

                        # Calculate overall prediction error
                        error = df_joint.xs("rmse", level="metrics", axis=1)
                        mask = df_joint.xs("conf", level="metrics",
                                           axis=1) >= cfg["pcutoff"]
                        error_masked = error[mask]
                        error_train = np.nanmean(error.iloc[trainIndices])
                        error_train_cut = np.nanmean(
                            error_masked.iloc[trainIndices])
                        error_test = np.nanmean(error.iloc[testIndices])
                        error_test_cut = np.nanmean(
                            error_masked.iloc[testIndices])
                        results = [
                            trainingsiterations,
                            int(100 * trainFraction),
                            shuffle,
                            np.round(error_train, 2),
                            np.round(error_test, 2),
                            cfg["pcutoff"],
                            np.round(error_train_cut, 2),
                            np.round(error_test_cut, 2),
                        ]
                        final_result.append(results)

                        # For OKS/PCK, compute the standard deviation error across all frames
                        sd = df_dist.groupby("bodyparts",
                                             axis=1).mean().std(axis=0)
                        sd["distnorm"] = np.sqrt(np.nanmax(distnorm))
                        sd.to_csv(write_path.replace("dist.csv", "sd.csv"))

                        if show_errors:
                            string = "Results for {} training iterations: {}, shuffle {}:\n" \
                                     "Train error: {} pixels. Test error: {} pixels.\n" \
                                     "With pcutoff of {}:\n" \
                                     "Train error: {} pixels. Test error: {} pixels."
                            print(string.format(*results))

                            print("##########################################")
                            print(
                                "Average Euclidean distance to GT per individual (in pixels)"
                            )
                            print(
                                error_masked.groupby(
                                    'individuals',
                                    axis=1).mean().mean().to_string())
                            print(
                                "Average Euclidean distance to GT per bodypart (in pixels)"
                            )
                            print(
                                error_masked.groupby(
                                    'bodyparts',
                                    axis=1).mean().mean().to_string())

                        PredicteData["metadata"] = {
                            "nms radius":
                            dlc_cfg.nmsradius,
                            "minimal confidence":
                            dlc_cfg.minconfidence,
                            "PAFgraph":
                            dlc_cfg.partaffinityfield_graph,
                            "all_joints":
                            [[i] for i in range(len(dlc_cfg.all_joints))],
                            "all_joints_names": [
                                dlc_cfg.all_joints_names[i]
                                for i in range(len(dlc_cfg.all_joints))
                            ],
                            "stride":
                            dlc_cfg.get("stride", 8),
                        }
                        print(
                            "Done and results stored for snapshot: ",
                            Snapshots[snapindex],
                        )

                        dictionary = {
                            "Scorer": DLCscorer,
                            "DLC-model-config file": dlc_cfg,
                            "trainIndices": trainIndices,
                            "testIndices": testIndices,
                            "trainFraction": trainFraction,
                        }
                        metadata = {"data": dictionary}
                        auxfun_multianimal.SaveFullMultiAnimalData(
                            PredicteData, metadata, resultsfilename)

                        tf.reset_default_graph()

                if len(final_result
                       ) > 0:  # Only append if results were calculated
                    make_results_file(final_result, evaluationfolder,
                                      DLCscorer)

    # returning to intial folder
    os.chdir(str(start_path))

Ejemplo n.º 3

Mostrar archivo

Archivo: evaluate.py Proyecto: dmurphy15/multiview-dlc

def evaluate_multiview_network(config,videos,projection_matrices,multiview_step,snapshot_index=None,Shuffles=[1],plotting = None,show_errors = True,comparisonbodyparts="all",gputouse=None):
    """
    Evaluates the network based on the saved models at different stages of the training network.\n
    The evaluation results are stored in the .h5 and .csv file under the subdirectory 'evaluation_results'.
    Change the snapshotindex parameter in the config file to 'all' in order to evaluate all the saved models.

    Parameters
    ----------
    config : string
        Full path of the config.yaml file as a string.

    videos: list of strings
        Name of each video, one per viewpoint. Must be in the same order that it was in for training

    projection_matrices: list of arrays
        Projection matrix for each viewpoint. Each is a 3x4 array

    multiview_step:
        1 or 2. Indicates whether network was trained with train_multiview_network_step_1 or train_multiview_network_step_2

    Shuffles: list, optional
        List of integers specifying the shuffle indices of the training dataset. The default is [1]

    plotting: bool, optional
        Plots the predictions on the train and test images. The default is ``False``; if provided it must be either ``True`` or ``False``

    show_errors: bool, optional
        Display train and test errors. The default is `True``

    comparisonbodyparts: list of bodyparts, Default is "all".
        The average error will be computed for those body parts only (Has to be a subset of the body parts).

    gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None.
    See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries
    
    Examples
    --------
    If you do not want to plot
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml', shuffle=[1])
    --------

    If you want to plot
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],True)
    """
    import os
    from skimage import io
    import skimage.color

    from deeplabcut.pose_estimation_tensorflow.nnet import predict as ptf_predict
    from deeplabcut.pose_estimation_tensorflow.config import load_config
    from deeplabcut.pose_estimation_tensorflow.dataset.pose_dataset import data_to_input
    from deeplabcut.utils import auxiliaryfunctions, visualization
    import tensorflow as tf
    
    if 'TF_CUDNN_USE_AUTOTUNE' in os.environ:
        del os.environ['TF_CUDNN_USE_AUTOTUNE'] #was potentially set during training
    

    tf.reset_default_graph()
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 
#    tf.logging.set_verbosity(tf.logging.WARN)

    start_path=os.getcwd()
    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)
    if gputouse is not None: #gpu selectinon
            os.environ['CUDA_VISIBLE_DEVICES'] = str(gputouse)
            
    # Loading human annotatated data
    trainingsetfolder=auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Datas = [pd.read_hdf(os.path.join(cfg['project_path'], 'labeled-data', video, 'CollectedData_'+cfg['scorer']+'.h5'), 'df_with_missing') for video in videos]
    # Get list of body parts to evaluate network for
    comparisonbodyparts=auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(cfg,comparisonbodyparts)
    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(str(cfg["project_path"]+"/evaluation-results/"))
    for shuffle in Shuffles:
        for trainFraction in cfg["TrainingFraction"]:
            ##################################################
            # Load and setup CNN part detector
            ##################################################
            datafn,metadatafn=auxiliaryfunctions.GetDataandMetaDataFilenames(trainingsetfolder,trainFraction,shuffle,cfg)
            modelfolder=os.path.join(cfg["project_path"],str(auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg)))
            path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml'
            # Load meta data
            metadatas = []
            for video in videos:
                m = ('-'+video).join(os.path.splitext(metadatafn))
                data, trainIndices, testIndices, trainFraction=auxiliaryfunctions.LoadMetadata(os.path.join(cfg["project_path"],m))
                metadatas.append(data)

            try:
                dlc_cfg = load_config(str(path_test_config))
            except FileNotFoundError:
                raise FileNotFoundError("It seems the model for shuffle %s and trainFraction %s does not exist."%(shuffle,trainFraction))
            
            #change batch size, if it was edited during analysis!
            dlc_cfg['batch_size']=1 #in case this was edited for analysis.
            #Create folder structure to store results.
            evaluationfolder=os.path.join(cfg["project_path"],str(auxiliaryfunctions.GetEvaluationFolder(trainFraction,shuffle,cfg)))
            auxiliaryfunctions.attempttomakefolder(evaluationfolder,recursive=True)
            #path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

            dlc_cfg.multiview_step = multiview_step
            dlc_cfg.projection_matrices = projection_matrices
            
            # Check which snapshots are available and sort them by # iterations
            Snapshots = np.array([fn.split('.')[0]for fn in os.listdir(os.path.join(str(modelfolder), 'train'))if "index" in fn])
            try: #check if any where found?
              Snapshots[0]
            except IndexError:
              raise FileNotFoundError("Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."%(shuffle,trainFraction))

            increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots])
            Snapshots = Snapshots[increasing_indices]

            if snapshot_index is not None:
                snapindices = [i for i in range(len(Snapshots)) if int(Snapshots[i].split('-')[1].split('.')[0])==snapshot_index]
            elif cfg["snapshotindex"] == -1:
                snapindices = [-1]
            elif cfg["snapshotindex"] == "all":
                snapindices = range(len(Snapshots))
            elif cfg["snapshotindex"]<len(Snapshots):
                snapindices=[cfg["snapshotindex"]]
            else:
                print("Invalid choice, only -1 (last), any integer up to last, or all (as string)!")

            final_result=[]
            ##################################################
            # Compute predictions over images
            ##################################################
            for snapindex in snapindices:
                dlc_cfg['init_weights'] = os.path.join(str(modelfolder),'train',Snapshots[snapindex]) #setting weights to corresponding snapshot.
                trainingsiterations = (dlc_cfg['init_weights'].split(os.sep)[-1]).split('-')[-1] #read how many training siterations that corresponds to.
                
                #name for deeplabcut net (based on its parameters)
                DLCscorer = auxiliaryfunctions.GetScorerName(cfg,shuffle,trainFraction,trainingsiterations)
                print("Running ", DLCscorer, " with # of trainingiterations:", trainingsiterations)
                resultsfilename=os.path.join(str(evaluationfolder),DLCscorer + '-' + Snapshots[snapindex]+  '.h5')
                try:
                    DataMachine = pd.read_hdf(resultsfilename,'df_with_missing')
                    print("This net has already been evaluated!")
                except FileNotFoundError:
                    # Specifying state of model (snapshot / training state)
                    sess, inputs, outputs = ptf_predict.setup_pose_prediction(dlc_cfg)

                    Numimages = len(Datas[0].index)
                    PredicteDatas = np.zeros((Numimages,len(Datas), 3 * len(dlc_cfg['all_joints_names'])))
                    imagesizes = []
                    print("Analyzing data...")
                    if multiview_step == 1:
                        for imageindex in tqdm(range(len(Datas[0].index))):
                            imagenames = [Data.index[imageindex] for Data in Datas]
                            images = [io.imread(os.path.join(cfg['project_path'],imagename),mode='RGB') for imagename in imagenames]
                            images = [skimage.color.gray2rgb(image) for image in images]
                            image_batch = images
                            imagesizes.append([image.shape for image in images])
                            
                            # Compute prediction with the CNN
                            outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
                            scmap, locref = ptf_predict.extract_cnn_output(outputs_np, dlc_cfg)

                            # Extract maximum scoring location from the heatmap, assume 1 person
                            pose = ptf_predict.argmax_pose_predict(scmap, locref, dlc_cfg.stride)
                            PredicteDatas[imageindex] = pose.reshape([pose.shape[0], -1])  # NOTE: thereby     cfg_test['all_joints_names'] should be same order as bodyparts!

                        sess.close() #closes the current tf session

                        index = pd.MultiIndex.from_product(
                            [[DLCscorer], dlc_cfg['all_joints_names'], ['x', 'y', 'likelihood']],
                            names=['scorer', 'bodyparts', 'coords'])

                        # Saving results
                        for i, video in enumerate(videos):
                            print('Evaluating 2D predictions on video %s'%video)
                            Data = Datas[i]
                            DataMachine = pd.DataFrame(PredicteDatas[:,i], columns=index, index=Data.index.values)
                            r = ('-'+video).join(os.path.splitext(resultsfilename))
                            DataMachine.to_hdf(r,'df_with_missing',format='table',mode='w')

                            print("Done and results stored for snapshot: ", Snapshots[snapindex])
                            DataCombined = pd.concat([Data.T, DataMachine.T], axis=0).T
                            RMSE,RMSEpcutoff = pairwisedistances(DataCombined, cfg["scorer"], DLCscorer,cfg["pcutoff"],comparisonbodyparts)
                            testerror = np.nanmean(RMSE.iloc[testIndices].values.flatten())
                            trainerror = np.nanmean(RMSE.iloc[trainIndices].values.flatten())
                            testerrorpcutoff = np.nanmean(RMSEpcutoff.iloc[testIndices].values.flatten())
                            trainerrorpcutoff = np.nanmean(RMSEpcutoff.iloc[trainIndices].values.flatten())
                            results = [trainingsiterations,int(100 * trainFraction),shuffle,np.round(trainerror,2),np.round(testerror,2),cfg["pcutoff"],np.round(trainerrorpcutoff,2), np.round(testerrorpcutoff,2)]
                            final_result.append(results)

                            if show_errors == True:
                                    print("Results for",trainingsiterations," training iterations:", int(100 * trainFraction), shuffle, "train error:",np.round(trainerror,2), "pixels. Test error:", np.round(testerror,2)," pixels.")
                                    print("With pcutoff of", cfg["pcutoff"]," train error:",np.round(trainerrorpcutoff,2), "pixels. Test error:", np.round(testerrorpcutoff,2), "pixels")
                                    print("Thereby, the errors are given by the average distances between the labels by DLC and the scorer.")

                            if plotting == True:
                                print("Plotting...")
                                colors = visualization.get_cmap(len(comparisonbodyparts),name=cfg['colormap'])

                                foldername=os.path.join(str(evaluationfolder),'LabeledImages_' + DLCscorer + '_' + Snapshots[snapindex]+'_'+video)
                                auxiliaryfunctions.attempttomakefolder(foldername)
                                NumFrames=np.size(DataCombined.index)
                                for ind in np.arange(NumFrames):
                                    visualization.PlottingandSaveLabeledFrame(DataCombined,ind,trainIndices,cfg,colors,comparisonbodyparts,DLCscorer,foldername)
                        
                        # get predictions in homogeneous pixel coordinates
                        # pixel coordinates have (0,0) in the top-left, and the bottom-right coordinate is (h,w)
                        predictions = PredicteDatas.reshape(Numimages, len(Datas), len(dlc_cfg['all_joints_names']), 3)
                        scores = np.copy(predictions[:,:,:,2])
                        predictions[:,:,:,2] = 1.0 # homogeneous coordinates; (x,y,1). Top-left corner is (-width/2, -height/2, 1); Bottom-right corner is opposite. Shape is num_images x num_views x num_joints x 3
                        num_ims, num_views, num_joints, _ = predictions.shape

                        # get labels in homogeneous pixel coordinates
                        labels = np.array([Data.values.reshape(num_ims, num_joints, 2) for Data in Datas]) # num_views x num_ims x num_joints x (x,y)
                        labels = np.transpose(labels, [1, 2, 0, 3]) # num_ims x num_joints x num_views x (x,y)
                        labels = np.concatenate([labels, np.ones([num_ims, num_joints, num_views, 1])], axis=3)

                        # solve linear system to get labels in 3D
                        # helpful explanation of equation found on pg 5 here: https://hal.inria.fr/inria-00524401/PDF/Sturm-cvpr05.pdf
                        labs = labels.reshape([num_ims * num_joints, num_views, 3]).astype(np.float)
                        confidences = ~np.isnan(np.sum(labs, axis=2))
                        valid = np.sum(~np.isnan(np.sum(labs, axis=2)), axis=1) >= 2
                        labs[~confidences] = 0
                        labels3d = project_3d(projection_matrices, labs, confidences=confidences)
                        labels3d[~valid] = np.nan
                        labels3d = labels3d.reshape([num_ims, num_joints, 3]) 

                        # solve linear system to get 3D predictions
                        preds = np.transpose(predictions, [0, 2, 1, 3]) # num_ims x num_joints x num_views x 3
                        preds = preds.reshape([num_ims*num_joints, num_views, 3])
                        preds3d = project_3d(projection_matrices, preds)
                        preds3d = preds3d.reshape([num_ims, num_joints, 3])
                        
                        # try it with confidence weighting
                        scores = np.transpose(scores, [0, 2, 1]) # num_images x num_joints x num_views
                        scores = np.reshape(scores, [num_ims*num_joints, num_views])
                        preds3d_weighted = project_3d(projection_matrices, preds, confidences=scores)
                        preds3d_weighted = preds3d_weighted.reshape([num_ims, num_joints, 3])

                        # try it with the pcutoff
                        scores2 = np.copy(scores)
                        scores2[scores2 < cfg["pcutoff"]] = 0
                        preds3d_weighted_cutoff = project_3d(projection_matrices, preds, confidences=scores2)
                        preds3d_weighted_cutoff = preds3d_weighted_cutoff.reshape([num_ims, num_joints, 3])

                        print("\n\n3D errors:")
                        RMSE_train = np.nanmean(np.nansum((preds3d[trainIndices] - labels3d[trainIndices])**2, axis=2)**0.5)
                        RMSE_test = np.nanmean(np.nansum((preds3d[testIndices] - labels3d[testIndices])**2, axis=2)**0.5)
                        RMSE_train_weighted = np.nanmean(np.nansum((preds3d_weighted[trainIndices] - labels3d[trainIndices])**2, axis=2)**0.5)
                        RMSE_test_weighted = np.nanmean(np.nansum((preds3d_weighted[testIndices] - labels3d[testIndices])**2, axis=2)**0.5)
                        RMSE_train_weighted_cutoff = np.nanmean(np.nansum((preds3d_weighted_cutoff[trainIndices] - labels3d[trainIndices])**2, axis=2)**0.5)
                        RMSE_test_weighted_cutoff = np.nanmean(np.nansum((preds3d_weighted_cutoff[testIndices] - labels3d[testIndices])**2, axis=2)**0.5)

                        print("RMSE train: ", RMSE_train)
                        print("RMSE test: ", RMSE_test)
                        print("RMSE train weighted: ", RMSE_train_weighted)
                        print("RMSE test weighted: ", RMSE_test_weighted)
                        print("RMSE train weighted cutoff: ", RMSE_train_weighted_cutoff)
                        print("RMSE test weighted cutoff: ", RMSE_test_weighted_cutoff) 

                        tail = np.nansum((preds3d_weighted - labels3d)**2, axis=2)**0.5
                        tail = np.sort(tail[~np.isnan(tail)])
                        tail = tail[-10:]
                        print('10 worst predictions: ', tail)

                        tf.reset_default_graph()
                    elif multiview_step==2:
                        preds3d = []
                        for imageindex in tqdm(range(len(Datas[0].index))):
                            imagenames = [Data.index[imageindex] for Data in Datas]
                            images = [io.imread(os.path.join(cfg['project_path'],imagename),mode='RGB') for imagename in imagenames]
                            images = [skimage.color.gray2rgb(image) for image in images]
                            image_batch = images
                            
                            # Compute prediction with the CNN
                            outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
                            pred_3d = outputs_np[2]
                            preds3d.append(pred_3d)

                        sess.close() #closes the current tf session
                        preds3d = np.array(preds3d) # num_ims x num_joints x (x,y,z)
                        num_ims, num_joints = preds3d.shape[:2]
                        num_views = dlc_cfg.num_views

                        # get labels in homogeneous pixel coordinates
                        labels = np.array([Data.values.reshape(num_ims, num_joints, 2) for Data in Datas]) # num_views x num_ims x num_joints x (x,y)
                        labels = np.transpose(labels, [1, 2, 0, 3]) # num_ims x num_joints x num_views x (x,y)
                        labels = np.concatenate([labels, np.ones([num_ims, num_joints, num_views, 1])], axis=3)

                        # solve linear system to get labels in 3D
                        # helpful explanation of equation found on pg 5 here: https://hal.inria.fr/inria-00524401/PDF/Sturm-cvpr05.pdf
                        labs = labels.reshape([num_ims * num_joints, num_views, 3]).astype(np.float)
                        confidences = ~np.isnan(np.sum(labs, axis=2))
                        valid = np.sum(~np.isnan(np.sum(labs, axis=2)), axis=1) >= 2
                        labs[~confidences] = 0
                        labels3d = project_3d(projection_matrices, labs, confidences=confidences)
                        labels3d[~valid] = np.nan
                        labels3d = labels3d.reshape([num_ims, num_joints, 3]) 

                        print("\n\n3D errors (units are determined by projection matrices):")
                        RMSE_train = np.nanmean(np.nansum((preds3d[trainIndices] - labels3d[trainIndices])**2, axis=2)**0.5)
                        RMSE_test = np.nanmean(np.nansum((preds3d[testIndices] - labels3d[testIndices])**2, axis=2)**0.5)

                        print("RMSE train: ", RMSE_train)
                        print("RMSE test: ", RMSE_test)

                        tail = np.nansum((preds3d- labels3d)**2, axis=2)**0.5
                        tail = np.sort(tail[~np.isnan(tail)])
                        tail = tail[-10:]
                        print('10 worst predictions: ', tail)

                        tf.reset_default_graph()
                    else:
                        print('invalid multiview_step given')
                        return
            make_results_file(final_result,evaluationfolder,DLCscorer)
            print("The network is evaluated and the results are stored in the subdirectory 'evaluation_results'.")
            print("If it generalizes well, choose the best model for prediction and update the config file with the appropriate index for the 'snapshotindex'.\nUse the function 'analyze_video' to make predictions on new videos.")
            print("Otherwise consider retraining the network (see DeepLabCut workflow Fig 2)")
    
    #returning to intial folder
    os.chdir(str(start_path))

Ejemplo n.º 4

Mostrar archivo

def evaluate_multianimal_full(
    config,
    Shuffles=[1],
    trainingsetindex=0,
    plotting=None,
    show_errors=True,
    comparisonbodyparts="all",
    gputouse=None,
    modelprefix="",
    c_engine=False,
):
    """
    WIP multi animal project.
    """

    import os

    from deeplabcut.pose_estimation_tensorflow.nnet import predict
    from deeplabcut.pose_estimation_tensorflow.nnet import (
        predict_multianimal as predictma, )
    from deeplabcut.utils import auxiliaryfunctions, auxfun_multianimal

    import tensorflow as tf

    if "TF_CUDNN_USE_AUTOTUNE" in os.environ:
        del os.environ[
            "TF_CUDNN_USE_AUTOTUNE"]  # was potentially set during training

    tf.reset_default_graph()
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  #
    if gputouse is not None:  # gpu selectinon
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse)

    start_path = os.getcwd()

    ##################################################
    # Load data...
    ##################################################
    cfg = auxiliaryfunctions.read_config(config)
    if trainingsetindex == "all":
        TrainingFractions = cfg["TrainingFraction"]
    else:
        TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]]

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(
            cfg["project_path"],
            str(trainingsetfolder),
            "CollectedData_" + cfg["scorer"] + ".h5",
        ),
        "df_with_missing",
    )
    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts)
    colors = visualization.get_cmap(len(comparisonbodyparts),
                                    name=cfg["colormap"])
    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(
        str(cfg["project_path"] + "/evaluation-results/"))
    for shuffle in Shuffles:
        for trainFraction in TrainingFractions:
            ##################################################
            # Load and setup CNN part detector
            ##################################################
            datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                trainingsetfolder, trainFraction, shuffle, cfg)
            modelfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix)),
            )
            path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"

            # Load meta data
            (
                data,
                trainIndices,
                testIndices,
                trainFraction,
            ) = auxiliaryfunctions.LoadMetadata(
                os.path.join(cfg["project_path"], metadatafn))

            try:
                dlc_cfg = load_config(str(path_test_config))
            except FileNotFoundError:
                raise FileNotFoundError(
                    "It seems the model for shuffle %s and trainFraction %s does not exist."
                    % (shuffle, trainFraction))

            # TODO: IMPLEMENT for different batch sizes?
            dlc_cfg["batch_size"] = 1  # due to differently sized images!!!

            # Create folder structure to store results.
            evaluationfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetEvaluationFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix)),
            )
            auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                                   recursive=True)
            # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

            # Check which snapshots are available and sort them by # iterations
            Snapshots = np.array([
                fn.split(".")[0]
                for fn in os.listdir(os.path.join(str(modelfolder), "train"))
                if "index" in fn
            ])
            if len(Snapshots) == 0:
                print(
                    "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                    % (shuffle, trainFraction))
            else:
                increasing_indices = np.argsort(
                    [int(m.split("-")[1]) for m in Snapshots])
                Snapshots = Snapshots[increasing_indices]

                if cfg["snapshotindex"] == -1:
                    snapindices = [-1]
                elif cfg["snapshotindex"] == "all":
                    snapindices = range(len(Snapshots))
                elif cfg["snapshotindex"] < len(Snapshots):
                    snapindices = [cfg["snapshotindex"]]
                else:
                    print(
                        "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                    )

                (
                    individuals,
                    uniquebodyparts,
                    multianimalbodyparts,
                ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)

                final_result = []
                ##################################################
                # Compute predictions over images
                ##################################################
                for snapindex in snapindices:
                    dlc_cfg["init_weights"] = os.path.join(
                        str(modelfolder), "train", Snapshots[snapindex]
                    )  # setting weights to corresponding snapshot.
                    trainingsiterations = (
                        dlc_cfg["init_weights"].split(os.sep)[-1]
                    ).split(
                        "-"
                    )[-1]  # read how many training siterations that corresponds to.

                    # name for deeplabcut net (based on its parameters)
                    DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
                        cfg,
                        shuffle,
                        trainFraction,
                        trainingsiterations,
                        modelprefix=modelprefix,
                    )
                    print(
                        "Running ",
                        DLCscorer,
                        " with # of trainingiterations:",
                        trainingsiterations,
                    )
                    (
                        notanalyzed,
                        resultsfilename,
                        DLCscorer,
                    ) = auxiliaryfunctions.CheckifNotEvaluated(
                        str(evaluationfolder),
                        DLCscorer,
                        DLCscorerlegacy,
                        Snapshots[snapindex],
                    )

                    if os.path.isfile(
                            resultsfilename.split(".h5")[0] + "_full.pickle"):
                        print("Model already evaluated.", resultsfilename)
                    else:
                        if plotting:
                            foldername = os.path.join(
                                str(evaluationfolder),
                                "LabeledImages_" + DLCscorer + "_" +
                                Snapshots[snapindex],
                            )
                            auxiliaryfunctions.attempttomakefolder(foldername)

                        # print(dlc_cfg)
                        # Specifying state of model (snapshot / training state)
                        sess, inputs, outputs = predict.setup_pose_prediction(
                            dlc_cfg)

                        PredicteData = {}
                        print("Analyzing data...")
                        for imageindex, imagename in tqdm(enumerate(
                                Data.index)):
                            image_path = os.path.join(cfg["project_path"],
                                                      imagename)
                            image = io.imread(image_path)
                            frame = img_as_ubyte(skimage.color.gray2rgb(image))

                            GT = Data.iloc[imageindex]

                            # Storing GT data as dictionary, so it can be used for calculating connection costs
                            groundtruthcoordinates = []
                            groundtruthidentity = []
                            for bptindex, bpt in enumerate(
                                    dlc_cfg["all_joints_names"]):
                                coords = np.zeros([len(individuals), 2
                                                   ]) * np.nan
                                identity = []
                                for prfxindex, prefix in enumerate(
                                        individuals):
                                    if bpt in uniquebodyparts and prefix == "single":
                                        coords[prfxindex, :] = np.array([
                                            GT[cfg["scorer"]][prefix][bpt]
                                            ["x"],
                                            GT[cfg["scorer"]][prefix][bpt]
                                            ["y"],
                                        ])
                                        identity.append(prefix)
                                    elif (bpt in multianimalbodyparts
                                          and prefix != "single"):
                                        coords[prfxindex, :] = np.array([
                                            GT[cfg["scorer"]][prefix][bpt]
                                            ["x"],
                                            GT[cfg["scorer"]][prefix][bpt]
                                            ["y"],
                                        ])
                                        identity.append(prefix)
                                    else:
                                        identity.append("nix")

                                groundtruthcoordinates.append(
                                    coords[np.isfinite(coords[:, 0]), :])
                                groundtruthidentity.append(
                                    np.array(identity)[np.isfinite(coords[:,
                                                                          0])])

                            PredicteData[imagename] = {}
                            PredicteData[imagename]["index"] = imageindex

                            pred = predictma.get_detectionswithcostsandGT(
                                frame,
                                groundtruthcoordinates,
                                dlc_cfg,
                                sess,
                                inputs,
                                outputs,
                                outall=False,
                                nms_radius=dlc_cfg.nmsradius,
                                det_min_score=dlc_cfg.minconfidence,
                                c_engine=c_engine,
                            )
                            PredicteData[imagename]["prediction"] = pred
                            PredicteData[imagename]["groundtruth"] = [
                                groundtruthidentity,
                                groundtruthcoordinates,
                                GT,
                            ]

                            if plotting:
                                coords_pred = pred["coordinates"][0]
                                probs_pred = pred["confidence"]
                                fig = visualization.make_multianimal_labeled_image(
                                    frame,
                                    groundtruthcoordinates,
                                    coords_pred,
                                    probs_pred,
                                    colors,
                                    cfg["dotsize"],
                                    cfg["alphavalue"],
                                    cfg["pcutoff"],
                                )

                                visualization.save_labeled_frame(
                                    fig,
                                    image_path,
                                    foldername,
                                    imageindex in trainIndices,
                                )

                        sess.close()  # closes the current tf session
                        PredicteData["metadata"] = {
                            "nms radius":
                            dlc_cfg.nmsradius,
                            "minimal confidence":
                            dlc_cfg.minconfidence,
                            "PAFgraph":
                            dlc_cfg.partaffinityfield_graph,
                            "all_joints":
                            [[i] for i in range(len(dlc_cfg.all_joints))],
                            "all_joints_names": [
                                dlc_cfg.all_joints_names[i]
                                for i in range(len(dlc_cfg.all_joints))
                            ],
                            "stride":
                            dlc_cfg.get("stride", 8),
                        }
                        print(
                            "Done and results stored for snapshot: ",
                            Snapshots[snapindex],
                        )

                        dictionary = {
                            "Scorer": DLCscorer,
                            "DLC-model-config file": dlc_cfg,
                            "trainIndices": trainIndices,
                            "testIndices": testIndices,
                            "trainFraction": trainFraction,
                        }
                        metadata = {"data": dictionary}
                        auxfun_multianimal.SaveFullMultiAnimalData(
                            PredicteData, metadata, resultsfilename)

                        tf.reset_default_graph()

    # returning to intial folder
    os.chdir(str(start_path))

Ejemplo n.º 5

Mostrar archivo

def mergeandsplit(config, trainindex=0, uniform=True, windows2linux=False):
    """
    This function allows additional control over "create_training_dataset".

    Merge annotated data sets (from different folders) and split data in a specific way, returns the split variables (train/test indices).
    Importantly, this allows one to freeze a split.

    One can also either create a uniform split (uniform = True; thereby indexing TrainingFraction in config file) or leave-one-folder out split
    by passing the index of the corrensponding video from the config.yaml file as variable trainindex.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    trainindex: int, optional
        Either (in case uniform = True) indexes which element of TrainingFraction in the config file should be used (note it is a list!).
        Alternatively (uniform = False) indexes which folder is dropped, i.e. the first if trainindex=0, the second if trainindex =1, etc.

    uniform: bool, optional
        Perform uniform split (disregarding folder structure in labeled data), or (if False) leave one folder out.

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    Examples
    --------
    To create a leave-one-folder-out model:
    >>> trainIndices, testIndices=deeplabcut.mergeandsplit(config,trainindex=0,uniform=False)
    returns the indices for the first video folder (as defined in config file) as testIndices and all others as trainIndices.
    You can then create the training set by calling (e.g. defining it as Shuffle 3):
    >>> deeplabcut.create_training_dataset(config,Shuffles=[3],trainIndices=trainIndices,testIndices=testIndices)

    To freeze a (uniform) split (i.e. iid sampled from all the data):
    >>> trainIndices, testIndices=deeplabcut.mergeandsplit(config,trainindex=0,uniform=True)
    You can then create two model instances that have the identical trainingset. Thereby you can assess the role of various parameters on the performance of DLC.
    >>> deeplabcut.create_training_dataset(config,Shuffles=[0,1],trainIndices=[trainIndices, trainIndices],testIndices=[testIndices, testIndices])
    --------

    """
    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg["scorer"]
    project_path = cfg["project_path"]
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
        cfg)  # Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(
        os.path.join(project_path, str(trainingsetfolder))),
                                           recursive=True)
    fn = os.path.join(project_path, trainingsetfolder,
                      "CollectedData_" + cfg["scorer"])

    try:
        Data = pd.read_hdf(fn + ".h5", "df_with_missing")
    except FileNotFoundError:
        Data = merge_annotateddatasets(
            cfg,
            Path(os.path.join(project_path, trainingsetfolder)),
            windows2linux=windows2linux,
        )
        if Data is None:
            return [], []

    Data = Data[scorer]  # extract labeled data

    if uniform == True:
        TrainingFraction = cfg["TrainingFraction"]
        trainFraction = TrainingFraction[trainindex]
        trainIndices, testIndices = SplitTrials(range(len(Data.index)),
                                                trainFraction)
    else:  # leave one folder out split
        videos = cfg["video_sets"].keys()
        test_video_name = [Path(i).stem for i in videos][trainindex]
        print("Excluding the following folder (from training):",
              test_video_name)
        trainIndices, testIndices = [], []
        for index, name in enumerate(Data.index):
            # print(index,name.split(os.sep)[1])
            if test_video_name == name.split(
                    os.sep)[1]:  # this is the video name
                # print(name,test_video_name)
                testIndices.append(index)
            else:
                trainIndices.append(index)

    return trainIndices, testIndices

Ejemplo n.º 6

Mostrar archivo

def convert_cropped_to_standard_dataset(
    config_path,
    recreate_datasets=True,
    delete_crops=True,
    back_up=True,
):
    import pandas as pd
    import pickle
    import shutil
    from deeplabcut.generate_training_dataset import trainingsetmanipulation
    from deeplabcut.utils import read_plainconfig, write_config

    cfg = auxiliaryfunctions.read_config(config_path)
    videos_orig = cfg.pop("video_sets_original")
    is_cropped = cfg.pop("croppedtraining")
    if videos_orig is None or not is_cropped:
        print("Labeled data do not appear to be cropped. "
              "Project will remain unchanged...")
        return

    project_path = cfg["project_path"]

    if back_up:
        print("Backing up project...")
        shutil.copytree(project_path, project_path + "_bak", symlinks=True)

    if delete_crops:
        print("Deleting crops...")
        data_path = os.path.join(project_path, "labeled-data")
        for video in cfg["video_sets"]:
            _, filename, _ = trainingsetmanipulation._robust_path_split(video)
            if "_cropped" in video:  # One can never be too safe...
                shutil.rmtree(os.path.join(data_path, filename),
                              ignore_errors=True)

    cfg["video_sets"] = videos_orig
    write_config(config_path, cfg)

    if not recreate_datasets:
        return

    datasets_folder = os.path.join(
        project_path,
        auxiliaryfunctions.GetTrainingSetFolder(cfg),
    )
    df_old = pd.read_hdf(
        os.path.join(datasets_folder,
                     "CollectedData_" + cfg["scorer"] + ".h5"), )

    def strip_cropped_image_name(path):
        head, filename = os.path.split(path)
        head = head.replace("_cropped", "")
        file, ext = filename.split(".")
        file = file.split("c")[0]
        return os.path.join(head, file + "." + ext)

    img_names_old = np.asarray(
        [strip_cropped_image_name(img) for img in df_old.index.to_list()])
    df = merge_annotateddatasets(cfg, datasets_folder)
    img_names = df.index.to_numpy()
    train_idx = []
    test_idx = []
    pickle_files = []
    for filename in os.listdir(datasets_folder):
        if filename.endswith("pickle"):
            pickle_file = os.path.join(datasets_folder, filename)
            pickle_files.append(pickle_file)
            if filename.startswith("Docu"):
                with open(pickle_file, "rb") as f:
                    _, train_inds, test_inds, train_frac = pickle.load(f)
                    train_inds_temp = np.flatnonzero(
                        np.isin(img_names, img_names_old[train_inds]))
                    test_inds_temp = np.flatnonzero(
                        np.isin(img_names, img_names_old[test_inds]))
                    train_inds, test_inds = pad_train_test_indices(
                        train_inds_temp, test_inds_temp, train_frac)
                    train_idx.append(train_inds)
                    test_idx.append(test_inds)

    # Search a pose_config.yaml file to parse missing information
    pose_config_path = ""
    for dirpath, _, filenames in os.walk(
            os.path.join(project_path, "dlc-models")):
        for file in filenames:
            if file.endswith("pose_cfg.yaml"):
                pose_config_path = os.path.join(dirpath, file)
                break
    pose_cfg = read_plainconfig(pose_config_path)
    net_type = pose_cfg["net_type"]
    if net_type == "resnet_50" and pose_cfg.get("multi_stage", False):
        net_type = "dlcrnet_ms5"

    # Clean the training-datasets folder prior to recreating the data pickles
    shuffle_inds = set()
    for file in pickle_files:
        os.remove(file)
        shuffle_inds.add(int(re.findall(r"shuffle(\d+)", file)[0]))
    create_multianimaltraining_dataset(
        config_path,
        trainIndices=train_idx,
        testIndices=test_idx,
        Shuffles=sorted(shuffle_inds),
        net_type=net_type,
        paf_graph=pose_cfg["partaffinityfield_graph"],
        crop_size=pose_cfg.get("crop_size", [400, 400]),
        crop_sampling=pose_cfg.get("crop_sampling", "hybrid"),
    )

Ejemplo n.º 7

Mostrar archivo

Archivo: evaluate_multianimal.py Proyecto: eaogorman/DeepLabCut

def evaluate_multianimal_full(
    config,
    Shuffles=[1],
    trainingsetindex=0,
    plotting=False,
    show_errors=True,
    comparisonbodyparts="all",
    gputouse=None,
    modelprefix="",
):
    from deeplabcut.pose_estimation_tensorflow.core import (
        predict,
        predict_multianimal as predictma,
    )
    from deeplabcut.utils import (
        auxiliaryfunctions,
        auxfun_multianimal,
        auxfun_videos,
        conversioncode,
    )

    import tensorflow as tf

    if "TF_CUDNN_USE_AUTOTUNE" in os.environ:
        del os.environ["TF_CUDNN_USE_AUTOTUNE"]  # was potentially set during training

    tf.compat.v1.reset_default_graph()
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  #
    if gputouse is not None:  # gpu selectinon
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse)

    start_path = os.getcwd()

    if plotting is True:
        plotting = "bodypart"

    ##################################################
    # Load data...
    ##################################################
    cfg = auxiliaryfunctions.read_config(config)
    if trainingsetindex == "all":
        TrainingFractions = cfg["TrainingFraction"]
    else:
        TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]]

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(
            cfg["project_path"],
            str(trainingsetfolder),
            "CollectedData_" + cfg["scorer"] + ".h5",
        )
    )
    conversioncode.guarantee_multiindex_rows(Data)

    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts
    )
    all_bpts = np.asarray(
        len(cfg["individuals"]) * cfg["multianimalbodyparts"] + cfg["uniquebodyparts"]
    )
    colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"])
    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(
        str(cfg["project_path"] + "/evaluation-results/")
    )
    for shuffle in Shuffles:
        for trainFraction in TrainingFractions:
            ##################################################
            # Load and setup CNN part detector
            ##################################################
            datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                trainingsetfolder, trainFraction, shuffle, cfg
            )
            modelfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix
                    )
                ),
            )
            path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"

            # Load meta data
            (
                data,
                trainIndices,
                testIndices,
                trainFraction,
            ) = auxiliaryfunctions.LoadMetadata(
                os.path.join(cfg["project_path"], metadatafn)
            )

            try:
                dlc_cfg = load_config(str(path_test_config))
            except FileNotFoundError:
                raise FileNotFoundError(
                    "It seems the model for shuffle %s and trainFraction %s does not exist."
                    % (shuffle, trainFraction)
                )

            pipeline = iaa.Sequential(random_order=False)
            pre_resize = dlc_cfg.get("pre_resize")
            if pre_resize:
                width, height = pre_resize
                pipeline.add(iaa.Resize({"height": height, "width": width}))

            # TODO: IMPLEMENT for different batch sizes?
            dlc_cfg["batch_size"] = 1  # due to differently sized images!!!

            stride = dlc_cfg["stride"]
            # Ignore best edges possibly defined during a prior evaluation
            _ = dlc_cfg.pop("paf_best", None)
            joints = dlc_cfg["all_joints_names"]

            # Create folder structure to store results.
            evaluationfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetEvaluationFolder(
                        trainFraction, shuffle, cfg, modelprefix=modelprefix
                    )
                ),
            )
            auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True)
            # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

            # Check which snapshots are available and sort them by # iterations
            Snapshots = np.array(
                [
                    fn.split(".")[0]
                    for fn in os.listdir(os.path.join(str(modelfolder), "train"))
                    if "index" in fn
                ]
            )
            if len(Snapshots) == 0:
                print(
                    "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                    % (shuffle, trainFraction)
                )
            else:
                increasing_indices = np.argsort(
                    [int(m.split("-")[1]) for m in Snapshots]
                )
                Snapshots = Snapshots[increasing_indices]

                if cfg["snapshotindex"] == -1:
                    snapindices = [-1]
                elif cfg["snapshotindex"] == "all":
                    snapindices = range(len(Snapshots))
                elif cfg["snapshotindex"] < len(Snapshots):
                    snapindices = [cfg["snapshotindex"]]
                else:
                    print(
                        "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                    )

                final_result = []
                ##################################################
                # Compute predictions over images
                ##################################################
                for snapindex in snapindices:
                    dlc_cfg["init_weights"] = os.path.join(
                        str(modelfolder), "train", Snapshots[snapindex]
                    )  # setting weights to corresponding snapshot.
                    trainingsiterations = (
                        dlc_cfg["init_weights"].split(os.sep)[-1]
                    ).split("-")[
                        -1
                    ]  # read how many training siterations that corresponds to.

                    # name for deeplabcut net (based on its parameters)
                    DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
                        cfg,
                        shuffle,
                        trainFraction,
                        trainingsiterations,
                        modelprefix=modelprefix,
                    )
                    print(
                        "Running ",
                        DLCscorer,
                        " with # of trainingiterations:",
                        trainingsiterations,
                    )
                    (
                        notanalyzed,
                        resultsfilename,
                        DLCscorer,
                    ) = auxiliaryfunctions.CheckifNotEvaluated(
                        str(evaluationfolder),
                        DLCscorer,
                        DLCscorerlegacy,
                        Snapshots[snapindex],
                    )

                    data_path = resultsfilename.split(".h5")[0] + "_full.pickle"

                    if plotting:
                        foldername = os.path.join(
                            str(evaluationfolder),
                            "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex],
                        )
                        auxiliaryfunctions.attempttomakefolder(foldername)
                        if plotting == "bodypart":
                            fig, ax = visualization.create_minimal_figure()

                    if os.path.isfile(data_path):
                        print("Model already evaluated.", resultsfilename)
                    else:

                        (sess, inputs, outputs,) = predict.setup_pose_prediction(
                            dlc_cfg
                        )

                        PredicteData = {}
                        dist = np.full((len(Data), len(all_bpts)), np.nan)
                        conf = np.full_like(dist, np.nan)
                        print("Network Evaluation underway...")
                        for imageindex, imagename in tqdm(enumerate(Data.index)):
                            image_path = os.path.join(cfg["project_path"], *imagename)
                            frame = auxfun_videos.imread(image_path, mode="skimage")

                            GT = Data.iloc[imageindex]
                            if not GT.any():
                                continue

                            # Pass the image and the keypoints through the resizer;
                            # this has no effect if no augmenters were added to it.
                            keypoints = [GT.to_numpy().reshape((-1, 2)).astype(float)]
                            frame_, keypoints = pipeline(
                                images=[frame], keypoints=keypoints
                            )
                            frame = frame_[0]
                            GT[:] = keypoints[0].flatten()

                            df = GT.unstack("coords").reindex(joints, level="bodyparts")

                            # FIXME Is having an empty array vs nan really that necessary?!
                            groundtruthidentity = list(
                                df.index.get_level_values("individuals")
                                .to_numpy()
                                .reshape((-1, 1))
                            )
                            groundtruthcoordinates = list(df.values[:, np.newaxis])
                            for i, coords in enumerate(groundtruthcoordinates):
                                if np.isnan(coords).any():
                                    groundtruthcoordinates[i] = np.empty(
                                        (0, 2), dtype=float
                                    )
                                    groundtruthidentity[i] = np.array([], dtype=str)

                            # Form 2D array of shape (n_rows, 4) where the last dimension
                            # is (sample_index, peak_y, peak_x, bpt_index) to slice the PAFs.
                            temp = df.reset_index(level="bodyparts").dropna()
                            temp["bodyparts"].replace(
                                dict(zip(joints, range(len(joints)))), inplace=True,
                            )
                            temp["sample"] = 0
                            peaks_gt = temp.loc[
                                :, ["sample", "y", "x", "bodyparts"]
                            ].to_numpy()
                            peaks_gt[:, 1:3] = (peaks_gt[:, 1:3] - stride // 2) / stride

                            pred = predictma.predict_batched_peaks_and_costs(
                                dlc_cfg,
                                np.expand_dims(frame, axis=0),
                                sess,
                                inputs,
                                outputs,
                                peaks_gt.astype(int),
                            )

                            if not pred:
                                continue
                            else:
                                pred = pred[0]

                            PredicteData[imagename] = {}
                            PredicteData[imagename]["index"] = imageindex
                            PredicteData[imagename]["prediction"] = pred
                            PredicteData[imagename]["groundtruth"] = [
                                groundtruthidentity,
                                groundtruthcoordinates,
                                GT,
                            ]

                            coords_pred = pred["coordinates"][0]
                            probs_pred = pred["confidence"]
                            for bpt, xy_gt in df.groupby(level="bodyparts"):
                                inds_gt = np.flatnonzero(
                                    np.all(~np.isnan(xy_gt), axis=1)
                                )
                                n_joint = joints.index(bpt)
                                xy = coords_pred[n_joint]
                                if inds_gt.size and xy.size:
                                    # Pick the predictions closest to ground truth,
                                    # rather than the ones the model has most confident in
                                    xy_gt_values = xy_gt.iloc[inds_gt].values
                                    neighbors = _find_closest_neighbors(
                                        xy_gt_values, xy, k=3
                                    )
                                    found = neighbors != -1
                                    min_dists = np.linalg.norm(
                                        xy_gt_values[found] - xy[neighbors[found]],
                                        axis=1,
                                    )
                                    inds = np.flatnonzero(all_bpts == bpt)
                                    sl = imageindex, inds[inds_gt[found]]
                                    dist[sl] = min_dists
                                    conf[sl] = probs_pred[n_joint][
                                        neighbors[found]
                                    ].squeeze()

                            if plotting == "bodypart":
                                temp_xy = GT.unstack("bodyparts")[joints].values
                                gt = temp_xy.reshape(
                                    (-1, 2, temp_xy.shape[1])
                                ).T.swapaxes(1, 2)
                                h, w, _ = np.shape(frame)
                                fig.set_size_inches(w / 100, h / 100)
                                ax.set_xlim(0, w)
                                ax.set_ylim(0, h)
                                ax.invert_yaxis()
                                ax = visualization.make_multianimal_labeled_image(
                                    frame,
                                    gt,
                                    coords_pred,
                                    probs_pred,
                                    colors,
                                    cfg["dotsize"],
                                    cfg["alphavalue"],
                                    cfg["pcutoff"],
                                    ax=ax,
                                )
                                visualization.save_labeled_frame(
                                    fig,
                                    image_path,
                                    foldername,
                                    imageindex in trainIndices,
                                )
                                visualization.erase_artists(ax)

                        sess.close()  # closes the current tf session

                        # Compute all distance statistics
                        df_dist = pd.DataFrame(dist, columns=df.index)
                        df_conf = pd.DataFrame(conf, columns=df.index)
                        df_joint = pd.concat(
                            [df_dist, df_conf],
                            keys=["rmse", "conf"],
                            names=["metrics"],
                            axis=1,
                        )
                        df_joint = df_joint.reorder_levels(
                            list(np.roll(df_joint.columns.names, -1)), axis=1
                        )
                        df_joint.sort_index(
                            axis=1,
                            level=["individuals", "bodyparts"],
                            ascending=[True, True],
                            inplace=True,
                        )
                        write_path = os.path.join(
                            evaluationfolder, f"dist_{trainingsiterations}.csv"
                        )
                        df_joint.to_csv(write_path)

                        # Calculate overall prediction error
                        error = df_joint.xs("rmse", level="metrics", axis=1)
                        mask = (
                            df_joint.xs("conf", level="metrics", axis=1)
                            >= cfg["pcutoff"]
                        )
                        error_masked = error[mask]
                        error_train = np.nanmean(error.iloc[trainIndices])
                        error_train_cut = np.nanmean(error_masked.iloc[trainIndices])
                        error_test = np.nanmean(error.iloc[testIndices])
                        error_test_cut = np.nanmean(error_masked.iloc[testIndices])
                        results = [
                            trainingsiterations,
                            int(100 * trainFraction),
                            shuffle,
                            np.round(error_train, 2),
                            np.round(error_test, 2),
                            cfg["pcutoff"],
                            np.round(error_train_cut, 2),
                            np.round(error_test_cut, 2),
                        ]
                        final_result.append(results)

                        if show_errors:
                            string = (
                                "Results for {} training iterations, training fraction of {}, and shuffle {}:\n"
                                "Train error: {} pixels. Test error: {} pixels.\n"
                                "With pcutoff of {}:\n"
                                "Train error: {} pixels. Test error: {} pixels."
                            )
                            print(string.format(*results))

                            print("##########################################")
                            print(
                                "Average Euclidean distance to GT per individual (in pixels; test-only)"
                            )
                            print(
                                error_masked.iloc[testIndices]
                                .groupby("individuals", axis=1)
                                .mean()
                                .mean()
                                .to_string()
                            )
                            print(
                                "Average Euclidean distance to GT per bodypart (in pixels; test-only)"
                            )
                            print(
                                error_masked.iloc[testIndices]
                                .groupby("bodyparts", axis=1)
                                .mean()
                                .mean()
                                .to_string()
                            )

                        PredicteData["metadata"] = {
                            "nms radius": dlc_cfg["nmsradius"],
                            "minimal confidence": dlc_cfg["minconfidence"],
                            "sigma": dlc_cfg.get("sigma", 1),
                            "PAFgraph": dlc_cfg["partaffinityfield_graph"],
                            "PAFinds": np.arange(
                                len(dlc_cfg["partaffinityfield_graph"])
                            ),
                            "all_joints": [
                                [i] for i in range(len(dlc_cfg["all_joints"]))
                            ],
                            "all_joints_names": [
                                dlc_cfg["all_joints_names"][i]
                                for i in range(len(dlc_cfg["all_joints"]))
                            ],
                            "stride": dlc_cfg.get("stride", 8),
                        }
                        print(
                            "Done and results stored for snapshot: ",
                            Snapshots[snapindex],
                        )

                        dictionary = {
                            "Scorer": DLCscorer,
                            "DLC-model-config file": dlc_cfg,
                            "trainIndices": trainIndices,
                            "testIndices": testIndices,
                            "trainFraction": trainFraction,
                        }
                        metadata = {"data": dictionary}
                        _ = auxfun_multianimal.SaveFullMultiAnimalData(
                            PredicteData, metadata, resultsfilename
                        )

                        tf.compat.v1.reset_default_graph()

                    n_multibpts = len(cfg["multianimalbodyparts"])
                    if n_multibpts == 1:
                        continue

                    # Skip data-driven skeleton selection unless
                    # the model was trained on the full graph.
                    max_n_edges = n_multibpts * (n_multibpts - 1) // 2
                    n_edges = len(dlc_cfg["partaffinityfield_graph"])
                    if n_edges == max_n_edges:
                        print("Selecting best skeleton...")
                        n_graphs = 10
                        paf_inds = None
                    else:
                        n_graphs = 1
                        paf_inds = [list(range(n_edges))]
                    (
                        results,
                        paf_scores,
                        best_assemblies,
                    ) = crossvalutils.cross_validate_paf_graphs(
                        config,
                        str(path_test_config).replace("pose_", "inference_"),
                        data_path,
                        data_path.replace("_full.", "_meta."),
                        n_graphs=n_graphs,
                        paf_inds=paf_inds,
                        oks_sigma=dlc_cfg.get("oks_sigma", 0.1),
                        margin=dlc_cfg.get("bbox_margin", 0),
                        symmetric_kpts=dlc_cfg.get("symmetric_kpts"),
                    )
                    if plotting == "individual":
                        assemblies, assemblies_unique, image_paths = best_assemblies
                        fig, ax = visualization.create_minimal_figure()
                        n_animals = len(cfg["individuals"])
                        if cfg["uniquebodyparts"]:
                            n_animals += 1
                        colors = visualization.get_cmap(n_animals, name=cfg["colormap"])
                        for k, v in tqdm(assemblies.items()):
                            imname = image_paths[k]
                            image_path = os.path.join(cfg["project_path"], *imname)
                            frame = auxfun_videos.imread(image_path, mode="skimage")

                            h, w, _ = np.shape(frame)
                            fig.set_size_inches(w / 100, h / 100)
                            ax.set_xlim(0, w)
                            ax.set_ylim(0, h)
                            ax.invert_yaxis()

                            gt = [
                                s.to_numpy().reshape((-1, 2))
                                for _, s in Data.loc[imname].groupby("individuals")
                            ]
                            coords_pred = []
                            coords_pred += [ass.xy for ass in v]
                            probs_pred = []
                            probs_pred += [ass.data[:, 2:3] for ass in v]
                            if assemblies_unique is not None:
                                unique = assemblies_unique.get(k, None)
                                if unique is not None:
                                    coords_pred.append(unique[:, :2])
                                    probs_pred.append(unique[:, 2:3])
                            while len(coords_pred) < len(gt):
                                coords_pred.append(np.full((1, 2), np.nan))
                                probs_pred.append(np.full((1, 2), np.nan))
                            ax = visualization.make_multianimal_labeled_image(
                                frame,
                                gt,
                                coords_pred,
                                probs_pred,
                                colors,
                                cfg["dotsize"],
                                cfg["alphavalue"],
                                cfg["pcutoff"],
                                ax=ax,
                            )
                            visualization.save_labeled_frame(
                                fig, image_path, foldername, k in trainIndices,
                            )
                            visualization.erase_artists(ax)

                    df = results[1].copy()
                    df.loc(axis=0)[("mAP_train", "mean")] = [
                        d[0]["mAP"] for d in results[2]
                    ]
                    df.loc(axis=0)[("mAR_train", "mean")] = [
                        d[0]["mAR"] for d in results[2]
                    ]
                    df.loc(axis=0)[("mAP_test", "mean")] = [
                        d[1]["mAP"] for d in results[2]
                    ]
                    df.loc(axis=0)[("mAR_test", "mean")] = [
                        d[1]["mAR"] for d in results[2]
                    ]
                    with open(data_path.replace("_full.", "_map."), "wb") as file:
                        pickle.dump((df, paf_scores), file)

                if len(final_result) > 0:  # Only append if results were calculated
                    make_results_file(final_result, evaluationfolder, DLCscorer)

    os.chdir(str(start_path))

Ejemplo n.º 8

Mostrar archivo

def calculatepafdistancebounds(config,
                               shuffle=0,
                               trainingsetindex=0,
                               modelprefix="",
                               numdigits=0,
                               onlytrain=False):
    """
    Returns distances along paf edges in train/test data

    ----------
    config : string
        Full path of the config.yaml file as a string.

    shuffle: integer
        integers specifying shuffle index of the training dataset. The default is 0.

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This
        variable can also be set to "all".

    numdigits: number of digits to round for distances.

    """
    import os
    from deeplabcut.utils import auxiliaryfunctions, auxfun_multianimal
    from deeplabcut.pose_estimation_tensorflow.config import load_config

    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)

    if cfg["multianimalproject"]:
        (
            individuals,
            uniquebodyparts,
            multianimalbodyparts,
        ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)

        # Loading human annotatated data
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
        trainFraction = cfg["TrainingFraction"][trainingsetindex]
        datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
            trainingsetfolder, trainFraction, shuffle, cfg)
        modelfolder = os.path.join(
            cfg["project_path"],
            str(
                auxiliaryfunctions.GetModelFolder(trainFraction,
                                                  shuffle,
                                                  cfg,
                                                  modelprefix=modelprefix)),
        )

        # Load meta data & annotations
        (
            data,
            trainIndices,
            testIndices,
            trainFraction,
        ) = auxiliaryfunctions.LoadMetadata(
            os.path.join(cfg["project_path"], metadatafn))
        Data = pd.read_hdf(
            os.path.join(
                cfg["project_path"],
                str(trainingsetfolder),
                "CollectedData_" + cfg["scorer"] + ".h5",
            ))[cfg["scorer"]]

        path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"
        dlc_cfg = load_config(str(path_test_config))

        # get the graph!
        partaffinityfield_graph = dlc_cfg["partaffinityfield_graph"]
        jointnames = [
            dlc_cfg["all_joints_names"][i]
            for i in range(len(dlc_cfg["all_joints"]))
        ]
        path_inferencebounds_config = (Path(modelfolder) / "test" /
                                       "inferencebounds.yaml")
        inferenceboundscfg = {}
        for pi, edge in enumerate(partaffinityfield_graph):
            j1, j2 = jointnames[edge[0]], jointnames[edge[1]]
            ds_within = []
            ds_across = []
            for ind in individuals:
                for ind2 in individuals:
                    if ind != "single" and ind2 != "single":
                        if (ind, j1, "x") in Data.keys() and (
                                ind2,
                                j2,
                                "y",
                        ) in Data.keys():
                            distances = (np.sqrt(
                                (Data[ind, j1, "x"] - Data[ind2, j2, "x"])**2 +
                                (Data[ind, j1, "y"] - Data[ind2, j2, "y"])**2)
                                         / dlc_cfg["stride"])
                        else:
                            distances = None

                        if distances is not None:
                            if onlytrain:
                                distances = distances.iloc[trainIndices]
                            if ind == ind2:
                                ds_within.extend(distances.values.flatten())
                            else:
                                ds_across.extend(distances.values.flatten())

            edgeencoding = str(edge[0]) + "_" + str(edge[1])
            inferenceboundscfg[edgeencoding] = {}
            if len(ds_within) > 0:
                inferenceboundscfg[edgeencoding]["intra_max"] = str(
                    round(np.nanmax(ds_within), numdigits))
                inferenceboundscfg[edgeencoding]["intra_min"] = str(
                    round(np.nanmin(ds_within), numdigits))
            else:
                inferenceboundscfg[edgeencoding]["intra_max"] = str(
                    1e5)  # large number (larger than any image diameter)
                inferenceboundscfg[edgeencoding]["intra_min"] = str(0)

            # NOTE: the inter-animal distances are currently not used, but are interesting to compare to intra_*
            if len(ds_across) > 0:
                inferenceboundscfg[edgeencoding]["inter_max"] = str(
                    round(np.nanmax(ds_across), numdigits))
                inferenceboundscfg[edgeencoding]["inter_min"] = str(
                    round(np.nanmin(ds_across), numdigits))
            else:
                inferenceboundscfg[edgeencoding]["inter_max"] = str(
                    1e5
                )  # large number (larger than image diameters in typical experiments)
                inferenceboundscfg[edgeencoding]["inter_min"] = str(0)

        auxiliaryfunctions.write_plainconfig(str(path_inferencebounds_config),
                                             dict(inferenceboundscfg))
        return inferenceboundscfg
    else:
        print("You might as well bring owls to Athens.")
        return {}

Ejemplo n.º 9

Mostrar archivo

def create_training_dataset(config,num_shuffles=1,Shuffles=None,windows2linux=False,userfeedback=False,
        trainIndexes=None,testIndexes=None,
        net_type=None,augmenter_type=None):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n

    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    trainIndexes: list of lists, optional (default=None)
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndexes: list of lists, optional (default=None)
        List of test indexes.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.

    augmenter_type: string
        Type of augmenter. Currently default, imgaug, tensorpack, and deterministic are supported.

    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    import scipy.io as sio

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(os.path.join(project_path,str(trainingsetfolder))),recursive=True)

    Data = merge_annotateddatasets(cfg,project_path,Path(os.path.join(project_path,trainingsetfolder)),windows2linux)
    Data = Data[scorer] #extract labeled data

    #loading & linking pretrained models
    if net_type is None: #loading & linking pretrained models
        net_type =cfg.get('default_net_type', 'resnet_50')
    else:
        if 'resnet' in net_type or 'mobilenet' in net_type:
            pass
        else:
            raise ValueError('Invalid network type:', net_type)

    if augmenter_type is None:
        augmenter_type=cfg.get('default_augmenter', 'default')
    else:
        if augmenter_type in ['default','imgaug','tensorpack','deterministic']:
            pass
        else:
            raise ValueError('Invalid augmenter type:', augmenter_type)

    import deeplabcut
    parent_path = Path(os.path.dirname(deeplabcut.__file__))
    defaultconfigfile = str(parent_path / 'pose_cfg.yaml')
    model_path,num_shuffles=auxfun_models.Check4weights(net_type,parent_path,num_shuffles) #if the model does not exist >> throws error!

    if Shuffles is None:
        Shuffles = range(1, num_shuffles + 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    if trainIndexes is None and testIndexes is None:
        splits = [(trainFraction, shuffle, SplitTrials(range(len(Data.index)), trainFraction))
                  for trainFraction in cfg['TrainingFraction'] for shuffle in Shuffles]
    else:
        if len(trainIndexes) != len(testIndexes):
            raise ValueError('Number of train and test indexes should be equal.')
        splits = []
        for shuffle, (train_inds, test_inds) in enumerate(zip(trainIndexes, testIndexes)):
            trainFraction = len(train_inds) / (len(train_inds) + len(test_inds))
            print(f"You passed a split with the following fraction: {int(100 * trainFraction)}%")
            splits.append((trainFraction, shuffle, (train_inds, test_inds)))

    bodyparts = cfg['bodyparts']
    nbodyparts = len(bodyparts)
    for trainFraction, shuffle, (trainIndexes, testIndexes) in splits:
        if len(trainIndexes)>0:
            if userfeedback:
                trainposeconfigfile, _, _ = training.return_train_network_path(config, shuffle=shuffle, trainFraction=trainFraction)
                if trainposeconfigfile.is_file():
                    askuser=input ("The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): ")
                    if askuser=='no'or askuser=='No' or askuser=='N' or askuser=='No':
                        raise Exception("Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details.")

            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################
            # Make training file!
            datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(trainingsetfolder,
                                                                                            trainFraction, shuffle, cfg)

            ################################################################################
            # Saving data file (convert to training file for deeper cut (*.mat))
            ################################################################################
            data, MatlabData = format_training_data(Data, trainIndexes, nbodyparts, project_path)
            sio.savemat(os.path.join(project_path,datafilename), {'dataset': MatlabData})

            ################################################################################
            # Saving metadata (Pickle file)
            ################################################################################
            auxiliaryfunctions.SaveMetadata(os.path.join(project_path,metadatafilename),data, trainIndexes, testIndexes, trainFraction)

            ################################################################################
            # Creating file structure for training &
            # Test files as well as pose_yaml files (containing training and testing information)
            #################################################################################
            modelfoldername=auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg)
            auxiliaryfunctions.attempttomakefolder(Path(config).parents[0] / modelfoldername,recursive=True)
            auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/train')
            auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/test')

            path_train_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'train','pose_cfg.yaml'))
            path_test_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'test','pose_cfg.yaml'))
            #str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')

            items2change = {
                "dataset": datafilename,
                "metadataset": metadatafilename,
                "num_joints": len(bodyparts),
                "all_joints": [[i] for i in range(len(bodyparts))],
                "all_joints_names": [str(bpt) for bpt in bodyparts],
                "init_weights": model_path,
                "project_path": str(cfg['project_path']),
                "net_type": net_type,
                "dataset_type": augmenter_type,
            }
            trainingdata = MakeTrain_pose_yaml(items2change,path_train_config,defaultconfigfile)
            keys2save = [
                "dataset", "num_joints", "all_joints", "all_joints_names",
                "net_type", 'init_weights', 'global_scale', 'location_refinement',
                'locref_stdev'
            ]
            MakeTest_pose_yaml(trainingdata, keys2save,path_test_config)
            print("The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!")

Ejemplo n.º 10

Mostrar archivo

def generate_prediction(MAX_PREDICTION_STEPS=1000):
    """
    Generator for predicting image
    MAX_PREDICTION_STEPS : Number of predictions that should be done before re-initializing 

    """

    ##################################################
    # Clone arguments from deeplabcut.evaluate_network
    ##################################################

    config = "/root/DLCROS_ws/Surgical_Tool_Tracking/ForwardPassDeepLabCut/DaVinci-Ambar-2019-10-31/config.yaml"
    Shuffles = [1]
    plotting = None
    show_errors = True
    comparisonbodyparts = "all"
    gputouse = None

    # Suppress scientific notation while printing
    np.set_printoptions(suppress=True)

    ##################################################
    # SETUP everything until image prediction
    ##################################################

    if 'TF_CUDNN_USE_AUTOTUNE' in os.environ:
        del os.environ[
            'TF_CUDNN_USE_AUTOTUNE']  # was potentially set during training

    vers = tf.__version__.split('.')
    if int(vers[0]) == 1 and int(vers[1]) > 12:
        TF = tf.compat.v1
    else:
        TF = tf

    TF.reset_default_graph()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    #tf.logging.set_verbosity(tf.logging.WARN)

    start_path = os.getcwd()

    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)
    if gputouse is not None:  # gpu selectinon
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gputouse)

    ##############
    # Cloning for-loop variables
    shuffle = Shuffles[0]
    trainFraction = cfg["TrainingFraction"][0]
    ##############

    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts)

    ##################################################
    # Load and setup CNN part detector
    ##################################################

    modelfolder = os.path.join(
        cfg["project_path"],
        str(auxiliaryfunctions.GetModelFolder(trainFraction, shuffle, cfg)))
    path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml'
    # Load meta data
    # data, trainIndices, testIndices, trainFraction = auxiliaryfunctions.LoadMetadata(
    #     os.path.join(cfg["project_path"], metadatafn))

    try:
        dlc_cfg = load_config(str(path_test_config))
    except FileNotFoundError:
        raise FileNotFoundError(
            "It seems the model for shuffle s and trainFraction %s does not exist."
        )

    dlc_cfg['batch_size'] = 1  # in case this was edited for analysis.

    # Check which snapshots are available and sort them by # iterations
    Snapshots = np.array([
        fn.split('.')[0]
        for fn in os.listdir(os.path.join(str(modelfolder), 'train'))
        if "index" in fn
    ])
    try:  # check if any where found?
        Snapshots[0]
    except IndexError:
        raise FileNotFoundError(
            "Snapshots not found! It seems the dataset for shuffle and "
            "trainFraction is not trained.\nPlease train it before evaluating."
            "\nUse the function 'train_network' to do so.")

    increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots])
    Snapshots = Snapshots[increasing_indices]

    if cfg["snapshotindex"] == -1:
        snapindices = [-1]
    elif cfg["snapshotindex"] == "all":
        snapindices = range(len(Snapshots))
    elif cfg["snapshotindex"] < len(Snapshots):
        snapindices = [cfg["snapshotindex"]]
    else:
        print(
            "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
        )

    ##################################################
    # Compute predictions over image
    ##################################################

    for snapindex in snapindices:
        dlc_cfg['init_weights'] = os.path.join(
            str(modelfolder), 'train',
            Snapshots[snapindex])  # setting weights to corresponding snapshot.
        trainingsiterations = (dlc_cfg['init_weights'].split(
            os.sep)[-1]).split('-')[
                -1]  # read how many training siterations that corresponds to.

        # name for deeplabcut net (based on its parameters)
        DLCscorer = auxiliaryfunctions.GetScorerName(cfg, shuffle,
                                                     trainFraction,
                                                     trainingsiterations)
        print("Running ", DLCscorer, " with # of trainingiterations:",
              trainingsiterations)

        # Specifying state of model (snapshot / training state)
        sess, inputs, outputs = ptf_predict.setup_pose_prediction(dlc_cfg)

        # Using GPU for prediction
        # Specifying state of model (snapshot / training state)
        # sess, inputs, outputs = ptf_predict.setup_GPUpose_prediction(dlc_cfg)

        print("Analyzing test image ...")
        imagename = "img034.png"
        image = io.imread(imagename, plugin='matplotlib')

        count = 0
        start_time = time.time()
        while count < MAX_PREDICTION_STEPS:

            ##################################################
            # Predict for test image once, and wait for future images to arrive
            ##################################################

            print("Calling predict_single_image")
            pose = predict_single_image(image, sess, inputs, outputs, dlc_cfg)

            ##################################################
            # Yield prediction to caller
            ##################################################

            image = (
                yield pose
            )  # Receive image here ( Refer https://stackabuse.com/python-generators/ for sending/receiving in generators)

            step_time = time.time()
            print(f"time: {step_time-start_time}")
            start_time = step_time
            count += 1

            if count == MAX_PREDICTION_STEPS:
                print(
                    f"Restart prediction system, Steps have exceeded {MAX_PREDICTION_STEPS}"
                )

        sess.close()  # closes the current tf session
        TF.reset_default_graph()

Ejemplo n.º 11

Mostrar archivo

Archivo: CLARA_DLC_utils_v2.py Proyecto: wryanw/CLARA_DLC

def create_training_dataset_CLARA(config,
                                  num_shuffles=1,
                                  Shuffles=None,
                                  windows2linux=False,
                                  trainIndexes=None,
                                  testIndexes=None):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n
    
    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows 
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths. 
    
    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    from skimage import io
    import scipy.io as sio
    from deeplabcut.utils import auxiliaryfunctions, auxfun_models

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
        cfg)  #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(
        os.path.join(project_path, str(trainingsetfolder))),
                                           recursive=True)
    """
    Merges all the h5 files for all labeled-datasets (from individual videos).
    """
    AnnotationData = None
    data_path = Path(os.path.join(project_path, 'labeled-data'))
    videos = cfg['video_sets'].keys()
    video_names = [Path(i).stem for i in videos]
    for i in video_names:
        try:
            data = pd.read_hdf((str(data_path / Path(i)) + '/CollectedData_' +
                                cfg['scorer'] + '.h5'), 'df_with_missing')
            smlData = data.dropna(how='all')
            smlKeys = list(smlData.index.values)
            smlKeyLong = list()
            for sk in smlKeys:
                smlKeyLong.append('labeled-data/' + str(Path(i)) + '/' + sk)
            smlData.index = smlKeyLong
            data = smlData
            if AnnotationData is None:
                AnnotationData = data
            else:
                AnnotationData = pd.concat([AnnotationData, data])

        except FileNotFoundError:
            print((str(data_path / Path(i)) + '/CollectedData_' +
                   cfg['scorer'] + '.h5'),
                  " not found (perhaps not annotated)")

    trainingsetfolder_full = Path(os.path.join(project_path,
                                               trainingsetfolder))
    filename = str(
        str(trainingsetfolder_full) + '/' + '/CollectedData_' + cfg['scorer'])
    AnnotationData.to_hdf(filename + '.h5', key='df_with_missing', mode='w')
    AnnotationData.to_csv(filename + '.csv')  #human readable.
    Data = AnnotationData

    Data = Data[scorer]  #extract labeled data

    #loading & linking pretrained models
    net_type = 'resnet_' + str(cfg['resnet'])
    import deeplabcut
    parent_path = Path(os.path.dirname(deeplabcut.__file__))
    defaultconfigfile = str(parent_path / 'pose_cfg.yaml')

    model_path, num_shuffles = auxfun_models.Check4weights(
        net_type, parent_path, num_shuffles)

    if Shuffles == None:
        Shuffles = range(1, num_shuffles + 1, 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    bodyparts = cfg['bodyparts']
    if isinstance(bodyparts, list):
        parts = bodyparts
    else:
        parts = list()
        categories = list()
        for cat in bodyparts.keys():
            categories.append(cat)
        for key in categories:
            for ptname in bodyparts[key]:
                parts.append(ptname)
    bodyparts = parts

    TrainingFraction = cfg['TrainingFraction']
    for shuffle in Shuffles:  # Creating shuffles starting from 1
        for trainFraction in TrainingFraction:
            #trainIndexes, testIndexes = SplitTrials(range(len(Data.index)), trainFraction)
            if trainIndexes is None and testIndexes is None:
                trainIndexes, testIndexes = SplitTrials_CLARA(
                    range(len(Data.index)), trainFraction)
            else:
                print(
                    "You passed a split with the following fraction:",
                    len(trainIndexes) * 1. /
                    (len(testIndexes) + len(trainIndexes)) * 100)

            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################

            # Make training file!
            data = []
            for jj in trainIndexes:
                H = {}
                # load image to get dimensions:
                filename = Data.index[jj]
                im = io.imread(os.path.join(cfg['project_path'], filename))
                H['image'] = filename

                if np.ndim(im) == 3:
                    H['size'] = np.array(
                        [np.shape(im)[2],
                         np.shape(im)[0],
                         np.shape(im)[1]])
                else:
                    # print "Grayscale!"
                    H['size'] = np.array([1, np.shape(im)[0], np.shape(im)[1]])

                indexjoints = 0
                joints = np.zeros((len(bodyparts), 3)) * np.nan
                for bpindex, bodypart in enumerate(bodyparts):
                    if Data[bodypart]['x'][jj] < np.shape(
                            im)[1] and Data[bodypart]['y'][jj] < np.shape(
                                im)[0]:  #are labels in image?
                        joints[indexjoints, 0] = int(bpindex)
                        joints[indexjoints, 1] = Data[bodypart]['x'][jj]
                        joints[indexjoints, 2] = Data[bodypart]['y'][jj]
                        indexjoints += 1

                joints = joints[np.where(np.prod(
                    np.isfinite(joints),
                    1))[0], :]  # drop NaN, i.e. lines for missing body parts

                assert (np.prod(np.array(joints[:, 2]) < np.shape(im)[0])
                        )  # y coordinate within image?
                assert (np.prod(np.array(joints[:, 1]) < np.shape(im)[1])
                        )  # x coordinate within image?

                H['joints'] = np.array(joints, dtype=int)
                if np.size(joints) > 0:  #exclude images without labels
                    data.append(H)

            if len(trainIndexes) > 0:
                datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)
                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename), data,
                    trainIndexes, testIndexes, trainFraction)
                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################

                DTYPE = [('image', 'O'), ('size', 'O'), ('joints', 'O')]
                MatlabData = np.array(
                    [(np.array([data[item]['image']],
                               dtype='U'), np.array([data[item]['size']]),
                      boxitintoacell_CLARA(data[item]['joints']))
                     for item in range(len(data))],
                    dtype=DTYPE)

                sio.savemat(os.path.join(project_path, datafilename),
                            {'dataset': MatlabData})

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################

                modelfoldername = auxiliaryfunctions.GetModelFolder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + '/' +
                    '/train')
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + '/' +
                    '/test')

                path_train_config = str(
                    os.path.join(cfg['project_path'], Path(modelfoldername),
                                 'train', 'pose_cfg.yaml'))
                path_test_config = str(
                    os.path.join(cfg['project_path'], Path(modelfoldername),
                                 'test', 'pose_cfg.yaml'))
                #str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')

                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": model_path,
                    "project_path": str(cfg['project_path']),
                    "net_type": net_type,
                    "crop": 'False'
                }
                trainingdata = MakeTrain_pose_yaml_CLARA(
                    items2change, path_train_config, defaultconfigfile)
                keys2save = [
                    "dataset", "num_joints", "all_joints", "all_joints_names",
                    "net_type", 'init_weights', 'global_scale',
                    'location_refinement', 'locref_stdev'
                ]
                MakeTest_pose_yaml_CLARA(trainingdata, keys2save,
                                         path_test_config)
    deeplabcut.train_network(config)

Ejemplo n.º 12

Mostrar archivo

def create_pretrained_project(
    project,
    experimenter,
    videos,
    model="full_human",
    working_directory=None,
    copy_videos=False,
    videotype=None,
    analyzevideo=True,
    filtered=True,
    createlabeledvideo=True,
    trainFraction=None,
):
    """
    Creates a new project directory, sub-directories and a basic configuration file.
    Change its parameters to your projects need.

    The project will also be initialized with a pre-trained model from the DeepLabCut model zoo!

    http://www.mousemotorlab.org/dlc-modelzoo

    Parameters
    ----------
    project : string
        String containing the name of the project.

    experimenter : string
        String containing the name of the experimenter.

    model: string, options see  http://www.mousemotorlab.org/dlc-modelzoo
        Current option and default: 'full_human'  Creates a demo human project and analyzes a video with ResNet 101 weights pretrained on MPII Human Pose. This is from the DeeperCut paper
        by Insafutdinov et al. https://arxiv.org/abs/1605.03170 Please make sure to cite it too if you use this code!

    videos : list
        A list of string containing the full paths of the videos to include in the project.

    working_directory : string, optional
        The directory where the project will be created. The default is the ``current working directory``; if provided, it must be a string.

    copy_videos : bool, optional  ON WINDOWS: TRUE is often necessary!
        If this is set to True, the videos are copied to the ``videos`` directory. If it is False,symlink of the videos are copied to the project/videos directory. The default is ``False``; if provided it must be either
        ``True`` or ``False``.

    analyzevideo " bool, optional
        If true, then the video is analzyed and a labeled video is created. If false, then only the project will be created and the weights downloaded. You can then access them

    filtered: bool, default false
        Boolean variable indicating if filtered pose data output should be plotted rather than frame-by-frame predictions.
        Filtered version can be calculated with deeplabcut.filterpredictions

    trainFraction: By default value from *new* projects. (0.95)
            Fraction that will be used in dlc-model/trainingset folder name.

    Example
    --------
    Linux/MacOs loading full_human model and analzying video /homosapiens1.avi
    >>> deeplabcut.create_pretrained_project('humanstrokestudy','Linus',['/data/videos/homosapiens1.avi'], copy_videos=False)

    Loading full_cat model and analzying video "felixfeliscatus3.avi"
    >>> deeplabcut.create_pretrained_project('humanstrokestudy','Linus',['/data/videos/felixfeliscatus3.avi'], model='full_cat')

    Windows:
    >>> deeplabcut.create_pretrained_project('humanstrokestudy','Bill',[r'C:\yourusername\rig-95\Videos\reachingvideo1.avi'],r'C:\yourusername\analysis\project' copy_videos=True)
    Users must format paths with either:  r'C:\ OR 'C:\\ <- i.e. a double backslash \ \ )

    """
    if model in globals()["Modeloptions"]:
        cwd = os.getcwd()

        cfg = deeplabcut.create_new_project(project, experimenter, videos,
                                            working_directory, copy_videos,
                                            videotype)
        if trainFraction is not None:
            auxiliaryfunctions.edit_config(
                cfg, {"TrainingFraction": [tranFraction]})

        config = auxiliaryfunctions.read_config(cfg)
        if model == "full_human":
            config["bodyparts"] = [
                "ankle1",
                "knee1",
                "hip1",
                "hip2",
                "knee2",
                "ankle2",
                "wrist1",
                "elbow1",
                "shoulder1",
                "shoulder2",
                "elbow2",
                "wrist2",
                "chin",
                "forehead",
            ]
            config["skeleton"] = [
                ["ankle1", "knee1"],
                ["ankle2", "knee2"],
                ["knee1", "hip1"],
                ["knee2", "hip2"],
                ["hip1", "hip2"],
                ["shoulder1", "shoulder2"],
                ["shoulder1", "hip1"],
                ["shoulder2", "hip2"],
                ["shoulder1", "elbow1"],
                ["shoulder2", "elbow2"],
                ["chin", "forehead"],
                ["elbow1", "wrist1"],
                ["elbow2", "wrist2"],
            ]
            config["default_net_type"] = "resnet_101"
        else:  # just make a case and put the stuff you want.
            # TBD: 'partaffinityfield_graph' >> use to set skeleton!
            pass

        auxiliaryfunctions.write_config(cfg, config)
        config = auxiliaryfunctions.read_config(cfg)

        train_dir = Path(
            os.path.join(
                config["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction=config["TrainingFraction"][0],
                        shuffle=1,
                        cfg=config,
                    )),
                "train",
            ))
        test_dir = Path(
            os.path.join(
                config["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(
                        trainFraction=config["TrainingFraction"][0],
                        shuffle=1,
                        cfg=config,
                    )),
                "test",
            ))

        # Create the model directory
        train_dir.mkdir(parents=True, exist_ok=True)
        test_dir.mkdir(parents=True, exist_ok=True)

        modelfoldername = auxiliaryfunctions.GetModelFolder(
            trainFraction=config["TrainingFraction"][0], shuffle=1, cfg=config)
        path_train_config = str(
            os.path.join(config["project_path"], Path(modelfoldername),
                         "train", "pose_cfg.yaml"))
        path_test_config = str(
            os.path.join(config["project_path"], Path(modelfoldername), "test",
                         "pose_cfg.yaml"))

        # Download the weights and put then in appropriate directory
        print("Dowloading weights...")
        auxfun_models.DownloadModel(model, train_dir)

        pose_cfg = deeplabcut.auxiliaryfunctions.read_plainconfig(
            path_train_config)
        print(path_train_config)
        # Updating config file:
        dict = {
            "default_net_type": pose_cfg["net_type"],
            "default_augmenter": pose_cfg["dataset_type"],
            "bodyparts": pose_cfg["all_joints_names"],
            "skeleton": [],  # TODO: update with paf_graph
            "dotsize": 6,
        }
        auxiliaryfunctions.edit_config(cfg, dict)

        # Create the pose_config.yaml files
        parent_path = Path(os.path.dirname(deeplabcut.__file__))
        defaultconfigfile = str(parent_path / "pose_cfg.yaml")
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(config)
        datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(
            trainingsetfolder,
            trainFraction=config["TrainingFraction"][0],
            shuffle=1,
            cfg=config,
        )

        # downloading base encoder / not required unless on re-trains (but when a training set is created this happens anyway)
        # model_path, num_shuffles=auxfun_models.Check4weights(pose_cfg['net_type'], parent_path, num_shuffles= 1)

        # Updating training and test pose_cfg:
        snapshotname = [fn for fn in os.listdir(train_dir)
                        if ".meta" in fn][0].split(".meta")[0]
        dict2change = {
            "init_weights": str(os.path.join(train_dir, snapshotname)),
            "project_path": str(config["project_path"]),
        }

        UpdateTrain_pose_yaml(pose_cfg, dict2change, path_train_config)
        keys2save = [
            "dataset",
            "dataset_type",
            "num_joints",
            "all_joints",
            "all_joints_names",
            "net_type",
            "init_weights",
            "global_scale",
            "location_refinement",
            "locref_stdev",
        ]

        MakeTest_pose_yaml(pose_cfg, keys2save, path_test_config)

        video_dir = os.path.join(config["project_path"], "videos")
        if analyzevideo == True:
            print("Analyzing video...")
            deeplabcut.analyze_videos(cfg, [video_dir],
                                      videotype,
                                      save_as_csv=True)

        if createlabeledvideo == True:
            if filtered:
                deeplabcut.filterpredictions(cfg, [video_dir], videotype)

            print("Plotting results...")
            deeplabcut.create_labeled_video(cfg, [video_dir],
                                            videotype,
                                            draw_skeleton=True,
                                            filtered=filtered)
            deeplabcut.plot_trajectories(cfg, [video_dir],
                                         videotype,
                                         filtered=filtered)

        os.chdir(cwd)
        return cfg, path_train_config

    else:
        return "N/A", "N/A"

Ejemplo n.º 13

Mostrar archivo

Archivo: trainingsetmanipulation.py Proyecto: sebo361/DeepLabCut

def mergeandsplit(config,
                  test_video_name=[],
                  trainindex=0,
                  uniform=True,
                  windows2linux=False):
    """
    This function allows additional control over "create_training_dataset". 
    
    Merge annotated data sets (from different folders) and split data in a specific way, returns the split variables (train/test indices). 
    Importantly, this allows one to freeze a split. 
    
    One can also either create a uniform split (uniform = True; thereby indexing TrainingFraction in config file) or leave-folder-out split 
    by listing the corrensponding video name in test_video_name.
    
    Parameter
    ----------
    config: string
        Full path of the config.yaml file as a string.
    test_video_name: list
        In case uniform = False, the listed folders are returned as testIndexes and all others as trainIndexes.
    trainindex: int, optional
        In case uniform = True, indexes which element of TrainingFraction in the config file should be used (note it is a list!).
    uniform: bool, optional
        Perform uniform split (disregarding folder structure in labeled data), or (if False) leave-folder-out split.
    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows 
        but train & evaluate on a unix system (e.g. ubuntu, colab, Mac) set this variable to True to convert the paths. 
    
    Examples
    --------
    To create a leave-folder-out model:
    >>> trainIndexes, testIndexes=deeplabcut.mergeandsplit(config,['mouse1','mouse3'],uniform=False)
    returns the indices for the video 'mouse1' and 'mouse3' as testIndexes and all others as trainIndexes.
    You can then create the training set by calling (e.g. defining it as Shuffle 3):
    >>> deeplabcut.create_training_dataset(config,Shuffles=[3],trainIndexes=trainIndexes,testIndexes=testIndexes)
    
    To freeze a (uniform) split:
    >>> trainIndexes, testIndexes=deeplabcut.mergeandsplit(config,trainindex=0,uniform=True)
    You can then create two model instances that have the identical trainingset. Thereby you can assess the role of various parameters on the performance of DLC.
    >>> deeplabcut.create_training_dataset(config,Shuffles=[0],trainIndexes=trainIndexes,testIndexes=testIndexes)
    >>> deeplabcut.create_training_dataset(config,Shuffles=[1],trainIndexes=trainIndexes,testIndexes=testIndexes)
    --------
    
    """
    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
        cfg)  #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(
        os.path.join(project_path, str(trainingsetfolder))),
                                           recursive=True)
    fn = os.path.join(project_path, trainingsetfolder,
                      'CollectedData_' + cfg['scorer'])

    try:
        Data = pd.read_hdf(fn + '.h5', 'df_with_missing')
    except FileNotFoundError:
        Data = merge_annotateddatasets(
            cfg,
            project_path,
            Path(os.path.join(project_path, trainingsetfolder)),
            windows2linux=windows2linux)

    Data = Data[scorer]  #extract labeled data

    if uniform == True:
        TrainingFraction = cfg['TrainingFraction']
        trainFraction = TrainingFraction[trainindex]
        trainIndexes, testIndexes = SplitTrials(range(len(Data.index)),
                                                trainFraction)
    else:  #leave folders out split
        videos = cfg['video_sets'].keys()
        trainIndexes, testIndexes, wrongName = [], [], []
        wrongName = list(
            set(test_video_name) - set([Path(i).stem for i in videos]))
        test_video_name = list(set(test_video_name) - set(wrongName))

        if not test_video_name:
            print(
                "Folder name is missing or incorrect in parameter 'test_video_name'!"
            )
        else:
            if wrongName:
                print("Folder", wrongName, "does not exist in",
                      os.path.join(project_path, 'labeled-data'))
            print("Excluding the following folder (from training):",
                  test_video_name)
            for index, name in enumerate(Data.index):
                #print(index,name.split(os.sep)[1])
                for x in range(len(test_video_name)):
                    if test_video_name[x] == name.split(
                            os.sep)[1]:  #this is the video name
                        #print(name,test_video_name[x])
                        testIndexes.append(index)
                trainIndexes.append(index)
            trainIndexes = list(set(trainIndexes) - set(testIndexes))

    return trainIndexes, testIndexes

Ejemplo n.º 14

Mostrar archivo

Archivo: multiple_individuals_trainingsetmanipulation.py Proyecto: timrosenow/DeepLabCut

def create_multianimaltraining_dataset(
    config,
    num_shuffles=1,
    Shuffles=None,
    windows2linux=False,
    net_type=None,
    numdigits=2,
):
    """
    Creates a training dataset for multi-animal datasets. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n
    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Imporant differences to standard:
     - stores coordinates with numdigits as many digits
     - creates
    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, and resnet_152 are supported (not the MobileNets!)

    numdigits: int, optional


    Example
    --------
    >>> deeplabcut.create_multianimaltraining_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)

    Windows:
    >>> deeplabcut.create_multianimaltraining_dataset(r'C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    from skimage import io

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg["scorer"]
    project_path = cfg["project_path"]
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
        cfg)  # Path concatenatn OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(
        os.path.join(project_path, str(trainingsetfolder))),
                                           recursive=True)

    Data = trainingsetmanipulation.merge_annotateddatasets(
        cfg, Path(os.path.join(project_path, trainingsetfolder)),
        windows2linux)
    if Data is None:
        return
    Data = Data[scorer]  # extract labeled data

    # actualbpts=set(Data.columns.get_level_values(0))

    def strip_cropped_image_name(path):
        # utility function to split different crops from same image into either train or test!
        filename = os.path.split(path)[1]
        return filename.split("c")[0]

    img_names = Data.index.map(strip_cropped_image_name).unique()

    # loading & linking pretrained models
    # CURRENTLY ONLY ResNet supported!
    if net_type is None:  # loading & linking pretrained models
        net_type = cfg.get("default_net_type", "resnet_50")
    else:
        if "resnet" in net_type:  # or 'mobilenet' in net_type:
            pass
        else:
            raise ValueError("Currently only resnet is supported.")

    # multianimal case:
    dataset_type = "multi-animal-imgaug"
    partaffinityfield_graph = auxfun_multianimal.getpafgraph(cfg,
                                                             printnames=False)
    # ATTENTION: order has to be multibodyparts, then uniquebodyparts (for indexing)
    print("Utilizing the following graph:", partaffinityfield_graph)
    num_limbs = len(partaffinityfield_graph)
    partaffinityfield_predict = True

    # Loading the encoder (if necessary downloading from TF)
    dlcparent_path = auxiliaryfunctions.get_deeplabcut_path()
    defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml")
    model_path, num_shuffles = auxfun_models.Check4weights(
        net_type, Path(dlcparent_path), num_shuffles)

    if Shuffles == None:
        Shuffles = range(1, num_shuffles + 1, 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    (
        individuals,
        uniquebodyparts,
        multianimalbodyparts,
    ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)

    TrainingFraction = cfg["TrainingFraction"]
    for shuffle in Shuffles:  # Creating shuffles starting from 1
        for trainFraction in TrainingFraction:
            train_inds_temp, test_inds_temp = trainingsetmanipulation.SplitTrials(
                range(len(img_names)), trainFraction)
            # Map back to the original indices.
            temp = [
                name for i, name in enumerate(img_names) if i in test_inds_temp
            ]
            mask = Data.index.str.contains("|".join(temp))
            testIndices = np.flatnonzero(mask)
            trainIndices = np.flatnonzero(~mask)

            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################

            # Make training file!
            data = []
            print("Creating training data for ", shuffle, trainFraction)
            print("This can take some time...")
            for jj in tqdm(trainIndices):
                jointsannotated = False
                H = {}
                # load image to get dimensions:
                filename = Data.index[jj]
                im = io.imread(os.path.join(cfg["project_path"], filename))
                H["image"] = filename

                try:
                    H["size"] = np.array(
                        [np.shape(im)[2],
                         np.shape(im)[0],
                         np.shape(im)[1]])
                except:
                    # print "Grayscale!"
                    H["size"] = np.array([1, np.shape(im)[0], np.shape(im)[1]])

                Joints = {}
                for prfxindex, prefix in enumerate(individuals):
                    joints = (np.zeros(
                        (len(uniquebodyparts) + len(multianimalbodyparts), 3))
                              * np.nan)
                    if prefix != "single":  # first ones are multianimalparts!
                        indexjoints = 0
                        for bpindex, bodypart in enumerate(
                                multianimalbodyparts):
                            socialbdpt = bodypart  # prefix+bodypart #build names!
                            # if socialbdpt in actualbpts:
                            try:
                                x, y = (
                                    Data[prefix][socialbdpt]["x"][jj],
                                    Data[prefix][socialbdpt]["y"][jj],
                                )
                                joints[indexjoints, 0] = int(bpindex)
                                joints[indexjoints, 1] = round(x, numdigits)
                                joints[indexjoints, 2] = round(y, numdigits)
                                indexjoints += 1
                            except:
                                pass
                    else:
                        indexjoints = len(multianimalbodyparts)
                        for bpindex, bodypart in enumerate(uniquebodyparts):
                            socialbdpt = bodypart  # prefix+bodypart #build names!
                            # if socialbdpt in actualbpts:
                            try:
                                x, y = (
                                    Data[prefix][socialbdpt]["x"][jj],
                                    Data[prefix][socialbdpt]["y"][jj],
                                )
                                joints[indexjoints, 0] = len(
                                    multianimalbodyparts) + int(bpindex)
                                joints[indexjoints, 1] = round(x, 2)
                                joints[indexjoints, 2] = round(y, 2)
                                indexjoints += 1
                            except:
                                pass

                    # Drop missing body parts
                    joints = joints[~np.isnan(joints).any(axis=1)]
                    # Drop points lying outside the image
                    inside = np.logical_and.reduce((
                        joints[:, 1] < im.shape[1],
                        joints[:, 1] > 0,
                        joints[:, 2] < im.shape[0],
                        joints[:, 2] > 0,
                    ))
                    joints = joints[inside]

                    if np.size(joints) > 0:  # exclude images without labels
                        jointsannotated = True

                    Joints[prfxindex] = joints  # np.array(joints, dtype=int)

                H["joints"] = Joints
                if jointsannotated:  # exclude images without labels
                    data.append(H)

            if len(trainIndices) > 0:
                (
                    datafilename,
                    metadatafilename,
                ) = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)
                ################################################################################
                # Saving metadata and data file (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename),
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                )

                datafilename = datafilename.split(".mat")[0] + ".pickle"
                import pickle

                with open(os.path.join(project_path, datafilename), "wb") as f:
                    # Pickle the 'labeled-data' dictionary using the highest protocol available.
                    pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################

                modelfoldername = auxiliaryfunctions.GetModelFolder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/" +
                    "/train")
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/" +
                    "/test")

                path_train_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "train",
                        "pose_cfg.yaml",
                    ))
                path_test_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "pose_cfg.yaml",
                    ))
                path_inference_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "inference_cfg.yaml",
                    ))

                jointnames = [str(bpt) for bpt in multianimalbodyparts]
                jointnames.extend([str(bpt) for bpt in uniquebodyparts])
                items2change = {
                    "dataset":
                    datafilename,
                    "metadataset":
                    metadatafilename,
                    "num_joints":
                    len(multianimalbodyparts) +
                    len(uniquebodyparts),  # cfg["uniquebodyparts"]),
                    "all_joints": [[i] for i in range(
                        len(multianimalbodyparts) + len(uniquebodyparts))
                                   ],  # cfg["uniquebodyparts"]))],
                    "all_joints_names":
                    jointnames,
                    "init_weights":
                    model_path,
                    "project_path":
                    str(cfg["project_path"]),
                    "net_type":
                    net_type,
                    "pairwise_loss_weight":
                    0.1,
                    "pafwidth":
                    20,
                    "partaffinityfield_graph":
                    partaffinityfield_graph,
                    "partaffinityfield_predict":
                    partaffinityfield_predict,
                    "weigh_only_present_joints":
                    False,
                    "num_limbs":
                    len(partaffinityfield_graph),
                    "dataset_type":
                    dataset_type,
                    "optimizer":
                    "adam",
                    "batch_size":
                    8,
                    "multi_step": [[1e-4, 7500], [5 * 1e-5, 12000],
                                   [1e-5, 200000]],
                    "save_iters":
                    10000,
                    "display_iters":
                    500,
                }

                defaultconfigfile = os.path.join(dlcparent_path,
                                                 "pose_cfg.yaml")
                trainingdata = trainingsetmanipulation.MakeTrain_pose_yaml(
                    items2change, path_train_config, defaultconfigfile)
                keys2save = [
                    "dataset",
                    "num_joints",
                    "all_joints",
                    "all_joints_names",
                    "net_type",
                    "init_weights",
                    "global_scale",
                    "location_refinement",
                    "locref_stdev",
                    "dataset_type",
                    "partaffinityfield_predict",
                    "pairwise_predict",
                    "partaffinityfield_graph",
                    "num_limbs",
                    "dataset_type",
                ]

                trainingsetmanipulation.MakeTest_pose_yaml(
                    trainingdata,
                    keys2save,
                    path_test_config,
                    nmsradius=5.0,
                    minconfidence=0.01,
                )  # setting important def. values for inference

                # Setting inference cfg file:
                defaultinference_configfile = os.path.join(
                    dlcparent_path, "inference_cfg.yaml")
                items2change = {
                    "minimalnumberofconnections":
                    int(len(cfg["multianimalbodyparts"]) / 2),
                    "topktoretain":
                    len(cfg["individuals"]) + 1 *
                    (len(cfg["uniquebodyparts"]) > 0),
                }
                # TODO:   "distnormalization":  could be calculated here based on data and set
                # >> now we calculate this during evaluation (which is a good spot...)
                trainingsetmanipulation.MakeInference_yaml(
                    items2change, path_inference_config,
                    defaultinference_configfile)

                print(
                    "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
                )
            else:
                pass

Ejemplo n.º 15

Mostrar archivo

Archivo: trainingsetmanipulation.py Proyecto: stephfulton/DeepLabCut

def create_training_dataset(config,
                            num_shuffles=1,
                            Shuffles=None,
                            windows2linux=False):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n
    [OPTIONAL]Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows 
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths. 
    
    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    from skimage import io
    import scipy.io as sio
    import deeplabcut
    import subprocess

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
        cfg)  #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(
        os.path.join(project_path, str(trainingsetfolder))),
                                           recursive=True)

    Data = merge_annotateddatasets(
        cfg, project_path, Path(os.path.join(project_path, trainingsetfolder)),
        windows2linux)
    Data = Data[scorer]  #extract labeled data

    #set model type. we will allow more in the future.
    if cfg['resnet'] == 50:
        net_type = 'resnet_' + str(cfg['resnet'])
        resnet_path = str(
            Path(deeplabcut.__file__).parents[0] /
            'pose_estimation_tensorflow/models/pretrained/resnet_v1_50.ckpt')
    elif cfg['resnet'] == 101:
        net_type = 'resnet_' + str(cfg['resnet'])
        resnet_path = str(
            Path(deeplabcut.__file__).parents[0] /
            'pose_estimation_tensorflow/models/pretrained/resnet_v1_101.ckpt')
    else:
        print(
            "Currently only ResNet 50 or 101 supported, please change 'resnet' entry in config.yaml!"
        )
        num_shuffles = -1  #thus the loop below is empty...

    if not Path(resnet_path).is_file():
        """
        Downloads the ImageNet pretrained weights for ResNet.
        """
        start = os.getcwd()
        os.chdir(str(Path(resnet_path).parents[0]))
        print("Downloading the pretrained model (ResNets)....")
        subprocess.call("download.sh", shell=True)
        os.chdir(start)

    if Shuffles == None:
        Shuffles = range(1, num_shuffles + 1, 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    bodyparts = cfg['bodyparts']
    TrainingFraction = cfg['TrainingFraction']
    for shuffle in Shuffles:  # Creating shuffles starting from 1
        for trainFraction in TrainingFraction:
            trainIndexes, testIndexes = SplitTrials(range(len(Data.index)),
                                                    trainFraction)

            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################

            # Make training file!
            data = []
            for jj in trainIndexes:
                H = {}
                # load image to get dimensions:
                filename = Data.index[jj]
                im = io.imread(os.path.join(cfg['project_path'], filename))
                H['image'] = filename

                if np.ndim(im) == 3:
                    H['size'] = np.array(
                        [np.shape(im)[2],
                         np.shape(im)[0],
                         np.shape(im)[1]])
                else:
                    # print "Grayscale!"
                    H['size'] = np.array([1, np.shape(im)[0], np.shape(im)[1]])

                indexjoints = 0
                joints = np.zeros((len(bodyparts), 3)) * np.nan
                for bpindex, bodypart in enumerate(bodyparts):
                    if Data[bodypart]['x'][jj] < np.shape(
                            im)[1] and Data[bodypart]['y'][jj] < np.shape(
                                im)[0]:  #are labels in image?
                        joints[indexjoints, 0] = int(bpindex)
                        joints[indexjoints, 1] = Data[bodypart]['x'][jj]
                        joints[indexjoints, 2] = Data[bodypart]['y'][jj]
                        indexjoints += 1

                joints = joints[np.where(np.prod(
                    np.isfinite(joints),
                    1))[0], :]  # drop NaN, i.e. lines for missing body parts

                assert (np.prod(np.array(joints[:, 2]) < np.shape(im)[0])
                        )  # y coordinate within image?
                assert (np.prod(np.array(joints[:, 1]) < np.shape(im)[1])
                        )  # x coordinate within image?

                H['joints'] = np.array(joints, dtype=int)
                if np.size(joints) > 0:  #exclude images without labels
                    data.append(H)

            if len(trainIndexes) > 0:
                datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)
                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename), data,
                    trainIndexes, testIndexes, trainFraction)
                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################

                DTYPE = [('image', 'O'), ('size', 'O'), ('joints', 'O')]
                MatlabData = np.array(
                    [(np.array([data[item]['image']],
                               dtype='U'), np.array([data[item]['size']]),
                      boxitintoacell(data[item]['joints']))
                     for item in range(len(data))],
                    dtype=DTYPE)

                sio.savemat(os.path.join(project_path, datafilename),
                            {'dataset': MatlabData})

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################

                modelfoldername = auxiliaryfunctions.GetModelFolder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + '/' +
                    '/train')
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + '/' +
                    '/test')

                path_train_config = str(
                    os.path.join(cfg['project_path'], Path(modelfoldername),
                                 'train', 'pose_cfg.yaml'))
                path_test_config = str(
                    os.path.join(cfg['project_path'], Path(modelfoldername),
                                 'test', 'pose_cfg.yaml'))
                #str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')

                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": resnet_path,
                    "project_path": str(cfg['project_path']),
                    "net_type": net_type
                }

                defaultconfigfile = str(
                    Path(deeplabcut.__file__).parents[0] / 'pose_cfg.yaml')

                trainingdata = MakeTrain_pose_yaml(items2change,
                                                   path_train_config,
                                                   defaultconfigfile)
                keys2save = [
                    "dataset", "num_joints", "all_joints", "all_joints_names",
                    "net_type", 'init_weights', 'global_scale',
                    'location_refinement', 'locref_stdev'
                ]
                MakeTest_pose_yaml(trainingdata, keys2save, path_test_config)
                print(
                    "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
                )

Ejemplo n.º 16

Mostrar archivo

def return_evaluate_network_data(
    config,
    shuffle=0,
    trainingsetindex=0,
    comparisonbodyparts="all",
    Snapindex=None,
    rescale=False,
    fulldata=False,
    show_errors=True,
    modelprefix="",
    returnjustfns=True,
):
    """
    Returns the results for (previously evaluated) network. deeplabcut.evaluate_network(..)
    Returns list of (per model): [trainingsiterations,trainfraction,shuffle,trainerror,testerror,pcutoff,trainerrorpcutoff,testerrorpcutoff,Snapshots[snapindex],scale,net_type]

    If fulldata=True, also returns (the complete annotation and prediction array)
    Returns list of: (DataMachine, Data, data, trainIndices, testIndices, trainFraction, DLCscorer,comparisonbodyparts, cfg, Snapshots[snapindex])
    ----------
    config : string
        Full path of the config.yaml file as a string.

    shuffle: integer
        integers specifying shuffle index of the training dataset. The default is 0.

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This
        variable can also be set to "all".

    comparisonbodyparts: list of bodyparts, Default is "all".
        The average error will be computed for those body parts only (Has to be a subset of the body parts).

    rescale: bool, default False
        Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every
        image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported
        in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated
        on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the
        original size!

    Examples
    --------
    If you do not want to plot
    >>> deeplabcut._evaluate_network_data('/analysis/project/reaching-task/config.yaml', shuffle=[1])
    --------
    If you want to plot
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],True)
    """

    import os

    from deeplabcut.pose_estimation_tensorflow.config import load_config
    from deeplabcut.utils import auxiliaryfunctions

    start_path = os.getcwd()
    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    # Data=pd.read_hdf(os.path.join(cfg["project_path"],str(trainingsetfolder),'CollectedData_' + cfg["scorer"] + '.h5'),'df_with_missing')

    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts)
    ##################################################
    # Load data...
    ##################################################
    trainFraction = cfg["TrainingFraction"][trainingsetindex]
    datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
        trainingsetfolder, trainFraction, shuffle, cfg)
    modelfolder = os.path.join(
        cfg["project_path"],
        str(
            auxiliaryfunctions.GetModelFolder(trainFraction,
                                              shuffle,
                                              cfg,
                                              modelprefix=modelprefix)),
    )
    path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"
    # Load meta data
    data, trainIndices, testIndices, trainFraction = auxiliaryfunctions.LoadMetadata(
        os.path.join(cfg["project_path"], metadatafn))

    try:
        dlc_cfg = load_config(str(path_test_config))
    except FileNotFoundError:
        raise FileNotFoundError(
            "It seems the model for shuffle %s and trainFraction %s does not exist."
            % (shuffle, trainFraction))

    ########################### RESCALING (to global scale)
    if rescale == True:
        scale = dlc_cfg["global_scale"]
        print("Rescaling Data to ", scale)
        Data = (pd.read_hdf(
            os.path.join(
                cfg["project_path"],
                str(trainingsetfolder),
                "CollectedData_" + cfg["scorer"] + ".h5",
            )) * scale)
    else:
        scale = 1
        Data = pd.read_hdf(
            os.path.join(
                cfg["project_path"],
                str(trainingsetfolder),
                "CollectedData_" + cfg["scorer"] + ".h5",
            ))

    evaluationfolder = os.path.join(
        cfg["project_path"],
        str(
            auxiliaryfunctions.GetEvaluationFolder(trainFraction,
                                                   shuffle,
                                                   cfg,
                                                   modelprefix=modelprefix)),
    )
    # Check which snapshots are available and sort them by # iterations
    Snapshots = np.array([
        fn.split(".")[0]
        for fn in os.listdir(os.path.join(str(modelfolder), "train"))
        if "index" in fn
    ])

    if len(Snapshots) == 0:
        print(
            "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
            % (shuffle, trainFraction))
        snapindices = []
    else:
        increasing_indices = np.argsort(
            [int(m.split("-")[1]) for m in Snapshots])
        Snapshots = Snapshots[increasing_indices]
        if Snapindex == None:
            Snapindex = cfg["snapshotindex"]

        if Snapindex == -1:
            snapindices = [-1]
        elif Snapindex == "all":
            snapindices = range(len(Snapshots))
        elif Snapindex < len(Snapshots):
            snapindices = [Snapindex]
        else:
            print(
                "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
            )

    DATA = []
    results = []
    resultsfns = []
    for snapindex in snapindices:
        dlc_cfg["init_weights"] = os.path.join(
            str(modelfolder), "train",
            Snapshots[snapindex])  # setting weights to corresponding snapshot.
        trainingsiterations = (dlc_cfg["init_weights"].split(
            os.sep)[-1]).split("-")[
                -1]  # read how many training siterations that corresponds to.

        # name for deeplabcut net (based on its parameters)
        DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
            cfg,
            shuffle,
            trainFraction,
            trainingsiterations,
            modelprefix=modelprefix)
        if not returnjustfns:
            print(
                "Retrieving ",
                DLCscorer,
                " with # of trainingiterations:",
                trainingsiterations,
            )

        (
            notanalyzed,
            resultsfilename,
            DLCscorer,
        ) = auxiliaryfunctions.CheckifNotEvaluated(str(evaluationfolder),
                                                   DLCscorer, DLCscorerlegacy,
                                                   Snapshots[snapindex])
        # resultsfilename=os.path.join(str(evaluationfolder),DLCscorer + '-' + str(Snapshots[snapindex])+  '.h5') # + '-' + str(snapshot)+  ' #'-' + Snapshots[snapindex]+  '.h5')
        print(resultsfilename)
        resultsfns.append(resultsfilename)
        if not returnjustfns:
            if not notanalyzed and os.path.isfile(
                    resultsfilename):  # data exists..
                DataMachine = pd.read_hdf(resultsfilename)
                DataCombined = pd.concat([Data.T, DataMachine.T], axis=0).T
                RMSE, RMSEpcutoff = pairwisedistances(
                    DataCombined,
                    cfg["scorer"],
                    DLCscorer,
                    cfg["pcutoff"],
                    comparisonbodyparts,
                )

                testerror = np.nanmean(RMSE.iloc[testIndices].values.flatten())
                trainerror = np.nanmean(
                    RMSE.iloc[trainIndices].values.flatten())
                testerrorpcutoff = np.nanmean(
                    RMSEpcutoff.iloc[testIndices].values.flatten())
                trainerrorpcutoff = np.nanmean(
                    RMSEpcutoff.iloc[trainIndices].values.flatten())
                if show_errors == True:
                    print(
                        "Results for",
                        trainingsiterations,
                        " training iterations:",
                        int(100 * trainFraction),
                        shuffle,
                        "train error:",
                        np.round(trainerror, 2),
                        "pixels. Test error:",
                        np.round(testerror, 2),
                        " pixels.",
                    )
                    print(
                        "With pcutoff of",
                        cfg["pcutoff"],
                        " train error:",
                        np.round(trainerrorpcutoff, 2),
                        "pixels. Test error:",
                        np.round(testerrorpcutoff, 2),
                        "pixels",
                    )
                    print("Snapshot", Snapshots[snapindex])

                r = [
                    trainingsiterations,
                    int(100 * trainFraction),
                    shuffle,
                    np.round(trainerror, 2),
                    np.round(testerror, 2),
                    cfg["pcutoff"],
                    np.round(trainerrorpcutoff, 2),
                    np.round(testerrorpcutoff, 2),
                    Snapshots[snapindex],
                    scale,
                    dlc_cfg["net_type"],
                ]
                results.append(r)
            else:
                print("Model not trained/evaluated!")
            if fulldata == True:
                DATA.append([
                    DataMachine,
                    Data,
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                    DLCscorer,
                    comparisonbodyparts,
                    cfg,
                    evaluationfolder,
                    Snapshots[snapindex],
                ])

    os.chdir(start_path)
    if returnjustfns:
        return resultsfns
    else:
        if fulldata == True:
            return DATA, results
        else:
            return results

Ejemplo n.º 17

Mostrar archivo

def create_training_dataset(config,num_shuffles=1,Shuffles=None,windows2linux=False,userfeedback=False,
        trainIndexes=None,testIndexes=None,
        net_type=None,augmenter_type=None,defaultconfigfile=None,items2change_pose={}):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n

    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.

    augmenter_type: string
        Type of augmenter. Currently default, imgaug, tensorpack, and deterministic are supported.

    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """

    from skimage import io
    import scipy.io as sio

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg['scorer']
    project_path = cfg['project_path']
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) #Path concatenation OS platform independent
    auxiliaryfunctions.attempttomakefolder(Path(os.path.join(project_path,str(trainingsetfolder))),recursive=True)

    Data = merge_annotateddatasets(cfg,project_path,Path(os.path.join(project_path,trainingsetfolder)),windows2linux)
    Data = Data[scorer] #extract labeled data

    #%%
    # check if we need to update the netwoek
    items2change_flag = bool(items2change_pose)
    if items2change_flag: # not empty
        net_type = items2change_pose.get('net_type', None)

    #loading & linking pretrained models
    if net_type is None: #loading & linking pretrained models
        net_type =cfg.get('default_net_type', 'resnet_50')
    else:
        if 'resnet' in net_type or 'mobilenet' in net_type:
            pass
        else:
            raise ValueError('Invalid network type:', net_type)

    if augmenter_type is None:
        augmenter_type=cfg.get('default_augmenter', 'default')
    else:
        if augmenter_type in ['default','imgaug','tensorpack','deterministic']:
            pass
        else:
            raise ValueError('Invalid augmenter type:', augmenter_type)

    import deeplabcut
    if defaultconfigfile is None:
        parent_path = Path(os.path.dirname(deeplabcut.__file__))
        defaultconfigfile = str(parent_path / 'pose_cfg.yaml')
    model_path,num_shuffles=auxfun_models.Check4weights(net_type,parent_path,num_shuffles) #if the model does not exist >> throws error!

    if Shuffles==None:
        Shuffles=range(1,num_shuffles+1,1)
    else:
        Shuffles=[i for i in Shuffles if isinstance(i,int)]

    bodyparts = cfg['bodyparts']
    TrainingFraction = cfg['TrainingFraction']
    for shuffle in Shuffles: # Creating shuffles starting from 1
        for trainingsetindex,trainFraction in enumerate(TrainingFraction):
            if userfeedback:
                trainposeconfigfile,testposeconfigfile,snapshotfolder =  training.return_train_network_path(config,shuffle=shuffle,trainingsetindex=trainingsetindex)
                if os.path.isfile(trainposeconfigfile):
                    askuser=input ("The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): ")
                    if askuser=='no'or askuser=='No' or askuser=='N' or askuser=='No':
                        raise Exception("Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details.")
                    else:
                        pass
            #trainIndexes, testIndexes = SplitTrials(range(len(Data.index)), trainFraction)
            if trainIndexes is None and testIndexes is None:
                trainIndexes, testIndexes = SplitTrials(range(len(Data.index)), trainFraction)
            else:
                print("You passed a split with the following fraction:", len(trainIndexes)*1./(len(testIndexes)+len(trainIndexes))*100)
            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################
            # Make training file!
            data = []
            for jj in trainIndexes:
                H = {}
                # load image to get dimensions:
                filename = Data.index[jj]
                im = io.imread(os.path.join(cfg['project_path'],filename))
                H['image'] = filename

                if np.ndim(im)==3:
                    H['size'] = np.array(
                        [np.shape(im)[2],
                         np.shape(im)[0],
                         np.shape(im)[1]])
                else:
                    # print "Grayscale!"
                    H['size'] = np.array([1, np.shape(im)[0], np.shape(im)[1]])

                indexjoints=0
                joints=np.zeros((len(bodyparts),3))*np.nan
                for bpindex,bodypart in enumerate(bodyparts):
                    # check whether the labels are positive and inside the img
                    x_pos_n_inside = 0 <= Data[bodypart]['x'][jj] < np.shape(im)[1]
                    y_pos_n_inside = 0 <= Data[bodypart]['y'][jj] < np.shape(im)[0]
                    if x_pos_n_inside and y_pos_n_inside:
                        joints[indexjoints,0]=int(bpindex)
                        joints[indexjoints,1]=Data[bodypart]['x'][jj]
                        joints[indexjoints,2]=Data[bodypart]['y'][jj]
                        indexjoints+=1

                joints = joints[np.where(
                    np.prod(np.isfinite(joints),
                            1))[0], :]  # drop NaN, i.e. lines for missing body parts

                assert (np.prod(np.array(joints[:, 2]) < np.shape(im)[0])
                        )  # y coordinate within image?
                assert (np.prod(np.array(joints[:, 1]) < np.shape(im)[1])
                        )  # x coordinate within image?

                H['joints'] = np.array(joints, dtype=int)
                if np.size(joints)>0: #exclude images without labels
                        data.append(H)

            if len(trainIndexes)>0:

                datafilename,metadatafilename=auxiliaryfunctions.GetDataandMetaDataFilenames(trainingsetfolder,trainFraction,shuffle,cfg)
                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(os.path.join(project_path,metadatafilename),data, trainIndexes, testIndexes, trainFraction)
                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################

                DTYPE = [('image', 'O'), ('size', 'O'), ('joints', 'O')]
                MatlabData = np.array(
                    [(np.array([data[item]['image']], dtype='U'),
                      np.array([data[item]['size']]),
                      boxitintoacell(data[item]['joints']))
                     for item in range(len(data))],
                    dtype=DTYPE)

                sio.savemat(os.path.join(project_path,datafilename), {'dataset': MatlabData})

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################
                modelfoldername=auxiliaryfunctions.GetModelFolder(trainFraction,shuffle,cfg)
                auxiliaryfunctions.attempttomakefolder(Path(config).parents[0] / modelfoldername,recursive=True)
                auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/train')
                auxiliaryfunctions.attempttomakefolder(str(Path(config).parents[0] / modelfoldername)+ '/'+ '/test')

                path_train_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'train','pose_cfg.yaml'))
                path_test_config = str(os.path.join(cfg['project_path'],Path(modelfoldername),'test','pose_cfg.yaml'))
                #str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')

                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": model_path,
                    "project_path": str(cfg['project_path']),
                    "net_type": net_type,
                    "dataset_type": augmenter_type
                }
                items2change.update(items2change_pose)
                trainingdata = MakeTrain_pose_yaml(items2change,path_train_config,defaultconfigfile)
                keys2save = [
                    "dataset", "num_joints", "all_joints", "all_joints_names",
                    "net_type", 'init_weights', 'global_scale', 'location_refinement',
                    'locref_stdev'
                ]
                MakeTest_pose_yaml(trainingdata, keys2save,path_test_config)
                print("The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!")

    return

Ejemplo n.º 18

Mostrar archivo

def evaluate_network(
    config,
    Shuffles=[1],
    trainingsetindex=0,
    plotting=False,
    show_errors=True,
    comparisonbodyparts="all",
    gputouse=None,
    rescale=False,
    modelprefix="",
):
    """

    Evaluates the network based on the saved models at different stages of the training network.\n
    The evaluation results are stored in the .h5 and .csv file under the subdirectory 'evaluation_results'.
    Change the snapshotindex parameter in the config file to 'all' in order to evaluate all the saved models.
    Parameters
    ----------
    config : string
        Full path of the config.yaml file as a string.

    Shuffles: list, optional
        List of integers specifying the shuffle indices of the training dataset. The default is [1]

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This
        variable can also be set to "all".

    plotting: bool or str, optional
        Plots the predictions on the train and test images.
        The default is ``False``; if provided it must be either ``True``, ``False``, "bodypart", or "individual".
        Setting to ``True`` defaults as "bodypart" for multi-animal projects.

    show_errors: bool, optional
        Display train and test errors. The default is `True``

    comparisonbodyparts: list of bodyparts, Default is "all".
        The average error will be computed for those body parts only (Has to be a subset of the body parts).

    gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None.
        See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries

    rescale: bool, default False
        Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every
        image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported
        in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated
        on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the
        original size!

    Examples
    --------
    If you do not want to plot, just evaluate shuffle 1.
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml', Shuffles=[1])
    --------
    If you want to plot and evaluate shuffle 0 and 1.
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',Shuffles=[0, 1],plotting = True)

    --------
    If you want to plot assemblies for a maDLC project:
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',Shuffles=[1],plotting = "individual")

    Note: this defaults to standard plotting for single-animal projects.

    """
    if plotting not in (True, False, "bodypart", "individual"):
        raise ValueError(f"Unknown value for `plotting`={plotting}")

    import os

    start_path = os.getcwd()
    from deeplabcut.utils import auxiliaryfunctions

    cfg = auxiliaryfunctions.read_config(config)

    if cfg.get("multianimalproject", False):
        from .evaluate_multianimal import evaluate_multianimal_full

        # TODO: Make this code not so redundant!
        evaluate_multianimal_full(
            config=config,
            Shuffles=Shuffles,
            trainingsetindex=trainingsetindex,
            plotting=plotting,
            comparisonbodyparts=comparisonbodyparts,
            gputouse=gputouse,
            modelprefix=modelprefix,
        )
    else:
        from deeplabcut.utils.auxfun_videos import imread, imresize
        from deeplabcut.pose_estimation_tensorflow.core import predict
        from deeplabcut.pose_estimation_tensorflow.config import load_config
        from deeplabcut.pose_estimation_tensorflow.datasets.utils import data_to_input
        from deeplabcut.utils import auxiliaryfunctions, conversioncode
        import tensorflow as tf

        # If a string was passed in, auto-convert to True for backward compatibility
        plotting = bool(plotting)

        if "TF_CUDNN_USE_AUTOTUNE" in os.environ:
            del os.environ[
                "TF_CUDNN_USE_AUTOTUNE"]  # was potentially set during training

        tf.compat.v1.reset_default_graph()
        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  #
        #    tf.logging.set_verbosity(tf.logging.WARN)

        start_path = os.getcwd()
        # Read file path for pose_config file. >> pass it on
        cfg = auxiliaryfunctions.read_config(config)
        if gputouse is not None:  # gpu selectinon
            os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse)

        if trainingsetindex == "all":
            TrainingFractions = cfg["TrainingFraction"]
        else:
            if (trainingsetindex < len(cfg["TrainingFraction"])
                    and trainingsetindex >= 0):
                TrainingFractions = [
                    cfg["TrainingFraction"][int(trainingsetindex)]
                ]
            else:
                raise Exception(
                    "Please check the trainingsetindex! ",
                    trainingsetindex,
                    " should be an integer from 0 .. ",
                    int(len(cfg["TrainingFraction"]) - 1),
                )

        # Loading human annotatated data
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
        Data = pd.read_hdf(
            os.path.join(
                cfg["project_path"],
                str(trainingsetfolder),
                "CollectedData_" + cfg["scorer"] + ".h5",
            ))

        # Get list of body parts to evaluate network for
        comparisonbodyparts = (
            auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
                cfg, comparisonbodyparts))
        # Make folder for evaluation
        auxiliaryfunctions.attempttomakefolder(
            str(cfg["project_path"] + "/evaluation-results/"))
        for shuffle in Shuffles:
            for trainFraction in TrainingFractions:
                ##################################################
                # Load and setup CNN part detector
                ##################################################
                datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)
                modelfolder = os.path.join(
                    cfg["project_path"],
                    str(
                        auxiliaryfunctions.GetModelFolder(
                            trainFraction,
                            shuffle,
                            cfg,
                            modelprefix=modelprefix)),
                )

                path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"
                # Load meta data
                (
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                ) = auxiliaryfunctions.LoadMetadata(
                    os.path.join(cfg["project_path"], metadatafn))

                try:
                    dlc_cfg = load_config(str(path_test_config))
                except FileNotFoundError:
                    raise FileNotFoundError(
                        "It seems the model for shuffle %s and trainFraction %s does not exist."
                        % (shuffle, trainFraction))

                # change batch size, if it was edited during analysis!
                dlc_cfg[
                    "batch_size"] = 1  # in case this was edited for analysis.

                # Create folder structure to store results.
                evaluationfolder = os.path.join(
                    cfg["project_path"],
                    str(
                        auxiliaryfunctions.GetEvaluationFolder(
                            trainFraction,
                            shuffle,
                            cfg,
                            modelprefix=modelprefix)),
                )
                auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                                       recursive=True)
                # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

                # Check which snapshots are available and sort them by # iterations
                Snapshots = np.array([
                    fn.split(".")[0] for fn in os.listdir(
                        os.path.join(str(modelfolder), "train"))
                    if "index" in fn
                ])
                try:  # check if any where found?
                    Snapshots[0]
                except IndexError:
                    raise FileNotFoundError(
                        "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                        % (shuffle, trainFraction))

                increasing_indices = np.argsort(
                    [int(m.split("-")[1]) for m in Snapshots])
                Snapshots = Snapshots[increasing_indices]

                if cfg["snapshotindex"] == -1:
                    snapindices = [-1]
                elif cfg["snapshotindex"] == "all":
                    snapindices = range(len(Snapshots))
                elif cfg["snapshotindex"] < len(Snapshots):
                    snapindices = [cfg["snapshotindex"]]
                else:
                    raise ValueError(
                        "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                    )

                final_result = []

                ########################### RESCALING (to global scale)
                if rescale:
                    scale = dlc_cfg["global_scale"]
                    Data = (pd.read_hdf(
                        os.path.join(
                            cfg["project_path"],
                            str(trainingsetfolder),
                            "CollectedData_" + cfg["scorer"] + ".h5",
                        )) * scale)
                else:
                    scale = 1

                conversioncode.guarantee_multiindex_rows(Data)
                ##################################################
                # Compute predictions over images
                ##################################################
                for snapindex in snapindices:
                    dlc_cfg["init_weights"] = os.path.join(
                        str(modelfolder), "train", Snapshots[snapindex]
                    )  # setting weights to corresponding snapshot.
                    trainingsiterations = (
                        dlc_cfg["init_weights"].split(os.sep)[-1]
                    ).split(
                        "-"
                    )[-1]  # read how many training siterations that corresponds to.

                    # Name for deeplabcut net (based on its parameters)
                    DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName(
                        cfg,
                        shuffle,
                        trainFraction,
                        trainingsiterations,
                        modelprefix=modelprefix,
                    )
                    print(
                        "Running ",
                        DLCscorer,
                        " with # of training iterations:",
                        trainingsiterations,
                    )
                    (
                        notanalyzed,
                        resultsfilename,
                        DLCscorer,
                    ) = auxiliaryfunctions.CheckifNotEvaluated(
                        str(evaluationfolder),
                        DLCscorer,
                        DLCscorerlegacy,
                        Snapshots[snapindex],
                    )
                    if notanalyzed:
                        # Specifying state of model (snapshot / training state)
                        sess, inputs, outputs = predict.setup_pose_prediction(
                            dlc_cfg)
                        Numimages = len(Data.index)
                        PredicteData = np.zeros(
                            (Numimages, 3 * len(dlc_cfg["all_joints_names"])))
                        print("Running evaluation ...")
                        for imageindex, imagename in tqdm(enumerate(
                                Data.index)):
                            image = imread(
                                os.path.join(cfg["project_path"], *imagename),
                                mode="skimage",
                            )
                            if scale != 1:
                                image = imresize(image, scale)

                            image_batch = data_to_input(image)
                            # Compute prediction with the CNN
                            outputs_np = sess.run(
                                outputs, feed_dict={inputs: image_batch})
                            scmap, locref = predict.extract_cnn_output(
                                outputs_np, dlc_cfg)

                            # Extract maximum scoring location from the heatmap, assume 1 person
                            pose = predict.argmax_pose_predict(
                                scmap, locref, dlc_cfg["stride"])
                            PredicteData[imageindex, :] = (
                                pose.flatten()
                            )  # NOTE: thereby     cfg_test['all_joints_names'] should be same order as bodyparts!

                        sess.close()  # closes the current tf session

                        index = pd.MultiIndex.from_product(
                            [
                                [DLCscorer],
                                dlc_cfg["all_joints_names"],
                                ["x", "y", "likelihood"],
                            ],
                            names=["scorer", "bodyparts", "coords"],
                        )

                        # Saving results
                        DataMachine = pd.DataFrame(PredicteData,
                                                   columns=index,
                                                   index=Data.index)
                        DataMachine.to_hdf(resultsfilename, "df_with_missing")

                        print(
                            "Analysis is done and the results are stored (see evaluation-results) for snapshot: ",
                            Snapshots[snapindex],
                        )
                        DataCombined = pd.concat([Data.T, DataMachine.T],
                                                 axis=0,
                                                 sort=False).T

                        RMSE, RMSEpcutoff = pairwisedistances(
                            DataCombined,
                            cfg["scorer"],
                            DLCscorer,
                            cfg["pcutoff"],
                            comparisonbodyparts,
                        )
                        testerror = np.nanmean(
                            RMSE.iloc[testIndices].values.flatten())
                        trainerror = np.nanmean(
                            RMSE.iloc[trainIndices].values.flatten())
                        testerrorpcutoff = np.nanmean(
                            RMSEpcutoff.iloc[testIndices].values.flatten())
                        trainerrorpcutoff = np.nanmean(
                            RMSEpcutoff.iloc[trainIndices].values.flatten())
                        results = [
                            trainingsiterations,
                            int(100 * trainFraction),
                            shuffle,
                            np.round(trainerror, 2),
                            np.round(testerror, 2),
                            cfg["pcutoff"],
                            np.round(trainerrorpcutoff, 2),
                            np.round(testerrorpcutoff, 2),
                        ]
                        final_result.append(results)

                        if show_errors:
                            print(
                                "Results for",
                                trainingsiterations,
                                " training iterations:",
                                int(100 * trainFraction),
                                shuffle,
                                "train error:",
                                np.round(trainerror, 2),
                                "pixels. Test error:",
                                np.round(testerror, 2),
                                " pixels.",
                            )
                            print(
                                "With pcutoff of",
                                cfg["pcutoff"],
                                " train error:",
                                np.round(trainerrorpcutoff, 2),
                                "pixels. Test error:",
                                np.round(testerrorpcutoff, 2),
                                "pixels",
                            )
                            if scale != 1:
                                print(
                                    "The predictions have been calculated for rescaled images (and rescaled ground truth). Scale:",
                                    scale,
                                )
                            print(
                                "Thereby, the errors are given by the average distances between the labels by DLC and the scorer."
                            )

                        if plotting:
                            print("Plotting...")
                            foldername = os.path.join(
                                str(evaluationfolder),
                                "LabeledImages_" + DLCscorer + "_" +
                                Snapshots[snapindex],
                            )
                            auxiliaryfunctions.attempttomakefolder(foldername)
                            Plotting(
                                cfg,
                                comparisonbodyparts,
                                DLCscorer,
                                trainIndices,
                                DataCombined * 1.0 / scale,
                                foldername,
                            )  # Rescaling coordinates to have figure in original size!

                        tf.compat.v1.reset_default_graph()
                        # print(final_result)
                    else:
                        DataMachine = pd.read_hdf(resultsfilename)
                        conversioncode.guarantee_multiindex_rows(DataMachine)
                        if plotting:
                            DataCombined = pd.concat([Data.T, DataMachine.T],
                                                     axis=0,
                                                     sort=False).T
                            print(
                                "Plotting...(attention scale might be inconsistent in comparison to when data was analyzed; i.e. if you used rescale)"
                            )
                            foldername = os.path.join(
                                str(evaluationfolder),
                                "LabeledImages_" + DLCscorer + "_" +
                                Snapshots[snapindex],
                            )
                            auxiliaryfunctions.attempttomakefolder(foldername)
                            Plotting(
                                cfg,
                                comparisonbodyparts,
                                DLCscorer,
                                trainIndices,
                                DataCombined * 1.0 / scale,
                                foldername,
                            )

                if len(final_result
                       ) > 0:  # Only append if results were calculated
                    make_results_file(final_result, evaluationfolder,
                                      DLCscorer)
                    print(
                        "The network is evaluated and the results are stored in the subdirectory 'evaluation_results'."
                    )
                    print(
                        "Please check the results, then choose the best model (snapshot) for prediction. You can update the config.yaml file with the appropriate index for the 'snapshotindex'.\nUse the function 'analyze_video' to make predictions on new videos."
                    )
                    print(
                        "Otherwise, consider adding more labeled-data and retraining the network (see DeepLabCut workflow Fig 2, Nath 2019)"
                    )

    # returning to initial folder
    os.chdir(str(start_path))

Ejemplo n.º 19

Mostrar archivo

Archivo: evaluate.py Proyecto: shaanchandra/Pose_Estimation

def evaluate_network(config,
                     Shuffles=[1],
                     plotting=None,
                     show_errors=True,
                     comparisonbodyparts="all",
                     gputouse=None):
    """
    Evaluates the network based on the saved models at different stages of the training network.\n
    The evaluation results are stored in the .h5 and .csv file under the subdirectory 'evaluation_results'.
    Change the snapshotindex parameter in the config file to 'all' in order to evaluate all the saved models.

    Parameters
    ----------
    config : string
        Full path of the config.yaml file as a string.

    Shuffles: list, optional
        List of integers specifying the shuffle indices of the training dataset. The default is [1]

    plotting: bool, optional
        Plots the predictions on the train and test images. The default is ``False``; if provided it must be either ``True`` or ``False``

    show_errors: bool, optional
        Display train and test errors. The default is `True``

    comparisonbodyparts: list of bodyparts, Default is "all".
        The average error will be computed for those body parts only (Has to be a subset of the body parts).

    gputouse: int, optional. Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None.
    See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries

    Examples
    --------
    If you do not want to plot
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml', shuffle=[1])
    --------

    If you want to plot
    >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],True)
    """
    import os
    from skimage import io
    import skimage.color

    from deeplabcut.pose_estimation_tensorflow.nnet import predict as ptf_predict
    from deeplabcut.pose_estimation_tensorflow.config import load_config
    from deeplabcut.pose_estimation_tensorflow.dataset.pose_dataset import data_to_input
    from deeplabcut.utils import auxiliaryfunctions, visualization
    import tensorflow as tf

    if 'TF_CUDNN_USE_AUTOTUNE' in os.environ:
        del os.environ[
            'TF_CUDNN_USE_AUTOTUNE']  #was potentially set during training

    vers = (tf.__version__).split('.')
    if int(vers[0]) == 1 and int(vers[1]) > 12:
        TF = tf.compat.v1
    else:
        TF = tf

    TF.reset_default_graph()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #
    #    tf.logging.set_verbosity(tf.logging.WARN)

    start_path = os.getcwd()
    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)
    if gputouse is not None:  #gpu selectinon
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gputouse)

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(cfg["project_path"], str(trainingsetfolder),
                     'CollectedData_' + cfg["scorer"] + '.h5'),
        'df_with_missing')
    # Get list of body parts to evaluate network for
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, comparisonbodyparts)
    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(
        str(cfg["project_path"] + "/evaluation-results/"))
    for shuffle in Shuffles:
        for trainFraction in cfg["TrainingFraction"]:
            ##################################################
            # Load and setup CNN part detector
            ##################################################
            datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
                trainingsetfolder, trainFraction, shuffle, cfg)
            modelfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetModelFolder(trainFraction, shuffle,
                                                      cfg)))
            path_test_config = Path(modelfolder) / 'test' / 'pose_cfg.yaml'
            # Load meta data
            data, trainIndices, testIndices, trainFraction = auxiliaryfunctions.LoadMetadata(
                os.path.join(cfg["project_path"], metadatafn))

            try:
                dlc_cfg = load_config(str(path_test_config))
            except FileNotFoundError:
                raise FileNotFoundError(
                    "It seems the model for shuffle %s and trainFraction %s does not exist."
                    % (shuffle, trainFraction))

            #change batch size, if it was edited during analysis!
            dlc_cfg['batch_size'] = 1  #in case this was edited for analysis.
            #Create folder structure to store results.
            evaluationfolder = os.path.join(
                cfg["project_path"],
                str(
                    auxiliaryfunctions.GetEvaluationFolder(
                        trainFraction, shuffle, cfg)))
            auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                                   recursive=True)
            #path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

            # Check which snapshots are available and sort them by # iterations
            Snapshots = np.array([
                fn.split('.')[0]
                for fn in os.listdir(os.path.join(str(modelfolder), 'train'))
                if "index" in fn
            ])
            try:  #check if any where found?
                Snapshots[0]
            except IndexError:
                raise FileNotFoundError(
                    "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                    % (shuffle, trainFraction))

            increasing_indices = np.argsort(
                [int(m.split('-')[1]) for m in Snapshots])
            Snapshots = Snapshots[increasing_indices]

            if cfg["snapshotindex"] == -1:
                snapindices = [-1]
            elif cfg["snapshotindex"] == "all":
                snapindices = range(len(Snapshots))
            elif cfg["snapshotindex"] < len(Snapshots):
                snapindices = [cfg["snapshotindex"]]
            else:
                print(
                    "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
                )

            final_result = []
            ##################################################
            # Compute predictions over images
            ##################################################
            for snapindex in snapindices:
                dlc_cfg['init_weights'] = os.path.join(
                    str(modelfolder), 'train', Snapshots[snapindex]
                )  #setting weights to corresponding snapshot.
                trainingsiterations = (
                    dlc_cfg['init_weights'].split(os.sep)[-1]
                ).split(
                    '-'
                )[-1]  #read how many training siterations that corresponds to.

                #name for deeplabcut net (based on its parameters)
                DLCscorer = auxiliaryfunctions.GetScorerName(
                    cfg, shuffle, trainFraction, trainingsiterations)
                print("Running ", DLCscorer, " with # of trainingiterations:",
                      trainingsiterations)
                resultsfilename = os.path.join(
                    str(evaluationfolder),
                    DLCscorer + '-' + Snapshots[snapindex] + '.h5')
                try:
                    DataMachine = pd.read_hdf(resultsfilename,
                                              'df_with_missing')
                    print("This net has already been evaluated!")
                except FileNotFoundError:
                    # Specifying state of model (snapshot / training state)
                    sess, inputs, outputs = ptf_predict.setup_pose_prediction(
                        dlc_cfg)

                    Numimages = len(Data.index)
                    PredicteData = np.zeros(
                        (Numimages, 3 * len(dlc_cfg['all_joints_names'])))
                    print("Analyzing data...")
                    for imageindex, imagename in tqdm(enumerate(Data.index)):
                        image = io.imread(os.path.join(cfg['project_path'],
                                                       imagename),
                                          mode='RGB')
                        image = skimage.color.gray2rgb(image)
                        image_batch = data_to_input(image)

                        # Compute prediction with the CNN
                        outputs_np = sess.run(outputs,
                                              feed_dict={inputs: image_batch})
                        scmap, locref = ptf_predict.extract_cnn_output(
                            outputs_np, dlc_cfg)

                        # Extract maximum scoring location from the heatmap, assume 1 person
                        pose = ptf_predict.argmax_pose_predict(
                            scmap, locref, dlc_cfg.stride)
                        PredicteData[imageindex, :] = pose.flatten(
                        )  # NOTE: thereby     cfg_test['all_joints_names'] should be same order as bodyparts!

                    sess.close()  #closes the current tf session

                    index = pd.MultiIndex.from_product(
                        [[DLCscorer], dlc_cfg['all_joints_names'],
                         ['x', 'y', 'likelihood']],
                        names=['scorer', 'bodyparts', 'coords'])

                    # Saving results
                    DataMachine = pd.DataFrame(PredicteData,
                                               columns=index,
                                               index=Data.index.values)
                    DataMachine.to_hdf(resultsfilename,
                                       'df_with_missing',
                                       format='table',
                                       mode='w')

                    print("Done and results stored for snapshot: ",
                          Snapshots[snapindex])
                    DataCombined = pd.concat([Data.T, DataMachine.T], axis=0).T
                    RMSE, RMSEpcutoff = pairwisedistances(
                        DataCombined, cfg["scorer"], DLCscorer, cfg["pcutoff"],
                        comparisonbodyparts)
                    testerror = np.nanmean(
                        RMSE.iloc[testIndices].values.flatten())
                    trainerror = np.nanmean(
                        RMSE.iloc[trainIndices].values.flatten())
                    testerrorpcutoff = np.nanmean(
                        RMSEpcutoff.iloc[testIndices].values.flatten())
                    trainerrorpcutoff = np.nanmean(
                        RMSEpcutoff.iloc[trainIndices].values.flatten())
                    results = [
                        trainingsiterations,
                        int(100 * trainFraction), shuffle,
                        np.round(trainerror, 2),
                        np.round(testerror, 2), cfg["pcutoff"],
                        np.round(trainerrorpcutoff, 2),
                        np.round(testerrorpcutoff, 2)
                    ]
                    final_result.append(results)

                    if show_errors == True:
                        print("Results for",
                              trainingsiterations, " training iterations:",
                              int(100 * trainFraction), shuffle,
                              "train error:",
                              np.round(trainerror, 2), "pixels. Test error:",
                              np.round(testerror, 2), " pixels.")
                        print("With pcutoff of",
                              cfg["pcutoff"], " train error:",
                              np.round(trainerrorpcutoff,
                                       2), "pixels. Test error:",
                              np.round(testerrorpcutoff, 2), "pixels")
                        print(
                            "Thereby, the errors are given by the average distances between the labels by DLC and the scorer."
                        )

                    if plotting == True:
                        print("Plotting...")
                        colors = visualization.get_cmap(
                            len(comparisonbodyparts), name=cfg['colormap'])

                        foldername = os.path.join(
                            str(evaluationfolder), 'LabeledImages_' +
                            DLCscorer + '_' + Snapshots[snapindex])
                        auxiliaryfunctions.attempttomakefolder(foldername)
                        NumFrames = np.size(DataCombined.index)
                        for ind in np.arange(NumFrames):
                            visualization.PlottingandSaveLabeledFrame(
                                DataCombined, ind, trainIndices, cfg, colors,
                                comparisonbodyparts, DLCscorer, foldername)

                    TF.reset_default_graph()
                    #print(final_result)
            make_results_file(final_result, evaluationfolder, DLCscorer)
            print(
                "The network is evaluated and the results are stored in the subdirectory 'evaluation_results'."
            )
            print(
                "If it generalizes well, choose the best model for prediction and update the config file with the appropriate index for the 'snapshotindex'.\nUse the function 'analyze_video' to make predictions on new videos."
            )
            print(
                "Otherwise consider retraining the network (see DeepLabCut workflow Fig 2)"
            )

    #returning to intial folder
    os.chdir(str(start_path))

Ejemplo n.º 20

Mostrar archivo

def create_pretrained_human_project(project,
                                    experimenter,
                                    videos,
                                    working_directory=None,
                                    copy_videos=False,
                                    videotype='.avi',
                                    createlabeledvideo=True,
                                    analyzevideo=True):
    """
    Creates a demo human project and analyzes a video with ResNet 101 weights pretrained on MPII Human Pose. This is from the DeeperCut paper by Insafutdinov et al. https://arxiv.org/abs/1605.03170 Please make sure to cite it too if you use this code!

    Parameters
    ----------
    project : string
        String containing the name of the project.

    experimenter : string
        String containing the name of the experimenter.

    videos : list
        A list of string containing the full paths of the videos to include in the project.

    working_directory : string, optional
        The directory where the project will be created. The default is the ``current working directory``; if provided, it must be a string.

    copy_videos : bool, optional
        If this is set to True, the videos are copied to the ``videos`` directory. If it is False,symlink of the videos are copied to the project/videos directory. The default is ``False``; if provided it must be either
        ``True`` or ``False``.
    analyzevideo " bool, optional
        If true, then the video is analzyed and a labeled video is created. If false, then only the project will be created and the weights downloaded. You can then access them

    Example
    --------
    Linux/MacOs
    >>> deeplabcut.create_pretrained_human_project('human','Linus',['/data/videos/mouse1.avi'],'/analysis/project/',copy_videos=False)

    Windows:
    >>> deeplabcut.create_pretrained_human_project('human','Bill',[r'C:\yourusername\rig-95\Videos\reachingvideo1.avi'],r'C:\yourusername\analysis\project' copy_videos=False)
    Users must format paths with either:  r'C:\ OR 'C:\\ <- i.e. a double backslash \ \ )
    --------
    """

    cfg = deeplabcut.create_new_project(project, experimenter, videos,
                                        working_directory, copy_videos,
                                        videotype)

    config = auxiliaryfunctions.read_config(cfg)
    config['bodyparts'] = [
        'ankle1', 'knee1', 'hip1', 'hip2', 'knee2', 'ankle2', 'wrist1',
        'elbow1', 'shoulder1', 'shoulder2', 'elbow2', 'wrist2', 'chin',
        'forehead'
    ]
    config['skeleton'] = [['ankle1', 'knee1'], ['ankle2', 'knee2'],
                          ['knee1', 'hip1'], ['knee2', 'hip2'],
                          ['hip1', 'hip2'], ['shoulder1', 'shoulder2'],
                          ['shoulder1', 'hip1'], ['shoulder2', 'hip2'],
                          ['shoulder1', 'elbow1'], ['shoulder2', 'elbow2'],
                          ['chin', 'forehead'], ['elbow1', 'wrist1'],
                          ['elbow2', 'wrist2']]
    config['default_net_type'] = 'resnet_101'
    auxiliaryfunctions.write_config(cfg, config)
    config = auxiliaryfunctions.read_config(cfg)

    train_dir = Path(
        os.path.join(
            config['project_path'],
            str(
                auxiliaryfunctions.GetModelFolder(
                    trainFraction=config['TrainingFraction'][0],
                    shuffle=1,
                    cfg=config)), 'train'))
    test_dir = Path(
        os.path.join(
            config['project_path'],
            str(
                auxiliaryfunctions.GetModelFolder(
                    trainFraction=config['TrainingFraction'][0],
                    shuffle=1,
                    cfg=config)), 'test'))

    # Create the model directory
    train_dir.mkdir(parents=True, exist_ok=True)
    test_dir.mkdir(parents=True, exist_ok=True)

    modelfoldername = auxiliaryfunctions.GetModelFolder(
        trainFraction=config['TrainingFraction'][0], shuffle=1, cfg=config)

    path_train_config = str(
        os.path.join(config['project_path'], Path(modelfoldername), 'train',
                     'pose_cfg.yaml'))
    path_test_config = str(
        os.path.join(config['project_path'], Path(modelfoldername), 'test',
                     'pose_cfg.yaml'))

    # Download the weights and put then in appropriate directory
    cwd = os.getcwd()
    os.chdir(train_dir)
    print(
        "Checking if the weights are already available, otherwise I will download them!"
    )
    weightfilename = auxfun_models.download_mpii_weigths(train_dir)
    os.chdir(cwd)

    # Create the pose_config.yaml files
    parent_path = Path(os.path.dirname(deeplabcut.__file__))
    defaultconfigfile = str(parent_path / 'pose_cfg.yaml')
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(config)
    datafilename, metadatafilename = auxiliaryfunctions.GetDataandMetaDataFilenames(
        trainingsetfolder,
        trainFraction=config['TrainingFraction'][0],
        shuffle=1,
        cfg=config)
    bodyparts = config['bodyparts']
    net_type = 'resnet_101'
    num_shuffles = 1
    model_path, num_shuffles = auxfun_models.Check4weights(
        net_type, parent_path, num_shuffles)
    items2change = {
        "dataset": 'dataset-test.mat',  #datafilename,
        "metadataset": metadatafilename,
        "num_joints": len(bodyparts),
        "all_joints": [[i] for i in range(len(bodyparts))],
        "all_joints_names": [str(bpt) for bpt in bodyparts],
        "init_weights":
        weightfilename.split('.index')[0],  #'models/mpii/snapshot-1030000',
        "project_path": str(config['project_path']),
        "net_type": net_type,
        "dataset_type": "default"
    }
    trainingdata = MakeTrain_pose_yaml(items2change, path_train_config,
                                       defaultconfigfile)

    keys2save = [
        "dataset", "dataset_type", "num_joints", "all_joints",
        "all_joints_names", "net_type", 'init_weights', 'global_scale',
        'location_refinement', 'locref_stdev'
    ]
    MakeTest_pose_yaml(trainingdata, keys2save, path_test_config)

    video_dir = os.path.join(config['project_path'], 'videos')

    if analyzevideo == True:
        # Analyze the videos
        deeplabcut.analyze_videos(cfg, [video_dir],
                                  videotype,
                                  save_as_csv=True)
    if createlabeledvideo == True:
        deeplabcut.create_labeled_video(cfg, [video_dir],
                                        videotype,
                                        draw_skeleton=True)
        deeplabcut.plot_trajectories(cfg, [video_dir], videotype)
    return cfg, path_train_config

Ejemplo n.º 21

Mostrar archivo

def extract_maps(
    config,
    shuffle=0,
    trainingsetindex=0,
    gputouse=None,
    rescale=False,
    Indices=None,
    modelprefix="",
):
    """
    Extracts the scoremap, locref, partaffinityfields (if available).

    Returns a dictionary indexed by: trainingsetfraction, snapshotindex, and imageindex
    for those keys, each item contains: (image,scmap,locref,paf,bpt names,partaffinity graph, imagename, True/False if this image was in trainingset)
    ----------
    config : string
        Full path of the config.yaml file as a string.

    shuffle: integer
        integers specifying shuffle index of the training dataset. The default is 0.

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This
        variable can also be set to "all".

    rescale: bool, default False
        Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every
        image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported
        in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated
        on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the
        original size!

    Examples
    --------
    If you want to extract the data for image 0 and 103 (of the training set) for model trained with shuffle 0.
    >>> deeplabcut.extract_maps(configfile,0,Indices=[0,103])

    """
    from deeplabcut.utils.auxfun_videos import imread, imresize
    from deeplabcut.pose_estimation_tensorflow.nnet import predict
    from deeplabcut.pose_estimation_tensorflow.nnet import (
        predict_multianimal as predictma, )
    from deeplabcut.pose_estimation_tensorflow.config import load_config
    from deeplabcut.pose_estimation_tensorflow.dataset.pose_dataset import data_to_input
    from deeplabcut.utils import auxiliaryfunctions
    from tqdm import tqdm
    import tensorflow as tf

    vers = (tf.__version__).split(".")
    if int(vers[0]) == 1 and int(vers[1]) > 12:
        TF = tf.compat.v1
    else:
        TF = tf

    import pandas as pd
    from pathlib import Path
    import numpy as np

    TF.reset_default_graph()
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  #
    #    tf.logging.set_verbosity(tf.logging.WARN)

    start_path = os.getcwd()
    # Read file path for pose_config file. >> pass it on
    cfg = auxiliaryfunctions.read_config(config)

    if gputouse is not None:  # gpu selectinon
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse)

    if trainingsetindex == "all":
        TrainingFractions = cfg["TrainingFraction"]
    else:
        if trainingsetindex < len(
                cfg["TrainingFraction"]) and trainingsetindex >= 0:
            TrainingFractions = [
                cfg["TrainingFraction"][int(trainingsetindex)]
            ]
        else:
            raise Exception(
                "Please check the trainingsetindex! ",
                trainingsetindex,
                " should be an integer from 0 .. ",
                int(len(cfg["TrainingFraction"]) - 1),
            )

    # Loading human annotatated data
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(
            cfg["project_path"],
            str(trainingsetfolder),
            "CollectedData_" + cfg["scorer"] + ".h5",
        ),
        "df_with_missing",
    )

    # Make folder for evaluation
    auxiliaryfunctions.attempttomakefolder(
        str(cfg["project_path"] + "/evaluation-results/"))

    Maps = {}
    for trainFraction in TrainingFractions:
        Maps[trainFraction] = {}
        ##################################################
        # Load and setup CNN part detector
        ##################################################
        datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
            trainingsetfolder, trainFraction, shuffle, cfg)

        modelfolder = os.path.join(
            cfg["project_path"],
            str(
                auxiliaryfunctions.GetModelFolder(trainFraction,
                                                  shuffle,
                                                  cfg,
                                                  modelprefix=modelprefix)),
        )
        path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"
        # Load meta data
        (
            data,
            trainIndices,
            testIndices,
            trainFraction,
        ) = auxiliaryfunctions.LoadMetadata(
            os.path.join(cfg["project_path"], metadatafn))
        try:
            dlc_cfg = load_config(str(path_test_config))
        except FileNotFoundError:
            raise FileNotFoundError(
                "It seems the model for shuffle %s and trainFraction %s does not exist."
                % (shuffle, trainFraction))

        # change batch size, if it was edited during analysis!
        dlc_cfg["batch_size"] = 1  # in case this was edited for analysis.

        # Create folder structure to store results.
        evaluationfolder = os.path.join(
            cfg["project_path"],
            str(
                auxiliaryfunctions.GetEvaluationFolder(
                    trainFraction, shuffle, cfg, modelprefix=modelprefix)),
        )
        auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                               recursive=True)
        # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

        # Check which snapshots are available and sort them by # iterations
        Snapshots = np.array([
            fn.split(".")[0]
            for fn in os.listdir(os.path.join(str(modelfolder), "train"))
            if "index" in fn
        ])
        try:  # check if any where found?
            Snapshots[0]
        except IndexError:
            raise FileNotFoundError(
                "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so."
                % (shuffle, trainFraction))

        increasing_indices = np.argsort(
            [int(m.split("-")[1]) for m in Snapshots])
        Snapshots = Snapshots[increasing_indices]

        if cfg["snapshotindex"] == -1:
            snapindices = [-1]
        elif cfg["snapshotindex"] == "all":
            snapindices = range(len(Snapshots))
        elif cfg["snapshotindex"] < len(Snapshots):
            snapindices = [cfg["snapshotindex"]]
        else:
            print(
                "Invalid choice, only -1 (last), any integer up to last, or all (as string)!"
            )

        ########################### RESCALING (to global scale)
        scale = dlc_cfg["global_scale"] if rescale else 1
        Data *= scale

        bptnames = [
            dlc_cfg["all_joints_names"][i]
            for i in range(len(dlc_cfg["all_joints"]))
        ]

        for snapindex in snapindices:
            dlc_cfg["init_weights"] = os.path.join(
                str(modelfolder), "train", Snapshots[snapindex]
            )  # setting weights to corresponding snapshot.
            trainingsiterations = (
                dlc_cfg["init_weights"].split(os.sep)[-1]
            ).split("-")[
                -1]  # read how many training siterations that corresponds to.

            # Name for deeplabcut net (based on its parameters)
            # DLCscorer,DLCscorerlegacy = auxiliaryfunctions.GetScorerName(cfg,shuffle,trainFraction,trainingsiterations)
            # notanalyzed, resultsfilename, DLCscorer=auxiliaryfunctions.CheckifNotEvaluated(str(evaluationfolder),DLCscorer,DLCscorerlegacy,Snapshots[snapindex])
            # print("Extracting maps for ", DLCscorer, " with # of trainingiterations:", trainingsiterations)
            # if notanalyzed: #this only applies to ask if h5 exists...

            # Specifying state of model (snapshot / training state)
            sess, inputs, outputs = predict.setup_pose_prediction(dlc_cfg)
            Numimages = len(Data.index)
            PredicteData = np.zeros(
                (Numimages, 3 * len(dlc_cfg["all_joints_names"])))
            print("Analyzing data...")
            if Indices is None:
                Indices = enumerate(Data.index)
            else:
                Ind = [Data.index[j] for j in Indices]
                Indices = enumerate(Ind)

            DATA = {}
            for imageindex, imagename in tqdm(Indices):
                image = imread(os.path.join(cfg["project_path"], imagename),
                               mode="RGB")
                if scale != 1:
                    image = imresize(image, scale)

                image_batch = data_to_input(image)
                # Compute prediction with the CNN
                outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})

                if cfg.get("multianimalproject", False):
                    scmap, locref, paf = predictma.extract_cnn_output(
                        outputs_np, dlc_cfg)
                    pagraph = dlc_cfg["partaffinityfield_graph"]
                else:
                    scmap, locref = predict.extract_cnn_output(
                        outputs_np, dlc_cfg)
                    paf = None
                    pagraph = []

                if imageindex in testIndices:
                    trainingfram = False
                else:
                    trainingfram = True

                DATA[imageindex] = [
                    image,
                    scmap,
                    locref,
                    paf,
                    bptnames,
                    pagraph,
                    imagename,
                    trainingfram,
                ]
            Maps[trainFraction][Snapshots[snapindex]] = DATA
    os.chdir(str(start_path))
    return Maps

Ejemplo n.º 22

Mostrar archivo

def create_multianimaltraining_dataset(
    config,
    num_shuffles=1,
    Shuffles=None,
    windows2linux=False,
    net_type=None,
    numdigits=2,
    crop_size=(400, 400),
    crop_sampling="hybrid",
    paf_graph=None,
    trainIndices=None,
    testIndices=None,
):
    """
    Creates a training dataset for multi-animal datasets. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n
    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Imporant differences to standard:
     - stores coordinates with numdigits as many digits
     - creates
    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, and resnet_152, efficientnet-b0, efficientnet-b1, efficientnet-b2, efficientnet-b3,
        efficientnet-b4, efficientnet-b5, and efficientnet-b6 as well as dlcrnet_ms5 are supported (not the MobileNets!).
        See Lauer et al. 2021 https://www.biorxiv.org/content/10.1101/2021.04.30.442096v1

    numdigits: int, optional

    crop_size: tuple of int, optional
        Dimensions (width, height) of the crops for data augmentation.
        Default is 400x400.

    crop_sampling: str, optional
        Crop centers sampling method. Must be either:
        "uniform" (randomly over the image),
        "keypoints" (randomly over the annotated keypoints),
        "density" (weighing preferentially dense regions of keypoints),
        or "hybrid" (alternating randomly between "uniform" and "density").
        Default is "hybrid".

    paf_graph: list of lists, optional (default=None)
        If not None, overwrite the default complete graph. This is useful for advanced users who
        already know a good graph, or simply want to use a specific one. Note that, in that case,
        the data-driven selection procedure upon model evaluation will be skipped.

    trainIndices: list of lists, optional (default=None)
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndices: list of lists, optional (default=None)
        List of one or multiple lists containing test indexes.

    Example
    --------
    >>> deeplabcut.create_multianimaltraining_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)

    >>> deeplabcut.create_multianimaltraining_dataset('/analysis/project/reaching-task/config.yaml', Shuffles=[0,1,2], trainIndices=[trainInd1, trainInd2, trainInd3], testIndices=[testInd1, testInd2, testInd3])

    Windows:
    >>> deeplabcut.create_multianimaltraining_dataset(r'C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    if windows2linux:
        warnings.warn(
            "`windows2linux` has no effect since 2.2.0.4 and will be removed in 2.2.1.",
            FutureWarning,
        )

    if len(crop_size) != 2 or not all(isinstance(v, int) for v in crop_size):
        raise ValueError(
            "Crop size must be a tuple of two integers (width, height).")

    if crop_sampling not in ("uniform", "keypoints", "density", "hybrid"):
        raise ValueError(
            f"Invalid sampling {crop_sampling}. Must be "
            f"either 'uniform', 'keypoints', 'density', or 'hybrid.")

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg["scorer"]
    project_path = cfg["project_path"]
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    full_training_path = Path(project_path, trainingsetfolder)
    auxiliaryfunctions.attempttomakefolder(full_training_path, recursive=True)

    Data = merge_annotateddatasets(cfg, full_training_path)
    if Data is None:
        return
    Data = Data[scorer]

    if net_type is None:  # loading & linking pretrained models
        net_type = cfg.get("default_net_type", "dlcrnet_ms5")
    elif not any(net in net_type for net in ("resnet", "eff", "dlc", "mob")):
        raise ValueError(f"Unsupported network {net_type}.")

    multi_stage = False
    ### dlcnet_ms5: backbone resnet50 + multi-fusion & multi-stage module
    ### dlcr101_ms5/dlcr152_ms5: backbone resnet101/152 + multi-fusion & multi-stage module
    if all(net in net_type for net in ("dlcr", "_ms5")):
        num_layers = re.findall("dlcr([0-9]*)", net_type)[0]
        if num_layers == "":
            num_layers = 50
        net_type = "resnet_{}".format(num_layers)
        multi_stage = True

    dataset_type = "multi-animal-imgaug"
    (
        individuals,
        uniquebodyparts,
        multianimalbodyparts,
    ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)

    if paf_graph is None:  # Automatically form a complete PAF graph
        partaffinityfield_graph = [
            list(edge)
            for edge in combinations(range(len(multianimalbodyparts)), 2)
        ]
    else:
        # Ignore possible connections between 'multi' and 'unique' body parts;
        # one can never be too careful...
        to_ignore = auxfun_multianimal.filter_unwanted_paf_connections(
            cfg, paf_graph)
        partaffinityfield_graph = [
            edge for i, edge in enumerate(paf_graph) if i not in to_ignore
        ]
        auxfun_multianimal.validate_paf_graph(cfg, partaffinityfield_graph)

    print("Utilizing the following graph:", partaffinityfield_graph)
    # Disable the prediction of PAFs if the graph is empty
    partaffinityfield_predict = bool(partaffinityfield_graph)

    # Loading the encoder (if necessary downloading from TF)
    dlcparent_path = auxiliaryfunctions.get_deeplabcut_path()
    defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml")
    model_path, num_shuffles = auxfun_models.Check4weights(
        net_type, Path(dlcparent_path), num_shuffles)

    if Shuffles is None:
        Shuffles = range(1, num_shuffles + 1, 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type)
    if trainIndices is None and testIndices is None:
        splits = []
        for shuffle in Shuffles:  # Creating shuffles starting from 1
            for train_frac in cfg["TrainingFraction"]:
                train_inds, test_inds = SplitTrials(range(len(Data)),
                                                    train_frac)
                splits.append((train_frac, shuffle, (train_inds, test_inds)))
    else:
        if len(trainIndices) != len(testIndices) != len(Shuffles):
            raise ValueError(
                "Number of Shuffles and train and test indexes should be equal."
            )
        splits = []
        for shuffle, (train_inds,
                      test_inds) in enumerate(zip(trainIndices, testIndices)):
            trainFraction = round(
                len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)), 2)
            print(
                f"You passed a split with the following fraction: {int(100 * trainFraction)}%"
            )
            # Now that the training fraction is guaranteed to be correct,
            # the values added to pad the indices are removed.
            train_inds = np.asarray(train_inds)
            train_inds = train_inds[train_inds != -1]
            test_inds = np.asarray(test_inds)
            test_inds = test_inds[test_inds != -1]
            splits.append(
                (trainFraction, Shuffles[shuffle], (train_inds, test_inds)))

    for trainFraction, shuffle, (trainIndices, testIndices) in splits:
        ####################################################
        # Generating data structure with labeled information & frame metadata (for deep cut)
        ####################################################
        print(
            "Creating training data for: Shuffle:",
            shuffle,
            "TrainFraction: ",
            trainFraction,
        )

        # Make training file!
        data = format_multianimal_training_data(
            Data,
            trainIndices,
            cfg["project_path"],
            numdigits,
        )

        if len(trainIndices) > 0:
            (
                datafilename,
                metadatafilename,
            ) = auxiliaryfunctions.GetDataandMetaDataFilenames(
                trainingsetfolder, trainFraction, shuffle, cfg)
            ################################################################################
            # Saving metadata and data file (Pickle file)
            ################################################################################
            auxiliaryfunctions.SaveMetadata(
                os.path.join(project_path, metadatafilename),
                data,
                trainIndices,
                testIndices,
                trainFraction,
            )

            datafilename = datafilename.split(".mat")[0] + ".pickle"
            import pickle

            with open(os.path.join(project_path, datafilename), "wb") as f:
                # Pickle the 'labeled-data' dictionary using the highest protocol available.
                pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

            ################################################################################
            # Creating file structure for training &
            # Test files as well as pose_yaml files (containing training and testing information)
            #################################################################################

            modelfoldername = auxiliaryfunctions.GetModelFolder(
                trainFraction, shuffle, cfg)
            auxiliaryfunctions.attempttomakefolder(Path(config).parents[0] /
                                                   modelfoldername,
                                                   recursive=True)
            auxiliaryfunctions.attempttomakefolder(
                str(Path(config).parents[0] / modelfoldername / "train"))
            auxiliaryfunctions.attempttomakefolder(
                str(Path(config).parents[0] / modelfoldername / "test"))

            path_train_config = str(
                os.path.join(
                    cfg["project_path"],
                    Path(modelfoldername),
                    "train",
                    "pose_cfg.yaml",
                ))
            path_test_config = str(
                os.path.join(
                    cfg["project_path"],
                    Path(modelfoldername),
                    "test",
                    "pose_cfg.yaml",
                ))
            path_inference_config = str(
                os.path.join(
                    cfg["project_path"],
                    Path(modelfoldername),
                    "test",
                    "inference_cfg.yaml",
                ))

            jointnames = [str(bpt) for bpt in multianimalbodyparts]
            jointnames.extend([str(bpt) for bpt in uniquebodyparts])
            items2change = {
                "dataset":
                datafilename,
                "metadataset":
                metadatafilename,
                "num_joints":
                len(multianimalbodyparts) +
                len(uniquebodyparts),  # cfg["uniquebodyparts"]),
                "all_joints": [[i] for i in range(
                    len(multianimalbodyparts) + len(uniquebodyparts))
                               ],  # cfg["uniquebodyparts"]))],
                "all_joints_names":
                jointnames,
                "init_weights":
                model_path,
                "project_path":
                str(cfg["project_path"]),
                "net_type":
                net_type,
                "multi_stage":
                multi_stage,
                "pairwise_loss_weight":
                0.1,
                "pafwidth":
                20,
                "partaffinityfield_graph":
                partaffinityfield_graph,
                "partaffinityfield_predict":
                partaffinityfield_predict,
                "weigh_only_present_joints":
                False,
                "num_limbs":
                len(partaffinityfield_graph),
                "dataset_type":
                dataset_type,
                "optimizer":
                "adam",
                "batch_size":
                8,
                "multi_step": [[1e-4, 7500], [5 * 1e-5, 12000], [1e-5,
                                                                 200000]],
                "save_iters":
                10000,
                "display_iters":
                500,
                "num_idchannel":
                len(cfg["individuals"]) if cfg.get("identity", False) else 0,
                "crop_size":
                list(crop_size),
                "crop_sampling":
                crop_sampling,
            }

            trainingdata = MakeTrain_pose_yaml(items2change, path_train_config,
                                               defaultconfigfile)
            keys2save = [
                "dataset",
                "num_joints",
                "all_joints",
                "all_joints_names",
                "net_type",
                "multi_stage",
                "init_weights",
                "global_scale",
                "location_refinement",
                "locref_stdev",
                "dataset_type",
                "partaffinityfield_predict",
                "pairwise_predict",
                "partaffinityfield_graph",
                "num_limbs",
                "dataset_type",
                "num_idchannel",
            ]

            MakeTest_pose_yaml(
                trainingdata,
                keys2save,
                path_test_config,
                nmsradius=5.0,
                minconfidence=0.01,
                sigma=1,
                locref_smooth=False,
            )  # setting important def. values for inference

            # Setting inference cfg file:
            defaultinference_configfile = os.path.join(dlcparent_path,
                                                       "inference_cfg.yaml")
            items2change = {
                "minimalnumberofconnections":
                int(len(cfg["multianimalbodyparts"]) / 2),
                "topktoretain":
                len(cfg["individuals"]) + 1 *
                (len(cfg["uniquebodyparts"]) > 0),
                "withid":
                cfg.get("identity", False),
            }
            MakeInference_yaml(items2change, path_inference_config,
                               defaultinference_configfile)

            print(
                "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
            )
        else:
            pass

Ejemplo n.º 23

Mostrar archivo

Archivo: testscript_multianimal.py Proyecto: juliencarponcy/DeepLabCut

        output_path.replace("csv", "h5"), "df_with_missing", format="table", mode="w"
    )
    print("Artificial data created.")

    print("Checking labels...")
    deeplabcut.check_labels(config_path, draw_skeleton=False)
    print("Labels checked.")

    print("Creating train dataset...")
    deeplabcut.create_multianimaltraining_dataset(
        config_path, net_type=NET, crop_size=(200, 200)
    )
    print("Train dataset created.")

    # Check the training image paths are correctly stored as arrays of strings
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    datafile, _ = auxiliaryfunctions.GetDataandMetaDataFilenames(
        trainingsetfolder,
        0.8,
        1,
        cfg,
    )
    datafile = datafile.split(".mat")[0] + ".pickle"
    with open(os.path.join(cfg["project_path"], datafile), "rb") as f:
        pickledata = pickle.load(f)
    num_images = len(pickledata)
    assert all(len(pickledata[i]["image"]) == 3 for i in range(num_images))

    print("Editing pose config...")
    model_folder = auxiliaryfunctions.GetModelFolder(
        TRAIN_SIZE, 1, cfg, cfg["project_path"]

Ejemplo n.º 24

Mostrar archivo

def create_training_dataset(
    config,
    num_shuffles=1,
    Shuffles=None,
    windows2linux=False,
    userfeedback=False,
    trainIndices=None,
    testIndices=None,
    net_type=None,
    augmenter_type=None,
):
    """
    Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n

    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    userfeedback: bool, optional
        If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
        want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.

    trainIndices: list of lists, optional (default=None)
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndices: list of lists, optional (default=None)
        List of one or multiple lists containing test indexes.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.

    augmenter_type: string
        Type of augmenter. Currently default, imgaug, tensorpack, and deterministic are supported.

    Example
    --------
    >>> deeplabcut.create_training_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)
    Windows:
    >>> deeplabcut.create_training_dataset('C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """
    import scipy.io as sio

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    if cfg.get("multianimalproject", False):
        from deeplabcut.generate_training_dataset.multiple_individuals_trainingsetmanipulation import (
            create_multianimaltraining_dataset, )

        create_multianimaltraining_dataset(config, num_shuffles, Shuffles,
                                           windows2linux, net_type)
    else:
        scorer = cfg["scorer"]
        project_path = cfg["project_path"]
        # Create path for training sets & store data there
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
            cfg)  # Path concatenation OS platform independent
        auxiliaryfunctions.attempttomakefolder(Path(
            os.path.join(project_path, str(trainingsetfolder))),
                                               recursive=True)

        Data = merge_annotateddatasets(
            cfg, Path(os.path.join(project_path, trainingsetfolder)),
            windows2linux)
        if Data is None:
            return
        Data = Data[scorer]  # extract labeled data

        # loading & linking pretrained models
        if net_type is None:  # loading & linking pretrained models
            net_type = cfg.get("default_net_type", "resnet_50")
        else:
            if "resnet" in net_type or "mobilenet" in net_type:
                pass
            else:
                raise ValueError("Invalid network type:", net_type)

        if augmenter_type is None:
            augmenter_type = cfg.get("default_augmenter", "imgaug")
            if augmenter_type is None:  # this could be in config.yaml for old projects!
                # updating variable if null/None! #backwardscompatability
                auxiliaryfunctions.edit_config(config,
                                               {"default_augmenter": "imgaug"})
                augmenter_type = "imgaug"
        else:
            if augmenter_type in [
                    "default",
                    "scalecrop",
                    "imgaug",
                    "tensorpack",
                    "deterministic",
            ]:
                pass
            else:
                raise ValueError("Invalid augmenter type:", augmenter_type)

        # Loading the encoder (if necessary downloading from TF)
        dlcparent_path = auxiliaryfunctions.get_deeplabcut_path()
        defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml")
        model_path, num_shuffles = auxfun_models.Check4weights(
            net_type, Path(dlcparent_path), num_shuffles)

        if Shuffles is None:
            Shuffles = range(1, num_shuffles + 1)
        else:
            Shuffles = [i for i in Shuffles if isinstance(i, int)]

        # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type)
        if trainIndices is None and testIndices is None:
            splits = [(
                trainFraction,
                shuffle,
                SplitTrials(range(len(Data.index)), trainFraction),
            ) for trainFraction in cfg["TrainingFraction"]
                      for shuffle in Shuffles]
        else:
            if len(trainIndices) != len(testIndices) != len(Shuffles):
                raise ValueError(
                    "Number of Shuffles and train and test indexes should be equal."
                )
            splits = []
            for shuffle, (train_inds, test_inds) in enumerate(
                    zip(trainIndices, testIndices)):
                trainFraction = round(
                    len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)),
                    2)
                print(
                    f"You passed a split with the following fraction: {int(100 * trainFraction)}%"
                )
                splits.append((trainFraction, Shuffles[shuffle], (train_inds,
                                                                  test_inds)))

        bodyparts = cfg["bodyparts"]
        nbodyparts = len(bodyparts)
        for trainFraction, shuffle, (trainIndices, testIndices) in splits:
            if len(trainIndices) > 0:
                if userfeedback:
                    trainposeconfigfile, _, _ = training.return_train_network_path(
                        config,
                        shuffle=shuffle,
                        trainingsetindex=cfg["TrainingFraction"].index(
                            trainFraction),
                    )
                    if trainposeconfigfile.is_file():
                        askuser = input(
                            "The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): "
                        )
                        if (askuser == "no" or askuser == "No"
                                or askuser == "N" or askuser == "No"):
                            raise Exception(
                                "Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details."
                            )

                ####################################################
                # Generating data structure with labeled information & frame metadata (for deep cut)
                ####################################################
                # Make training file!
                (
                    datafilename,
                    metadatafilename,
                ) = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)

                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################
                data, MatlabData = format_training_data(
                    Data, trainIndices, nbodyparts, project_path)
                sio.savemat(os.path.join(project_path, datafilename),
                            {"dataset": MatlabData})

                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename),
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                )

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################
                modelfoldername = auxiliaryfunctions.GetModelFolder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/train")
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/test")

                path_train_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "train",
                        "pose_cfg.yaml",
                    ))
                path_test_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "pose_cfg.yaml",
                    ))
                # str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')
                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": model_path,
                    "project_path": str(cfg["project_path"]),
                    "net_type": net_type,
                    "dataset_type": augmenter_type,
                }

                items2drop = {}
                if augmenter_type == "scalecrop":
                    # these values are dropped as scalecrop
                    # doesn't have rotation implemented
                    items2drop = {"rotation": 0, "rotratio": 0.0}

                trainingdata = MakeTrain_pose_yaml(items2change,
                                                   path_train_config,
                                                   defaultconfigfile,
                                                   items2drop)

                keys2save = [
                    "dataset",
                    "num_joints",
                    "all_joints",
                    "all_joints_names",
                    "net_type",
                    "init_weights",
                    "global_scale",
                    "location_refinement",
                    "locref_stdev",
                ]
                MakeTest_pose_yaml(trainingdata, keys2save, path_test_config)
                print(
                    "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
                )
        return splits

Ejemplo n.º 25

Mostrar archivo

def create_multianimaltraining_dataset(
    config,
    num_shuffles=1,
    Shuffles=None,
    windows2linux=False,
    net_type=None,
    numdigits=2,
    paf_graph=None,
):
    """
    Creates a training dataset for multi-animal datasets. Labels from all the extracted frames are merged into a single .h5 file.\n
    Only the videos included in the config file are used to create this dataset.\n
    [OPTIONAL] Use the function 'add_new_video' at any stage of the project to add more videos to the project.

    Imporant differences to standard:
     - stores coordinates with numdigits as many digits
     - creates
    Parameter
    ----------
    config : string
        Full path of the config.yaml file as a string.

    num_shuffles : int, optional
        Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1.

    Shuffles: list of shuffles.
        Alternatively the user can also give a list of shuffles (integers!).

    windows2linux: bool.
        The annotation files contain path formated according to your operating system. If you label on windows
        but train & evaluate on a unix system (e.g. ubunt, colab, Mac) set this variable to True to convert the paths.

    net_type: string
        Type of networks. Currently resnet_50, resnet_101, and resnet_152, efficientnet-b0, efficientnet-b1, efficientnet-b2, efficientnet-b3,
        efficientnet-b4, efficientnet-b5, and efficientnet-b6 as well as dlcrnet_ms5 are supported (not the MobileNets!).
        See Lauer et al. 2021 https://www.biorxiv.org/content/10.1101/2021.04.30.442096v1

    numdigits: int, optional

    paf_graph: list of lists, optional (default=None)
        If not None, overwrite the default complete graph. This is useful for advanced users who
        already know a good graph, or simply want to use a specific one. Note that, in that case,
        the data-driven selection procedure upon model evaluation will be skipped.

    Example
    --------
    >>> deeplabcut.create_multianimaltraining_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1)

    Windows:
    >>> deeplabcut.create_multianimaltraining_dataset(r'C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5])
    --------
    """

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    scorer = cfg["scorer"]
    project_path = cfg["project_path"]
    # Create path for training sets & store data there
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    full_training_path = Path(project_path, trainingsetfolder)
    auxiliaryfunctions.attempttomakefolder(full_training_path, recursive=True)

    Data = merge_annotateddatasets(cfg, full_training_path, windows2linux)
    if Data is None:
        return
    Data = Data[scorer]

    def strip_cropped_image_name(path):
        # utility function to split different crops from same image into either train or test!
        head, filename = os.path.split(path)
        if cfg["croppedtraining"]:
            filename = filename.split("c")[0]
        return os.path.join(head, filename)

    img_names = Data.index.map(strip_cropped_image_name).unique()

    if net_type is None:  # loading & linking pretrained models
        net_type = cfg.get("default_net_type", "dlcrnet_ms5")
    elif not any(net in net_type for net in ("resnet", "eff", "dlc")):
        raise ValueError(f"Unsupported network {net_type}.")

    multi_stage = False
    if net_type == "dlcrnet_ms5":
        net_type = "resnet_50"
        multi_stage = True

    dataset_type = "multi-animal-imgaug"
    (
        individuals,
        uniquebodyparts,
        multianimalbodyparts,
    ) = auxfun_multianimal.extractindividualsandbodyparts(cfg)

    if paf_graph is None:  # Automatically form a complete PAF graph
        partaffinityfield_graph = [
            list(edge)
            for edge in combinations(range(len(multianimalbodyparts)), 2)
        ]
    else:
        # Ignore possible connections between 'multi' and 'unique' body parts;
        # one can never be too careful...
        to_ignore = auxfun_multianimal.filter_unwanted_paf_connections(
            cfg, paf_graph)
        partaffinityfield_graph = [
            edge for i, edge in enumerate(paf_graph) if i not in to_ignore
        ]
        auxfun_multianimal.validate_paf_graph(cfg, partaffinityfield_graph)

    print("Utilizing the following graph:", partaffinityfield_graph)
    partaffinityfield_predict = True

    # Loading the encoder (if necessary downloading from TF)
    dlcparent_path = auxiliaryfunctions.get_deeplabcut_path()
    defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml")
    model_path, num_shuffles = auxfun_models.Check4weights(
        net_type, Path(dlcparent_path), num_shuffles)

    if Shuffles is None:
        Shuffles = range(1, num_shuffles + 1, 1)
    else:
        Shuffles = [i for i in Shuffles if isinstance(i, int)]

    TrainingFraction = cfg["TrainingFraction"]
    for shuffle in Shuffles:  # Creating shuffles starting from 1
        for trainFraction in TrainingFraction:
            train_inds_temp, test_inds_temp = SplitTrials(
                range(len(img_names)), trainFraction)
            # Map back to the original indices.
            temp = [
                re.escape(name) for i, name in enumerate(img_names)
                if i in test_inds_temp
            ]
            mask = Data.index.str.contains("|".join(temp))
            testIndices = np.flatnonzero(mask)
            trainIndices = np.flatnonzero(~mask)

            ####################################################
            # Generating data structure with labeled information & frame metadata (for deep cut)
            ####################################################
            print(
                "Creating training data for: Shuffle:",
                shuffle,
                "TrainFraction: ",
                trainFraction,
            )

            # Make training file!
            data = format_multianimal_training_data(
                Data,
                trainIndices,
                cfg["project_path"],
                numdigits,
            )

            if len(trainIndices) > 0:
                (
                    datafilename,
                    metadatafilename,
                ) = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)
                ################################################################################
                # Saving metadata and data file (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename),
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                )

                datafilename = datafilename.split(".mat")[0] + ".pickle"
                import pickle

                with open(os.path.join(project_path, datafilename), "wb") as f:
                    # Pickle the 'labeled-data' dictionary using the highest protocol available.
                    pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################

                modelfoldername = auxiliaryfunctions.GetModelFolder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername / "train"))
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername / "test"))

                path_train_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "train",
                        "pose_cfg.yaml",
                    ))
                path_test_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "pose_cfg.yaml",
                    ))
                path_inference_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "inference_cfg.yaml",
                    ))

                jointnames = [str(bpt) for bpt in multianimalbodyparts]
                jointnames.extend([str(bpt) for bpt in uniquebodyparts])
                items2change = {
                    "dataset":
                    datafilename,
                    "metadataset":
                    metadatafilename,
                    "num_joints":
                    len(multianimalbodyparts) +
                    len(uniquebodyparts),  # cfg["uniquebodyparts"]),
                    "all_joints": [[i] for i in range(
                        len(multianimalbodyparts) + len(uniquebodyparts))
                                   ],  # cfg["uniquebodyparts"]))],
                    "all_joints_names":
                    jointnames,
                    "init_weights":
                    model_path,
                    "project_path":
                    str(cfg["project_path"]),
                    "net_type":
                    net_type,
                    "multi_stage":
                    multi_stage,
                    "pairwise_loss_weight":
                    0.1,
                    "pafwidth":
                    20,
                    "partaffinityfield_graph":
                    partaffinityfield_graph,
                    "partaffinityfield_predict":
                    partaffinityfield_predict,
                    "weigh_only_present_joints":
                    False,
                    "num_limbs":
                    len(partaffinityfield_graph),
                    "dataset_type":
                    dataset_type,
                    "optimizer":
                    "adam",
                    "batch_size":
                    8,
                    "multi_step": [[1e-4, 7500], [5 * 1e-5, 12000],
                                   [1e-5, 200000]],
                    "save_iters":
                    10000,
                    "display_iters":
                    500,
                    "num_idchannel":
                    len(cfg["individuals"])
                    if cfg.get("identity", False) else 0,
                }

                trainingdata = MakeTrain_pose_yaml(items2change,
                                                   path_train_config,
                                                   defaultconfigfile)
                keys2save = [
                    "dataset",
                    "num_joints",
                    "all_joints",
                    "all_joints_names",
                    "net_type",
                    "multi_stage",
                    "init_weights",
                    "global_scale",
                    "location_refinement",
                    "locref_stdev",
                    "dataset_type",
                    "partaffinityfield_predict",
                    "pairwise_predict",
                    "partaffinityfield_graph",
                    "num_limbs",
                    "dataset_type",
                    "num_idchannel",
                ]

                MakeTest_pose_yaml(
                    trainingdata,
                    keys2save,
                    path_test_config,
                    nmsradius=5.0,
                    minconfidence=0.01,
                )  # setting important def. values for inference

                # Setting inference cfg file:
                defaultinference_configfile = os.path.join(
                    dlcparent_path, "inference_cfg.yaml")
                items2change = {
                    "minimalnumberofconnections":
                    int(len(cfg["multianimalbodyparts"]) / 2),
                    "topktoretain":
                    len(cfg["individuals"]) + 1 *
                    (len(cfg["uniquebodyparts"]) > 0),
                    "withid":
                    cfg.get("identity", False),
                }
                MakeInference_yaml(items2change, path_inference_config,
                                   defaultinference_configfile)

                print(
                    "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
                )
            else:
                pass

Ejemplo n.º 26

Mostrar archivo

def evaluate_multianimal_crossvalidate(
    config,
    Shuffles=[1],
    trainingsetindex=0,
    pbounds=None,
    edgewisecondition=True,
    target="rpck_train",
    inferencecfg=None,
    init_points=20,
    n_iter=50,
    dcorr=10.0,
    leastbpts=1,
    printingintermediatevalues=True,
    modelprefix="",
    plotting=False,
):
    """
    Crossvalidate inference parameters on evaluation data; optimal parametrs will be stored in " inference_cfg.yaml".

    They will then be then used for inference (for analysis of videos). Performs Bayesian Optimization with https://github.com/fmfn/BayesianOptimization

    This is a crucial step. The most important variable (in inferencecfg) to cross-validate is minimalnumberofconnections. Pass
    a reasonable range to optimze (e.g. if you have 5 edges from 1 to 5. If you have 4 bpts and 11 connections from 3 to 9).

    config: string
        Full path of the config.yaml file as a string.

    shuffle: int, optional
        An integer specifying the shuffle index of the training dataset used for training the network. The default is 1.

    trainingsetindex: int, optional
        Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml).

    pbounds: dictionary of variables with ranges to crossvalidate.
        By default: pbounds = {
                        'pafthreshold': (0.05, 0.7),
                        'detectionthresholdsquare': (0, 0.9),
                        'minimalnumberofconnections': (1, # connections in your skeleton),
                    }

    inferencecfg: dict, OPTIONAL
        For the variables that are *not* crossvalidated the parameters from inference_cfg.yaml are used, or
        you can overwrite them by passing a dictinary with your preferred parameters.

    edgewisecondition: bool, default True
        Estimates Euclidean distances for each skeleton edge and uses those distance for excluding possible connections.
        If false, uses only one distance for all bodyparts (which is obviously suboptimal).

    target: string, default='rpck_train'
        What metric to optimize. Options are pck/rpck/rmse on train/test set.

    init_points: int, optional (default=10)
        Number of random initial explorations. Probing random regions helps diversify the exploration space.
        Parameter from BayesianOptimization.

    n_iter: int, optional (default=20)
        Number of iterations of Bayesian optimization to perform.
        The larger it is, the higher the likelihood of finding a good extremum.
        Parameter from BayesianOptimization.

    dcorr: float,
        Distance thereshold for percent correct keypoints / relative percent correct keypoints (see paper).

    leastbpts: integer (should be a small number)
        If an animals has less or equal as many body parts in an image it will not be used
        for cross validation. Imagine e.g. if only a single bodypart is present, then
        if animals need a certain minimal number of bodyparts for assembly (minimalnumberofconnections),
        this might not be predictable.

    printingintermediatevalues: bool, default True
        If intermediate metrics RMSE/hits/.. per sample should be printed.


    Examples
    --------

    first run evalute:

    deeplabcut.evaluate_network(path_config_file,Shuffles=[shuffle],plotting=True)

    Then e.g. for finding inference parameters to minimize rmse on test set:

    deeplabcut.evaluate_multianimal_crossvalidate(path_config_file,Shuffles=[shuffle],target='rmse_test')
    """
    from deeplabcut.pose_estimation_tensorflow.lib import crossvalutils
    from deeplabcut.utils import auxfun_multianimal, auxiliaryfunctions
    from easydict import EasyDict as edict

    cfg = auxiliaryfunctions.read_config(config)
    trainFraction = cfg["TrainingFraction"][trainingsetindex]
    trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg)
    Data = pd.read_hdf(
        os.path.join(
            cfg["project_path"],
            str(trainingsetfolder),
            "CollectedData_" + cfg["scorer"] + ".h5",
        ),
        "df_with_missing",
    )
    comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
        cfg, "all")
    colors = visualization.get_cmap(len(comparisonbodyparts),
                                    name=cfg["colormap"])

    # wild guesses for a wide range:
    maxconnections = len(cfg["skeleton"])
    minconnections = 1  # len(cfg['multianimalbodyparts'])-1

    _pbounds = {
        "pafthreshold": (0.05, 0.7),
        "detectionthresholdsquare": (
            0,
            0.9,
        ),  # TODO: set to minimum (from pose_cfg.yaml)
        "minimalnumberofconnections": (minconnections, maxconnections),
    }
    if pbounds is not None:
        _pbounds.update(pbounds)

    if "rpck" in target or "pck" in target:
        maximize = True

    if "rmse" in target:
        maximize = False  # i.e. minimize

    for shuffle in Shuffles:
        evaluationfolder = os.path.join(
            cfg["project_path"],
            str(
                auxiliaryfunctions.GetEvaluationFolder(
                    trainFraction, shuffle, cfg, modelprefix=modelprefix)),
        )
        auxiliaryfunctions.attempttomakefolder(evaluationfolder,
                                               recursive=True)

        datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames(
            trainingsetfolder, trainFraction, shuffle, cfg)
        _, trainIndices, testIndices, _ = auxiliaryfunctions.LoadMetadata(
            os.path.join(cfg["project_path"], metadatafn))
        modelfolder = os.path.join(
            cfg["project_path"],
            str(
                auxiliaryfunctions.GetModelFolder(trainFraction,
                                                  shuffle,
                                                  cfg,
                                                  modelprefix=modelprefix)),
        )
        path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml"
        try:
            dlc_cfg = load_config(str(path_test_config))
        except FileNotFoundError:
            raise FileNotFoundError(
                "It seems the model for shuffle %s and trainFraction %s does not exist."
                % (shuffle, trainFraction))

        # Check which snapshots are available and sort them by # iterations
        Snapshots = np.array([
            fn.split(".")[0]
            for fn in os.listdir(os.path.join(str(modelfolder), "train"))
            if "index" in fn
        ])
        snapindex = -1
        dlc_cfg["init_weights"] = os.path.join(
            str(modelfolder), "train",
            Snapshots[snapindex])  # setting weights to corresponding snapshot.
        trainingsiterations = (dlc_cfg["init_weights"].split(
            os.sep)[-1]).split("-")[
                -1]  # read how many training siterations that corresponds to.

        DLCscorer, _ = auxiliaryfunctions.GetScorerName(
            cfg,
            shuffle,
            trainFraction,
            trainingsiterations,
            modelprefix=modelprefix)

        path_inference_config = Path(
            modelfolder) / "test" / "inference_cfg.yaml"
        if inferencecfg is None:  # then load or initialize
            inferencecfg = auxfun_multianimal.read_inferencecfg(
                path_inference_config, cfg)
        else:
            inferencecfg = edict(inferencecfg)
            auxfun_multianimal.check_inferencecfg_sanity(cfg, inferencecfg)

        inferencecfg.topktoretain = np.inf
        inferencecfg, opt = crossvalutils.bayesian_search(
            config,
            inferencecfg,
            _pbounds,
            edgewisecondition=edgewisecondition,
            shuffle=shuffle,
            trainingsetindex=trainingsetindex,
            target=target,
            maximize=maximize,
            init_points=init_points,
            n_iter=n_iter,
            acq="ei",
            dcorr=dcorr,
            leastbpts=leastbpts,
            modelprefix=modelprefix,
        )

        # update number of individuals to retain.
        inferencecfg.topktoretain = len(
            cfg["individuals"]) + 1 * (len(cfg["uniquebodyparts"]) > 0)

        # calculating result at best best solution
        DataOptParams, poses_gt, poses = crossvalutils.compute_crossval_metrics(
            config, inferencecfg, shuffle, trainingsetindex, modelprefix)

        path_inference_config = str(path_inference_config)
        # print("Quantification:", DataOptParams.head())
        DataOptParams.to_hdf(
            path_inference_config.split(".yaml")[0] + ".h5",
            "df_with_missing",
            format="table",
            mode="w",
        )
        DataOptParams.to_csv(path_inference_config.split(".yaml")[0] + ".csv")
        print("Saving optimal inference parameters...")
        print(DataOptParams.to_string())
        auxiliaryfunctions.write_plainconfig(path_inference_config,
                                             dict(inferencecfg))

        # Store best predictions
        max_indivs = max(pose.shape[0] for pose in poses)
        bpts = dlc_cfg["all_joints_names"]
        container = np.full((len(poses), max_indivs * len(bpts) * 3), np.nan)
        for n, pose in enumerate(poses):
            temp = pose.flatten()
            container[n, :len(temp)] = temp

        header = pd.MultiIndex.from_product(
            [
                [DLCscorer],
                [f"individual{i}" for i in range(1, max_indivs + 1)],
                bpts,
                ["x", "y", "likelihood"],
            ],
            names=["scorer", "individuals", "bodyparts", "coords"],
        )

        df = pd.DataFrame(container, columns=header)
        df.to_hdf(os.path.join(evaluationfolder, f"{DLCscorer}.h5"),
                  key="df_with_missing")

        if plotting:
            foldername = os.path.join(
                str(evaluationfolder),
                "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex],
            )
            auxiliaryfunctions.attempttomakefolder(foldername)
            for imageindex, imagename in tqdm(enumerate(Data.index)):
                image_path = os.path.join(cfg["project_path"], imagename)
                image = io.imread(image_path)
                frame = img_as_ubyte(skimage.color.gray2rgb(image))
                groundtruthcoordinates = poses_gt[imageindex]
                coords_pred = poses[imageindex][:, :, :2]
                probs_pred = poses[imageindex][:, :, -1:]
                fig = visualization.make_multianimal_labeled_image(
                    frame,
                    groundtruthcoordinates,
                    coords_pred,
                    probs_pred,
                    colors,
                    cfg["dotsize"],
                    cfg["alphavalue"],
                    cfg["pcutoff"],
                )
                visualization.save_labeled_frame(fig, image_path, foldername,
                                                 imageindex in trainIndices)

Ejemplo n.º 27

Mostrar archivo

Archivo: trainingsetmanipulation.py Proyecto: DeepLabCut/DeepLabCut

def create_training_dataset(
    config,
    num_shuffles=1,
    Shuffles=None,
    windows2linux=False,
    userfeedback=False,
    trainIndices=None,
    testIndices=None,
    net_type=None,
    augmenter_type=None,
    posecfg_template=None,
):
    """Creates a training dataset.

    Labels from all the extracted frames are merged into a single .h5 file.
    Only the videos included in the config file are used to create this dataset.

    Parameters
    ----------
    config : string
        Full path of the ``config.yaml`` file as a string.

    num_shuffles : int, optional, default=1
        Number of shuffles of training dataset to create, i.e. ``[1,2,3]`` for
        ``num_shuffles=3``.

    Shuffles: list[int], optional
        Alternatively the user can also give a list of shuffles.

    userfeedback: bool, optional, default=False
        If ``False``, all requested train/test splits are created (no matter if they
        already exist). If you want to assure that previous splits etc. are not
        overwritten, set this to ``True`` and you will be asked for each split.

    trainIndices: list of lists, optional, default=None
        List of one or multiple lists containing train indexes.
        A list containing two lists of training indexes will produce two splits.

    testIndices: list of lists, optional, default=None
        List of one or multiple lists containing test indexes.

    net_type: list, optional, default=None
        Type of networks. Currently supported options are

        * ``resnet_50``
        * ``resnet_101``
        * ``resnet_152``
        * ``mobilenet_v2_1.0``
        * ``mobilenet_v2_0.75``
        * ``mobilenet_v2_0.5``
        * ``mobilenet_v2_0.35``
        * ``efficientnet-b0``
        * ``efficientnet-b1``
        * ``efficientnet-b2``
        * ``efficientnet-b3``
        * ``efficientnet-b4``
        * ``efficientnet-b5``
        * ``efficientnet-b6``

    augmenter_type: string, optional, default=None
        Type of augmenter. Currently supported augmenters are
        
        * ``default``
        * ``scalecrop``
        * ``imgaug``
        * ``tensorpack``
        * ``deterministic``

    posecfg_template: string, optional, default=None
        Path to a ``pose_cfg.yaml`` file to use as a template for generating the new
        one for the current iteration. Useful if you would like to start with the same
        parameters a previous training iteration. None uses the default
        ``pose_cfg.yaml``.

    Returns
    -------
    list(tuple) or None
        If training dataset was successfully created, a list of tuples is returned.
        The first two elements in each tuple represent the training fraction and the
        shuffle value. The last two elements in each tuple are arrays of integers
        representing the training and test indices.

        Returns None if training dataset could not be created.

    Notes
    -----
    Use the function ``add_new_videos`` at any stage of the project to add more videos
    to the project.

    Examples
    --------

    Linux/MacOS

    >>> deeplabcut.create_training_dataset(
            '/analysis/project/reaching-task/config.yaml', num_shuffles=1,
        )

    Windows

    >>> deeplabcut.create_training_dataset(
            'C:\\Users\\Ulf\\looming-task\\config.yaml', Shuffles=[3,17,5],
        )
    """
    import scipy.io as sio

    if windows2linux:
        # DeprecationWarnings are silenced since Python 3.2 unless triggered in __main__
        warnings.warn(
            "`windows2linux` has no effect since 2.2.0.4 and will be removed in 2.2.1.",
            FutureWarning,
        )

    # Loading metadata from config file:
    cfg = auxiliaryfunctions.read_config(config)
    if posecfg_template:
        if not posecfg_template.endswith("pose_cfg.yaml"):
            raise ValueError(
                "posecfg_template argument must contain path to a pose_cfg.yaml file"
            )
        else:
            print("Reloading pose_cfg parameters from " + posecfg_template +
                  '\n')
            from deeplabcut.utils.auxiliaryfunctions import read_plainconfig

            prior_cfg = read_plainconfig(posecfg_template)
    if cfg.get("multianimalproject", False):
        from deeplabcut.generate_training_dataset.multiple_individuals_trainingsetmanipulation import (
            create_multianimaltraining_dataset, )

        create_multianimaltraining_dataset(config,
                                           num_shuffles,
                                           Shuffles,
                                           net_type=net_type)
    else:
        scorer = cfg["scorer"]
        project_path = cfg["project_path"]
        # Create path for training sets & store data there
        trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(
            cfg)  # Path concatenation OS platform independent
        auxiliaryfunctions.attempttomakefolder(Path(
            os.path.join(project_path, str(trainingsetfolder))),
                                               recursive=True)

        Data = merge_annotateddatasets(
            cfg,
            Path(os.path.join(project_path, trainingsetfolder)),
        )
        if Data is None:
            return
        Data = Data[scorer]  # extract labeled data

        # loading & linking pretrained models
        if net_type is None:  # loading & linking pretrained models
            net_type = cfg.get("default_net_type", "resnet_50")
        else:
            if ("resnet" in net_type or "mobilenet" in net_type
                    or "efficientnet" in net_type):
                pass
            else:
                raise ValueError("Invalid network type:", net_type)

        if augmenter_type is None:
            augmenter_type = cfg.get("default_augmenter", "imgaug")
            if augmenter_type is None:  # this could be in config.yaml for old projects!
                # updating variable if null/None! #backwardscompatability
                auxiliaryfunctions.edit_config(config,
                                               {"default_augmenter": "imgaug"})
                augmenter_type = "imgaug"
        elif augmenter_type not in [
                "default",
                "scalecrop",
                "imgaug",
                "tensorpack",
                "deterministic",
        ]:
            raise ValueError("Invalid augmenter type:", augmenter_type)

        if posecfg_template:
            if net_type != prior_cfg["net_type"]:
                print(
                    "WARNING: Specified net_type does not match net_type from posecfg_template path entered. Proceed with caution."
                )
            if augmenter_type != prior_cfg["dataset_type"]:
                print(
                    "WARNING: Specified augmenter_type does not match dataset_type from posecfg_template path entered. Proceed with caution."
                )

        # Loading the encoder (if necessary downloading from TF)
        dlcparent_path = auxiliaryfunctions.get_deeplabcut_path()
        if not posecfg_template:
            defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml")
        elif posecfg_template:
            defaultconfigfile = posecfg_template
        model_path, num_shuffles = auxfun_models.check_for_weights(
            net_type, Path(dlcparent_path), num_shuffles)

        if Shuffles is None:
            Shuffles = range(1, num_shuffles + 1)
        else:
            Shuffles = [i for i in Shuffles if isinstance(i, int)]

        # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type)
        if trainIndices is None and testIndices is None:
            splits = [(
                trainFraction,
                shuffle,
                SplitTrials(range(len(Data.index)), trainFraction),
            ) for trainFraction in cfg["TrainingFraction"]
                      for shuffle in Shuffles]
        else:
            if len(trainIndices) != len(testIndices) != len(Shuffles):
                raise ValueError(
                    "Number of Shuffles and train and test indexes should be equal."
                )
            splits = []
            for shuffle, (train_inds, test_inds) in enumerate(
                    zip(trainIndices, testIndices)):
                trainFraction = round(
                    len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)),
                    2)
                print(
                    f"You passed a split with the following fraction: {int(100 * trainFraction)}%"
                )
                # Now that the training fraction is guaranteed to be correct,
                # the values added to pad the indices are removed.
                train_inds = np.asarray(train_inds)
                train_inds = train_inds[train_inds != -1]
                test_inds = np.asarray(test_inds)
                test_inds = test_inds[test_inds != -1]
                splits.append((trainFraction, Shuffles[shuffle], (train_inds,
                                                                  test_inds)))

        bodyparts = cfg["bodyparts"]
        nbodyparts = len(bodyparts)
        for trainFraction, shuffle, (trainIndices, testIndices) in splits:
            if len(trainIndices) > 0:
                if userfeedback:
                    trainposeconfigfile, _, _ = training.return_train_network_path(
                        config,
                        shuffle=shuffle,
                        trainingsetindex=cfg["TrainingFraction"].index(
                            trainFraction),
                    )
                    if trainposeconfigfile.is_file():
                        askuser = input(
                            "The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): "
                        )
                        if (askuser == "no" or askuser == "No"
                                or askuser == "N" or askuser == "No"):
                            raise Exception(
                                "Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details."
                            )

                ####################################################
                # Generating data structure with labeled information & frame metadata (for deep cut)
                ####################################################
                # Make training file!
                (
                    datafilename,
                    metadatafilename,
                ) = auxiliaryfunctions.GetDataandMetaDataFilenames(
                    trainingsetfolder, trainFraction, shuffle, cfg)

                ################################################################################
                # Saving data file (convert to training file for deeper cut (*.mat))
                ################################################################################
                data, MatlabData = format_training_data(
                    Data, trainIndices, nbodyparts, project_path)
                sio.savemat(os.path.join(project_path, datafilename),
                            {"dataset": MatlabData})

                ################################################################################
                # Saving metadata (Pickle file)
                ################################################################################
                auxiliaryfunctions.SaveMetadata(
                    os.path.join(project_path, metadatafilename),
                    data,
                    trainIndices,
                    testIndices,
                    trainFraction,
                )

                ################################################################################
                # Creating file structure for training &
                # Test files as well as pose_yaml files (containing training and testing information)
                #################################################################################
                modelfoldername = auxiliaryfunctions.get_model_folder(
                    trainFraction, shuffle, cfg)
                auxiliaryfunctions.attempttomakefolder(
                    Path(config).parents[0] / modelfoldername, recursive=True)
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/train")
                auxiliaryfunctions.attempttomakefolder(
                    str(Path(config).parents[0] / modelfoldername) + "/test")

                path_train_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "train",
                        "pose_cfg.yaml",
                    ))
                path_test_config = str(
                    os.path.join(
                        cfg["project_path"],
                        Path(modelfoldername),
                        "test",
                        "pose_cfg.yaml",
                    ))
                # str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test'  /  'pose_cfg.yaml')
                items2change = {
                    "dataset": datafilename,
                    "metadataset": metadatafilename,
                    "num_joints": len(bodyparts),
                    "all_joints": [[i] for i in range(len(bodyparts))],
                    "all_joints_names": [str(bpt) for bpt in bodyparts],
                    "init_weights": model_path,
                    "project_path": str(cfg["project_path"]),
                    "net_type": net_type,
                    "dataset_type": augmenter_type,
                }

                items2drop = {}
                if augmenter_type == "scalecrop":
                    # these values are dropped as scalecrop
                    # doesn't have rotation implemented
                    items2drop = {"rotation": 0, "rotratio": 0.0}
                # Also drop maDLC smart cropping augmentation parameters
                for key in [
                        "pre_resize", "crop_size", "max_shift", "crop_sampling"
                ]:
                    items2drop[key] = None

                trainingdata = MakeTrain_pose_yaml(items2change,
                                                   path_train_config,
                                                   defaultconfigfile,
                                                   items2drop)

                keys2save = [
                    "dataset",
                    "num_joints",
                    "all_joints",
                    "all_joints_names",
                    "net_type",
                    "init_weights",
                    "global_scale",
                    "location_refinement",
                    "locref_stdev",
                ]
                MakeTest_pose_yaml(trainingdata, keys2save, path_test_config)
                print(
                    "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!"
                )

        return splits