def return_train_network_path(config, shuffle=1, trainingsetindex=0, modelprefix=""): """Returns the training and test pose config file names as well as the folder where the snapshot is Parameters ---------- config : string Full path of the config.yaml file as a string. shuffle: int Integer value specifying the shuffle index to select for training. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). Returns the triple: trainposeconfigfile, testposeconfigfile, snapshotfolder """ from deeplabcut.utils import auxiliaryfunctions cfg = auxiliaryfunctions.read_config(config) modelfoldername = auxiliaryfunctions.get_model_folder( cfg["TrainingFraction"][trainingsetindex], shuffle, cfg, modelprefix=modelprefix ) trainposeconfigfile = Path( os.path.join( cfg["project_path"], str(modelfoldername), "train", "pose_cfg.yaml" ) ) testposeconfigfile = Path( os.path.join(cfg["project_path"], str(modelfoldername), "test", "pose_cfg.yaml") ) snapshotfolder = Path( os.path.join(cfg["project_path"], str(modelfoldername), "train") ) return trainposeconfigfile, testposeconfigfile, snapshotfolder
def edit_pose_config(self, event): """ """ self.shuffles.Enable(True) #self.trainingindex.Enable(True) self.display_iters.Enable(True) self.save_iters.Enable(True) self.max_iters.Enable(True) self.snapshots.Enable(True) # Read the pose config file cfg = auxiliaryfunctions.read_config(self.config) trainFraction = cfg["TrainingFraction"] #print(trainFraction[-1]) # print(os.path.join(cfg['project_path'],auxiliaryfunctions.get_model_folder(trainFraction, self.shuffles.GetValue(),cfg),'train','pose_cfg.yaml')) self.pose_cfg_path = os.path.join( cfg["project_path"], auxiliaryfunctions.get_model_folder(trainFraction[-1], self.shuffles.GetValue(), cfg), "train", "pose_cfg.yaml", ) # let the user open the file with default text editor. Also make it mac compatible if sys.platform == "darwin": self.file_open_bool = subprocess.call(["open", self.pose_cfg_path]) self.file_open_bool = True else: self.file_open_bool = webbrowser.open(self.pose_cfg_path) if self.file_open_bool: self.pose_cfg = auxiliaryfunctions.read_plainconfig( self.pose_cfg_path) else: raise FileNotFoundError("File not found!")
def edit_inf_config(self, event): # Read the infer config file cfg = auxiliaryfunctions.read_config(self.config) #trainFraction = cfg["TrainingFraction"][trainingsetindex] self.inf_cfg_path = os.path.join( cfg["project_path"], auxiliaryfunctions.get_model_folder(trainFraction, self.shuffle.GetValue(), cfg), "test", "inference_cfg.yaml", ) # let the user open the file with default text editor. Also make it mac compatible if sys.platform == "darwin": self.file_open_bool = subprocess.call(["open", self.inf_cfg_path]) self.file_open_bool = True else: self.file_open_bool = webbrowser.open(self.inf_cfg_path) if self.file_open_bool: self.inf_cfg = auxiliaryfunctions.read_config(self.inf_cfg_path) else: raise FileNotFoundError("File not found!")
def create_multianimaltraining_dataset( config, num_shuffles=1, Shuffles=None, windows2linux=False, net_type=None, numdigits=2, crop_size=(400, 400), crop_sampling="hybrid", paf_graph=None, trainIndices=None, testIndices=None, n_edges_threshold=105, paf_graph_degree=6, ): """ Creates a training dataset for multi-animal datasets. Labels from all the extracted frames are merged into a single .h5 file.\n Only the videos included in the config file are used to create this dataset.\n [OPTIONAL] Use the function 'add_new_videos' at any stage of the project to add more videos to the project. Imporant differences to standard: - stores coordinates with numdigits as many digits - creates Parameter ---------- config : string Full path of the config.yaml file as a string. num_shuffles : int, optional Number of shuffles of training dataset to create, i.e. [1,2,3] for num_shuffles=3. Default is set to 1. Shuffles: list of shuffles. Alternatively the user can also give a list of shuffles (integers!). net_type: string Type of networks. Currently resnet_50, resnet_101, and resnet_152, efficientnet-b0, efficientnet-b1, efficientnet-b2, efficientnet-b3, efficientnet-b4, efficientnet-b5, and efficientnet-b6 as well as dlcrnet_ms5 are supported (not the MobileNets!). See Lauer et al. 2021 https://www.biorxiv.org/content/10.1101/2021.04.30.442096v1 numdigits: int, optional crop_size: tuple of int, optional Dimensions (width, height) of the crops for data augmentation. Default is 400x400. crop_sampling: str, optional Crop centers sampling method. Must be either: "uniform" (randomly over the image), "keypoints" (randomly over the annotated keypoints), "density" (weighing preferentially dense regions of keypoints), or "hybrid" (alternating randomly between "uniform" and "density"). Default is "hybrid". paf_graph: list of lists, or "config" optional (default=None) If not None, overwrite the default complete graph. This is useful for advanced users who already know a good graph, or simply want to use a specific one. Note that, in that case, the data-driven selection procedure upon model evaluation will be skipped. "config" will use the skeleton defined in the config file. trainIndices: list of lists, optional (default=None) List of one or multiple lists containing train indexes. A list containing two lists of training indexes will produce two splits. testIndices: list of lists, optional (default=None) List of one or multiple lists containing test indexes. n_edges_threshold: int, optional (default=105) Number of edges above which the graph is automatically pruned. paf_graph_degree: int, optional (default=6) Degree of paf_graph when automatically pruning it (before training). Example -------- >>> deeplabcut.create_multianimaltraining_dataset('/analysis/project/reaching-task/config.yaml',num_shuffles=1) >>> deeplabcut.create_multianimaltraining_dataset('/analysis/project/reaching-task/config.yaml', Shuffles=[0,1,2], trainIndices=[trainInd1, trainInd2, trainInd3], testIndices=[testInd1, testInd2, testInd3]) Windows: >>> deeplabcut.create_multianimaltraining_dataset(r'C:\\Users\\Ulf\\looming-task\\config.yaml',Shuffles=[3,17,5]) -------- """ if windows2linux: warnings.warn( "`windows2linux` has no effect since 2.2.0.4 and will be removed in 2.2.1.", FutureWarning, ) if len(crop_size) != 2 or not all(isinstance(v, int) for v in crop_size): raise ValueError("Crop size must be a tuple of two integers (width, height).") if crop_sampling not in ("uniform", "keypoints", "density", "hybrid"): raise ValueError( f"Invalid sampling {crop_sampling}. Must be " f"either 'uniform', 'keypoints', 'density', or 'hybrid." ) # Loading metadata from config file: cfg = auxiliaryfunctions.read_config(config) scorer = cfg["scorer"] project_path = cfg["project_path"] # Create path for training sets & store data there trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) full_training_path = Path(project_path, trainingsetfolder) auxiliaryfunctions.attempttomakefolder(full_training_path, recursive=True) Data = merge_annotateddatasets(cfg, full_training_path) if Data is None: return Data = Data[scorer] if net_type is None: # loading & linking pretrained models net_type = cfg.get("default_net_type", "dlcrnet_ms5") elif not any(net in net_type for net in ("resnet", "eff", "dlc", "mob")): raise ValueError(f"Unsupported network {net_type}.") multi_stage = False ### dlcnet_ms5: backbone resnet50 + multi-fusion & multi-stage module ### dlcr101_ms5/dlcr152_ms5: backbone resnet101/152 + multi-fusion & multi-stage module if all(net in net_type for net in ("dlcr", "_ms5")): num_layers = re.findall("dlcr([0-9]*)", net_type)[0] if num_layers == "": num_layers = 50 net_type = "resnet_{}".format(num_layers) multi_stage = True dataset_type = "multi-animal-imgaug" ( individuals, uniquebodyparts, multianimalbodyparts, ) = auxfun_multianimal.extractindividualsandbodyparts(cfg) if paf_graph is None: # Automatically form a complete PAF graph n_bpts = len(multianimalbodyparts) partaffinityfield_graph = [ list(edge) for edge in combinations(range(n_bpts), 2) ] n_edges_orig = len(partaffinityfield_graph) # If the graph is unnecessarily large (with 15+ keypoints by default), # we randomly prune it to a size guaranteeing an average node degree of 6; # see Suppl. Fig S9c in Lauer et al., 2022. if n_edges_orig >= n_edges_threshold: partaffinityfield_graph = auxfun_multianimal.prune_paf_graph( partaffinityfield_graph, average_degree=paf_graph_degree, ) else: if paf_graph == "config": # Use the skeleton defined in the config file skeleton = cfg["skeleton"] paf_graph = [ sorted( (multianimalbodyparts.index(bpt1), multianimalbodyparts.index(bpt2)) ) for bpt1, bpt2 in skeleton ] print( "Using `skeleton` from the config file as a paf_graph. Data-driven skeleton will not be computed." ) # Ignore possible connections between 'multi' and 'unique' body parts; # one can never be too careful... to_ignore = auxfun_multianimal.filter_unwanted_paf_connections(cfg, paf_graph) partaffinityfield_graph = [ edge for i, edge in enumerate(paf_graph) if i not in to_ignore ] auxfun_multianimal.validate_paf_graph(cfg, partaffinityfield_graph) print("Utilizing the following graph:", partaffinityfield_graph) # Disable the prediction of PAFs if the graph is empty partaffinityfield_predict = bool(partaffinityfield_graph) # Loading the encoder (if necessary downloading from TF) dlcparent_path = auxiliaryfunctions.get_deeplabcut_path() defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml") model_path, num_shuffles = auxfun_models.check_for_weights( net_type, Path(dlcparent_path), num_shuffles ) if Shuffles is None: Shuffles = range(1, num_shuffles + 1, 1) else: Shuffles = [i for i in Shuffles if isinstance(i, int)] # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type) if trainIndices is None and testIndices is None: splits = [] for shuffle in Shuffles: # Creating shuffles starting from 1 for train_frac in cfg["TrainingFraction"]: train_inds, test_inds = SplitTrials(range(len(Data)), train_frac) splits.append((train_frac, shuffle, (train_inds, test_inds))) else: if len(trainIndices) != len(testIndices) != len(Shuffles): raise ValueError( "Number of Shuffles and train and test indexes should be equal." ) splits = [] for shuffle, (train_inds, test_inds) in enumerate( zip(trainIndices, testIndices) ): trainFraction = round( len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)), 2 ) print( f"You passed a split with the following fraction: {int(100 * trainFraction)}%" ) # Now that the training fraction is guaranteed to be correct, # the values added to pad the indices are removed. train_inds = np.asarray(train_inds) train_inds = train_inds[train_inds != -1] test_inds = np.asarray(test_inds) test_inds = test_inds[test_inds != -1] splits.append((trainFraction, Shuffles[shuffle], (train_inds, test_inds))) for trainFraction, shuffle, (trainIndices, testIndices) in splits: #################################################### # Generating data structure with labeled information & frame metadata (for deep cut) #################################################### print( "Creating training data for: Shuffle:", shuffle, "TrainFraction: ", trainFraction, ) # Make training file! data = format_multianimal_training_data( Data, trainIndices, cfg["project_path"], numdigits, ) if len(trainIndices) > 0: ( datafilename, metadatafilename, ) = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg ) ################################################################################ # Saving metadata and data file (Pickle file) ################################################################################ auxiliaryfunctions.SaveMetadata( os.path.join(project_path, metadatafilename), data, trainIndices, testIndices, trainFraction, ) datafilename = datafilename.split(".mat")[0] + ".pickle" import pickle with open(os.path.join(project_path, datafilename), "wb") as f: # Pickle the 'labeled-data' dictionary using the highest protocol available. pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) ################################################################################ # Creating file structure for training & # Test files as well as pose_yaml files (containing training and testing information) ################################################################################# modelfoldername = auxiliaryfunctions.get_model_folder( trainFraction, shuffle, cfg ) auxiliaryfunctions.attempttomakefolder( Path(config).parents[0] / modelfoldername, recursive=True ) auxiliaryfunctions.attempttomakefolder( str(Path(config).parents[0] / modelfoldername / "train") ) auxiliaryfunctions.attempttomakefolder( str(Path(config).parents[0] / modelfoldername / "test") ) path_train_config = str( os.path.join( cfg["project_path"], Path(modelfoldername), "train", "pose_cfg.yaml", ) ) path_test_config = str( os.path.join( cfg["project_path"], Path(modelfoldername), "test", "pose_cfg.yaml", ) ) path_inference_config = str( os.path.join( cfg["project_path"], Path(modelfoldername), "test", "inference_cfg.yaml", ) ) jointnames = [str(bpt) for bpt in multianimalbodyparts] jointnames.extend([str(bpt) for bpt in uniquebodyparts]) items2change = { "dataset": datafilename, "metadataset": metadatafilename, "num_joints": len(multianimalbodyparts) + len(uniquebodyparts), # cfg["uniquebodyparts"]), "all_joints": [ [i] for i in range(len(multianimalbodyparts) + len(uniquebodyparts)) ], # cfg["uniquebodyparts"]))], "all_joints_names": jointnames, "init_weights": model_path, "project_path": str(cfg["project_path"]), "net_type": net_type, "multi_stage": multi_stage, "pairwise_loss_weight": 0.1, "pafwidth": 20, "partaffinityfield_graph": partaffinityfield_graph, "partaffinityfield_predict": partaffinityfield_predict, "weigh_only_present_joints": False, "num_limbs": len(partaffinityfield_graph), "dataset_type": dataset_type, "optimizer": "adam", "batch_size": 8, "multi_step": [[1e-4, 7500], [5 * 1e-5, 12000], [1e-5, 200000]], "save_iters": 10000, "display_iters": 500, "num_idchannel": len(cfg["individuals"]) if cfg.get("identity", False) else 0, "crop_size": list(crop_size), "crop_sampling": crop_sampling, } trainingdata = MakeTrain_pose_yaml( items2change, path_train_config, defaultconfigfile ) keys2save = [ "dataset", "num_joints", "all_joints", "all_joints_names", "net_type", "multi_stage", "init_weights", "global_scale", "location_refinement", "locref_stdev", "dataset_type", "partaffinityfield_predict", "pairwise_predict", "partaffinityfield_graph", "num_limbs", "dataset_type", "num_idchannel", ] MakeTest_pose_yaml( trainingdata, keys2save, path_test_config, nmsradius=5.0, minconfidence=0.01, sigma=1, locref_smooth=False, ) # setting important def. values for inference # Setting inference cfg file: defaultinference_configfile = os.path.join( dlcparent_path, "inference_cfg.yaml" ) items2change = { "minimalnumberofconnections": int(len(cfg["multianimalbodyparts"]) / 2), "topktoretain": len(cfg["individuals"]) + 1 * (len(cfg["uniquebodyparts"]) > 0), "withid": cfg.get("identity", False), } MakeInference_yaml( items2change, path_inference_config, defaultinference_configfile ) print( "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!" ) else: pass
def load_model(cfg, shuffle=1, trainingsetindex=0, TFGPUinference=True, modelprefix=""): """ Loads a tensorflow session with a DLC model from the associated configuration Return a tensorflow session with DLC model given cfg and shuffle Parameters: ----------- cfg : dict Configuration read from the project's main config.yaml file shuffle : int, optional which shuffle to use trainingsetindex : int. optional which training fraction to use, identified by its index TFGPUinference : bool, optional use tensorflow inference model? default = True Returns: -------- sess : tensorflow session tensorflow session with DLC model from the provided configuration, shuffle, and trainingsetindex checkpoint file path : string the path to the checkpoint file associated with the loaded model """ ######################## ### find snapshot to use ######################## train_fraction = cfg["TrainingFraction"][trainingsetindex] model_folder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_model_folder(train_fraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = os.path.normpath(model_folder + "/test/pose_cfg.yaml") path_train_config = os.path.normpath(model_folder + "/train/pose_cfg.yaml") try: dlc_cfg = load_config(str(path_train_config)) # dlc_cfg_train = load_config(str(path_train_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, train_fraction)) # Check which snapshots are available and sort them by # iterations try: Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(model_folder, "train")) if "index" in fn ]) except FileNotFoundError: raise FileNotFoundError( "Snapshots not found! It seems the dataset for shuffle %s has not been trained/does not exist.\n Please train it before trying to export.\n Use the function 'train_network' to train the network for shuffle %s." % (shuffle, shuffle)) if len(Snapshots) == 0: raise FileNotFoundError( "The train folder for iteration %s and shuffle %s exists, but no snapshots were found.\n Please train this model before trying to export.\n Use the function 'train_network' to train the network for iteration %s shuffle %s." % (cfg["iteration"], shuffle, cfg["iteration"], shuffle)) if cfg["snapshotindex"] == "all": print( "Snapshotindex is set to 'all' in the config.yaml file. Changing snapshot index to -1!" ) snapshotindex = -1 else: snapshotindex = cfg["snapshotindex"] increasing_indices = np.argsort([int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] #################################### ### Load and setup CNN part detector #################################### # Check if data already was generated: dlc_cfg["init_weights"] = os.path.join(model_folder, "train", Snapshots[snapshotindex]) trainingsiterations = (dlc_cfg["init_weights"].split( os.sep)[-1]).split("-")[-1] dlc_cfg["num_outputs"] = cfg.get("num_outputs", dlc_cfg.get("num_outputs", 1)) dlc_cfg["batch_size"] = None # load network if TFGPUinference: sess, _, _ = predict.setup_GPUpose_prediction(dlc_cfg) output = ["concat_1"] else: sess, _, _ = predict.setup_pose_prediction(dlc_cfg) if dlc_cfg["location_refinement"]: output = ["Sigmoid", "pose/locref_pred/block4/BiasAdd"] else: output = ["Sigmoid", "pose/part_pred/block4/BiasAdd"] input = tf.compat.v1.get_default_graph().get_operations()[0].name return sess, input, output, dlc_cfg
print("Train dataset created.") # Check the training image paths are correctly stored as arrays of strings trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) datafile, _ = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, 0.8, 1, cfg, ) datafile = datafile.split(".mat")[0] + ".pickle" with open(os.path.join(cfg["project_path"], datafile), "rb") as f: pickledata = pickle.load(f) num_images = len(pickledata) assert all(len(pickledata[i]["joints"]) == 3 for i in range(num_images)) print("Editing pose config...") model_folder = auxiliaryfunctions.get_model_folder( TRAIN_SIZE, 1, cfg, cfg["project_path"] ) pose_config_path = os.path.join(model_folder, "train", "pose_cfg.yaml") edits = { "global_scale": 0.5, "batch_size": 1, "save_iters": N_ITER, "display_iters": N_ITER // 2, "crop_size": [200, 200], # "multi_step": [[0.001, N_ITER]], } deeplabcut.auxiliaryfunctions.edit_config(pose_config_path, edits) print("Pose config edited.") print("Training network...") deeplabcut.train_network(config_path, maxiters=N_ITER)
def evaluate_network( config, Shuffles=[1], trainingsetindex=0, plotting=False, show_errors=True, comparisonbodyparts="all", gputouse=None, rescale=False, modelprefix="", ): """Evaluates the network. Evaluates the network based on the saved models at different stages of the training network. The evaluation results are stored in the .h5 and .csv file under the subdirectory 'evaluation_results'. Change the snapshotindex parameter in the config file to 'all' in order to evaluate all the saved models. Parameters ---------- config : string Full path of the config.yaml file. Shuffles: list, optional, default=[1] List of integers specifying the shuffle indices of the training dataset. trainingsetindex: int or str, optional, default=0 Integer specifying which "TrainingsetFraction" to use. Note that "TrainingFraction" is a list in config.yaml. This variable can also be set to "all". plotting: bool or str, optional, default=False Plots the predictions on the train and test images. If provided it must be either ``True``, ``False``, ``"bodypart"``, or ``"individual"``. Setting to ``True`` defaults as ``"bodypart"`` for multi-animal projects. show_errors: bool, optional, default=True Display train and test errors. comparisonbodyparts: str or list, optional, default="all" The average error will be computed for those body parts only. The provided list has to be a subset of the defined body parts. gputouse: int or None, optional, default=None Indicates the GPU to use (see number in ``nvidia-smi``). If you do not have a GPU put `None``. See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries rescale: bool, optional, default=False Evaluate the model at the ``'global_scale'`` variable (as set in the ``pose_config.yaml`` file for a particular project). I.e. every image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at ``global_scale=.5``, the predictions are calculated on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the original size! modelprefix: str, optional, default="" Directory containing the deeplabcut models to use when evaluating the network. By default, the models are assumed to exist in the project folder. Returns ------- None Examples -------- If you do not want to plot and evaluate with shuffle set to 1. >>> deeplabcut.evaluate_network( '/analysis/project/reaching-task/config.yaml', Shuffles=[1], ) If you want to plot and evaluate with shuffle set to 0 and 1. >>> deeplabcut.evaluate_network( '/analysis/project/reaching-task/config.yaml', Shuffles=[0, 1], plotting=True, ) If you want to plot assemblies for a maDLC project >>> deeplabcut.evaluate_network( '/analysis/project/reaching-task/config.yaml', Shuffles=[1], plotting="individual", ) Note: This defaults to standard plotting for single-animal projects. """ if plotting not in (True, False, "bodypart", "individual"): raise ValueError(f"Unknown value for `plotting`={plotting}") import os start_path = os.getcwd() from deeplabcut.utils import auxiliaryfunctions cfg = auxiliaryfunctions.read_config(config) if cfg.get("multianimalproject", False): from .evaluate_multianimal import evaluate_multianimal_full # TODO: Make this code not so redundant! evaluate_multianimal_full( config=config, Shuffles=Shuffles, trainingsetindex=trainingsetindex, plotting=plotting, comparisonbodyparts=comparisonbodyparts, gputouse=gputouse, modelprefix=modelprefix, ) else: from deeplabcut.utils.auxfun_videos import imread, imresize from deeplabcut.pose_estimation_tensorflow.core import predict from deeplabcut.pose_estimation_tensorflow.config import load_config from deeplabcut.pose_estimation_tensorflow.datasets.utils import data_to_input from deeplabcut.utils import auxiliaryfunctions, conversioncode import tensorflow as tf # If a string was passed in, auto-convert to True for backward compatibility plotting = bool(plotting) if "TF_CUDNN_USE_AUTOTUNE" in os.environ: del os.environ[ "TF_CUDNN_USE_AUTOTUNE" ] # was potentially set during training tf.compat.v1.reset_default_graph() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # # tf.logging.set_verbosity(tf.logging.WARN) start_path = os.getcwd() # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) if gputouse is not None: # gpu selectinon os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) if trainingsetindex == "all": TrainingFractions = cfg["TrainingFraction"] else: if ( trainingsetindex < len(cfg["TrainingFraction"]) and trainingsetindex >= 0 ): TrainingFractions = [cfg["TrainingFraction"][int(trainingsetindex)]] else: raise Exception( "Please check the trainingsetindex! ", trainingsetindex, " should be an integer from 0 .. ", int(len(cfg["TrainingFraction"]) - 1), ) # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ) ) # Get list of body parts to evaluate network for comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts ) # Make folder for evaluation auxiliaryfunctions.attempttomakefolder( str(cfg["project_path"] + "/evaluation-results/") ) for shuffle in Shuffles: for trainFraction in TrainingFractions: ################################################## # Load and setup CNN part detector ################################################## datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg ) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_model_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix ) ), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn) ) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction) ) # change batch size, if it was edited during analysis! dlc_cfg["batch_size"] = 1 # in case this was edited for analysis. # Create folder structure to store results. evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_evaluation_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix ) ), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml' # Check which snapshots are available and sort them by # iterations Snapshots = np.array( [ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ] ) try: # check if any where found? Snapshots[0] except IndexError: raise FileNotFoundError( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction) ) increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots] ) Snapshots = Snapshots[increasing_indices] if cfg["snapshotindex"] == -1: snapindices = [-1] elif cfg["snapshotindex"] == "all": snapindices = range(len(Snapshots)) elif cfg["snapshotindex"] < len(Snapshots): snapindices = [cfg["snapshotindex"]] else: raise ValueError( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) final_result = [] ########################### RESCALING (to global scale) if rescale: scale = dlc_cfg["global_scale"] Data = ( pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ) ) * scale ) else: scale = 1 conversioncode.guarantee_multiindex_rows(Data) ################################################## # Compute predictions over images ################################################## for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = ( dlc_cfg["init_weights"].split(os.sep)[-1] ).split("-")[ -1 ] # read how many training siterations that corresponds to. # Name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix, ) print( "Running ", DLCscorer, " with # of training iterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated( str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex], ) if notanalyzed: # Specifying state of model (snapshot / training state) sess, inputs, outputs = predict.setup_pose_prediction(dlc_cfg) Numimages = len(Data.index) PredicteData = np.zeros( (Numimages, 3 * len(dlc_cfg["all_joints_names"])) ) print("Running evaluation ...") for imageindex, imagename in tqdm(enumerate(Data.index)): image = imread( os.path.join(cfg["project_path"], *imagename), mode="skimage", ) if scale != 1: image = imresize(image, scale) image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run( outputs, feed_dict={inputs: image_batch} ) scmap, locref = predict.extract_cnn_output( outputs_np, dlc_cfg ) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict( scmap, locref, dlc_cfg["stride"] ) PredicteData[ imageindex, : ] = ( pose.flatten() ) # NOTE: thereby cfg_test['all_joints_names'] should be same order as bodyparts! sess.close() # closes the current tf session index = pd.MultiIndex.from_product( [ [DLCscorer], dlc_cfg["all_joints_names"], ["x", "y", "likelihood"], ], names=["scorer", "bodyparts", "coords"], ) # Saving results DataMachine = pd.DataFrame( PredicteData, columns=index, index=Data.index ) DataMachine.to_hdf(resultsfilename, "df_with_missing") print( "Analysis is done and the results are stored (see evaluation-results) for snapshot: ", Snapshots[snapindex], ) DataCombined = pd.concat( [Data.T, DataMachine.T], axis=0, sort=False ).T RMSE, RMSEpcutoff = pairwisedistances( DataCombined, cfg["scorer"], DLCscorer, cfg["pcutoff"], comparisonbodyparts, ) testerror = np.nanmean(RMSE.iloc[testIndices].values.flatten()) trainerror = np.nanmean( RMSE.iloc[trainIndices].values.flatten() ) testerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[testIndices].values.flatten() ) trainerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[trainIndices].values.flatten() ) results = [ trainingsiterations, int(100 * trainFraction), shuffle, np.round(trainerror, 2), np.round(testerror, 2), cfg["pcutoff"], np.round(trainerrorpcutoff, 2), np.round(testerrorpcutoff, 2), ] final_result.append(results) if show_errors: print( "Results for", trainingsiterations, " training iterations:", int(100 * trainFraction), shuffle, "train error:", np.round(trainerror, 2), "pixels. Test error:", np.round(testerror, 2), " pixels.", ) print( "With pcutoff of", cfg["pcutoff"], " train error:", np.round(trainerrorpcutoff, 2), "pixels. Test error:", np.round(testerrorpcutoff, 2), "pixels", ) if scale != 1: print( "The predictions have been calculated for rescaled images (and rescaled ground truth). Scale:", scale, ) print( "Thereby, the errors are given by the average distances between the labels by DLC and the scorer." ) if plotting: print("Plotting...") foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) Plotting( cfg, comparisonbodyparts, DLCscorer, trainIndices, DataCombined * 1.0 / scale, foldername, ) # Rescaling coordinates to have figure in original size! tf.compat.v1.reset_default_graph() # print(final_result) else: DataMachine = pd.read_hdf(resultsfilename) conversioncode.guarantee_multiindex_rows(DataMachine) if plotting: DataCombined = pd.concat( [Data.T, DataMachine.T], axis=0, sort=False ).T print( "Plotting...(attention scale might be inconsistent in comparison to when data was analyzed; i.e. if you used rescale)" ) foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) Plotting( cfg, comparisonbodyparts, DLCscorer, trainIndices, DataCombined * 1.0 / scale, foldername, ) if len(final_result) > 0: # Only append if results were calculated make_results_file(final_result, evaluationfolder, DLCscorer) print( "The network is evaluated and the results are stored in the subdirectory 'evaluation_results'." ) print( "Please check the results, then choose the best model (snapshot) for prediction. You can update the config.yaml file with the appropriate index for the 'snapshotindex'.\nUse the function 'analyze_video' to make predictions on new videos." ) print( "Otherwise, consider adding more labeled-data and retraining the network (see DeepLabCut workflow Fig 2, Nath 2019)" ) # returning to initial folder os.chdir(str(start_path))
def calculatepafdistancebounds( config, shuffle=0, trainingsetindex=0, modelprefix="", numdigits=0, onlytrain=False ): """ Returns distances along paf edges in train/test data ---------- config : string Full path of the config.yaml file as a string. shuffle: integer integers specifying shuffle index of the training dataset. The default is 0. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This variable can also be set to "all". numdigits: number of digits to round for distances. """ import os from deeplabcut.utils import auxiliaryfunctions, auxfun_multianimal from deeplabcut.pose_estimation_tensorflow.config import load_config # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) if cfg["multianimalproject"]: ( individuals, uniquebodyparts, multianimalbodyparts, ) = auxfun_multianimal.extractindividualsandbodyparts(cfg) # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) trainFraction = cfg["TrainingFraction"][trainingsetindex] datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg ) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_model_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix ) ), ) # Load meta data & annotations ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn) ) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ) )[cfg["scorer"]] path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" dlc_cfg = load_config(str(path_test_config)) # get the graph! partaffinityfield_graph = dlc_cfg["partaffinityfield_graph"] jointnames = [ dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ] path_inferencebounds_config = ( Path(modelfolder) / "test" / "inferencebounds.yaml" ) inferenceboundscfg = {} for pi, edge in enumerate(partaffinityfield_graph): j1, j2 = jointnames[edge[0]], jointnames[edge[1]] ds_within = [] ds_across = [] for ind in individuals: for ind2 in individuals: if ind != "single" and ind2 != "single": if (ind, j1, "x") in Data.keys() and ( ind2, j2, "y", ) in Data.keys(): distances = ( np.sqrt( (Data[ind, j1, "x"] - Data[ind2, j2, "x"]) ** 2 + (Data[ind, j1, "y"] - Data[ind2, j2, "y"]) ** 2 ) / dlc_cfg["stride"] ) else: distances = None if distances is not None: if onlytrain: distances = distances.iloc[trainIndices] if ind == ind2: ds_within.extend(distances.values.flatten()) else: ds_across.extend(distances.values.flatten()) edgeencoding = str(edge[0]) + "_" + str(edge[1]) inferenceboundscfg[edgeencoding] = {} if len(ds_within) > 0: inferenceboundscfg[edgeencoding]["intra_max"] = str( round(np.nanmax(ds_within), numdigits) ) inferenceboundscfg[edgeencoding]["intra_min"] = str( round(np.nanmin(ds_within), numdigits) ) else: inferenceboundscfg[edgeencoding]["intra_max"] = str( 1e5 ) # large number (larger than any image diameter) inferenceboundscfg[edgeencoding]["intra_min"] = str(0) # NOTE: the inter-animal distances are currently not used, but are interesting to compare to intra_* if len(ds_across) > 0: inferenceboundscfg[edgeencoding]["inter_max"] = str( round(np.nanmax(ds_across), numdigits) ) inferenceboundscfg[edgeencoding]["inter_min"] = str( round(np.nanmin(ds_across), numdigits) ) else: inferenceboundscfg[edgeencoding]["inter_max"] = str( 1e5 ) # large number (larger than image diameters in typical experiments) inferenceboundscfg[edgeencoding]["inter_min"] = str(0) auxiliaryfunctions.write_plainconfig( str(path_inferencebounds_config), dict(inferenceboundscfg) ) return inferenceboundscfg else: print("You might as well bring owls to Athens.") return {}
def return_evaluate_network_data( config, shuffle=0, trainingsetindex=0, comparisonbodyparts="all", Snapindex=None, rescale=False, fulldata=False, show_errors=True, modelprefix="", returnjustfns=True, ): """ Returns the results for (previously evaluated) network. deeplabcut.evaluate_network(..) Returns list of (per model): [trainingsiterations,trainfraction,shuffle,trainerror,testerror,pcutoff,trainerrorpcutoff,testerrorpcutoff,Snapshots[snapindex],scale,net_type] If fulldata=True, also returns (the complete annotation and prediction array) Returns list of: (DataMachine, Data, data, trainIndices, testIndices, trainFraction, DLCscorer,comparisonbodyparts, cfg, Snapshots[snapindex]) ---------- config : string Full path of the config.yaml file as a string. shuffle: integer integers specifying shuffle index of the training dataset. The default is 0. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This variable can also be set to "all". comparisonbodyparts: list of bodyparts, Default is "all". The average error will be computed for those body parts only (Has to be a subset of the body parts). rescale: bool, default False Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the original size! Examples -------- If you do not want to plot >>> deeplabcut._evaluate_network_data('/analysis/project/reaching-task/config.yaml', shuffle=[1]) -------- If you want to plot >>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],True) """ import os from deeplabcut.pose_estimation_tensorflow.config import load_config from deeplabcut.utils import auxiliaryfunctions start_path = os.getcwd() # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) # Data=pd.read_hdf(os.path.join(cfg["project_path"],str(trainingsetfolder),'CollectedData_' + cfg["scorer"] + '.h5'),'df_with_missing') # Get list of body parts to evaluate network for comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts ) ################################################## # Load data... ################################################## trainFraction = cfg["TrainingFraction"][trainingsetindex] datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg ) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_model_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix ) ), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data data, trainIndices, testIndices, trainFraction = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn) ) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction) ) ########################### RESCALING (to global scale) if rescale == True: scale = dlc_cfg["global_scale"] print("Rescaling Data to ", scale) Data = ( pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ) ) * scale ) else: scale = 1 Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", ) ) evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_evaluation_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix ) ), ) # Check which snapshots are available and sort them by # iterations Snapshots = np.array( [ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ] ) if len(Snapshots) == 0: print( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction) ) snapindices = [] else: increasing_indices = np.argsort([int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] if Snapindex == None: Snapindex = cfg["snapshotindex"] if Snapindex == -1: snapindices = [-1] elif Snapindex == "all": snapindices = range(len(Snapshots)) elif Snapindex < len(Snapshots): snapindices = [Snapindex] else: print( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) DATA = [] results = [] resultsfns = [] for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = (dlc_cfg["init_weights"].split(os.sep)[-1]).split("-")[ -1 ] # read how many training siterations that corresponds to. # name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix ) if not returnjustfns: print( "Retrieving ", DLCscorer, " with # of trainingiterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated( str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex] ) # resultsfilename=os.path.join(str(evaluationfolder),DLCscorer + '-' + str(Snapshots[snapindex])+ '.h5') # + '-' + str(snapshot)+ ' #'-' + Snapshots[snapindex]+ '.h5') print(resultsfilename) resultsfns.append(resultsfilename) if not returnjustfns: if not notanalyzed and os.path.isfile(resultsfilename): # data exists.. DataMachine = pd.read_hdf(resultsfilename) DataCombined = pd.concat([Data.T, DataMachine.T], axis=0).T RMSE, RMSEpcutoff = pairwisedistances( DataCombined, cfg["scorer"], DLCscorer, cfg["pcutoff"], comparisonbodyparts, ) testerror = np.nanmean(RMSE.iloc[testIndices].values.flatten()) trainerror = np.nanmean(RMSE.iloc[trainIndices].values.flatten()) testerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[testIndices].values.flatten() ) trainerrorpcutoff = np.nanmean( RMSEpcutoff.iloc[trainIndices].values.flatten() ) if show_errors == True: print( "Results for", trainingsiterations, " training iterations:", int(100 * trainFraction), shuffle, "train error:", np.round(trainerror, 2), "pixels. Test error:", np.round(testerror, 2), " pixels.", ) print( "With pcutoff of", cfg["pcutoff"], " train error:", np.round(trainerrorpcutoff, 2), "pixels. Test error:", np.round(testerrorpcutoff, 2), "pixels", ) print("Snapshot", Snapshots[snapindex]) r = [ trainingsiterations, int(100 * trainFraction), shuffle, np.round(trainerror, 2), np.round(testerror, 2), cfg["pcutoff"], np.round(trainerrorpcutoff, 2), np.round(testerrorpcutoff, 2), Snapshots[snapindex], scale, dlc_cfg["net_type"], ] results.append(r) else: print("Model not trained/evaluated!") if fulldata == True: DATA.append( [ DataMachine, Data, data, trainIndices, testIndices, trainFraction, DLCscorer, comparisonbodyparts, cfg, evaluationfolder, Snapshots[snapindex], ] ) os.chdir(start_path) if returnjustfns: return resultsfns else: if fulldata == True: return DATA, results else: return results
def extract_maps( config, shuffle=0, trainingsetindex=0, gputouse=None, rescale=False, Indices=None, modelprefix="", ): """ Extracts the scoremap, locref, partaffinityfields (if available). Returns a dictionary indexed by: trainingsetfraction, snapshotindex, and imageindex for those keys, each item contains: (image,scmap,locref,paf,bpt names,partaffinity graph, imagename, True/False if this image was in trainingset) ---------- config : string Full path of the config.yaml file as a string. shuffle: integer integers specifying shuffle index of the training dataset. The default is 0. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). This variable can also be set to "all". rescale: bool, default False Evaluate the model at the 'global_scale' variable (as set in the test/pose_config.yaml file for a particular project). I.e. every image will be resized according to that scale and prediction will be compared to the resized ground truth. The error will be reported in pixels at rescaled to the *original* size. I.e. For a [200,200] pixel image evaluated at global_scale=.5, the predictions are calculated on [100,100] pixel images, compared to 1/2*ground truth and this error is then multiplied by 2!. The evaluation images are also shown for the original size! Examples -------- If you want to extract the data for image 0 and 103 (of the training set) for model trained with shuffle 0. >>> deeplabcut.extract_maps(configfile,0,Indices=[0,103]) """ from deeplabcut.utils.auxfun_videos import imread, imresize from deeplabcut.pose_estimation_tensorflow.core import ( predict, predict_multianimal as predictma, ) from deeplabcut.pose_estimation_tensorflow.config import load_config from deeplabcut.pose_estimation_tensorflow.datasets.utils import data_to_input from deeplabcut.utils import auxiliaryfunctions from tqdm import tqdm import tensorflow as tf import pandas as pd from pathlib import Path import numpy as np tf.compat.v1.reset_default_graph() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # # tf.logging.set_verbosity(tf.logging.WARN) start_path = os.getcwd() # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) if gputouse is not None: # gpu selectinon os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) if trainingsetindex == "all": TrainingFractions = cfg["TrainingFraction"] else: if trainingsetindex < len( cfg["TrainingFraction"]) and trainingsetindex >= 0: TrainingFractions = [ cfg["TrainingFraction"][int(trainingsetindex)] ] else: raise Exception( "Please check the trainingsetindex! ", trainingsetindex, " should be an integer from 0 .. ", int(len(cfg["TrainingFraction"]) - 1), ) # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", )) # Make folder for evaluation auxiliaryfunctions.attempttomakefolder( str(cfg["project_path"] + "/evaluation-results/")) Maps = {} for trainFraction in TrainingFractions: Maps[trainFraction] = {} ################################################## # Load and setup CNN part detector ################################################## datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_model_folder(trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) # change batch size, if it was edited during analysis! dlc_cfg["batch_size"] = 1 # in case this was edited for analysis. # Create folder structure to store results. evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_evaluation_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml' # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ]) try: # check if any where found? Snapshots[0] except IndexError: raise FileNotFoundError( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction)) increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] if cfg["snapshotindex"] == -1: snapindices = [-1] elif cfg["snapshotindex"] == "all": snapindices = range(len(Snapshots)) elif cfg["snapshotindex"] < len(Snapshots): snapindices = [cfg["snapshotindex"]] else: print( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) ########################### RESCALING (to global scale) scale = dlc_cfg["global_scale"] if rescale else 1 Data *= scale bptnames = [ dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ] for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = ( dlc_cfg["init_weights"].split(os.sep)[-1] ).split("-")[ -1] # read how many training siterations that corresponds to. # Name for deeplabcut net (based on its parameters) # DLCscorer,DLCscorerlegacy = auxiliaryfunctions.GetScorerName(cfg,shuffle,trainFraction,trainingsiterations) # notanalyzed, resultsfilename, DLCscorer=auxiliaryfunctions.CheckifNotEvaluated(str(evaluationfolder),DLCscorer,DLCscorerlegacy,Snapshots[snapindex]) # print("Extracting maps for ", DLCscorer, " with # of trainingiterations:", trainingsiterations) # if notanalyzed: #this only applies to ask if h5 exists... # Specifying state of model (snapshot / training state) sess, inputs, outputs = predict.setup_pose_prediction(dlc_cfg) Numimages = len(Data.index) PredicteData = np.zeros( (Numimages, 3 * len(dlc_cfg["all_joints_names"]))) print("Analyzing data...") if Indices is None: Indices = enumerate(Data.index) else: Ind = [Data.index[j] for j in Indices] Indices = enumerate(Ind) DATA = {} for imageindex, imagename in tqdm(Indices): image = imread(os.path.join(cfg["project_path"], *imagename), mode="skimage") if scale != 1: image = imresize(image, scale) image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) if cfg.get("multianimalproject", False): scmap, locref, paf = predictma.extract_cnn_output( outputs_np, dlc_cfg) pagraph = dlc_cfg["partaffinityfield_graph"] else: scmap, locref = predict.extract_cnn_output( outputs_np, dlc_cfg) paf = None pagraph = [] peaks = outputs_np[-1] if imageindex in testIndices: trainingfram = False else: trainingfram = True DATA[imageindex] = [ image, scmap, locref, paf, peaks, bptnames, pagraph, imagename, trainingfram, ] Maps[trainFraction][Snapshots[snapindex]] = DATA os.chdir(str(start_path)) return Maps
def train_network( config, shuffle=1, trainingsetindex=0, max_snapshots_to_keep=5, displayiters=None, saveiters=None, maxiters=None, allow_growth=True, gputouse=None, autotune=False, keepdeconvweights=True, modelprefix="", ): """Trains the network with the labels in the training dataset. Parameters ---------- config : string Full path of the config.yaml file as a string. shuffle: int, optional, default=1 Integer value specifying the shuffle index to select for training. trainingsetindex: int, optional, default=0 Integer specifying which TrainingsetFraction to use. Note that TrainingFraction is a list in config.yaml. max_snapshots_to_keep: int or None Sets how many snapshots are kept, i.e. states of the trained network. Every saving interation many times a snapshot is stored, however only the last ``max_snapshots_to_keep`` many are kept! If you change this to None, then all are kept. See: https://github.com/DeepLabCut/DeepLabCut/issues/8#issuecomment-387404835 displayiters: optional, default=None This variable is actually set in ``pose_config.yaml``. However, you can overwrite it with this hack. Don't use this regularly, just if you are too lazy to dig out the ``pose_config.yaml`` file for the corresponding project. If ``None``, the value from there is used, otherwise it is overwritten! saveiters: optional, default=None This variable is actually set in ``pose_config.yaml``. However, you can overwrite it with this hack. Don't use this regularly, just if you are too lazy to dig out the ``pose_config.yaml`` file for the corresponding project. If ``None``, the value from there is used, otherwise it is overwritten! maxiters: optional, default=None This variable is actually set in ``pose_config.yaml``. However, you can overwrite it with this hack. Don't use this regularly, just if you are too lazy to dig out the ``pose_config.yaml`` file for the corresponding project. If ``None``, the value from there is used, otherwise it is overwritten! allow_growth: bool, optional, default=True. For some smaller GPUs the memory issues happen. If ``True``, the memory allocator does not pre-allocate the entire specified GPU memory region, instead starting small and growing as needed. See issue: https://forum.image.sc/t/how-to-stop-running-out-of-vram/30551/2 gputouse: optional, default=None Natural number indicating the number of your GPU (see number in nvidia-smi). If you do not have a GPU put None. See: https://nvidia.custhelp.com/app/answers/detail/a_id/3751/~/useful-nvidia-smi-queries autotune: bool, optional, default=False Property of TensorFlow, somehow faster if ``False`` (as Eldar found out, see https://github.com/tensorflow/tensorflow/issues/13317). keepdeconvweights: bool, optional, default=True Also restores the weights of the deconvolution layers (and the backbone) when training from a snapshot. Note that if you change the number of bodyparts, you need to set this to false for re-training. modelprefix: str, optional, default="" Directory containing the deeplabcut models to use when evaluating the network. By default, the models are assumed to exist in the project folder. Returns ------- None Examples -------- To train the network for first shuffle of the training dataset >>> deeplabcut.train_network('/analysis/project/reaching-task/config.yaml') To train the network for second shuffle of the training dataset >>> deeplabcut.train_network( '/analysis/project/reaching-task/config.yaml', shuffle=2, keepdeconvweights=True, ) """ import tensorflow as tf # reload logger. import importlib import logging importlib.reload(logging) logging.shutdown() from deeplabcut.utils import auxiliaryfunctions tf.compat.v1.reset_default_graph() start_path = os.getcwd() # Read file path for pose_config file. >> pass it on cfg = auxiliaryfunctions.read_config(config) modelfoldername = auxiliaryfunctions.get_model_folder( cfg["TrainingFraction"][trainingsetindex], shuffle, cfg, modelprefix=modelprefix ) poseconfigfile = Path( os.path.join( cfg["project_path"], str(modelfoldername), "train", "pose_cfg.yaml" ) ) if not poseconfigfile.is_file(): print("The training datafile ", poseconfigfile, " is not present.") print( "Probably, the training dataset for this specific shuffle index was not created." ) print( "Try with a different shuffle/trainingsetfraction or use function 'create_training_dataset' to create a new trainingdataset with this shuffle index." ) else: # Set environment variables if ( autotune is not False ): # see: https://github.com/tensorflow/tensorflow/issues/13317 os.environ["TF_CUDNN_USE_AUTOTUNE"] = "0" if gputouse is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) try: cfg_dlc = auxiliaryfunctions.read_plainconfig(poseconfigfile) if "multi-animal" in cfg_dlc["dataset_type"]: from deeplabcut.pose_estimation_tensorflow.core.train_multianimal import ( train, ) print("Selecting multi-animal trainer") train( str(poseconfigfile), displayiters, saveiters, maxiters, max_to_keep=max_snapshots_to_keep, keepdeconvweights=keepdeconvweights, allow_growth=allow_growth, ) # pass on path and file name for pose_cfg.yaml! else: from deeplabcut.pose_estimation_tensorflow.core.train import train print("Selecting single-animal trainer") train( str(poseconfigfile), displayiters, saveiters, maxiters, max_to_keep=max_snapshots_to_keep, keepdeconvweights=keepdeconvweights, allow_growth=allow_growth, ) # pass on path and file name for pose_cfg.yaml! except BaseException as e: raise e finally: os.chdir(str(start_path)) print( "The network is now trained and ready to evaluate. Use the function 'evaluate_network' to evaluate the network." )
def evaluate_multianimal_full( config, Shuffles=[1], trainingsetindex=0, plotting=False, show_errors=True, comparisonbodyparts="all", gputouse=None, modelprefix="", ): from deeplabcut.pose_estimation_tensorflow.core import ( predict, predict_multianimal as predictma, ) from deeplabcut.utils import ( auxiliaryfunctions, auxfun_multianimal, auxfun_videos, conversioncode, ) import tensorflow as tf if "TF_CUDNN_USE_AUTOTUNE" in os.environ: del os.environ[ "TF_CUDNN_USE_AUTOTUNE"] # was potentially set during training tf.compat.v1.reset_default_graph() os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # if gputouse is not None: # gpu selectinon os.environ["CUDA_VISIBLE_DEVICES"] = str(gputouse) start_path = os.getcwd() if plotting is True: plotting = "bodypart" ################################################## # Load data... ################################################## cfg = auxiliaryfunctions.read_config(config) if trainingsetindex == "all": TrainingFractions = cfg["TrainingFraction"] else: TrainingFractions = [cfg["TrainingFraction"][trainingsetindex]] # Loading human annotatated data trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder(cfg) Data = pd.read_hdf( os.path.join( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", )) conversioncode.guarantee_multiindex_rows(Data) # Get list of body parts to evaluate network for comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, comparisonbodyparts) all_bpts = np.asarray( len(cfg["individuals"]) * cfg["multianimalbodyparts"] + cfg["uniquebodyparts"]) colors = visualization.get_cmap(len(comparisonbodyparts), name=cfg["colormap"]) # Make folder for evaluation auxiliaryfunctions.attempttomakefolder( str(cfg["project_path"] + "/evaluation-results/")) for shuffle in Shuffles: for trainFraction in TrainingFractions: ################################################## # Load and setup CNN part detector ################################################## datafn, metadatafn = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) modelfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_model_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) path_test_config = Path(modelfolder) / "test" / "pose_cfg.yaml" # Load meta data ( data, trainIndices, testIndices, trainFraction, ) = auxiliaryfunctions.LoadMetadata( os.path.join(cfg["project_path"], metadatafn)) try: dlc_cfg = load_config(str(path_test_config)) except FileNotFoundError: raise FileNotFoundError( "It seems the model for shuffle %s and trainFraction %s does not exist." % (shuffle, trainFraction)) pipeline = iaa.Sequential(random_order=False) pre_resize = dlc_cfg.get("pre_resize") if pre_resize: width, height = pre_resize pipeline.add(iaa.Resize({"height": height, "width": width})) # TODO: IMPLEMENT for different batch sizes? dlc_cfg["batch_size"] = 1 # due to differently sized images!!! stride = dlc_cfg["stride"] # Ignore best edges possibly defined during a prior evaluation _ = dlc_cfg.pop("paf_best", None) joints = dlc_cfg["all_joints_names"] # Create folder structure to store results. evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.get_evaluation_folder( trainFraction, shuffle, cfg, modelprefix=modelprefix)), ) auxiliaryfunctions.attempttomakefolder(evaluationfolder, recursive=True) # path_train_config = modelfolder / 'train' / 'pose_cfg.yaml' # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split(".")[0] for fn in os.listdir(os.path.join(str(modelfolder), "train")) if "index" in fn ]) if len(Snapshots) == 0: print( "Snapshots not found! It seems the dataset for shuffle %s and trainFraction %s is not trained.\nPlease train it before evaluating.\nUse the function 'train_network' to do so." % (shuffle, trainFraction)) else: increasing_indices = np.argsort( [int(m.split("-")[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] if cfg["snapshotindex"] == -1: snapindices = [-1] elif cfg["snapshotindex"] == "all": snapindices = range(len(Snapshots)) elif cfg["snapshotindex"] < len(Snapshots): snapindices = [cfg["snapshotindex"]] else: print( "Invalid choice, only -1 (last), any integer up to last, or all (as string)!" ) final_result = [] ################################################## # Compute predictions over images ################################################## for snapindex in snapindices: dlc_cfg["init_weights"] = os.path.join( str(modelfolder), "train", Snapshots[snapindex] ) # setting weights to corresponding snapshot. trainingsiterations = ( dlc_cfg["init_weights"].split(os.sep)[-1] ).split( "-" )[-1] # read how many training siterations that corresponds to. # name for deeplabcut net (based on its parameters) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, trainingsiterations, modelprefix=modelprefix, ) print( "Running ", DLCscorer, " with # of trainingiterations:", trainingsiterations, ) ( notanalyzed, resultsfilename, DLCscorer, ) = auxiliaryfunctions.CheckifNotEvaluated( str(evaluationfolder), DLCscorer, DLCscorerlegacy, Snapshots[snapindex], ) data_path = resultsfilename.split( ".h5")[0] + "_full.pickle" if plotting: foldername = os.path.join( str(evaluationfolder), "LabeledImages_" + DLCscorer + "_" + Snapshots[snapindex], ) auxiliaryfunctions.attempttomakefolder(foldername) if plotting == "bodypart": fig, ax = visualization.create_minimal_figure() if os.path.isfile(data_path): print("Model already evaluated.", resultsfilename) else: ( sess, inputs, outputs, ) = predict.setup_pose_prediction(dlc_cfg) PredicteData = {} dist = np.full((len(Data), len(all_bpts)), np.nan) conf = np.full_like(dist, np.nan) print("Network Evaluation underway...") for imageindex, imagename in tqdm(enumerate( Data.index)): image_path = os.path.join(cfg["project_path"], *imagename) frame = auxfun_videos.imread(image_path, mode="skimage") GT = Data.iloc[imageindex] if not GT.any(): continue # Pass the image and the keypoints through the resizer; # this has no effect if no augmenters were added to it. keypoints = [ GT.to_numpy().reshape((-1, 2)).astype(float) ] frame_, keypoints = pipeline(images=[frame], keypoints=keypoints) frame = frame_[0] GT[:] = keypoints[0].flatten() df = GT.unstack("coords").reindex( joints, level="bodyparts") # FIXME Is having an empty array vs nan really that necessary?! groundtruthidentity = list( df.index.get_level_values( "individuals").to_numpy().reshape((-1, 1))) groundtruthcoordinates = list( df.values[:, np.newaxis]) for i, coords in enumerate(groundtruthcoordinates): if np.isnan(coords).any(): groundtruthcoordinates[i] = np.empty( (0, 2), dtype=float) groundtruthidentity[i] = np.array( [], dtype=str) # Form 2D array of shape (n_rows, 4) where the last dimension # is (sample_index, peak_y, peak_x, bpt_index) to slice the PAFs. temp = df.reset_index(level="bodyparts").dropna() temp["bodyparts"].replace( dict(zip(joints, range(len(joints)))), inplace=True, ) temp["sample"] = 0 peaks_gt = temp.loc[:, [ "sample", "y", "x", "bodyparts" ]].to_numpy() peaks_gt[:, 1:3] = (peaks_gt[:, 1:3] - stride // 2) / stride pred = predictma.predict_batched_peaks_and_costs( dlc_cfg, np.expand_dims(frame, axis=0), sess, inputs, outputs, peaks_gt.astype(int), ) if not pred: continue else: pred = pred[0] PredicteData[imagename] = {} PredicteData[imagename]["index"] = imageindex PredicteData[imagename]["prediction"] = pred PredicteData[imagename]["groundtruth"] = [ groundtruthidentity, groundtruthcoordinates, GT, ] coords_pred = pred["coordinates"][0] probs_pred = pred["confidence"] for bpt, xy_gt in df.groupby(level="bodyparts"): inds_gt = np.flatnonzero( np.all(~np.isnan(xy_gt), axis=1)) n_joint = joints.index(bpt) xy = coords_pred[n_joint] if inds_gt.size and xy.size: # Pick the predictions closest to ground truth, # rather than the ones the model has most confident in xy_gt_values = xy_gt.iloc[inds_gt].values neighbors = _find_closest_neighbors( xy_gt_values, xy, k=3) found = neighbors != -1 min_dists = np.linalg.norm( xy_gt_values[found] - xy[neighbors[found]], axis=1, ) inds = np.flatnonzero(all_bpts == bpt) sl = imageindex, inds[inds_gt[found]] dist[sl] = min_dists conf[sl] = probs_pred[n_joint][ neighbors[found]].squeeze() if plotting == "bodypart": temp_xy = GT.unstack( "bodyparts")[joints].values gt = temp_xy.reshape( (-1, 2, temp_xy.shape[1])).T.swapaxes(1, 2) h, w, _ = np.shape(frame) fig.set_size_inches(w / 100, h / 100) ax.set_xlim(0, w) ax.set_ylim(0, h) ax.invert_yaxis() ax = visualization.make_multianimal_labeled_image( frame, gt, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ax=ax, ) visualization.save_labeled_frame( fig, image_path, foldername, imageindex in trainIndices, ) visualization.erase_artists(ax) sess.close() # closes the current tf session # Compute all distance statistics df_dist = pd.DataFrame(dist, columns=df.index) df_conf = pd.DataFrame(conf, columns=df.index) df_joint = pd.concat( [df_dist, df_conf], keys=["rmse", "conf"], names=["metrics"], axis=1, ) df_joint = df_joint.reorder_levels(list( np.roll(df_joint.columns.names, -1)), axis=1) df_joint.sort_index( axis=1, level=["individuals", "bodyparts"], ascending=[True, True], inplace=True, ) write_path = os.path.join( evaluationfolder, f"dist_{trainingsiterations}.csv") df_joint.to_csv(write_path) # Calculate overall prediction error error = df_joint.xs("rmse", level="metrics", axis=1) mask = (df_joint.xs("conf", level="metrics", axis=1) >= cfg["pcutoff"]) error_masked = error[mask] error_train = np.nanmean(error.iloc[trainIndices]) error_train_cut = np.nanmean( error_masked.iloc[trainIndices]) error_test = np.nanmean(error.iloc[testIndices]) error_test_cut = np.nanmean( error_masked.iloc[testIndices]) results = [ trainingsiterations, int(100 * trainFraction), shuffle, np.round(error_train, 2), np.round(error_test, 2), cfg["pcutoff"], np.round(error_train_cut, 2), np.round(error_test_cut, 2), ] final_result.append(results) if show_errors: string = ( "Results for {} training iterations, training fraction of {}, and shuffle {}:\n" "Train error: {} pixels. Test error: {} pixels.\n" "With pcutoff of {}:\n" "Train error: {} pixels. Test error: {} pixels." ) print(string.format(*results)) print("##########################################") print( "Average Euclidean distance to GT per individual (in pixels; test-only)" ) print(error_masked.iloc[testIndices].groupby( "individuals", axis=1).mean().mean().to_string()) print( "Average Euclidean distance to GT per bodypart (in pixels; test-only)" ) print(error_masked.iloc[testIndices].groupby( "bodyparts", axis=1).mean().mean().to_string()) PredicteData["metadata"] = { "nms radius": dlc_cfg["nmsradius"], "minimal confidence": dlc_cfg["minconfidence"], "sigma": dlc_cfg.get("sigma", 1), "PAFgraph": dlc_cfg["partaffinityfield_graph"], "PAFinds": np.arange(len(dlc_cfg["partaffinityfield_graph"])), "all_joints": [[i] for i in range(len(dlc_cfg["all_joints"]))], "all_joints_names": [ dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ], "stride": dlc_cfg.get("stride", 8), } print( "Done and results stored for snapshot: ", Snapshots[snapindex], ) dictionary = { "Scorer": DLCscorer, "DLC-model-config file": dlc_cfg, "trainIndices": trainIndices, "testIndices": testIndices, "trainFraction": trainFraction, } metadata = {"data": dictionary} _ = auxfun_multianimal.SaveFullMultiAnimalData( PredicteData, metadata, resultsfilename) tf.compat.v1.reset_default_graph() n_multibpts = len(cfg["multianimalbodyparts"]) if n_multibpts == 1: continue # Skip data-driven skeleton selection unless # the model was trained on the full graph. max_n_edges = n_multibpts * (n_multibpts - 1) // 2 n_edges = len(dlc_cfg["partaffinityfield_graph"]) if n_edges == max_n_edges: print("Selecting best skeleton...") n_graphs = 10 paf_inds = None else: n_graphs = 1 paf_inds = [list(range(n_edges))] ( results, paf_scores, best_assemblies, ) = crossvalutils.cross_validate_paf_graphs( config, str(path_test_config).replace("pose_", "inference_"), data_path, data_path.replace("_full.", "_meta."), n_graphs=n_graphs, paf_inds=paf_inds, oks_sigma=dlc_cfg.get("oks_sigma", 0.1), margin=dlc_cfg.get("bbox_margin", 0), symmetric_kpts=dlc_cfg.get("symmetric_kpts"), ) if plotting == "individual": assemblies, assemblies_unique, image_paths = best_assemblies fig, ax = visualization.create_minimal_figure() n_animals = len(cfg["individuals"]) if cfg["uniquebodyparts"]: n_animals += 1 colors = visualization.get_cmap(n_animals, name=cfg["colormap"]) for k, v in tqdm(assemblies.items()): imname = image_paths[k] image_path = os.path.join(cfg["project_path"], *imname) frame = auxfun_videos.imread(image_path, mode="skimage") h, w, _ = np.shape(frame) fig.set_size_inches(w / 100, h / 100) ax.set_xlim(0, w) ax.set_ylim(0, h) ax.invert_yaxis() gt = [ s.to_numpy().reshape((-1, 2)) for _, s in Data.loc[imname].groupby("individuals") ] coords_pred = [] coords_pred += [ass.xy for ass in v] probs_pred = [] probs_pred += [ass.data[:, 2:3] for ass in v] if assemblies_unique is not None: unique = assemblies_unique.get(k, None) if unique is not None: coords_pred.append(unique[:, :2]) probs_pred.append(unique[:, 2:3]) while len(coords_pred) < len(gt): coords_pred.append(np.full((1, 2), np.nan)) probs_pred.append(np.full((1, 2), np.nan)) ax = visualization.make_multianimal_labeled_image( frame, gt, coords_pred, probs_pred, colors, cfg["dotsize"], cfg["alphavalue"], cfg["pcutoff"], ax=ax, ) visualization.save_labeled_frame( fig, image_path, foldername, k in trainIndices, ) visualization.erase_artists(ax) df = results[1].copy() df.loc(axis=0)[("mAP_train", "mean")] = [ d[0]["mAP"] for d in results[2] ] df.loc(axis=0)[("mAR_train", "mean")] = [ d[0]["mAR"] for d in results[2] ] df.loc(axis=0)[("mAP_test", "mean")] = [ d[1]["mAP"] for d in results[2] ] df.loc(axis=0)[("mAR_test", "mean")] = [ d[1]["mAR"] for d in results[2] ] with open(data_path.replace("_full.", "_map."), "wb") as file: pickle.dump((df, paf_scores), file) if len(final_result ) > 0: # Only append if results were calculated make_results_file(final_result, evaluationfolder, DLCscorer) os.chdir(str(start_path))
def create_pretrained_project( project, experimenter, videos, model="full_human", working_directory=None, copy_videos=False, videotype="", analyzevideo=True, filtered=True, createlabeledvideo=True, trainFraction=None, ): """ Creates a new project directory, sub-directories and a basic configuration file. Change its parameters to your projects need. The project will also be initialized with a pre-trained model from the DeepLabCut model zoo! http://modelzoo.deeplabcut.org Parameters ---------- project : string String containing the name of the project. experimenter : string String containing the name of the experimenter. model: string, options see http://www.mousemotorlab.org/dlc-modelzoo Current option and default: 'full_human' Creates a demo human project and analyzes a video with ResNet 101 weights pretrained on MPII Human Pose. This is from the DeeperCut paper by Insafutdinov et al. https://arxiv.org/abs/1605.03170 Please make sure to cite it too if you use this code! videos : list A list of string containing the full paths of the videos to include in the project. working_directory : string, optional The directory where the project will be created. The default is the ``current working directory``; if provided, it must be a string. copy_videos : bool, optional ON WINDOWS: TRUE is often necessary! If this is set to True, the videos are copied to the ``videos`` directory. If it is False,symlink of the videos are copied to the project/videos directory. The default is ``False``; if provided it must be either ``True`` or ``False``. analyzevideo " bool, optional If true, then the video is analyzed and a labeled video is created. If false, then only the project will be created and the weights downloaded. You can then access them filtered: bool, default false Boolean variable indicating if filtered pose data output should be plotted rather than frame-by-frame predictions. Filtered version can be calculated with deeplabcut.filterpredictions trainFraction: By default value from *new* projects. (0.95) Fraction that will be used in dlc-model/trainingset folder name. Example -------- Linux/MacOs loading full_human model and analyzing video /homosapiens1.avi >>> deeplabcut.create_pretrained_project('humanstrokestudy','Linus',['/data/videos/homosapiens1.avi'], copy_videos=False) Loading full_cat model and analyzing video "felixfeliscatus3.avi" >>> deeplabcut.create_pretrained_project('humanstrokestudy','Linus',['/data/videos/felixfeliscatus3.avi'], model='full_cat') Windows: >>> deeplabcut.create_pretrained_project('humanstrokestudy','Bill',[r'C:\yourusername\rig-95\Videos\reachingvideo1.avi'],r'C:\yourusername\analysis\project' copy_videos=True) Users must format paths with either: r'C:\ OR 'C:\\ <- i.e. a double backslash \ \ ) """ if model in globals()["Modeloptions"]: cwd = os.getcwd() cfg = deeplabcut.create_new_project(project, experimenter, videos, working_directory, copy_videos, videotype) if trainFraction is not None: auxiliaryfunctions.edit_config( cfg, {"TrainingFraction": [trainFraction]}) config = auxiliaryfunctions.read_config(cfg) if model == "full_human": config["bodyparts"] = [ "ankle1", "knee1", "hip1", "hip2", "knee2", "ankle2", "wrist1", "elbow1", "shoulder1", "shoulder2", "elbow2", "wrist2", "chin", "forehead", ] config["skeleton"] = [ ["ankle1", "knee1"], ["ankle2", "knee2"], ["knee1", "hip1"], ["knee2", "hip2"], ["hip1", "hip2"], ["shoulder1", "shoulder2"], ["shoulder1", "hip1"], ["shoulder2", "hip2"], ["shoulder1", "elbow1"], ["shoulder2", "elbow2"], ["chin", "forehead"], ["elbow1", "wrist1"], ["elbow2", "wrist2"], ] config["default_net_type"] = "resnet_101" else: # just make a case and put the stuff you want. # TBD: 'partaffinityfield_graph' >> use to set skeleton! pass auxiliaryfunctions.write_config(cfg, config) config = auxiliaryfunctions.read_config(cfg) train_dir = Path( os.path.join( config["project_path"], str( auxiliaryfunctions.get_model_folder( trainFraction=config["TrainingFraction"][0], shuffle=1, cfg=config, )), "train", )) test_dir = Path( os.path.join( config["project_path"], str( auxiliaryfunctions.get_model_folder( trainFraction=config["TrainingFraction"][0], shuffle=1, cfg=config, )), "test", )) # Create the model directory train_dir.mkdir(parents=True, exist_ok=True) test_dir.mkdir(parents=True, exist_ok=True) modelfoldername = auxiliaryfunctions.get_model_folder( trainFraction=config["TrainingFraction"][0], shuffle=1, cfg=config) path_train_config = str( os.path.join(config["project_path"], Path(modelfoldername), "train", "pose_cfg.yaml")) path_test_config = str( os.path.join(config["project_path"], Path(modelfoldername), "test", "pose_cfg.yaml")) # Download the weights and put then in appropriate directory print("Downloading weights...") auxfun_models.download_model(model, train_dir) pose_cfg = deeplabcut.auxiliaryfunctions.read_plainconfig( path_train_config) print(path_train_config) # Updating config file: dict_ = { "default_net_type": pose_cfg["net_type"], "default_augmenter": pose_cfg["dataset_type"], "bodyparts": pose_cfg["all_joints_names"], "dotsize": 6, } auxiliaryfunctions.edit_config(cfg, dict_) # downloading base encoder / not required unless on re-trains (but when a training set is created this happens anyway) # model_path, num_shuffles=auxfun_models.Check4weights(pose_cfg['net_type'], parent_path, num_shuffles= 1) # Updating training and test pose_cfg: snapshotname = [fn for fn in os.listdir(train_dir) if ".meta" in fn][0].split(".meta")[0] dict2change = { "init_weights": str(os.path.join(train_dir, snapshotname)), "project_path": str(config["project_path"]), } UpdateTrain_pose_yaml(pose_cfg, dict2change, path_train_config) keys2save = [ "dataset", "dataset_type", "num_joints", "all_joints", "all_joints_names", "net_type", "init_weights", "global_scale", "location_refinement", "locref_stdev", ] MakeTest_pose_yaml(pose_cfg, keys2save, path_test_config) video_dir = os.path.join(config["project_path"], "videos") if analyzevideo == True: print("Analyzing video...") deeplabcut.analyze_videos(cfg, [video_dir], videotype, save_as_csv=True) if createlabeledvideo == True: if filtered: deeplabcut.filterpredictions(cfg, [video_dir], videotype) print("Plotting results...") deeplabcut.create_labeled_video(cfg, [video_dir], videotype, draw_skeleton=True, filtered=filtered) deeplabcut.plot_trajectories(cfg, [video_dir], videotype, filtered=filtered) os.chdir(cwd) return cfg, path_train_config else: return "N/A", "N/A"
def create_training_dataset( config, num_shuffles=1, Shuffles=None, windows2linux=False, userfeedback=False, trainIndices=None, testIndices=None, net_type=None, augmenter_type=None, posecfg_template=None, ): """Creates a training dataset. Labels from all the extracted frames are merged into a single .h5 file. Only the videos included in the config file are used to create this dataset. Parameters ---------- config : string Full path of the ``config.yaml`` file as a string. num_shuffles : int, optional, default=1 Number of shuffles of training dataset to create, i.e. ``[1,2,3]`` for ``num_shuffles=3``. Shuffles: list[int], optional Alternatively the user can also give a list of shuffles. userfeedback: bool, optional, default=False If ``False``, all requested train/test splits are created (no matter if they already exist). If you want to assure that previous splits etc. are not overwritten, set this to ``True`` and you will be asked for each split. trainIndices: list of lists, optional, default=None List of one or multiple lists containing train indexes. A list containing two lists of training indexes will produce two splits. testIndices: list of lists, optional, default=None List of one or multiple lists containing test indexes. net_type: list, optional, default=None Type of networks. Currently supported options are * ``resnet_50`` * ``resnet_101`` * ``resnet_152`` * ``mobilenet_v2_1.0`` * ``mobilenet_v2_0.75`` * ``mobilenet_v2_0.5`` * ``mobilenet_v2_0.35`` * ``efficientnet-b0`` * ``efficientnet-b1`` * ``efficientnet-b2`` * ``efficientnet-b3`` * ``efficientnet-b4`` * ``efficientnet-b5`` * ``efficientnet-b6`` augmenter_type: string, optional, default=None Type of augmenter. Currently supported augmenters are * ``default`` * ``scalecrop`` * ``imgaug`` * ``tensorpack`` * ``deterministic`` posecfg_template: string, optional, default=None Path to a ``pose_cfg.yaml`` file to use as a template for generating the new one for the current iteration. Useful if you would like to start with the same parameters a previous training iteration. None uses the default ``pose_cfg.yaml``. Returns ------- list(tuple) or None If training dataset was successfully created, a list of tuples is returned. The first two elements in each tuple represent the training fraction and the shuffle value. The last two elements in each tuple are arrays of integers representing the training and test indices. Returns None if training dataset could not be created. Notes ----- Use the function ``add_new_videos`` at any stage of the project to add more videos to the project. Examples -------- Linux/MacOS >>> deeplabcut.create_training_dataset( '/analysis/project/reaching-task/config.yaml', num_shuffles=1, ) Windows >>> deeplabcut.create_training_dataset( 'C:\\Users\\Ulf\\looming-task\\config.yaml', Shuffles=[3,17,5], ) """ import scipy.io as sio if windows2linux: # DeprecationWarnings are silenced since Python 3.2 unless triggered in __main__ warnings.warn( "`windows2linux` has no effect since 2.2.0.4 and will be removed in 2.2.1.", FutureWarning, ) # Loading metadata from config file: cfg = auxiliaryfunctions.read_config(config) if posecfg_template: if not posecfg_template.endswith("pose_cfg.yaml"): raise ValueError( "posecfg_template argument must contain path to a pose_cfg.yaml file" ) else: print("Reloading pose_cfg parameters from " + posecfg_template + '\n') from deeplabcut.utils.auxiliaryfunctions import read_plainconfig prior_cfg = read_plainconfig(posecfg_template) if cfg.get("multianimalproject", False): from deeplabcut.generate_training_dataset.multiple_individuals_trainingsetmanipulation import ( create_multianimaltraining_dataset, ) create_multianimaltraining_dataset(config, num_shuffles, Shuffles, net_type=net_type) else: scorer = cfg["scorer"] project_path = cfg["project_path"] # Create path for training sets & store data there trainingsetfolder = auxiliaryfunctions.GetTrainingSetFolder( cfg) # Path concatenation OS platform independent auxiliaryfunctions.attempttomakefolder(Path( os.path.join(project_path, str(trainingsetfolder))), recursive=True) Data = merge_annotateddatasets( cfg, Path(os.path.join(project_path, trainingsetfolder)), ) if Data is None: return Data = Data[scorer] # extract labeled data # loading & linking pretrained models if net_type is None: # loading & linking pretrained models net_type = cfg.get("default_net_type", "resnet_50") else: if ("resnet" in net_type or "mobilenet" in net_type or "efficientnet" in net_type): pass else: raise ValueError("Invalid network type:", net_type) if augmenter_type is None: augmenter_type = cfg.get("default_augmenter", "imgaug") if augmenter_type is None: # this could be in config.yaml for old projects! # updating variable if null/None! #backwardscompatability auxiliaryfunctions.edit_config(config, {"default_augmenter": "imgaug"}) augmenter_type = "imgaug" elif augmenter_type not in [ "default", "scalecrop", "imgaug", "tensorpack", "deterministic", ]: raise ValueError("Invalid augmenter type:", augmenter_type) if posecfg_template: if net_type != prior_cfg["net_type"]: print( "WARNING: Specified net_type does not match net_type from posecfg_template path entered. Proceed with caution." ) if augmenter_type != prior_cfg["dataset_type"]: print( "WARNING: Specified augmenter_type does not match dataset_type from posecfg_template path entered. Proceed with caution." ) # Loading the encoder (if necessary downloading from TF) dlcparent_path = auxiliaryfunctions.get_deeplabcut_path() if not posecfg_template: defaultconfigfile = os.path.join(dlcparent_path, "pose_cfg.yaml") elif posecfg_template: defaultconfigfile = posecfg_template model_path, num_shuffles = auxfun_models.check_for_weights( net_type, Path(dlcparent_path), num_shuffles) if Shuffles is None: Shuffles = range(1, num_shuffles + 1) else: Shuffles = [i for i in Shuffles if isinstance(i, int)] # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type) if trainIndices is None and testIndices is None: splits = [( trainFraction, shuffle, SplitTrials(range(len(Data.index)), trainFraction), ) for trainFraction in cfg["TrainingFraction"] for shuffle in Shuffles] else: if len(trainIndices) != len(testIndices) != len(Shuffles): raise ValueError( "Number of Shuffles and train and test indexes should be equal." ) splits = [] for shuffle, (train_inds, test_inds) in enumerate( zip(trainIndices, testIndices)): trainFraction = round( len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)), 2) print( f"You passed a split with the following fraction: {int(100 * trainFraction)}%" ) # Now that the training fraction is guaranteed to be correct, # the values added to pad the indices are removed. train_inds = np.asarray(train_inds) train_inds = train_inds[train_inds != -1] test_inds = np.asarray(test_inds) test_inds = test_inds[test_inds != -1] splits.append((trainFraction, Shuffles[shuffle], (train_inds, test_inds))) bodyparts = cfg["bodyparts"] nbodyparts = len(bodyparts) for trainFraction, shuffle, (trainIndices, testIndices) in splits: if len(trainIndices) > 0: if userfeedback: trainposeconfigfile, _, _ = training.return_train_network_path( config, shuffle=shuffle, trainingsetindex=cfg["TrainingFraction"].index( trainFraction), ) if trainposeconfigfile.is_file(): askuser = input( "The model folder is already present. If you continue, it will overwrite the existing model (split). Do you want to continue?(yes/no): " ) if (askuser == "no" or askuser == "No" or askuser == "N" or askuser == "No"): raise Exception( "Use the Shuffles argument as a list to specify a different shuffle index. Check out the help for more details." ) #################################################### # Generating data structure with labeled information & frame metadata (for deep cut) #################################################### # Make training file! ( datafilename, metadatafilename, ) = auxiliaryfunctions.GetDataandMetaDataFilenames( trainingsetfolder, trainFraction, shuffle, cfg) ################################################################################ # Saving data file (convert to training file for deeper cut (*.mat)) ################################################################################ data, MatlabData = format_training_data( Data, trainIndices, nbodyparts, project_path) sio.savemat(os.path.join(project_path, datafilename), {"dataset": MatlabData}) ################################################################################ # Saving metadata (Pickle file) ################################################################################ auxiliaryfunctions.SaveMetadata( os.path.join(project_path, metadatafilename), data, trainIndices, testIndices, trainFraction, ) ################################################################################ # Creating file structure for training & # Test files as well as pose_yaml files (containing training and testing information) ################################################################################# modelfoldername = auxiliaryfunctions.get_model_folder( trainFraction, shuffle, cfg) auxiliaryfunctions.attempttomakefolder( Path(config).parents[0] / modelfoldername, recursive=True) auxiliaryfunctions.attempttomakefolder( str(Path(config).parents[0] / modelfoldername) + "/train") auxiliaryfunctions.attempttomakefolder( str(Path(config).parents[0] / modelfoldername) + "/test") path_train_config = str( os.path.join( cfg["project_path"], Path(modelfoldername), "train", "pose_cfg.yaml", )) path_test_config = str( os.path.join( cfg["project_path"], Path(modelfoldername), "test", "pose_cfg.yaml", )) # str(cfg['proj_path']+'/'+Path(modelfoldername) / 'test' / 'pose_cfg.yaml') items2change = { "dataset": datafilename, "metadataset": metadatafilename, "num_joints": len(bodyparts), "all_joints": [[i] for i in range(len(bodyparts))], "all_joints_names": [str(bpt) for bpt in bodyparts], "init_weights": model_path, "project_path": str(cfg["project_path"]), "net_type": net_type, "dataset_type": augmenter_type, } items2drop = {} if augmenter_type == "scalecrop": # these values are dropped as scalecrop # doesn't have rotation implemented items2drop = {"rotation": 0, "rotratio": 0.0} # Also drop maDLC smart cropping augmentation parameters for key in [ "pre_resize", "crop_size", "max_shift", "crop_sampling" ]: items2drop[key] = None trainingdata = MakeTrain_pose_yaml(items2change, path_train_config, defaultconfigfile, items2drop) keys2save = [ "dataset", "num_joints", "all_joints", "all_joints_names", "net_type", "init_weights", "global_scale", "location_refinement", "locref_stdev", ] MakeTest_pose_yaml(trainingdata, keys2save, path_test_config) print( "The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!" ) return splits