def compute_crossval_metrics( config_path, inference_cfg, shuffle=1, trainingsetindex=0, modelprefix="", snapshotindex=-1, dcorr=5, leastbpts=3, ): fns = return_evaluate_network_data( config_path, shuffle=shuffle, trainingsetindex=trainingsetindex, modelprefix=modelprefix, ) predictionsfn = fns[snapshotindex] data, metadata = auxfun_multianimal.LoadFullMultiAnimalData(predictionsfn) params = set_up_evaluation(data) n_images = len(params["imnames"]) poses = [] poses_gt = [] stats = np.full( (n_images, 7), np.nan) # RMSE, hits, misses, false_pos, num_detections, pck columns = ["train_iter", "train_frac", "shuffle"] columns += [ "_".join((b, a)) for a in ("train", "test") for b in ("rmse", "hits", "misses", "falsepos", "ndetects", "pck", "rpck") ] for n, imname in enumerate(params["imnames"]): animals = inferenceutils.assemble_individuals( inference_cfg, data[imname], params["num_joints"], params["bpts"], params["ibpts"], params["paf"], params["paf_graph"], params["paf_links"], evaluation=True, ) n_animals = len(animals) if n_animals: _, _, GT = data[imname]["groundtruth"] GT = GT.droplevel("scorer").unstack(level=["bodyparts", "coords"]) gt = GT.values.reshape((GT.shape[0], -1, 2)) poses_gt.append(gt) if ( leastbpts > 0 ): # ONLY KEEP animals with at least as many bpts (to get rid of crops that cannot be assembled) gt = gt[np.nansum(gt, axis=(1, 2)) > leastbpts] temp = np.stack(animals).reshape((n_animals, -1, 3)) poses.append(temp) ani = temp[:, :gt.shape[1], :2] mat = np.full((gt.shape[0], n_animals), np.nan) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) for i in range(len(gt)): for j in range(len(animals)): mat[i, j] = np.sqrt( np.nanmean( np.sum((gt[i] - ani[j, :, :2])**2, axis=1))) if np.nansum( mat ) > 0: # also assures at least one not nan np.size(mat)>0: mat[np.isnan(mat)] = np.nanmax(mat) + 1 row_indices, col_indices = linear_sum_assignment(mat) stats[n, 0] = mat[row_indices, col_indices].mean() # rmse gt_annot = np.any(~np.isnan(gt), axis=2) gt_matched = gt_annot[row_indices].flatten() dlc_annot = np.any(~np.isnan(ani), axis=2) # DLC assemblies dlc_matched = dlc_annot[col_indices].flatten() stats[n, 1] = np.logical_and(gt_matched, dlc_matched).sum() # hits stats[n, 2] = gt_annot.sum() - stats[n, 1] # misses stats[n, 3] = np.logical_and( ~gt_matched, dlc_matched).sum() # additional detections stats[n, 4] = n_animals numgtpts = gt_annot.sum() # animal & bpt-wise distance! if numgtpts > 0: # corrkps=np.sum((gt[row_indices]-ani[col_indices])**2,axis=2)<dcorr**2 dists = np.sum((gt[row_indices] - ani[col_indices])**2, axis=2) corrkps = dists[np.isfinite(dists)] < dcorr**2 pck = (corrkps.sum() * 1.0 / numgtpts ) # weigh by actually annotated ones! rpck = (np.sum( np.exp(-dists[np.isfinite(dists)] * 1.0 / (2 * dcorr**2))) * 1.0 / numgtpts) else: pck = 1.0 # does that make sense? As a convention fully correct... rpck = 1.0 stats[n, 5] = pck stats[n, 6] = rpck train_iter = int(predictionsfn.split("-")[-1].split(".")[0]) train_frac = int(predictionsfn.split("trainset")[1].split("shuffle")[0]) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) res = np.r_[ train_iter, train_frac, shuffle, np.nanmean(stats[metadata["data"]["trainIndices"]], axis=0), np.nanmean(stats[metadata["data"]["testIndices"]], axis=0), ] return pd.DataFrame(res.reshape((1, -1)), columns=columns), poses_gt, poses
def create_video_with_all_detections( config, videos, videotype="avi", shuffle=1, trainingsetindex=0, displayedbodyparts="all", destfolder=None, modelprefix="", ): """ Create a video labeled with all the detections stored in a '*_full.pickle' file. Parameters ---------- config : str Absolute path to the config.yaml file videos : list of str A list of strings containing the full paths to videos for analysis or a path to the directory, where all the videos with same extension are stored. videotype: string, optional Checks for the extension of the video in case the input to the video is a directory.\n Only videos with this extension are analyzed. The default is ``.avi`` shuffle : int, optional Number of shuffles of training dataset. Default is set to 1. trainingsetindex: int, optional Integer specifying which TrainingsetFraction to use. By default the first (note that TrainingFraction is a list in config.yaml). displayedbodyparts: list of strings, optional This selects the body parts that are plotted in the video. Either ``all``, then all body parts from config.yaml are used orr a list of strings that are a subset of the full list. E.g. ['hand','Joystick'] for the demo Reaching-Mackenzie-2018-08-30/config.yaml to select only these two body parts. destfolder: string, optional Specifies the destination folder that was used for storing analysis data (default is the path of the video). """ from deeplabcut.pose_estimation_tensorflow.lib.inferenceutils import Assembler import pickle, re cfg = auxiliaryfunctions.read_config(config) trainFraction = cfg["TrainingFraction"][trainingsetindex] DLCscorername, _ = auxiliaryfunctions.GetScorerName( cfg, shuffle, trainFraction, modelprefix=modelprefix ) videos = auxiliaryfunctions.Getlistofvideos(videos, videotype) if not videos: print("No video(s) were found. Please check your paths and/or 'video_type'.") return for video in videos: videofolder = os.path.splitext(video)[0] if destfolder is None: outputname = "{}_full.mp4".format(videofolder + DLCscorername) full_pickle = os.path.join(videofolder + DLCscorername + "_full.pickle") else: auxiliaryfunctions.attempttomakefolder(destfolder) outputname = os.path.join( destfolder, str(Path(video).stem) + DLCscorername + "_full.mp4" ) full_pickle = os.path.join( destfolder, str(Path(video).stem) + DLCscorername + "_full.pickle" ) if not (os.path.isfile(outputname)): print("Creating labeled video for ", str(Path(video).stem)) h5file = full_pickle.replace("_full.pickle", ".h5") data, _ = auxfun_multianimal.LoadFullMultiAnimalData(h5file) data = dict(data) # Cast to dict (making a copy) so items can safely be popped header = data.pop("metadata") all_jointnames = header["all_joints_names"] if displayedbodyparts == "all": numjoints = len(all_jointnames) bpts = range(numjoints) else: # select only "displayedbodyparts" bpts = [] for bptindex, bp in enumerate(all_jointnames): if bp in displayedbodyparts: bpts.append(bptindex) numjoints = len(bpts) frame_names = list(data) frames = [int(re.findall(r"\d+", name)[0]) for name in frame_names] colorclass = plt.cm.ScalarMappable(cmap=cfg["colormap"]) C = colorclass.to_rgba(np.linspace(0, 1, numjoints)) colors = (C[:, :3] * 255).astype(np.uint8) pcutoff = cfg["pcutoff"] dotsize = cfg["dotsize"] clip = vp(fname=video, sname=outputname, codec="mp4v") ny, nx = clip.height(), clip.width() for n in trange(clip.nframes): frame = clip.load_frame() if frame is None: continue try: ind = frames.index(n) dets = Assembler._flatten_detections(data[frame_names[ind]]) for det in dets: if det.label not in bpts or det.confidence < pcutoff: continue x, y = det.pos rr, cc = disk((y, x), dotsize, shape=(ny, nx)) frame[rr, cc] = colors[bpts.index(det.label)] except ValueError: # No data stored for that particular frame print(n, "no data") pass try: clip.save_frame(frame) except: print(n, "frame writing error.") pass clip.close() else: print("Detections already plotted, ", outputname)
def bayesian_search( config_path, inferencecfg, pbounds, edgewisecondition=True, shuffle=1, trainingsetindex=0, modelprefix="", snapshotindex=-1, target="rpck_test", maximize=True, init_points=20, n_iter=50, acq="ei", log_file=None, dcorr=5, leastbpts=3, printingintermediatevalues=True, ): # if "rpck" in target: assert maximize == True if "rmse" in target: assert maximize == False cfg = auxiliaryfunctions.read_config(config_path) evaluationfolder = os.path.join( cfg["project_path"], str( auxiliaryfunctions.GetEvaluationFolder( cfg["TrainingFraction"][int(trainingsetindex)], shuffle, cfg, modelprefix=modelprefix, )), ) DLCscorer, DLCscorerlegacy = auxiliaryfunctions.GetScorerName( cfg, shuffle, cfg["TrainingFraction"][int(trainingsetindex)], cfg["iteration"], modelprefix=modelprefix, ) # load params fns = return_evaluate_network_data( config_path, shuffle=shuffle, trainingsetindex=trainingsetindex, modelprefix=modelprefix, ) predictionsfn = fns[snapshotindex] data, metadata = auxfun_multianimal.LoadFullMultiAnimalData(predictionsfn) params = set_up_evaluation(data) columns = ["train_iter", "train_frac", "shuffle"] columns += [ "_".join((b, a)) for a in ("train", "test") for b in ("rmse", "hits", "misses", "falsepos", "ndetects", "pck", "rpck") ] train_iter = trainingsetindex # int(predictionsfn.split('-')[-1].split('.')[0]) train_frac = cfg["TrainingFraction"][ train_iter] # int(predictionsfn.split('trainset')[1].split('shuffle')[0]) trainIndices = metadata["data"]["trainIndices"] testIndices = metadata["data"]["testIndices"] if edgewisecondition: mf = str( auxiliaryfunctions.GetModelFolder( cfg["TrainingFraction"][int(trainingsetindex)], shuffle, cfg, modelprefix=modelprefix, )) modelfolder = os.path.join(cfg["project_path"], mf) path_inferencebounds_config = (Path(modelfolder) / "test" / "inferencebounds.yaml") try: inferenceboundscfg = auxiliaryfunctions.read_plainconfig( path_inferencebounds_config) except FileNotFoundError: print("Computing distances...") from deeplabcut.pose_estimation_tensorflow import calculatepafdistancebounds inferenceboundscfg = calculatepafdistancebounds( config_path, shuffle, trainingsetindex) auxiliaryfunctions.write_plainconfig(path_inferencebounds_config, inferenceboundscfg) partaffinityfield_graph = params["paf_graph"] upperbound = np.array([ float(inferenceboundscfg[str(edge[0]) + "_" + str(edge[1])]["intra_max"]) for edge in partaffinityfield_graph ]) lowerbound = np.array([ float(inferenceboundscfg[str(edge[0]) + "_" + str(edge[1])]["intra_min"]) for edge in partaffinityfield_graph ]) upperbound *= inferencecfg["upperbound_factor"] lowerbound *= inferencecfg["lowerbound_factor"] else: lowerbound = None upperbound = None def dlc_hyperparams(**kwargs): inferencecfg.update(kwargs) # Ensure type consistency for k, (bound, _) in pbounds.items(): inferencecfg[k] = type(bound)(inferencecfg[k]) stats = compute_crossval_metrics_preloadeddata( params, columns, inferencecfg, data, trainIndices, testIndices, train_iter, train_frac, shuffle, lowerbound, upperbound, dcorr=dcorr, leastbpts=leastbpts, ) # stats = compute_crossval_metrics(config_path, inferencecfg, shuffle,trainingsetindex, # dcorr=dcorr,leastbpts=leastbpts,modelprefix=modelprefix) if printingintermediatevalues: print( "rpck", stats["rpck_test"].values[0], "rpck train:", stats["rpck_train"].values[0], ) print( "rmse", stats["rmse_test"].values[0], "miss", stats["misses_test"].values[0], "hit", stats["hits_test"].values[0], ) # val = stats['rmse_test'].values[0]*(1+stats['misses_test'].values[0]*1./stats['hits_test'].values[0]) val = stats[target].values[0] if np.isnan(val): if maximize: # pck case val = -1e9 # random small number else: # RMSE, return a large RMSE val = 1e9 if not maximize: val = -val return val opt = BayesianOptimization(f=dlc_hyperparams, pbounds=pbounds, random_state=42) if log_file: load_logs(opt, log_file) logger = JSONLogger(path=os.path.join(evaluationfolder, "opti_log" + DLCscorer + ".json")) opt.subscribe(Events.OPTIMIZATION_STEP, logger) opt.maximize(init_points=init_points, n_iter=n_iter, acq=acq) inferencecfg.update(opt.max["params"]) for k, (bound, _) in pbounds.items(): tmp = type(bound)(inferencecfg[k]) if isinstance(tmp, np.floating): tmp = np.round(tmp, 2).item() inferencecfg[k] = tmp return inferencecfg, opt