def ma_dataset(): TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") cfg = read_plainconfig(os.path.join(TEST_DATA_DIR, "pose_cfg.yaml")) cfg["project_path"] = TEST_DATA_DIR cfg["dataset"] = "trimouse_train_data.pickle" return PoseDatasetFactory.create(cfg)
def convert_cropped_to_standard_dataset( config_path, recreate_datasets=True, delete_crops=True, back_up=True, ): import pandas as pd import pickle import shutil from deeplabcut.generate_training_dataset import trainingsetmanipulation from deeplabcut.utils import read_plainconfig, write_config cfg = auxiliaryfunctions.read_config(config_path) videos_orig = cfg.pop("video_sets_original") is_cropped = cfg.pop("croppedtraining") if videos_orig is None or not is_cropped: print("Labeled data do not appear to be cropped. " "Project will remain unchanged...") return project_path = cfg["project_path"] if back_up: print("Backing up project...") shutil.copytree(project_path, project_path + "_bak", symlinks=True) if delete_crops: print("Deleting crops...") data_path = os.path.join(project_path, "labeled-data") for video in cfg["video_sets"]: _, filename, _ = trainingsetmanipulation._robust_path_split(video) if "_cropped" in video: # One can never be too safe... shutil.rmtree(os.path.join(data_path, filename), ignore_errors=True) cfg["video_sets"] = videos_orig write_config(config_path, cfg) if not recreate_datasets: return datasets_folder = os.path.join( project_path, auxiliaryfunctions.GetTrainingSetFolder(cfg), ) df_old = pd.read_hdf( os.path.join(datasets_folder, "CollectedData_" + cfg["scorer"] + ".h5"), ) def strip_cropped_image_name(path): head, filename = os.path.split(path) head = head.replace("_cropped", "") file, ext = filename.split(".") file = file.split("c")[0] return os.path.join(head, file + "." + ext) img_names_old = np.asarray( [strip_cropped_image_name(img) for img in df_old.index.to_list()]) df = merge_annotateddatasets(cfg, datasets_folder) img_names = df.index.to_numpy() train_idx = [] test_idx = [] pickle_files = [] for filename in os.listdir(datasets_folder): if filename.endswith("pickle"): pickle_file = os.path.join(datasets_folder, filename) pickle_files.append(pickle_file) if filename.startswith("Docu"): with open(pickle_file, "rb") as f: _, train_inds, test_inds, train_frac = pickle.load(f) train_inds_temp = np.flatnonzero( np.isin(img_names, img_names_old[train_inds])) test_inds_temp = np.flatnonzero( np.isin(img_names, img_names_old[test_inds])) train_inds, test_inds = pad_train_test_indices( train_inds_temp, test_inds_temp, train_frac) train_idx.append(train_inds) test_idx.append(test_inds) # Search a pose_config.yaml file to parse missing information pose_config_path = "" for dirpath, _, filenames in os.walk( os.path.join(project_path, "dlc-models")): for file in filenames: if file.endswith("pose_cfg.yaml"): pose_config_path = os.path.join(dirpath, file) break pose_cfg = read_plainconfig(pose_config_path) net_type = pose_cfg["net_type"] if net_type == "resnet_50" and pose_cfg.get("multi_stage", False): net_type = "dlcrnet_ms5" # Clean the training-datasets folder prior to recreating the data pickles shuffle_inds = set() for file in pickle_files: os.remove(file) shuffle_inds.add(int(re.findall(r"shuffle(\d+)", file)[0])) create_multianimaltraining_dataset( config_path, trainIndices=train_idx, testIndices=test_idx, Shuffles=sorted(shuffle_inds), net_type=net_type, paf_graph=pose_cfg["partaffinityfield_graph"], crop_size=pose_cfg.get("crop_size", [400, 400]), crop_sampling=pose_cfg.get("crop_sampling", "hybrid"), )
def ma_dataset(): cfg = read_plainconfig(os.path.join(TEST_DATA_DIR, "pose_cfg.yaml")) cfg["project_path"] = TEST_DATA_DIR cfg["dataset"] = "trimouse_train_data.pickle" return PoseDatasetFactory.create(cfg)