def _register_toy_dataset( dataset_name, image_generator, num_images, num_classes=-1, num_keypoints=0 ): json_dataset, meta_data = create_toy_dataset( image_generator, num_images=num_images, num_classes=num_classes, num_keypoints=num_keypoints, ) with make_temp_directory("detectron2go_tmp_dataset") as tmp_dir: json_file = os.path.join(tmp_dir, "{}.json".format(dataset_name)) with open(json_file, "w") as f: json.dump(json_dataset, f) split_dict = { IM_DIR: image_generator.get_image_dir(), ANN_FN: json_file, "meta_data": meta_data, } register_dataset_split(dataset_name, split_dict) try: yield finally: DatasetCatalog.remove(dataset_name) MetadataCatalog.remove(dataset_name)
def register_our_dataset(): """ Carefully regiseters our dataset for use in the model """ # People, boxes, and chickens # These vars will be used later to reference the datasets dataset_name_train = "person_box_chicken_train" dataset_name_val = "person_box_chicken_val" # I'm doing terrible things with hardcoded paths, oh well person_box_chicken_train_json = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_train.json" person_box_chicken_val_json = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_val.json" person_box_chicken_train_image_dir = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_train/" person_box_chicken_val_image_dir = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_val/" # Careful not to double register, it raises an exception. for name in DatasetCatalog.list(): if name not in [dataset_name_train, dataset_name_val]: DatasetCatalog.remove(name) if dataset_name_train in DatasetCatalog.list(): print("Already registered %s dataset" % dataset_name_train) else: register_coco_instances(dataset_name_train, {}, person_box_chicken_train_json, person_box_chicken_train_image_dir) if dataset_name_val in DatasetCatalog.list(): print("Already registered %s dataset" % dataset_name_val) else: register_coco_instances(dataset_name_val, {}, person_box_chicken_val_json, person_box_chicken_val_image_dir) # b/c I'm lazy, return path names for convinience return dataset_name_train, dataset_name_val
def register(self): name = "train" if self.is_training else "test" if name in DatasetCatalog.list(): DatasetCatalog.remove(name) DatasetCatalog.register(name, lambda: self) metadata = MetadataCatalog.get(name) metadata.set(thing_classes=MAP_NAMES) return metadata
def remove_dataset(ds_name: str): r"""Remove a previously registered dataset Parameters ---------- ds_name : str the dataset to be removed """ for channel in ("training", "validation"): DatasetCatalog.remove(f"{ds_name}_{channel}")
def experiment(): result_df = pd.DataFrame(columns=['w20', 'size', 'AP', 'iter']) result_df.astype({'w20': 'bool', 'size': 'int32', 'AP': 'float32'}) date = datetime.date.today() time = datetime.datetime.now() try: for trial_id in range(10**6): sample_batch = generate_train_sets(True) sample_batch.extend(generate_train_sets(False)) for sample in sample_batch: print("---------------------------------------\n", 'RUNNING EXPERIMENT WITH',sample[0],sample[1],'TRIAL_ID IS ',trial_id, "\n---------------------------------------------") #cfg = TOY_cfg_and_register(trial_id,sample) cfg = initialize_cfg_and_register(trial_id,sample) trainer = Trainer(augmentations,cfg) trainer.resume_or_load(resume=False) try: trainer.train() except hooks.StopFakeExc: ap,iter = trainer.info_at_stop else: ap,iter = trainer.storage.latest()['segm/AP'] result = {'w20': sample[1],'size':str(sample[0]),'AP': ap, 'iter' : iter } result_df = result_df.append(result,ignore_index=True) with open(f'{base_output_dir}/results', 'a+') as f: json.dump(result, f) f.write(os.linesep) agg= result_df.groupby(['w20','size']).agg({'AP': ['mean', 'std']}) t = torch.cuda.get_device_properties(0).total_memory //(10**6) r = torch.cuda.memory_reserved(0) //(10**6) a = torch.cuda.memory_allocated(0) //(10**6) f = (r - a) # free inside reserved DatasetCatalog.remove(cfg.DATASETS.TRAIN[0]) MetadataCatalog.remove(cfg.DATASETS.TRAIN[0]) print("---------------------------------------\n", agg, "\n---------------------------------------------") titles = ['TOTAL', 'RESERVED','ALLOCATED','FREE INSIDE RESERVED'] vals = [t,r,a,f] strs = [] for title,val in zip(titles,vals): strs.append(f'{title}:\t,{val}') print( "\n".join(strs) ) except Exception as e: print(e) finally: time_info_str = "-".join([str(x) for x in [date.year,date.month,date.day,time.hour,time.minute]]) result_df.to_csv(f'{base_output_dir}/results_pd-{time_info_str}.csv') agg.to_csv(f'{base_output_dir}/agg_pd-{time_info_str}.csv')
def convert_dataset_to_coco_json(output_dir, registered_dataset_name): output_coco_json = os.path.join( output_dir, f'{registered_dataset_name}_coco_format.json') convert_to_coco_json(registered_dataset_name, output_file=output_coco_json, allow_cached=False) # Save old metadata metadata = DatasetCatalog.get(registered_dataset_name) # Remove the current one DatasetCatalog.remove(registered_dataset_name) # Register again # if your dataset is in COCO format, this cell can be replaced by the following three lines: register_coco_instances(registered_dataset_name, {}, output_coco_json, os.path.abspath('./')) return output_coco_json
def __init__(self, params): self.base_path = params['base_path'] self.min_points_threshold = params['min_points_threshold'] self.n_jobs = params['n_jobs'] self.overwrite = params['overwrite'] self.n_sample_hard = params['n_sample_hard'] self.n_sample_per_label = params['n_sample_per_label'] self.input_format = params['input_format'] assert os.path.exists( self.base_path + '/data/train.csv'), f"No train CSV file in data folder." self.data = pd.read_csv('./data/train.csv') self.preprocess() # load label and assigned idx self.unique_labels = json.load( open(self.base_path + f'/data/labels.json', 'r')) self.unique_labels['other'] = 0 self.labels = list(self.unique_labels.keys()) self.labels.insert(0, self.labels.pop()) # need "other" first in the list # idx to labels for inference self.bond_labels = [self.unique_labels[b] for b in ['-', '=', '#']] self.idx_to_labels = {v: k for k, v in self.unique_labels.items()} for l, b in zip(self.bond_labels, ['SINGLE', 'DOUBLE', 'TRIPLE']): self.idx_to_labels[l] = b # preparing datasets for training for mode in ["train", "val"]: dataset_name = f"smilesdetect_{mode}" if dataset_name in DatasetCatalog.list(): DatasetCatalog.remove(dataset_name) DatasetCatalog.register(dataset_name, lambda mode=mode: self.get_metadata(mode)) MetadataCatalog.get(dataset_name).set(thing_classes=self.labels) self.smiles_metadata = MetadataCatalog.get("smilesdetect_val") self.cfg = self.create_cfg() self.predictor = None self.inference_metadata = MetadataCatalog.get("smilesdetect_val")
def register_dataset(config , annot_df , images_df , annot): """ Register dataset (detectron2 register coco instance) folds included """ fold = config.general["fold"] train_dataset_name=f"my_dataset_train_{fold}" test_dataset_name=f"my_dataset_test_{fold}" train_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_train_{fold}.json") test_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_test_{fold}.json") train_annot_df=annot_df[annot_df["folds"]!=fold] test_annot_df=annot_df[annot_df["folds"]==fold] train_annot_df=train_annot_df.drop(["normal_category","normal_category_id"],axis=1) test_annot_df=test_annot_df.drop(["normal_category","normal_category_id"],axis=1) train_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(train_annot_df["image_id"].unique()) else False)] test_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(test_annot_df["image_id"].unique()) else False)] train_annot=annot.copy() test_annot=annot.copy() train_annot["annotations"]=train_annot_df.reset_index(drop=True).to_dict("records") train_annot["images"]=train_images_df.reset_index(drop=True).to_dict("records") test_annot["annotations"]=test_annot_df.reset_index(drop=True).to_dict("records") test_annot["images"]=test_images_df.reset_index(drop=True).to_dict("records") json.dump(train_annot,open(train_dataset_file,"w")) json.dump(test_annot,open(test_dataset_file,"w")) if train_dataset_name in DatasetCatalog.list(): DatasetCatalog.remove(train_dataset_name) MetadataCatalog.remove(train_dataset_name) if test_dataset_name in DatasetCatalog.list(): DatasetCatalog.remove(test_dataset_name) MetadataCatalog.remove(test_dataset_name) register_coco_instances(train_dataset_name, {}, train_dataset_file, os.path.join(DATASET_PATH,"data")) register_coco_instances(test_dataset_name, {}, test_dataset_file, os.path.join(DATASET_PATH,"data"))
def register_all_oid(): # register dataset for tv in ["train", "validation"]: is_train = tv == 'train' paths = get_paths(OID_DIR, tv) ds_name = "oid_" + tv if ds_name in DatasetCatalog.list(): DatasetCatalog.remove(ds_name) # make sure validation dataset compliant to Detectron2 Format. if is_train: pipeline = DescPipeline([ partial(get_oid_descs, is_train=is_train), ], cache_paths=[ paths.descs_pkl, ]) else: pipeline = DescPipeline( [ partial(get_oid_descs, is_train=is_train), partial(oid_descs_to_detectron2_dicts, masks_dir=paths.masks_dir, cache_path=paths.dicts_pkl) ], cache_paths=[paths.descs_pkl, paths.dicts_pkl]) # register oid dataset dicts. DatasetCatalog.register("oid_" + tv, pipeline) # set oid metadata. MetadataCatalog.get(ds_name).set( images_dir=paths.images_dir, masks_dir=paths.masks_dir, # json_file=json_file, image_root=paths.images_dir, evaluator_type="tfod", thing_classes=KLASS_NAMES, no_to_mid=NO_TO_MID)
def preview_augmentation(self, src, outdir: str = "./preview_augmentation", n_output: int = 100): """ 面倒なのでcocoを作り直してからpreviewさせる Params:: src: str, List[str], index, List[index] """ outdir = correct_dirpath(outdir) coco = CocoManager() coco.add_json(self.coco_json_path) # src で絞る if type(src) == str: coco.df_json = coco.df_json.loc[coco.df_json["images_file_name"] == src] elif type(src) == int: coco.df_json = coco.df_json.iloc[src:src + 1] elif type(src) == list or type(src) == tuple: if type(src[0]) == str: coco.df_json = coco.df_json.loc[ coco.df_json["images_file_name"].isin(src)] elif type(src[0]) == int: coco.df_json = coco.df_json.iloc[src, :] else: raise Exception("") coco.save(self.coco_json_path + ".cocomanager.json") # 作り直したcocoで再度読み込みさせる self.coco_json_path = self.coco_json_path + ".cocomanager.json" DatasetCatalog.remove(self.dataset_name) # key を削除しないと再登録できない MetadataCatalog.remove(self.dataset_name) # key を削除しないと再登録できない self.__register_coco_instances(self.dataset_name, self.coco_json_path, self.image_root) super().__init__(self.cfg) makedirs(outdir, exist_ok=True, remake=True) count = 0 for i, x in enumerate(self.data_loader): # x には per batch 分の size (2個とか) 入っているので、それ分回す for j, data in enumerate(x): if j > 0: continue ## Visualizer を predictor と統一するため, gt_*** -> pred_*** に copy する img = self.img_conv_dataloader(data) ins = data["instances"].to("cpu") if ins.has("gt_boxes"): ins.set("pred_boxes", ins.gt_boxes) if ins.has("gt_classes"): ins.set("pred_classes", ins.gt_classes) if ins.has("gt_keypoints"): ins.set("pred_keypoints", ins.gt_keypoints) if ins.has("gt_masks"): ## gt_mask では [x1, y1, x2, y2, ... ]の形式になっているのでそれを pred [False, True, True, ...] 形式に変換する segs = ins.get("gt_masks").polygons list_ndf = [] for seg_a_class in segs: ndf = convert_seg_point_to_bool( img.shape[0], img.shape[1], seg_a_class) list_ndf.append(ndf) ndf = np.concatenate([[ndfwk] for ndfwk in list_ndf], axis=0) ins.set("pred_masks", torch.from_numpy(ndf)) # Tensor 形式に変換 data["instances"] = ins img = self.draw_annoetation(img, data) cv2.imwrite( outdir + "preview_augmentation." + str(i) + "." + str(j) + ".png", img) count += 1 if count > n_output: break DatasetCatalog.remove(self.dataset_name) # key を削除しないと再登録できない MetadataCatalog.remove(self.dataset_name) # key を削除しないと再登録できない self.coco_json_path = self.coco_json_path_org self.__register_coco_instances(self.dataset_name, self.coco_json_path, self.image_root) super().__init__(self.cfg)
def train(model_name: str, results_path: str, train_idx: List[int], test_idx: List[int], train_annotations: OrderedDict, test_annotations: OrderedDict, lr: float = 0.0025, max_it: int = 500, img_per_batch: int = 16, batch_size: int = 512, num_freeze: int = 1) -> NoReturn: if Path(results_path).exists(): shutil.rmtree(results_path) os.makedirs(results_path, exist_ok=True) for catalog_type in ['train', 'test']: catalog = f'aic19_{catalog_type}' if catalog in DatasetCatalog.list(): DatasetCatalog.remove(catalog) if catalog_type == 'train': DatasetCatalog.register( catalog, lambda d=catalog_type: get_dicts(train_idx, train_annotations)) else: DatasetCatalog.register( catalog, lambda d=catalog_type: get_dicts(test_idx, test_annotations)) MetadataCatalog.get(catalog).set(thing_classes=['Car']) cfg = get_cfg() cfg.OUTPUT_DIR = results_path cfg.merge_from_file(model_zoo.get_config_file(model_name)) cfg.DATASETS.TRAIN = (f'aic19_train', ) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 16 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 #cfg.MODEL.BACKBONE.FREEZE_AT = 1 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 cfg.INPUT.MAX_SIZE_TEST = 1200 cfg.INPUT.MAX_SIZE_TRAIN = 1200 cfg.SOLVER.IMS_PER_BATCH = img_per_batch cfg.SOLVER.BASE_LR = lr cfg.SOLVER.MAX_ITER = max_it cfg.SOLVER.STEPS = [] trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train() cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth') evaluator = COCOEvaluator('aic19_test', cfg, False, output_dir=results_path) val_loader = build_detection_test_loader(cfg, "aic19_test") print(inference_on_dataset(trainer.model, val_loader, evaluator))
def tearDown(self): # Need to remove injected dataset injected_dataset = set(DatasetCatalog) - self._builtin_datasets for ds in injected_dataset: DatasetCatalog.remove(ds) MetadataCatalog.remove(ds)