コード例 #1
0
def _register_toy_dataset(
    dataset_name, image_generator, num_images, num_classes=-1, num_keypoints=0
):
    json_dataset, meta_data = create_toy_dataset(
        image_generator,
        num_images=num_images,
        num_classes=num_classes,
        num_keypoints=num_keypoints,
    )

    with make_temp_directory("detectron2go_tmp_dataset") as tmp_dir:
        json_file = os.path.join(tmp_dir, "{}.json".format(dataset_name))
        with open(json_file, "w") as f:
            json.dump(json_dataset, f)

        split_dict = {
            IM_DIR: image_generator.get_image_dir(),
            ANN_FN: json_file,
            "meta_data": meta_data,
        }
        register_dataset_split(dataset_name, split_dict)

        try:
            yield
        finally:
            DatasetCatalog.remove(dataset_name)
            MetadataCatalog.remove(dataset_name)
コード例 #2
0
ファイル: our_demo.py プロジェクト: aja9675/detectron2
def register_our_dataset():
    """
    Carefully regiseters our dataset for use in the model
    """
    # People, boxes, and chickens
    # These vars will be used later to reference the datasets
    dataset_name_train = "person_box_chicken_train"
    dataset_name_val = "person_box_chicken_val"

    # I'm doing terrible things with hardcoded paths, oh well
    person_box_chicken_train_json = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_train.json"
    person_box_chicken_val_json = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_val.json"
    person_box_chicken_train_image_dir = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_train/"
    person_box_chicken_val_image_dir = "/home/sean/Documents/school/4th_year/cv/final_project/datasets/custom_dataset/person_box_chicken_val/"

    # Careful not to double register, it raises an exception.
    for name in DatasetCatalog.list():
        if name not in [dataset_name_train, dataset_name_val]:
            DatasetCatalog.remove(name)
    if dataset_name_train in DatasetCatalog.list():
      print("Already registered %s dataset" % dataset_name_train)
    else:
      register_coco_instances(dataset_name_train, {}, person_box_chicken_train_json, person_box_chicken_train_image_dir)

    if dataset_name_val in DatasetCatalog.list():
      print("Already registered %s dataset" % dataset_name_val)
    else:
      register_coco_instances(dataset_name_val, {}, person_box_chicken_val_json, person_box_chicken_val_image_dir)

    # b/c I'm lazy, return path names for convinience
    return dataset_name_train, dataset_name_val
コード例 #3
0
 def register(self):
     name = "train" if self.is_training else "test"
     if name in DatasetCatalog.list():
         DatasetCatalog.remove(name)
     DatasetCatalog.register(name, lambda: self)
     metadata = MetadataCatalog.get(name)
     metadata.set(thing_classes=MAP_NAMES)
     return metadata
コード例 #4
0
def remove_dataset(ds_name: str):
    r"""Remove a previously registered dataset

    Parameters
    ----------
    ds_name : str
        the dataset to be removed
    """
    for channel in ("training", "validation"):
        DatasetCatalog.remove(f"{ds_name}_{channel}")
コード例 #5
0
def experiment():
    result_df = pd.DataFrame(columns=['w20', 'size', 'AP', 'iter'])
    result_df.astype({'w20': 'bool', 'size': 'int32', 'AP': 'float32'})
    date = datetime.date.today()
    time = datetime.datetime.now()
    try:
        for trial_id in range(10**6):
            sample_batch = generate_train_sets(True)
            sample_batch.extend(generate_train_sets(False))
            for sample in sample_batch:
                print("---------------------------------------\n",
                      'RUNNING EXPERIMENT WITH',sample[0],sample[1],'TRIAL_ID IS ',trial_id,
                      "\n---------------------------------------------")
                #cfg = TOY_cfg_and_register(trial_id,sample)
                cfg = initialize_cfg_and_register(trial_id,sample)
                trainer = Trainer(augmentations,cfg)
                trainer.resume_or_load(resume=False)
                try:
                    trainer.train()
                except hooks.StopFakeExc:
                    ap,iter = trainer.info_at_stop
                else:
                    ap,iter = trainer.storage.latest()['segm/AP']
                result = {'w20': sample[1],'size':str(sample[0]),'AP': ap, 'iter' : iter }

                result_df = result_df.append(result,ignore_index=True)
                with open(f'{base_output_dir}/results', 'a+') as f:
                    json.dump(result, f)
                    f.write(os.linesep)
                agg= result_df.groupby(['w20','size']).agg({'AP': ['mean', 'std']})
                t = torch.cuda.get_device_properties(0).total_memory //(10**6)
                r = torch.cuda.memory_reserved(0) //(10**6)
                a = torch.cuda.memory_allocated(0) //(10**6)
                f = (r - a)  # free inside reserved
                DatasetCatalog.remove(cfg.DATASETS.TRAIN[0])
                MetadataCatalog.remove(cfg.DATASETS.TRAIN[0])
                print("---------------------------------------\n",
                      agg,
                      "\n---------------------------------------------")
                titles = ['TOTAL', 'RESERVED','ALLOCATED','FREE INSIDE RESERVED']
                vals = [t,r,a,f]
                strs = []

                for title,val in zip(titles,vals):
                    strs.append(f'{title}:\t,{val}')
                print( "\n".join(strs) )
    except Exception as e:
        print(e)
    finally:
        time_info_str = "-".join([str(x) for x in [date.year,date.month,date.day,time.hour,time.minute]])
        result_df.to_csv(f'{base_output_dir}/results_pd-{time_info_str}.csv')
        agg.to_csv(f'{base_output_dir}/agg_pd-{time_info_str}.csv')
コード例 #6
0
def convert_dataset_to_coco_json(output_dir, registered_dataset_name):
    output_coco_json = os.path.join(
        output_dir, f'{registered_dataset_name}_coco_format.json')
    convert_to_coco_json(registered_dataset_name,
                         output_file=output_coco_json,
                         allow_cached=False)
    # Save old metadata
    metadata = DatasetCatalog.get(registered_dataset_name)
    # Remove the current one
    DatasetCatalog.remove(registered_dataset_name)
    # Register again
    # if your dataset is in COCO format, this cell can be replaced by the following three lines:
    register_coco_instances(registered_dataset_name, {}, output_coco_json,
                            os.path.abspath('./'))
    return output_coco_json
コード例 #7
0
    def __init__(self, params):
        self.base_path = params['base_path']
        self.min_points_threshold = params['min_points_threshold']
        self.n_jobs = params['n_jobs']
        self.overwrite = params['overwrite']
        self.n_sample_hard = params['n_sample_hard']
        self.n_sample_per_label = params['n_sample_per_label']
        self.input_format = params['input_format']

        assert os.path.exists(
            self.base_path +
            '/data/train.csv'), f"No train CSV file in data folder."
        self.data = pd.read_csv('./data/train.csv')

        self.preprocess()

        # load label and assigned idx
        self.unique_labels = json.load(
            open(self.base_path + f'/data/labels.json', 'r'))
        self.unique_labels['other'] = 0
        self.labels = list(self.unique_labels.keys())
        self.labels.insert(0,
                           self.labels.pop())  # need "other" first in the list

        # idx to labels for inference
        self.bond_labels = [self.unique_labels[b] for b in ['-', '=', '#']]
        self.idx_to_labels = {v: k for k, v in self.unique_labels.items()}
        for l, b in zip(self.bond_labels, ['SINGLE', 'DOUBLE', 'TRIPLE']):
            self.idx_to_labels[l] = b

        # preparing datasets for training
        for mode in ["train", "val"]:
            dataset_name = f"smilesdetect_{mode}"
            if dataset_name in DatasetCatalog.list():
                DatasetCatalog.remove(dataset_name)
            DatasetCatalog.register(dataset_name,
                                    lambda mode=mode: self.get_metadata(mode))
            MetadataCatalog.get(dataset_name).set(thing_classes=self.labels)
        self.smiles_metadata = MetadataCatalog.get("smilesdetect_val")

        self.cfg = self.create_cfg()
        self.predictor = None

        self.inference_metadata = MetadataCatalog.get("smilesdetect_val")
コード例 #8
0
ファイル: utils.py プロジェクト: Mo5mami/trash-detection
def register_dataset(config , annot_df , images_df , annot):
    """
    Register dataset (detectron2 register coco instance)
    folds included
    """
    fold = config.general["fold"]
    train_dataset_name=f"my_dataset_train_{fold}"
    test_dataset_name=f"my_dataset_test_{fold}"
    train_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_train_{fold}.json")
    test_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_test_{fold}.json")
    
    train_annot_df=annot_df[annot_df["folds"]!=fold]
    test_annot_df=annot_df[annot_df["folds"]==fold]
    train_annot_df=train_annot_df.drop(["normal_category","normal_category_id"],axis=1)
    test_annot_df=test_annot_df.drop(["normal_category","normal_category_id"],axis=1)

    train_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(train_annot_df["image_id"].unique()) else False)]
    test_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(test_annot_df["image_id"].unique()) else False)]
    
    train_annot=annot.copy()
    test_annot=annot.copy()
    
    train_annot["annotations"]=train_annot_df.reset_index(drop=True).to_dict("records")
    train_annot["images"]=train_images_df.reset_index(drop=True).to_dict("records")
    test_annot["annotations"]=test_annot_df.reset_index(drop=True).to_dict("records")
    test_annot["images"]=test_images_df.reset_index(drop=True).to_dict("records")
    
    json.dump(train_annot,open(train_dataset_file,"w"))
    json.dump(test_annot,open(test_dataset_file,"w"))
    
    if train_dataset_name in DatasetCatalog.list():
        DatasetCatalog.remove(train_dataset_name)
        MetadataCatalog.remove(train_dataset_name)
    if test_dataset_name in DatasetCatalog.list():
        DatasetCatalog.remove(test_dataset_name)
        MetadataCatalog.remove(test_dataset_name)
        
    register_coco_instances(train_dataset_name, {}, train_dataset_file, os.path.join(DATASET_PATH,"data"))
    register_coco_instances(test_dataset_name, {}, test_dataset_file, os.path.join(DATASET_PATH,"data"))
コード例 #9
0
def register_all_oid():
    # register dataset
    for tv in ["train", "validation"]:
        is_train = tv == 'train'
        paths = get_paths(OID_DIR, tv)
        ds_name = "oid_" + tv
        if ds_name in DatasetCatalog.list():
            DatasetCatalog.remove(ds_name)
        # make sure validation dataset compliant to Detectron2 Format.
        if is_train:
            pipeline = DescPipeline([
                partial(get_oid_descs, is_train=is_train),
            ],
                                    cache_paths=[
                                        paths.descs_pkl,
                                    ])
        else:
            pipeline = DescPipeline(
                [
                    partial(get_oid_descs, is_train=is_train),
                    partial(oid_descs_to_detectron2_dicts,
                            masks_dir=paths.masks_dir,
                            cache_path=paths.dicts_pkl)
                ],
                cache_paths=[paths.descs_pkl, paths.dicts_pkl])
        # register oid dataset dicts.
        DatasetCatalog.register("oid_" + tv, pipeline)
        # set oid metadata.
        MetadataCatalog.get(ds_name).set(
            images_dir=paths.images_dir,
            masks_dir=paths.masks_dir,
            # json_file=json_file,
            image_root=paths.images_dir,
            evaluator_type="tfod",
            thing_classes=KLASS_NAMES,
            no_to_mid=NO_TO_MID)
コード例 #10
0
ファイル: det2.py プロジェクト: kazukingh01/kkpackages
    def preview_augmentation(self,
                             src,
                             outdir: str = "./preview_augmentation",
                             n_output: int = 100):
        """
        面倒なのでcocoを作り直してからpreviewさせる
        Params::
            src: str, List[str], index, List[index]
        """
        outdir = correct_dirpath(outdir)
        coco = CocoManager()
        coco.add_json(self.coco_json_path)
        # src で絞る
        if type(src) == str:
            coco.df_json = coco.df_json.loc[coco.df_json["images_file_name"] ==
                                            src]
        elif type(src) == int:
            coco.df_json = coco.df_json.iloc[src:src + 1]
        elif type(src) == list or type(src) == tuple:
            if type(src[0]) == str:
                coco.df_json = coco.df_json.loc[
                    coco.df_json["images_file_name"].isin(src)]
            elif type(src[0]) == int:
                coco.df_json = coco.df_json.iloc[src, :]
        else:
            raise Exception("")
        coco.save(self.coco_json_path + ".cocomanager.json")

        # 作り直したcocoで再度読み込みさせる
        self.coco_json_path = self.coco_json_path + ".cocomanager.json"
        DatasetCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        MetadataCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        self.__register_coco_instances(self.dataset_name, self.coco_json_path,
                                       self.image_root)
        super().__init__(self.cfg)
        makedirs(outdir, exist_ok=True, remake=True)
        count = 0
        for i, x in enumerate(self.data_loader):
            # x には per batch 分の size (2個とか) 入っているので、それ分回す
            for j, data in enumerate(x):
                if j > 0: continue
                ## Visualizer を predictor と統一するため, gt_*** -> pred_*** に copy する
                img = self.img_conv_dataloader(data)
                ins = data["instances"].to("cpu")
                if ins.has("gt_boxes"): ins.set("pred_boxes", ins.gt_boxes)
                if ins.has("gt_classes"):
                    ins.set("pred_classes", ins.gt_classes)
                if ins.has("gt_keypoints"):
                    ins.set("pred_keypoints", ins.gt_keypoints)
                if ins.has("gt_masks"):
                    ## gt_mask では [x1, y1, x2, y2, ... ]の形式になっているのでそれを pred [False, True, True, ...] 形式に変換する
                    segs = ins.get("gt_masks").polygons
                    list_ndf = []
                    for seg_a_class in segs:
                        ndf = convert_seg_point_to_bool(
                            img.shape[0], img.shape[1], seg_a_class)
                        list_ndf.append(ndf)
                    ndf = np.concatenate([[ndfwk] for ndfwk in list_ndf],
                                         axis=0)
                    ins.set("pred_masks",
                            torch.from_numpy(ndf))  # Tensor 形式に変換
                data["instances"] = ins
                img = self.draw_annoetation(img, data)
                cv2.imwrite(
                    outdir + "preview_augmentation." + str(i) + "." + str(j) +
                    ".png", img)
            count += 1
            if count > n_output: break

        DatasetCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        MetadataCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        self.coco_json_path = self.coco_json_path_org
        self.__register_coco_instances(self.dataset_name, self.coco_json_path,
                                       self.image_root)
        super().__init__(self.cfg)
コード例 #11
0
def train(model_name: str,
          results_path: str,
          train_idx: List[int],
          test_idx: List[int],
          train_annotations: OrderedDict,
          test_annotations: OrderedDict,
          lr: float = 0.0025,
          max_it: int = 500,
          img_per_batch: int = 16,
          batch_size: int = 512,
          num_freeze: int = 1) -> NoReturn:

    if Path(results_path).exists():
        shutil.rmtree(results_path)

    os.makedirs(results_path, exist_ok=True)

    for catalog_type in ['train', 'test']:
        catalog = f'aic19_{catalog_type}'
        if catalog in DatasetCatalog.list():
            DatasetCatalog.remove(catalog)

        if catalog_type == 'train':
            DatasetCatalog.register(
                catalog,
                lambda d=catalog_type: get_dicts(train_idx, train_annotations))
        else:
            DatasetCatalog.register(
                catalog,
                lambda d=catalog_type: get_dicts(test_idx, test_annotations))

        MetadataCatalog.get(catalog).set(thing_classes=['Car'])

    cfg = get_cfg()
    cfg.OUTPUT_DIR = results_path
    cfg.merge_from_file(model_zoo.get_config_file(model_name))

    cfg.DATASETS.TRAIN = (f'aic19_train', )
    cfg.DATASETS.TEST = ()

    cfg.DATALOADER.NUM_WORKERS = 16

    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    #cfg.MODEL.BACKBONE.FREEZE_AT = 1
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

    cfg.INPUT.MAX_SIZE_TEST = 1200
    cfg.INPUT.MAX_SIZE_TRAIN = 1200

    cfg.SOLVER.IMS_PER_BATCH = img_per_batch
    cfg.SOLVER.BASE_LR = lr
    cfg.SOLVER.MAX_ITER = max_it
    cfg.SOLVER.STEPS = []

    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth')

    evaluator = COCOEvaluator('aic19_test',
                              cfg,
                              False,
                              output_dir=results_path)
    val_loader = build_detection_test_loader(cfg, "aic19_test")
    print(inference_on_dataset(trainer.model, val_loader, evaluator))
コード例 #12
0
 def tearDown(self):
     # Need to remove injected dataset
     injected_dataset = set(DatasetCatalog) - self._builtin_datasets
     for ds in injected_dataset:
         DatasetCatalog.remove(ds)
         MetadataCatalog.remove(ds)