Beispiel #1
0
def _register_toy_dataset(
    dataset_name, image_generator, num_images, num_classes=-1, num_keypoints=0
):
    json_dataset, meta_data = create_toy_dataset(
        image_generator,
        num_images=num_images,
        num_classes=num_classes,
        num_keypoints=num_keypoints,
    )

    with make_temp_directory("detectron2go_tmp_dataset") as tmp_dir:
        json_file = os.path.join(tmp_dir, "{}.json".format(dataset_name))
        with open(json_file, "w") as f:
            json.dump(json_dataset, f)

        split_dict = {
            IM_DIR: image_generator.get_image_dir(),
            ANN_FN: json_file,
            "meta_data": meta_data,
        }
        register_dataset_split(dataset_name, split_dict)

        try:
            yield
        finally:
            DatasetCatalog.remove(dataset_name)
            MetadataCatalog.remove(dataset_name)
Beispiel #2
0
    def register_catalog(self):
        """
        Adhoc COCO (json) dataset assumes the derived dataset can be created by only
        changing the json file, currently it supports two sources: 1) the dataset is
        registered using standard COCO registering functions in D2 or
        register_dataset_split from D2Go, this way it uses `json_file` from the metadata
        to access the json file. 2) the load func in DatasetCatalog is an instance of
        CallFuncWithJsonFile, which gives access to the json_file. In both cases,
        metadata will be the same except for the `name` and potentially `json_file`.
        """
        logger.info("Register {} from {}".format(self.new_ds_name,
                                                 self.src_ds_name))
        metadata = MetadataCatalog.get(self.src_ds_name)

        load_func = DatasetCatalog[self.src_ds_name]
        src_json_file = (load_func.json_file if isinstance(
            load_func, CallFuncWithJsonFile) else metadata.json_file)

        # TODO cache ?
        with PathManager.open(src_json_file) as f:
            json_dict = json.load(f)
        assert "images" in json_dict, "Only support COCO-style json!"
        json_dict = self.new_json_dict(json_dict)
        self.tmp_dir = tempfile.mkdtemp(prefix="detectron2go_tmp_datasets")
        tmp_file = os.path.join(self.tmp_dir,
                                "{}.json".format(self.new_ds_name))
        with open(tmp_file, "w") as f:
            json.dump(json_dict, f)

        # re-register DatasetCatalog
        if isinstance(load_func, CallFuncWithJsonFile):
            new_func = CallFuncWithJsonFile(func=load_func.func,
                                            json_file=tmp_file)
            DatasetCatalog.register(self.new_ds_name, new_func)
        else:
            # NOTE: only supports COCODataset as DS_TYPE since we cannot reconstruct
            # the split_dict
            register_dataset_split(
                self.new_ds_name,
                split_dict={
                    ANN_FN: tmp_file,
                    IM_DIR: metadata.image_root
                },
            )

        # re-regisister MetadataCatalog
        metadata_dict = metadata.as_dict()
        metadata_dict["name"] = self.new_ds_name
        if "json_file" in metadata_dict:
            metadata_dict["json_file"] = tmp_file
        MetadataCatalog.remove(self.new_ds_name)
        MetadataCatalog.get(self.new_ds_name).set(**metadata_dict)
Beispiel #3
0
def experiment():
    result_df = pd.DataFrame(columns=['w20', 'size', 'AP', 'iter'])
    result_df.astype({'w20': 'bool', 'size': 'int32', 'AP': 'float32'})
    date = datetime.date.today()
    time = datetime.datetime.now()
    try:
        for trial_id in range(10**6):
            sample_batch = generate_train_sets(True)
            sample_batch.extend(generate_train_sets(False))
            for sample in sample_batch:
                print("---------------------------------------\n",
                      'RUNNING EXPERIMENT WITH',sample[0],sample[1],'TRIAL_ID IS ',trial_id,
                      "\n---------------------------------------------")
                #cfg = TOY_cfg_and_register(trial_id,sample)
                cfg = initialize_cfg_and_register(trial_id,sample)
                trainer = Trainer(augmentations,cfg)
                trainer.resume_or_load(resume=False)
                try:
                    trainer.train()
                except hooks.StopFakeExc:
                    ap,iter = trainer.info_at_stop
                else:
                    ap,iter = trainer.storage.latest()['segm/AP']
                result = {'w20': sample[1],'size':str(sample[0]),'AP': ap, 'iter' : iter }

                result_df = result_df.append(result,ignore_index=True)
                with open(f'{base_output_dir}/results', 'a+') as f:
                    json.dump(result, f)
                    f.write(os.linesep)
                agg= result_df.groupby(['w20','size']).agg({'AP': ['mean', 'std']})
                t = torch.cuda.get_device_properties(0).total_memory //(10**6)
                r = torch.cuda.memory_reserved(0) //(10**6)
                a = torch.cuda.memory_allocated(0) //(10**6)
                f = (r - a)  # free inside reserved
                DatasetCatalog.remove(cfg.DATASETS.TRAIN[0])
                MetadataCatalog.remove(cfg.DATASETS.TRAIN[0])
                print("---------------------------------------\n",
                      agg,
                      "\n---------------------------------------------")
                titles = ['TOTAL', 'RESERVED','ALLOCATED','FREE INSIDE RESERVED']
                vals = [t,r,a,f]
                strs = []

                for title,val in zip(titles,vals):
                    strs.append(f'{title}:\t,{val}')
                print( "\n".join(strs) )
    except Exception as e:
        print(e)
    finally:
        time_info_str = "-".join([str(x) for x in [date.year,date.month,date.day,time.hour,time.minute]])
        result_df.to_csv(f'{base_output_dir}/results_pd-{time_info_str}.csv')
        agg.to_csv(f'{base_output_dir}/agg_pd-{time_info_str}.csv')
Beispiel #4
0
def register_dataset(config , annot_df , images_df , annot):
    """
    Register dataset (detectron2 register coco instance)
    folds included
    """
    fold = config.general["fold"]
    train_dataset_name=f"my_dataset_train_{fold}"
    test_dataset_name=f"my_dataset_test_{fold}"
    train_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_train_{fold}.json")
    test_dataset_file=os.path.join(DATASET_PATH,f"my_dataset_test_{fold}.json")
    
    train_annot_df=annot_df[annot_df["folds"]!=fold]
    test_annot_df=annot_df[annot_df["folds"]==fold]
    train_annot_df=train_annot_df.drop(["normal_category","normal_category_id"],axis=1)
    test_annot_df=test_annot_df.drop(["normal_category","normal_category_id"],axis=1)

    train_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(train_annot_df["image_id"].unique()) else False)]
    test_images_df=images_df[images_df["id"].apply(lambda i:True if i in list(test_annot_df["image_id"].unique()) else False)]
    
    train_annot=annot.copy()
    test_annot=annot.copy()
    
    train_annot["annotations"]=train_annot_df.reset_index(drop=True).to_dict("records")
    train_annot["images"]=train_images_df.reset_index(drop=True).to_dict("records")
    test_annot["annotations"]=test_annot_df.reset_index(drop=True).to_dict("records")
    test_annot["images"]=test_images_df.reset_index(drop=True).to_dict("records")
    
    json.dump(train_annot,open(train_dataset_file,"w"))
    json.dump(test_annot,open(test_dataset_file,"w"))
    
    if train_dataset_name in DatasetCatalog.list():
        DatasetCatalog.remove(train_dataset_name)
        MetadataCatalog.remove(train_dataset_name)
    if test_dataset_name in DatasetCatalog.list():
        DatasetCatalog.remove(test_dataset_name)
        MetadataCatalog.remove(test_dataset_name)
        
    register_coco_instances(train_dataset_name, {}, train_dataset_file, os.path.join(DATASET_PATH,"data"))
    register_coco_instances(test_dataset_name, {}, test_dataset_file, os.path.join(DATASET_PATH,"data"))
Beispiel #5
0
    def preview_augmentation(self,
                             src,
                             outdir: str = "./preview_augmentation",
                             n_output: int = 100):
        """
        面倒なのでcocoを作り直してからpreviewさせる
        Params::
            src: str, List[str], index, List[index]
        """
        outdir = correct_dirpath(outdir)
        coco = CocoManager()
        coco.add_json(self.coco_json_path)
        # src で絞る
        if type(src) == str:
            coco.df_json = coco.df_json.loc[coco.df_json["images_file_name"] ==
                                            src]
        elif type(src) == int:
            coco.df_json = coco.df_json.iloc[src:src + 1]
        elif type(src) == list or type(src) == tuple:
            if type(src[0]) == str:
                coco.df_json = coco.df_json.loc[
                    coco.df_json["images_file_name"].isin(src)]
            elif type(src[0]) == int:
                coco.df_json = coco.df_json.iloc[src, :]
        else:
            raise Exception("")
        coco.save(self.coco_json_path + ".cocomanager.json")

        # 作り直したcocoで再度読み込みさせる
        self.coco_json_path = self.coco_json_path + ".cocomanager.json"
        DatasetCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        MetadataCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        self.__register_coco_instances(self.dataset_name, self.coco_json_path,
                                       self.image_root)
        super().__init__(self.cfg)
        makedirs(outdir, exist_ok=True, remake=True)
        count = 0
        for i, x in enumerate(self.data_loader):
            # x には per batch 分の size (2個とか) 入っているので、それ分回す
            for j, data in enumerate(x):
                if j > 0: continue
                ## Visualizer を predictor と統一するため, gt_*** -> pred_*** に copy する
                img = self.img_conv_dataloader(data)
                ins = data["instances"].to("cpu")
                if ins.has("gt_boxes"): ins.set("pred_boxes", ins.gt_boxes)
                if ins.has("gt_classes"):
                    ins.set("pred_classes", ins.gt_classes)
                if ins.has("gt_keypoints"):
                    ins.set("pred_keypoints", ins.gt_keypoints)
                if ins.has("gt_masks"):
                    ## gt_mask では [x1, y1, x2, y2, ... ]の形式になっているのでそれを pred [False, True, True, ...] 形式に変換する
                    segs = ins.get("gt_masks").polygons
                    list_ndf = []
                    for seg_a_class in segs:
                        ndf = convert_seg_point_to_bool(
                            img.shape[0], img.shape[1], seg_a_class)
                        list_ndf.append(ndf)
                    ndf = np.concatenate([[ndfwk] for ndfwk in list_ndf],
                                         axis=0)
                    ins.set("pred_masks",
                            torch.from_numpy(ndf))  # Tensor 形式に変換
                data["instances"] = ins
                img = self.draw_annoetation(img, data)
                cv2.imwrite(
                    outdir + "preview_augmentation." + str(i) + "." + str(j) +
                    ".png", img)
            count += 1
            if count > n_output: break

        DatasetCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        MetadataCatalog.remove(self.dataset_name)  # key を削除しないと再登録できない
        self.coco_json_path = self.coco_json_path_org
        self.__register_coco_instances(self.dataset_name, self.coco_json_path,
                                       self.image_root)
        super().__init__(self.cfg)
Beispiel #6
0
def extended_coco_load(json_file,
                       image_root,
                       dataset_name=None,
                       loaded_json=None):
    """
    Load a json file with COCO's annotation format.
    Currently only supports instance segmentation annotations.

    Args:
        json_file (str): full path to the json file in COCO annotation format.
        image_root (str): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., "coco", "cityscapes").
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        loaded_json (str): optional loaded json content, used in InMemoryCOCO to
            avoid loading from json_file again.
    Returns:
        list[dict]: a list of dicts in "Detectron2 Dataset" format. (See DATASETS.md)

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
        2. When `dataset_name=='coco'`,
           this function will translate COCO's
           incontiguous category ids to contiguous ids in [0, 80).
    """

    json_file = _cache_json_file(json_file)

    if loaded_json is None:
        coco_api = COCO(json_file)
    else:
        coco_api = InMemoryCOCO(loaded_json)

    id_map = None
    # Get filtered classes
    all_cat_ids = coco_api.getCatIds()
    all_cats = coco_api.loadCats(all_cat_ids)

    # Setup classes to use for creating id map
    classes_to_use = [
        c["name"] for c in sorted(all_cats, key=lambda x: x["id"])
    ]

    # Setup id map
    id_map = {}
    for cat_id, cat in zip(all_cat_ids, all_cats):
        if cat["name"] in classes_to_use:
            id_map[cat_id] = classes_to_use.index(cat["name"])

    # Register dataset in metadata catalog
    if dataset_name is not None:
        # overwrite attrs
        meta_dict = MetadataCatalog.get(dataset_name).as_dict()
        meta_dict['thing_classes'] = classes_to_use
        meta_dict['thing_dataset_id_to_contiguous_id'] = id_map
        # update MetadataCatalog (cannot change inplace, has to remove)
        MetadataCatalog.remove(dataset_name)
        MetadataCatalog.get(dataset_name).set(**meta_dict)
        # assert the change
        assert MetadataCatalog.get(
            dataset_name).thing_classes == classes_to_use

    # sort indices for reproducible results
    img_ids = sorted(coco_api.imgs.keys())
    imgs = coco_api.loadImgs(img_ids)
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
    logger.info("Loaded {} images from {}".format(len(imgs), json_file))

    # Return the coco converted to record list
    return convert_to_dict_list(image_root, id_map, imgs, anns, dataset_name)
Beispiel #7
0
 def tearDown(self):
     # Need to remove injected dataset
     injected_dataset = set(DatasetCatalog) - self._builtin_datasets
     for ds in injected_dataset:
         DatasetCatalog.remove(ds)
         MetadataCatalog.remove(ds)