コード例 #1
0
ファイル: coco_format_dc.py プロジェクト: ruodingt/tensorpack
def register_coco_format(data_config: DataConfig):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # split_names = ['train', 'eval']

    class_names_ls = {}
    class_names = []

    for _split in data_config.train_splits + data_config.eval_splits:  # type: DataSubsetSplit
        _name = _split.nickname
        print("register coco:", _split)
        class_names = DatasetRegistry.register(
            dataset_name=_name,
            func=lambda sp=_split: COCOFormatDetectionSubset(
                _split.ann_path,
                image_data_basedir=data_config.image_data_basedir),
            logx=_split)
        class_names_ls[_name] = class_names

    # consistency check
    for nm, cls_n in class_names_ls.items():
        assert class_names == cls_n, "Train and Val category sets are not consistent"

    class_names_include_bg = ["BG"] + list(class_names)
    for subset_name, _ in class_names_ls.items():
        DatasetRegistry.register_metadata(subset_name, 'class_names',
                                          class_names_include_bg)

    # TODO: check dataset here
    return
コード例 #2
0
def register_ic(basedir):
    for split in ["train", "val"]:
        print('split: ', split)
        name = "ic_" + split
        DatasetRegistry.register(name, lambda x=split: ICDemo(basedir, x))
        DatasetRegistry.register_metadata(name, "class_names", ["BG", "IC"])
        print(DatasetRegistry._metadata_registry)
コード例 #3
0
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.
    """
    class_names = [
        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]  # noqa
    class_names = ["BG"] + class_names
    """
    class_names = ["BG", "failure"]

    for split in [
            "train2017", "val2017", "train2014", "val2014",
            "valminusminival2014", "minival2014", "val2017_100"
    ]:
        name = "coco_" + split
        DatasetRegistry.register(name,
                                 lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)
コード例 #4
0
def register_display(basedir):
    print("REGISTER")
    for split in ["train", "val"]:
        name = split
        DatasetRegistry.register(name, lambda x=split: DisplayDemo(basedir, x))
        DatasetRegistry.register_metadata(name, "class_names",
                                          ["BG", "LabelID0", "LabelID1"])
コード例 #5
0
def register_deep_fashion_2(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.
    """
    for split in ['train', 'val']:
        DatasetRegistry.register(
            split, lambda x=split: DeepFashion2Detection(basedir, x))
コード例 #6
0
def register_waymo(basedir):
    for split in ["train", "val"]:
        name = "waymo_" + split
        DatasetRegistry.register(name, lambda x=split: WaymoDemo(basedir, x))
        DatasetRegistry.register_metadata(name, "class_names", [
            "TYPE_BACKGROUND", "TYPE_UNKNOWN", "TYPE_VEHICLE",
            "TYPE_PEDESTRIAN", "TYPE_SIGN", "TYPE_CYCLIST"
        ])
コード例 #7
0
ファイル: coco.py プロジェクト: wangzhanwei666/tensorpack
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.
    """
    for split in ["train2017", "val2017", "train2014", "val2014",
                  "valminusminival2014", "minival2014"]:
        DatasetRegistry.register("coco_" + split, lambda x=split: COCODetection(basedir, x))
コード例 #8
0
ファイル: coco.py プロジェクト: sirpuria/tensorpack
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.
    class_names = [
        'OttavaBracket', 'OttavaText_15ma', 'OttavaText_15mb',
        'OttavaText_8va', 'OttavaText_8vb', 'accidentalDoubleFlat',
        'accidentalDoubleSharp', 'accidentalFlat', 'accidentalNatural',
        'accidentalSharp', 'arpeggiato', 'articAccentAbove',
        'articAccentBelow', 'articMarcatoAbove', 'articMarcatoBelow',
        'articStaccatissimoAbove', 'articStaccatissimoBelow',
        'articStaccatoAbove', 'articStaccatoBelow', 'articTenutoAbove',
        'articTenutoBelow', 'augmentationDot', 'barlineHeavy', 'barlineSingle',
        'beam', 'brace', 'cClefAlto', 'cClefAltoChange', 'cClefTenor',
        'cClefTenorChange', 'caesura', 'clef15', 'clef8', 'coda', 'combStaff',
        'combTimeSignature', 'dynamicFF', 'dynamicFFF', 'dynamicFFFF',
        'dynamicFFFFF', 'dynamicForte', 'dynamicFortePiano', 'dynamicMF',
        'dynamicMP', 'dynamicMezzo', 'dynamicPP', 'dynamicPPP', 'dynamicPPPP',
        'dynamicPPPPP', 'dynamicPiano', 'dynamicRinforzando2',
        'dynamicSforzando1', 'dynamicSforzato', 'fClef', 'fClefChange',
        'fermataAbove', 'fermataBelow', 'fingering0', 'fingering1',
        'fingering2', 'fingering3', 'fingering4', 'fingering5',
        'flag128thDown', 'flag128thUp', 'flag16thDown', 'flag16thUp',
        'flag32ndDown', 'flag32ndUp', 'flag64thDown', 'flag64thUp',
        'flag8thDown', 'flag8thUp', 'gClef', 'gClefChange', 'hairpin',
        'keyFlat', 'keyNatural', 'keySharp', 'keyboardPedalPed',
        'keyboardPedalUp', 'legerLine', 'noteheadBlack', 'noteheadDoubleWhole',
        'noteheadHalf', 'noteheadWhole', 'ornamentMordent', 'ornamentTrill',
        'ornamentTurn', 'ornamentTurnInverted', 'repeatDot', 'rest128th',
        'rest16th', 'rest32nd', 'rest64th', 'rest8th', 'restDoubleWhole',
        'restHBar', 'restHNr', 'restHalf', 'restLonga', 'restQuarter',
        'restWhole', 'segno', 'slur', 'staffLine', 'stem', 'stringsDownBow',
        'stringsUpBow', 'text_field', 'text_script', 'tie', 'timeSig0',
        'timeSig1', 'timeSig2', 'timeSig3', 'timeSig4', 'timeSig5', 'timeSig6',
        'timeSig7', 'timeSig8', 'timeSig9', 'timeSigCommon',
        'timeSigCutCommon', 'tremolo1', 'tremolo2', 'tremolo3', 'tremolo4',
        'tuplet1', 'tuplet3', 'tuplet4', 'tuplet5', 'tuplet6', 'tuplet7',
        'tuplet8', 'tuplet9', 'tupletBracket'
    ]

    class_names = ["BG"] + class_names

    for split in [
            "train2017", "val2017", "train2014", "val2014",
            "valminusminival2014", "minival2014", "val2017_100"
    ]:
        name = "coco_" + split
        DatasetRegistry.register(name,
                                 lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)
コード例 #9
0
def do_evaluate(pred_config, output_file):
    num_tower = max(cfg.TRAIN.NUM_GPUS, 1)
    graph_funcs = MultiTowerOfflinePredictor(
        pred_config, list(range(num_tower))).get_predictors()

    for dataset in cfg.DATA.VAL:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_tower)
            for k in range(num_tower)]
        all_results = multithread_predict_dataflow(dataflows, graph_funcs)
        output = output_file + '-' + dataset
        DatasetRegistry.get(dataset).eval_inference_results(all_results, output)
コード例 #10
0
ファイル: eval.py プロジェクト: SmartDataLab/MSNET
    def _eval(self):
        logdir = self._output_dir
        if cfg.TRAINER == 'replicated':
            all_results = multithread_predict_dataflow(self.dataflows,
                                                       self.predictors)
        else:
            filenames = [
                os.path.join(
                    logdir,
                    'outputs{}-part{}.json'.format(self.global_step, rank))
                for rank in range(hvd.local_size())
            ]

            if self._horovod_run_eval:
                local_results = predict_dataflow(self.dataflow, self.predictor)
                fname = filenames[hvd.local_rank()]
                with open(fname, 'w') as f:
                    json.dump(local_results, f)
            self.barrier.eval()
            if hvd.rank() > 0:
                return
            all_results = []
            for fname in filenames:
                with open(fname, 'r') as f:
                    obj = json.load(f)
                all_results.extend(obj)
                os.unlink(fname)

        scores = DatasetRegistry.get(
            self._eval_dataset).eval_inference_results(all_results)
        for k, v in scores.items():
            self.trainer.monitors.put_scalar(self._eval_dataset + '-' + k, v)
コード例 #11
0
ファイル: data.py プロジェクト: leeshien/mytensorpack
def get_eval_dataflow(name, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = DatasetRegistry.get(name).inference_roidbs()
    logger.info("Found {} images for inference.".format(len(roidbs)))

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]],
                            ["file_name", "image_id"])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds
コード例 #12
0
def get_pascal_voc_train_dataflow(batch_size=1):
    from dataset import register_pascal_voc

    # register_coco(os.path.expanduser("/media/ubuntu/Working/common_data/coco"))
    register_pascal_voc(os.path.expanduser("/media/ubuntu/Working/voc2012/VOC2012/"))

    print("In train dataflow")
    roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)
    print("Done loading roidbs")

    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
            num - len(roidbs), len(roidbs)
        )
    )

    aspect_grouping = [1]
    aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs]
    group_ids = _quantize(aspect_ratios, aspect_grouping)

    ds = DataFromList(np.arange(len(roidbs)), shuffle=True)
    ds.reset_state()
    ds = AspectGroupingDataFlow(roidbs, ds, group_ids, batch_size=batch_size, drop_uneven=True).__iter__()
    preprocess = TrainingDataPreprocessor()

    while True:
        batch_roidbs = next(ds)
        yield preprocess(batch_roidbs)
コード例 #13
0
def get_plain_train_dataflow(batch_size=2):
    # no aspect ratio grouping

    print("In train dataflow")
    roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)
    print("Done loading roidbs")

    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
            num - len(roidbs), len(roidbs)
        )
    )

    ds = DataFromList(roidbs, shuffle=True)
    preprocess = TrainingDataPreprocessor()
    buffer_size = cfg.DATA.NUM_WORKERS * 20
    ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size)
    ds.reset_state()
    dataiter = ds.__iter__()
    return dataiter
コード例 #14
0
def get_train_dataflow(batch_size=2):
    print("In train dataflow")
    roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)
    print("Done loading roidbs")

    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
            num - len(roidbs), len(roidbs)
        )
    )

    aspect_grouping = [1]
    aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs]
    group_ids = _quantize(aspect_ratios, aspect_grouping)

    ds = AspectGroupingDataFlow(roidbs, group_ids, batch_size=batch_size, drop_uneven=True)
    preprocess = TrainingDataPreprocessor()
    buffer_size = cfg.DATA.NUM_WORKERS * 10
    # ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size)
    ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size)
    ds.reset_state()

    # to get an infinite data flow
    ds = RepeatedData(ds, num=-1)
    dataiter = ds.__iter__()

    return dataiter
コード例 #15
0
ファイル: data.py プロジェクト: leeshien/mytensorpack
def print_class_histogram(roidbs):
    """
    Args:
        roidbs (list[dict]): the same format as the output of `training_roidbs`.
    """
    class_names = DatasetRegistry.get_metadata(cfg.DATA.TRAIN[0],
                                               'class_names')
    # labels are in [1, NUM_CATEGORY], hence +2 for bins
    hist_bins = np.arange(cfg.DATA.NUM_CATEGORY + 2)

    # Histogram of ground-truth objects
    gt_hist = np.zeros((cfg.DATA.NUM_CATEGORY + 1, ), dtype=np.int)
    for entry in roidbs:
        # filter crowd?
        gt_inds = np.where((entry["class"] > 0) & (entry["is_crowd"] == 0))[0]
        gt_classes = entry["class"][gt_inds]
        if len(gt_classes):
            assert gt_classes.max() <= len(class_names) - 1
        gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
    data = list(
        itertools.chain(*[[class_names[i + 1], v]
                          for i, v in enumerate(gt_hist[1:])]))
    COL = min(6, len(data))
    total_instances = sum(data[1::2])
    data.extend([None] * ((COL - len(data) % COL) % COL))
    data.extend(["total", total_instances])
    data = itertools.zip_longest(*[data[i::COL] for i in range(COL)])
    # the first line is BG
    table = tabulate(data,
                     headers=["class", "#box"] * (COL // 2),
                     tablefmt="pipe",
                     stralign="center",
                     numalign="left")
    logger.info("Ground-Truth category distribution:\n" +
                colored(table, "cyan"))
コード例 #16
0
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.
    #class_names = [
    #   "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]  # noqa
    # !!! xiaoying
    class_names = [
        'aeroplane',  # 0
        'bicycle',  # 1
        'bird',  # 2
        'boat',  # 3
        'bottle',  # 4
        'bus',  # 5
        'car',  # 6
        'cat',  # 7
        'chair',  # 8
        'cow',  # 9
        'diningtable',  # 10
        'dog',  # 11
        'horse',  # 12 
        'motorbike',  # 13
        'person',  # 14
        'pottedplant',  # 15
        'sheep',  # 16
        'sofa',  # 17
        'train',  # 18
        'tvmonitor'  # 19
    ]
    class_names = ["BG"] + class_names
    print("!!! xiaoying class_names", class_names)

    for split in [
            "train2017", "val2017", "train2014", "val2014",
            "valminusminival2014", "minival2014", "val2017_100",
            "voctrain2012", "vocval2012"
    ]:  #!!xiaoying
        name = "coco_" + split
        DatasetRegistry.register(name,
                                 lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)
コード例 #17
0
ファイル: coco.py プロジェクト: hbc123ht/fasfsa
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.
    class_names = ["page"]#, "passport_code"]
    class_names = ["BG"] + class_names

    for split in ["train_set_vito"]:
        name = split
        DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)
コード例 #18
0
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.

    class_names = ["BG", "failure"]

    for split in ["train2019", "val2019"]:
        #name = "coco_" + split
        DatasetRegistry.register(split,
                                 lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(split, 'class_names', class_names)
コード例 #19
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """
    roidbs = list(
        itertools.chain.from_iterable(
            DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print(
        "---------------------------------------------------------------- data.py:343"
    )
    print_class_histogram(roidbs)

    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    preprocess = TrainingDataPreprocessor(cfg)

    if cfg.DATA.NUM_WORKERS > 0:
        if cfg.TRAINER == "horovod":
            # one dataflow for each process, therefore don't need large buffer
            buffer_size = cfg.DATA.NUM_WORKERS * 10
            ds = MultiThreadMapData(ds,
                                    cfg.DATA.NUM_WORKERS,
                                    preprocess,
                                    buffer_size=buffer_size)
            # MPI does not like fork()
        else:
            buffer_size = cfg.DATA.NUM_WORKERS * 20
            ds = MultiProcessMapData(ds,
                                     cfg.DATA.NUM_WORKERS,
                                     preprocess,
                                     buffer_size=buffer_size)
    else:
        ds = MapData(ds, preprocess)
    return ds
コード例 #20
0
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.
    class_names = ['table']
    class_names = ["BG"] + class_names

    for split in [
            'train2021', "train2017"
    ]:  #, "train2014", "val2014", "valminusminival2014", "minival2014", "val2017_100"]:
        name = "coco_" + split
        DatasetRegistry.register(name,
                                 lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)
コード例 #21
0
def register_coco(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.
    class_names = ['Bacterial_Spot', 
              'Late_Blight', 
              'Septorial_Leaf_spot', 
              'Mosaic_Virus', 
              'Yellow_Curved']
    
    '''['Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', 
                   'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', 
                   'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', 'Sidewalk', 
                   'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', 
                   'Other Rider', 'Lane Marking - Crosswalk', 'Lane Marking - General', 
                   'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', 
                   'Banner', 'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', 
                   'CCTV Camera', 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 
                   'Phone Booth', 'Pothole', 'Street Light', 'Pole', 'Traffic Sign Frame',
                   'Utility Pole', 'Traffic Light', 'Traffic Sign (Back)', 
                   'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 
                   'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', 
                   'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled']'''

    '''['short_sleeved_shirt', 'long_sleeved_shirt', 'short_sleeved_outwear', 
                  'long_sleeved_outwear', 'vest', 'sling', 'shorts', 'trousers', 'skirt', 
                  'short_sleeved_dress', 'long_sleeved_dress', 'vest_dress', 'sling_dress']'''
    class_names = ["BG"] + class_names

    for split in ["train2017", "val2017", "train2014", "val2014",
                  "valminusminival2014", "minival2014", "val2017_100"]:
        name = "coco_" + split
        DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)
コード例 #22
0
def register_pascal_voc(basedir):
    """
    Add COCO datasets like "coco_train201x" to the registry,
    so you can refer to them with names in `cfg.DATA.TRAIN/VAL`.

    Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014.
    """

    # 80 names for COCO
    # For your own coco-format dataset, change this.
    class_names = [
        "aeroplane",
        "bicycle",
        "bird",
        "boat",
        "bottle",
        "bus",
        "car",
        "cat",
        "chair",
        "cow",
        "diningtable",
        "dog",
        "horse",
        "motorbike",
        "person",
        "pottedplant",
        "sheep",
        "sofa",
        "train",
        "tvmonitor",
    ]  # noqa

    class_names = ["BG"] + class_names

    for split in ["minitrain2014", "minival2014", "train2014", "val2014"]:
        name = "voc_" + split
        DatasetRegistry.register(name,
                                 lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)
コード例 #23
0
def get_eval_dataflow(name, is_aws, is_gcs, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = DatasetRegistry.get(name).inference_roidbs()
    logger.info("Found {} images for inference.".format(len(roidbs)))

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (
        shard * img_per_shard,
        (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs,
    )

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0] : img_range[1]], ["file_name", "image_id"])

    if is_aws:
        s3 = boto3.resource("s3")
    elif is_gcs:
        c = storage.Client.create_anonymous_client()
        bucket = c.get_bucket("determined-ai-coco-dataset")

    def f(fname):
        if is_aws:
            s3_object = s3.meta.client.get_object(Bucket="determined-ai-coco-dataset", Key=fname)
            im = cv2.imdecode(
                np.asarray(bytearray(s3_object["Body"].read()), dtype=np.uint8), cv2.IMREAD_COLOR,
            )
        elif is_gcs:
            blob = bucket.blob(fname)
            s = download_gcs_blob_with_backoff(blob)
            im = cv2.imdecode(np.asarray(bytearray(s), dtype=np.uint8), cv2.IMREAD_COLOR)
        else:
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds
コード例 #24
0
ファイル: predict.py プロジェクト: ruodingt/tensorpack
def do_sanity_check(pred_func,
                    output_dir='/root/dentalpoc/logs/xxxxx',
                    font_rs=10,
                    thickness_rs=10):
    # num_tower = max(cfg.TRAIN.NUM_GPUS, 1)
    # graph_funcs = MultiTowerOfflinePredictor(
    #     pred_config, list(range(num_tower))).get_predictors()
    os.makedirs(output_dir, exist_ok=True)

    for dataset in cfg.DATA.VAL:
        logger.info("sanity checking {} ...".format(dataset))
        # dataflows = [
        #     get_eval_dataflow(dataset, shard=k, num_shards=num_tower, add_gt=True)
        #     for k in range(num_tower)]
        # all_results = multithread_predict_dataflow(dataflows, graph_funcs)
        coco_format_detection = DatasetRegistry.get(dataset)
        coco_object = coco_format_detection.coco
        for _im_id, _img_dic in list(coco_object.imgs.items())[1:]:
            _img_path = _img_dic['path']
            _img_seg_polygons = coco_object.imgToAnns[_im_id]
            detection_ground_truths = list(
                map(
                    lambda x: DetectionResult(
                        box=convert_box_mode_xywh_2_xyxy(x['bbox']),
                        score=1.0,
                        class_id=x['category_id'],
                        mask=coco_object.annToMask(x)), _img_seg_polygons))

            print("S======check")
            _predict_with_gt(pred_func=pred_func,
                             input_file=_img_path,
                             ground_truths=detection_ground_truths,
                             output_dir=output_dir,
                             font_rs=font_rs,
                             thickness_rs=thickness_rs)

        xxx = 0
コード例 #25
0
def finalize_configs(is_training):
    """
    Run some sanity checks, and populate some configs from others
    """
    _C.freeze(False)  # populate new keys now
    if isinstance(_C.DATA.VAL, six.string_types
                  ):  # support single string (the typical case) as well
        _C.DATA.VAL = (_C.DATA.VAL, )
    if isinstance(_C.DATA.TRAIN, six.string_types):  # support single string
        _C.DATA.TRAIN = (_C.DATA.TRAIN, )

    # finalize dataset definitions ...
    from dataset import DatasetRegistry
    datasets = list(_C.DATA.TRAIN) + list(_C.DATA.VAL)
    _C.DATA.CLASS_NAMES = DatasetRegistry.get_metadata(datasets[0],
                                                       "class_names")
    _C.DATA.NUM_CATEGORY = len(_C.DATA.CLASS_NAMES) - 1

    assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN',
                                'None'], _C.BACKBONE.NORM
    if _C.BACKBONE.NORM != 'FreezeBN':
        assert not _C.BACKBONE.FREEZE_AFFINE
    assert _C.BACKBONE.FREEZE_AT in [0, 1, 2]

    _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS)
    assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES)
    # image size into the backbone has to be multiple of this number
    _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[
        3]  # [3] because we build FPN with features r2,r3,r4,r5

    if _C.MODE_FPN:
        size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1.
        _C.PREPROC.MAX_SIZE = np.ceil(
            _C.PREPROC.MAX_SIZE / size_mult) * size_mult
        assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint']
        assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head')
        assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head')
        assert _C.FPN.NORM in ['None', 'GN']

        if _C.FPN.CASCADE:
            # the first threshold is the proposal sampling threshold
            assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH
            assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS)

    if is_training:
        train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE
        if isinstance(
                train_scales,
            (list, tuple)) and train_scales[1] - train_scales[0] > 100:
            # don't autotune if augmentation is on
            os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
        os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
        assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER

        lr = _C.TRAIN.LR_SCHEDULE
        if isinstance(lr, six.string_types):
            if lr.endswith("x"):
                LR_SCHEDULE_KITER = {
                    "{}x".format(k): [180 * k - 120, 180 * k - 40, 180 * k]
                    for k in range(2, 10)
                }
                LR_SCHEDULE_KITER["1x"] = [120, 160, 180]
                _C.TRAIN.LR_SCHEDULE = [
                    x * 1000 for x in LR_SCHEDULE_KITER[lr]
                ]
            else:
                _C.TRAIN.LR_SCHEDULE = eval(lr)

        # setup NUM_GPUS
        if _C.TRAINER == 'horovod':
            import horovod.tensorflow as hvd
            ngpu = hvd.size()
            logger.info("Horovod Rank={}, Size={}, LocalRank={}".format(
                hvd.rank(), hvd.size(), hvd.local_rank()))
        else:
            assert 'OMPI_COMM_WORLD_SIZE' not in os.environ
            ngpu = get_num_gpu()
        assert ngpu > 0, "Has to train with GPU!"
        assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format(
            ngpu)
    else:
        # autotune is too slow for inference
        os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
        ngpu = get_num_gpu()

    if _C.TRAIN.NUM_GPUS is None:
        _C.TRAIN.NUM_GPUS = ngpu
    else:
        if _C.TRAINER == 'horovod':
            assert _C.TRAIN.NUM_GPUS == ngpu
        else:
            assert _C.TRAIN.NUM_GPUS <= ngpu

    _C.freeze()
    logger.info("Config: ------------------------------------------\n" +
                str(_C))
コード例 #26
0
def eval_one_dataset(dataset_name, output_filename):
    os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    import cv2
    from collections import namedtuple
    from dataset import DatasetRegistry
    from myaug_lib import short_side_resize_image
    DetectionResult = namedtuple('DetectionResult',
                                 ['box', 'score', 'class_id', 'mask'])
    register_coco(os.path.expanduser(cfg.DATA.BASEDIR))

    roidbs = DatasetRegistry.get(dataset_name).inference_roidbs()

    images = tf.placeholder(tf.float32,
                            shape=[None, None, None, 3],
                            name='images')
    with tf.variable_scope('resnet50'):
        final_boxes, final_scores, final_labels, final_inds = \
            model.model_fpn(images, is_training=False, data_format='channels_last', mode='test')

    init_op = tf.group(
        [tf.global_variables_initializer(),
         tf.local_variables_initializer()])

    sess_config = tf.ConfigProto()
    sess_config.allow_soft_placement = True
    sess_config.log_device_placement = False
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)
    sess.run(init_op)

    checkpoint_path = cfg.TRAIN.LOG_DIR + COMMON_POSTFIX
    # restorer = tf.train.Saver()
    # restorer.restore(sess, tf.train.latest_checkpoint(checkpoint_path))
    variable_averages = tf.train.ExponentialMovingAverage(
        decay=cfg.TRAIN.MOVING_AVERAGE_DECAY)
    variable_to_restore = variable_averages.variables_to_restore()
    restorer = tf.train.Saver(variable_to_restore)
    restorer.restore(sess, tf.train.latest_checkpoint(checkpoint_path))

    all_results = []
    start = time.time()
    for idx, roidb in enumerate(roidbs):
        fname, img_id = roidb["file_name"], roidb["image_id"]
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        im = im.astype("float32")
        h, w = im.shape[:2]

        # 短边resize
        resized_im = short_side_resize_image(im)
        # 减均值
        resized_im = resized_im[:, :, [2, 1, 0]]  # BGR-->RGB
        resized_im /= 255.0
        resized_im -= np.asarray(cfg.PREPROC.PIXEL_MEAN)
        resized_im /= np.asarray(cfg.PREPROC.PIXEL_STD)

        resized_h, resized_w = resized_im.shape[:2]

        scale = np.sqrt(resized_h * 1.0 / h * resized_w / w)

        mult = float(cfg.FPN.RESOLUTION_REQUIREMENT)  # size divisable
        max_height = int(np.ceil(float(resized_h) / mult) * mult)
        max_width = int(np.ceil(float(resized_w) / mult) * mult)
        resized_im1 = np.zeros((max_height, max_width, 3), dtype=np.float32)
        resized_im1[:resized_h, :resized_w, :] = resized_im

        # profile the graph executation
        if 1510 <= idx <= 1520:
            from tensorflow.python.client import timeline
            options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            boxes, scores, labels = sess.run(
                [final_boxes, final_scores, final_labels],
                feed_dict={images: resized_im1[np.newaxis]},
                options=options,
                run_metadata=run_metadata)
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(
                    '{}/timeline_Inference_step{}.json'.format(
                        checkpoint_path, idx), 'w') as fp:
                fp.write(chrome_trace)
        else:
            boxes, scores, labels = sess.run(
                [final_boxes, final_scores, final_labels],
                feed_dict={images: resized_im1[np.newaxis]})

        # Some slow numpy postprocessing:
        boxes = boxes / scale
        # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
        boxes = boxes.reshape([-1, 4])
        boxes[:, [0, 1]] = np.maximum(boxes[:, [0, 1]], 0)
        boxes[:, 2] = np.minimum(boxes[:, 2], w - 1)
        boxes[:, 3] = np.minimum(boxes[:, 3], h - 1)

        if idx < 5:
            print(boxes, scores, labels)

        # if masks:
        #     full_masks = [_paste_mask(box, mask, orig_shape)
        #                   for box, mask in zip(boxes, masks[0])]
        #     masks = full_masks
        # else:
        #     # fill with none
        # masks = [None] * len(boxes)

        # postprocessing for FCOS
        # ################# 每一类进行nms ##################
        # boxes_after_nms = []
        # for c in range(1, 81):
        #     inds = np.where(labels == c)
        #     if len(inds) > 0:
        #         boxes_keep = np.concatenate([boxes[inds], scores[inds].reshape(-1, 1),
        #                                      labels[inds].reshape(-1, 1)], axis=1)
        #         # 类内NMS
        #         keep = nms(boxes_keep[:, 0:5], thresh=cfg.FCOS.NMS_THRESH)
        #         boxes_keep = boxes_keep[keep]
        #         # 过滤得分比较低的框
        #         # keep = np.where(boxes_keep[:, 4] > 0.1)  # 这里的阈值应该根据每一类来确定
        #         # boxes_keep = boxes_keep[keep]
        #         boxes_after_nms.append(boxes_keep)
        # boxes_after_nms = np.concatenate(boxes_after_nms, axis=0)  # [x1,y1,x2,y2,score,label]
        boxes_after_nms = np.concatenate(
            [boxes, scores.reshape(-1, 1),
             labels.reshape(-1, 1)], axis=1)

        # ################# 限制每个图片最大检测个数 ##################
        number_of_detections = len(boxes_after_nms)
        if number_of_detections > cfg.FRCNN.TEST.RESULTS_PER_IM > 0:
            scores_sorted = np.sort(boxes_after_nms[:, 4])
            image_thresh = scores_sorted[number_of_detections -
                                         cfg.FRCNN.TEST.RESULTS_PER_IM + 1]
            keep = np.where(boxes_after_nms[:, 4] >= image_thresh)[0]
            boxes_after_nms = boxes_after_nms[keep]

        # ################# 类间nms ##################
        # keep = nms_across_class(boxes_after_nms, thresh=0.5)
        # boxes_after_nms = boxes_after_nms[keep]

        boxes = boxes_after_nms[:, 0:4]
        scores = boxes_after_nms[:, 4]
        labels = boxes_after_nms[:, 5].astype(np.int32)
        masks = [None] * len(boxes)

        for r in [
                DetectionResult(*args)
                for args in zip(boxes, scores, labels.tolist(), masks)
        ]:
            res = {
                'image_id': img_id,
                'category_id': int(r.class_id),
                'bbox': [round(float(x), 4) for x in r.box],
                'score': round(float(r.score), 4),
            }
            all_results.append(res)

        if idx % 1000 == 0:
            print(idx, (time.time() - start) / 1000)
            start = time.time()

    DatasetRegistry.get(dataset_name).eval_inference_results(
        all_results, output_filename)
コード例 #27
0
ファイル: balloon.py プロジェクト: nikhilarunw/tensorpack
def register_balloon(basedir):
    for split in ["train", "val"]:
        name = "balloon_" + split
        DatasetRegistry.register(name, lambda x=split: BalloonDemo(basedir, x))
        DatasetRegistry.register_metadata(name, "class_names",
                                          ["BG", "balloon"])
コード例 #28
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = list(
        itertools.chain.from_iterable(
            DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # Add rpn data to dataflow:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.DATA.NUM_WORKERS > 0:
        if cfg.TRAINER == 'horovod':
            buffer_size = cfg.DATA.NUM_WORKERS * 10  # one dataflow for each process, therefore don't need large buffer
            ds = MultiThreadMapData(ds,
                                    cfg.DATA.NUM_WORKERS,
                                    preprocess,
                                    buffer_size=buffer_size)
            # MPI does not like fork()
        else:
            buffer_size = cfg.DATA.NUM_WORKERS * 20
            ds = MultiProcessMapDataZMQ(ds,
                                        cfg.DATA.NUM_WORKERS,
                                        preprocess,
                                        buffer_size=buffer_size)
    else:
        ds = MapData(ds, preprocess)
    return ds
コード例 #29
0
def finalize_configs(is_training):
    """
    Run some sanity checks, and populate some configs from others
    """
    _C.freeze(False)  # populate new keys now
    if isinstance(_C.DATA.VAL, six.string_types
                  ):  # support single string (the typical case) as well
        _C.DATA.VAL = (_C.DATA.VAL, )
    if isinstance(_C.DATA.TRAIN, six.string_types):  # support single string
        _C.DATA.TRAIN = (_C.DATA.TRAIN, )

    # finalize dataset definitions ...
    from dataset import DatasetRegistry
    datasets = list(_C.DATA.TRAIN) + list(_C.DATA.VAL)

    # _C.DATA.CLASS_NAMES = ["BG", "class1", "class2", "class3", "class4", "class5", "class6"]
    # _C.DATA.CLASS_NAMES = [
    #     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]  # noqa
    # _C.DATA.CLASS_NAMES = ["BG"] + _C.DATA.CLASS_NAMES
    # print(datasets[0])
    _C.DATA.CLASS_NAMES = DatasetRegistry.get_metadata(datasets[0],
                                                       "class_names")
    # print(_C.DATA.CLASS_NAMES)
    _C.DATA.NUM_CATEGORY = len(_C.DATA.CLASS_NAMES) - 1

    assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN',
                                'None'], _C.BACKBONE.NORM
    if _C.BACKBONE.NORM != 'FreezeBN':
        assert not _C.BACKBONE.FREEZE_AFFINE
    assert _C.BACKBONE.FREEZE_AT in [0, 1, 2]

    _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS)
    assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES)
    # image size into the backbone has to be multiple of this number
    _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[
        3]  # [3] because we build FPN with features r2,r3,r4,r5

    if _C.MODE_FPN:
        size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1.
        _C.PREPROC.MAX_SIZE = np.ceil(
            _C.PREPROC.MAX_SIZE / size_mult) * size_mult
        assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint']
        assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head')
        assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head')
        assert _C.FPN.NORM in ['None', 'GN']

        if _C.FPN.CASCADE:
            # the first threshold is the proposal sampling threshold
            assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH
            assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS)

    if is_training:
        train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE
        if isinstance(
                train_scales,
            (list, tuple)) and train_scales[1] - train_scales[0] > 100:
            # don't autotune if augmentation is on
            os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
        os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
        assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER

        lr = _C.TRAIN.LR_SCHEDULE
        if isinstance(lr, six.string_types):
            if lr.endswith("x"):
                LR_SCHEDULE_KITER = {
                    "{}x".format(k): [180 * k - 120, 180 * k - 40, 180 * k]
                    for k in range(2, 10)
                }
                LR_SCHEDULE_KITER["1x"] = [120, 160, 180]
                _C.TRAIN.LR_SCHEDULE = [
                    x * 1000 for x in LR_SCHEDULE_KITER[lr]
                ]
            else:
                _C.TRAIN.LR_SCHEDULE = eval(lr)

        # setup NUM_GPUS
        if _C.TRAINER == 'horovod':
            import horovod.tensorflow as hvd
            ngpu = hvd.size()
            logger.info("Horovod Rank={}, Size={}, LocalRank={}".format(
                hvd.rank(), hvd.size(), hvd.local_rank()))
        else:
            assert 'OMPI_COMM_WORLD_SIZE' not in os.environ
            ngpu = get_num_gpu()
        assert ngpu > 0, "Has to train with GPU!"
        assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format(
            ngpu)
    else:
        # autotune is too slow for inference
        os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
        ngpu = get_num_gpu()

    if _C.TRAIN.NUM_GPUS is None:
        _C.TRAIN.NUM_GPUS = ngpu
    else:
        if _C.TRAINER == 'horovod':
            assert _C.TRAIN.NUM_GPUS == ngpu
        else:
            assert _C.TRAIN.NUM_GPUS <= ngpu

    _C.freeze()
    logger.info("Config: ------------------------------------------\n" +
                str(_C))
コード例 #30
0
def register_idcard(basedir):
    for split in ["train", "val"]:
        name = "idcard_" + split
        DatasetRegistry.register(name,
                                 lambda x=split: IDCardDataset(basedir, x))
        DatasetRegistry.register_metadata(name, "class_names", ["BG", "page"])