def register_coco_format(data_config: DataConfig): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # split_names = ['train', 'eval'] class_names_ls = {} class_names = [] for _split in data_config.train_splits + data_config.eval_splits: # type: DataSubsetSplit _name = _split.nickname print("register coco:", _split) class_names = DatasetRegistry.register( dataset_name=_name, func=lambda sp=_split: COCOFormatDetectionSubset( _split.ann_path, image_data_basedir=data_config.image_data_basedir), logx=_split) class_names_ls[_name] = class_names # consistency check for nm, cls_n in class_names_ls.items(): assert class_names == cls_n, "Train and Val category sets are not consistent" class_names_include_bg = ["BG"] + list(class_names) for subset_name, _ in class_names_ls.items(): DatasetRegistry.register_metadata(subset_name, 'class_names', class_names_include_bg) # TODO: check dataset here return
def register_ic(basedir): for split in ["train", "val"]: print('split: ', split) name = "ic_" + split DatasetRegistry.register(name, lambda x=split: ICDemo(basedir, x)) DatasetRegistry.register_metadata(name, "class_names", ["BG", "IC"]) print(DatasetRegistry._metadata_registry)
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. """ class_names = [ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] # noqa class_names = ["BG"] + class_names """ class_names = ["BG", "failure"] for split in [ "train2017", "val2017", "train2014", "val2014", "valminusminival2014", "minival2014", "val2017_100" ]: name = "coco_" + split DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(name, 'class_names', class_names)
def register_display(basedir): print("REGISTER") for split in ["train", "val"]: name = split DatasetRegistry.register(name, lambda x=split: DisplayDemo(basedir, x)) DatasetRegistry.register_metadata(name, "class_names", ["BG", "LabelID0", "LabelID1"])
def register_deep_fashion_2(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. """ for split in ['train', 'val']: DatasetRegistry.register( split, lambda x=split: DeepFashion2Detection(basedir, x))
def register_waymo(basedir): for split in ["train", "val"]: name = "waymo_" + split DatasetRegistry.register(name, lambda x=split: WaymoDemo(basedir, x)) DatasetRegistry.register_metadata(name, "class_names", [ "TYPE_BACKGROUND", "TYPE_UNKNOWN", "TYPE_VEHICLE", "TYPE_PEDESTRIAN", "TYPE_SIGN", "TYPE_CYCLIST" ])
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. """ for split in ["train2017", "val2017", "train2014", "val2014", "valminusminival2014", "minival2014"]: DatasetRegistry.register("coco_" + split, lambda x=split: COCODetection(basedir, x))
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. class_names = [ 'OttavaBracket', 'OttavaText_15ma', 'OttavaText_15mb', 'OttavaText_8va', 'OttavaText_8vb', 'accidentalDoubleFlat', 'accidentalDoubleSharp', 'accidentalFlat', 'accidentalNatural', 'accidentalSharp', 'arpeggiato', 'articAccentAbove', 'articAccentBelow', 'articMarcatoAbove', 'articMarcatoBelow', 'articStaccatissimoAbove', 'articStaccatissimoBelow', 'articStaccatoAbove', 'articStaccatoBelow', 'articTenutoAbove', 'articTenutoBelow', 'augmentationDot', 'barlineHeavy', 'barlineSingle', 'beam', 'brace', 'cClefAlto', 'cClefAltoChange', 'cClefTenor', 'cClefTenorChange', 'caesura', 'clef15', 'clef8', 'coda', 'combStaff', 'combTimeSignature', 'dynamicFF', 'dynamicFFF', 'dynamicFFFF', 'dynamicFFFFF', 'dynamicForte', 'dynamicFortePiano', 'dynamicMF', 'dynamicMP', 'dynamicMezzo', 'dynamicPP', 'dynamicPPP', 'dynamicPPPP', 'dynamicPPPPP', 'dynamicPiano', 'dynamicRinforzando2', 'dynamicSforzando1', 'dynamicSforzato', 'fClef', 'fClefChange', 'fermataAbove', 'fermataBelow', 'fingering0', 'fingering1', 'fingering2', 'fingering3', 'fingering4', 'fingering5', 'flag128thDown', 'flag128thUp', 'flag16thDown', 'flag16thUp', 'flag32ndDown', 'flag32ndUp', 'flag64thDown', 'flag64thUp', 'flag8thDown', 'flag8thUp', 'gClef', 'gClefChange', 'hairpin', 'keyFlat', 'keyNatural', 'keySharp', 'keyboardPedalPed', 'keyboardPedalUp', 'legerLine', 'noteheadBlack', 'noteheadDoubleWhole', 'noteheadHalf', 'noteheadWhole', 'ornamentMordent', 'ornamentTrill', 'ornamentTurn', 'ornamentTurnInverted', 'repeatDot', 'rest128th', 'rest16th', 'rest32nd', 'rest64th', 'rest8th', 'restDoubleWhole', 'restHBar', 'restHNr', 'restHalf', 'restLonga', 'restQuarter', 'restWhole', 'segno', 'slur', 'staffLine', 'stem', 'stringsDownBow', 'stringsUpBow', 'text_field', 'text_script', 'tie', 'timeSig0', 'timeSig1', 'timeSig2', 'timeSig3', 'timeSig4', 'timeSig5', 'timeSig6', 'timeSig7', 'timeSig8', 'timeSig9', 'timeSigCommon', 'timeSigCutCommon', 'tremolo1', 'tremolo2', 'tremolo3', 'tremolo4', 'tuplet1', 'tuplet3', 'tuplet4', 'tuplet5', 'tuplet6', 'tuplet7', 'tuplet8', 'tuplet9', 'tupletBracket' ] class_names = ["BG"] + class_names for split in [ "train2017", "val2017", "train2014", "val2014", "valminusminival2014", "minival2014", "val2017_100" ]: name = "coco_" + split DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(name, 'class_names', class_names)
def do_evaluate(pred_config, output_file): num_tower = max(cfg.TRAIN.NUM_GPUS, 1) graph_funcs = MultiTowerOfflinePredictor( pred_config, list(range(num_tower))).get_predictors() for dataset in cfg.DATA.VAL: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_tower) for k in range(num_tower)] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DatasetRegistry.get(dataset).eval_inference_results(all_results, output)
def _eval(self): logdir = self._output_dir if cfg.TRAINER == 'replicated': all_results = multithread_predict_dataflow(self.dataflows, self.predictors) else: filenames = [ os.path.join( logdir, 'outputs{}-part{}.json'.format(self.global_step, rank)) for rank in range(hvd.local_size()) ] if self._horovod_run_eval: local_results = predict_dataflow(self.dataflow, self.predictor) fname = filenames[hvd.local_rank()] with open(fname, 'w') as f: json.dump(local_results, f) self.barrier.eval() if hvd.rank() > 0: return all_results = [] for fname in filenames: with open(fname, 'r') as f: obj = json.load(f) all_results.extend(obj) os.unlink(fname) scores = DatasetRegistry.get( self._eval_dataset).eval_inference_results(all_results) for k, v in scores.items(): self.trainer.monitors.put_scalar(self._eval_dataset + '-' + k, v)
def get_eval_dataflow(name, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DatasetRegistry.get(name).inference_roidbs() logger.info("Found {} images for inference.".format(len(roidbs))) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]], ["file_name", "image_id"]) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def get_pascal_voc_train_dataflow(batch_size=1): from dataset import register_pascal_voc # register_coco(os.path.expanduser("/media/ubuntu/Working/common_data/coco")) register_pascal_voc(os.path.expanduser("/media/ubuntu/Working/voc2012/VOC2012/")) print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) aspect_grouping = [1] aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs] group_ids = _quantize(aspect_ratios, aspect_grouping) ds = DataFromList(np.arange(len(roidbs)), shuffle=True) ds.reset_state() ds = AspectGroupingDataFlow(roidbs, ds, group_ids, batch_size=batch_size, drop_uneven=True).__iter__() preprocess = TrainingDataPreprocessor() while True: batch_roidbs = next(ds) yield preprocess(batch_roidbs)
def get_plain_train_dataflow(batch_size=2): # no aspect ratio grouping print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor() buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) ds.reset_state() dataiter = ds.__iter__() return dataiter
def get_train_dataflow(batch_size=2): print("In train dataflow") roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) print("Done loading roidbs") # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs) ) ) aspect_grouping = [1] aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs] group_ids = _quantize(aspect_ratios, aspect_grouping) ds = AspectGroupingDataFlow(roidbs, group_ids, batch_size=batch_size, drop_uneven=True) preprocess = TrainingDataPreprocessor() buffer_size = cfg.DATA.NUM_WORKERS * 10 # ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) ds.reset_state() # to get an infinite data flow ds = RepeatedData(ds, num=-1) dataiter = ds.__iter__() return dataiter
def print_class_histogram(roidbs): """ Args: roidbs (list[dict]): the same format as the output of `training_roidbs`. """ class_names = DatasetRegistry.get_metadata(cfg.DATA.TRAIN[0], 'class_names') # labels are in [1, NUM_CATEGORY], hence +2 for bins hist_bins = np.arange(cfg.DATA.NUM_CATEGORY + 2) # Histogram of ground-truth objects gt_hist = np.zeros((cfg.DATA.NUM_CATEGORY + 1, ), dtype=np.int) for entry in roidbs: # filter crowd? gt_inds = np.where((entry["class"] > 0) & (entry["is_crowd"] == 0))[0] gt_classes = entry["class"][gt_inds] if len(gt_classes): assert gt_classes.max() <= len(class_names) - 1 gt_hist += np.histogram(gt_classes, bins=hist_bins)[0] data = list( itertools.chain(*[[class_names[i + 1], v] for i, v in enumerate(gt_hist[1:])])) COL = min(6, len(data)) total_instances = sum(data[1::2]) data.extend([None] * ((COL - len(data) % COL) % COL)) data.extend(["total", total_instances]) data = itertools.zip_longest(*[data[i::COL] for i in range(COL)]) # the first line is BG table = tabulate(data, headers=["class", "#box"] * (COL // 2), tablefmt="pipe", stralign="center", numalign="left") logger.info("Ground-Truth category distribution:\n" + colored(table, "cyan"))
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. #class_names = [ # "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] # noqa # !!! xiaoying class_names = [ 'aeroplane', # 0 'bicycle', # 1 'bird', # 2 'boat', # 3 'bottle', # 4 'bus', # 5 'car', # 6 'cat', # 7 'chair', # 8 'cow', # 9 'diningtable', # 10 'dog', # 11 'horse', # 12 'motorbike', # 13 'person', # 14 'pottedplant', # 15 'sheep', # 16 'sofa', # 17 'train', # 18 'tvmonitor' # 19 ] class_names = ["BG"] + class_names print("!!! xiaoying class_names", class_names) for split in [ "train2017", "val2017", "train2014", "val2014", "valminusminival2014", "minival2014", "val2017_100", "voctrain2012", "vocval2012" ]: #!!xiaoying name = "coco_" + split DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(name, 'class_names', class_names)
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. class_names = ["page"]#, "passport_code"] class_names = ["BG"] + class_names for split in ["train_set_vito"]: name = split DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(name, 'class_names', class_names)
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. class_names = ["BG", "failure"] for split in ["train2019", "val2019"]: #name = "coco_" + split DatasetRegistry.register(split, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(split, 'class_names', class_names)
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print( "---------------------------------------------------------------- data.py:343" ) print_class_histogram(roidbs) # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor(cfg) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == "horovod": # one dataflow for each process, therefore don't need large buffer buffer_size = cfg.DATA.NUM_WORKERS * 10 ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. class_names = ['table'] class_names = ["BG"] + class_names for split in [ 'train2021', "train2017" ]: #, "train2014", "val2014", "valminusminival2014", "minival2014", "val2017_100"]: name = "coco_" + split DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(name, 'class_names', class_names)
def register_coco(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. class_names = ['Bacterial_Spot', 'Late_Blight', 'Septorial_Leaf_spot', 'Mosaic_Virus', 'Yellow_Curved'] '''['Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk', 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole', 'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', 'Wheeled Slow', 'Car Mount', 'Ego Vehicle', 'Unlabeled']''' '''['short_sleeved_shirt', 'long_sleeved_shirt', 'short_sleeved_outwear', 'long_sleeved_outwear', 'vest', 'sling', 'shorts', 'trousers', 'skirt', 'short_sleeved_dress', 'long_sleeved_dress', 'vest_dress', 'sling_dress']''' class_names = ["BG"] + class_names for split in ["train2017", "val2017", "train2014", "val2014", "valminusminival2014", "minival2014", "val2017_100"]: name = "coco_" + split DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(name, 'class_names', class_names)
def register_pascal_voc(basedir): """ Add COCO datasets like "coco_train201x" to the registry, so you can refer to them with names in `cfg.DATA.TRAIN/VAL`. Note that train2017==trainval35k==train2014+val2014-minival2014, and val2017==minival2014. """ # 80 names for COCO # For your own coco-format dataset, change this. class_names = [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ] # noqa class_names = ["BG"] + class_names for split in ["minitrain2014", "minival2014", "train2014", "val2014"]: name = "voc_" + split DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register_metadata(name, 'class_names', class_names)
def get_eval_dataflow(name, is_aws, is_gcs, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DatasetRegistry.get(name).inference_roidbs() logger.info("Found {} images for inference.".format(len(roidbs))) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = ( shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs, ) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0] : img_range[1]], ["file_name", "image_id"]) if is_aws: s3 = boto3.resource("s3") elif is_gcs: c = storage.Client.create_anonymous_client() bucket = c.get_bucket("determined-ai-coco-dataset") def f(fname): if is_aws: s3_object = s3.meta.client.get_object(Bucket="determined-ai-coco-dataset", Key=fname) im = cv2.imdecode( np.asarray(bytearray(s3_object["Body"].read()), dtype=np.uint8), cv2.IMREAD_COLOR, ) elif is_gcs: blob = bucket.blob(fname) s = download_gcs_blob_with_backoff(blob) im = cv2.imdecode(np.asarray(bytearray(s), dtype=np.uint8), cv2.IMREAD_COLOR) else: im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def do_sanity_check(pred_func, output_dir='/root/dentalpoc/logs/xxxxx', font_rs=10, thickness_rs=10): # num_tower = max(cfg.TRAIN.NUM_GPUS, 1) # graph_funcs = MultiTowerOfflinePredictor( # pred_config, list(range(num_tower))).get_predictors() os.makedirs(output_dir, exist_ok=True) for dataset in cfg.DATA.VAL: logger.info("sanity checking {} ...".format(dataset)) # dataflows = [ # get_eval_dataflow(dataset, shard=k, num_shards=num_tower, add_gt=True) # for k in range(num_tower)] # all_results = multithread_predict_dataflow(dataflows, graph_funcs) coco_format_detection = DatasetRegistry.get(dataset) coco_object = coco_format_detection.coco for _im_id, _img_dic in list(coco_object.imgs.items())[1:]: _img_path = _img_dic['path'] _img_seg_polygons = coco_object.imgToAnns[_im_id] detection_ground_truths = list( map( lambda x: DetectionResult( box=convert_box_mode_xywh_2_xyxy(x['bbox']), score=1.0, class_id=x['category_id'], mask=coco_object.annToMask(x)), _img_seg_polygons)) print("S======check") _predict_with_gt(pred_func=pred_func, input_file=_img_path, ground_truths=detection_ground_truths, output_dir=output_dir, font_rs=font_rs, thickness_rs=thickness_rs) xxx = 0
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.freeze(False) # populate new keys now if isinstance(_C.DATA.VAL, six.string_types ): # support single string (the typical case) as well _C.DATA.VAL = (_C.DATA.VAL, ) if isinstance(_C.DATA.TRAIN, six.string_types): # support single string _C.DATA.TRAIN = (_C.DATA.TRAIN, ) # finalize dataset definitions ... from dataset import DatasetRegistry datasets = list(_C.DATA.TRAIN) + list(_C.DATA.VAL) _C.DATA.CLASS_NAMES = DatasetRegistry.get_metadata(datasets[0], "class_names") _C.DATA.NUM_CATEGORY = len(_C.DATA.CLASS_NAMES) - 1 assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN', 'None'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[ 3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil( _C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: # the first threshold is the proposal sampling threshold assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS) if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if isinstance( train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100: # don't autotune if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER lr = _C.TRAIN.LR_SCHEDULE if isinstance(lr, six.string_types): if lr.endswith("x"): LR_SCHEDULE_KITER = { "{}x".format(k): [180 * k - 120, 180 * k - 40, 180 * k] for k in range(2, 10) } LR_SCHEDULE_KITER["1x"] = [120, 160, 180] _C.TRAIN.LR_SCHEDULE = [ x * 1000 for x in LR_SCHEDULE_KITER[lr] ] else: _C.TRAIN.LR_SCHEDULE = eval(lr) # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() logger.info("Horovod Rank={}, Size={}, LocalRank={}".format( hvd.rank(), hvd.size(), hvd.local_rank())) else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu > 0, "Has to train with GPU!" assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format( ngpu) else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' ngpu = get_num_gpu() if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
def eval_one_dataset(dataset_name, output_filename): os.environ['CUDA_VISIBLE_DEVICES'] = '1' import cv2 from collections import namedtuple from dataset import DatasetRegistry from myaug_lib import short_side_resize_image DetectionResult = namedtuple('DetectionResult', ['box', 'score', 'class_id', 'mask']) register_coco(os.path.expanduser(cfg.DATA.BASEDIR)) roidbs = DatasetRegistry.get(dataset_name).inference_roidbs() images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='images') with tf.variable_scope('resnet50'): final_boxes, final_scores, final_labels, final_inds = \ model.model_fpn(images, is_training=False, data_format='channels_last', mode='test') init_op = tf.group( [tf.global_variables_initializer(), tf.local_variables_initializer()]) sess_config = tf.ConfigProto() sess_config.allow_soft_placement = True sess_config.log_device_placement = False sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) sess.run(init_op) checkpoint_path = cfg.TRAIN.LOG_DIR + COMMON_POSTFIX # restorer = tf.train.Saver() # restorer.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) variable_averages = tf.train.ExponentialMovingAverage( decay=cfg.TRAIN.MOVING_AVERAGE_DECAY) variable_to_restore = variable_averages.variables_to_restore() restorer = tf.train.Saver(variable_to_restore) restorer.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) all_results = [] start = time.time() for idx, roidb in enumerate(roidbs): fname, img_id = roidb["file_name"], roidb["image_id"] im = cv2.imread(fname, cv2.IMREAD_COLOR) im = im.astype("float32") h, w = im.shape[:2] # 短边resize resized_im = short_side_resize_image(im) # 减均值 resized_im = resized_im[:, :, [2, 1, 0]] # BGR-->RGB resized_im /= 255.0 resized_im -= np.asarray(cfg.PREPROC.PIXEL_MEAN) resized_im /= np.asarray(cfg.PREPROC.PIXEL_STD) resized_h, resized_w = resized_im.shape[:2] scale = np.sqrt(resized_h * 1.0 / h * resized_w / w) mult = float(cfg.FPN.RESOLUTION_REQUIREMENT) # size divisable max_height = int(np.ceil(float(resized_h) / mult) * mult) max_width = int(np.ceil(float(resized_w) / mult) * mult) resized_im1 = np.zeros((max_height, max_width, 3), dtype=np.float32) resized_im1[:resized_h, :resized_w, :] = resized_im # profile the graph executation if 1510 <= idx <= 1520: from tensorflow.python.client import timeline options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() boxes, scores, labels = sess.run( [final_boxes, final_scores, final_labels], feed_dict={images: resized_im1[np.newaxis]}, options=options, run_metadata=run_metadata) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open( '{}/timeline_Inference_step{}.json'.format( checkpoint_path, idx), 'w') as fp: fp.write(chrome_trace) else: boxes, scores, labels = sess.run( [final_boxes, final_scores, final_labels], feed_dict={images: resized_im1[np.newaxis]}) # Some slow numpy postprocessing: boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = boxes.reshape([-1, 4]) boxes[:, [0, 1]] = np.maximum(boxes[:, [0, 1]], 0) boxes[:, 2] = np.minimum(boxes[:, 2], w - 1) boxes[:, 3] = np.minimum(boxes[:, 3], h - 1) if idx < 5: print(boxes, scores, labels) # if masks: # full_masks = [_paste_mask(box, mask, orig_shape) # for box, mask in zip(boxes, masks[0])] # masks = full_masks # else: # # fill with none # masks = [None] * len(boxes) # postprocessing for FCOS # ################# 每一类进行nms ################## # boxes_after_nms = [] # for c in range(1, 81): # inds = np.where(labels == c) # if len(inds) > 0: # boxes_keep = np.concatenate([boxes[inds], scores[inds].reshape(-1, 1), # labels[inds].reshape(-1, 1)], axis=1) # # 类内NMS # keep = nms(boxes_keep[:, 0:5], thresh=cfg.FCOS.NMS_THRESH) # boxes_keep = boxes_keep[keep] # # 过滤得分比较低的框 # # keep = np.where(boxes_keep[:, 4] > 0.1) # 这里的阈值应该根据每一类来确定 # # boxes_keep = boxes_keep[keep] # boxes_after_nms.append(boxes_keep) # boxes_after_nms = np.concatenate(boxes_after_nms, axis=0) # [x1,y1,x2,y2,score,label] boxes_after_nms = np.concatenate( [boxes, scores.reshape(-1, 1), labels.reshape(-1, 1)], axis=1) # ################# 限制每个图片最大检测个数 ################## number_of_detections = len(boxes_after_nms) if number_of_detections > cfg.FRCNN.TEST.RESULTS_PER_IM > 0: scores_sorted = np.sort(boxes_after_nms[:, 4]) image_thresh = scores_sorted[number_of_detections - cfg.FRCNN.TEST.RESULTS_PER_IM + 1] keep = np.where(boxes_after_nms[:, 4] >= image_thresh)[0] boxes_after_nms = boxes_after_nms[keep] # ################# 类间nms ################## # keep = nms_across_class(boxes_after_nms, thresh=0.5) # boxes_after_nms = boxes_after_nms[keep] boxes = boxes_after_nms[:, 0:4] scores = boxes_after_nms[:, 4] labels = boxes_after_nms[:, 5].astype(np.int32) masks = [None] * len(boxes) for r in [ DetectionResult(*args) for args in zip(boxes, scores, labels.tolist(), masks) ]: res = { 'image_id': img_id, 'category_id': int(r.class_id), 'bbox': [round(float(x), 4) for x in r.box], 'score': round(float(r.score), 4), } all_results.append(res) if idx % 1000 == 0: print(idx, (time.time() - start) / 1000) start = time.time() DatasetRegistry.get(dataset_name).eval_inference_results( all_results, output_filename)
def register_balloon(basedir): for split in ["train", "val"]: name = "balloon_" + split DatasetRegistry.register(name, lambda x=split: BalloonDemo(basedir, x)) DatasetRegistry.register_metadata(name, "class_names", ["BG", "balloon"])
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # Add rpn data to dataflow: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == 'horovod': buffer_size = cfg.DATA.NUM_WORKERS * 10 # one dataflow for each process, therefore don't need large buffer ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapDataZMQ(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.freeze(False) # populate new keys now if isinstance(_C.DATA.VAL, six.string_types ): # support single string (the typical case) as well _C.DATA.VAL = (_C.DATA.VAL, ) if isinstance(_C.DATA.TRAIN, six.string_types): # support single string _C.DATA.TRAIN = (_C.DATA.TRAIN, ) # finalize dataset definitions ... from dataset import DatasetRegistry datasets = list(_C.DATA.TRAIN) + list(_C.DATA.VAL) # _C.DATA.CLASS_NAMES = ["BG", "class1", "class2", "class3", "class4", "class5", "class6"] # _C.DATA.CLASS_NAMES = [ # "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] # noqa # _C.DATA.CLASS_NAMES = ["BG"] + _C.DATA.CLASS_NAMES # print(datasets[0]) _C.DATA.CLASS_NAMES = DatasetRegistry.get_metadata(datasets[0], "class_names") # print(_C.DATA.CLASS_NAMES) _C.DATA.NUM_CATEGORY = len(_C.DATA.CLASS_NAMES) - 1 assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN', 'None'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[ 3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil( _C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: # the first threshold is the proposal sampling threshold assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS) if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if isinstance( train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100: # don't autotune if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER lr = _C.TRAIN.LR_SCHEDULE if isinstance(lr, six.string_types): if lr.endswith("x"): LR_SCHEDULE_KITER = { "{}x".format(k): [180 * k - 120, 180 * k - 40, 180 * k] for k in range(2, 10) } LR_SCHEDULE_KITER["1x"] = [120, 160, 180] _C.TRAIN.LR_SCHEDULE = [ x * 1000 for x in LR_SCHEDULE_KITER[lr] ] else: _C.TRAIN.LR_SCHEDULE = eval(lr) # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() logger.info("Horovod Rank={}, Size={}, LocalRank={}".format( hvd.rank(), hvd.size(), hvd.local_rank())) else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu > 0, "Has to train with GPU!" assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format( ngpu) else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' ngpu = get_num_gpu() if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
def register_idcard(basedir): for split in ["train", "val"]: name = "idcard_" + split DatasetRegistry.register(name, lambda x=split: IDCardDataset(basedir, x)) DatasetRegistry.register_metadata(name, "class_names", ["BG", "page"])