def _eval(self): logdir = self._output_dir if cfg.TRAINER == 'replicated': all_results = multithread_predict_dataflow(self.dataflows, self.predictors) else: if self.batched: local_results = predict_dataflow_batch(self.dataflow, self.predictor) else: local_results = predict_dataflow(self.dataflow, self.predictor) results = gather_result_from_all_processes(local_results) if hvd.rank() > 0: return all_results = [] for item in results: if item is not None: all_results.extend(item) output_file = os.path.join( logdir, '{}-outputs{}'.format(self._eval_dataset, self.global_step)) scores = DetectionDataset().eval_or_save_inference_results( all_results, self._eval_dataset, output_file) for k, v in scores.items(): self.trainer.monitors.put_scalar(k, v)
def _eval(self): logdir = self._output_dir if cfg.TRAINER == 'replicated': all_results = multithread_predict_dataflow(self.dataflows, self.predictors) else: filenames = [os.path.join( logdir, 'outputs{}-part{}.json'.format(self.global_step, rank) ) for rank in range(hvd.local_size())] if self._horovod_run_eval: local_results = predict_dataflow(self.dataflow, self.predictor) fname = filenames[hvd.local_rank()] with open(fname, 'w') as f: json.dump(local_results, f) self.barrier.eval() if hvd.rank() > 0: return all_results = [] for fname in filenames: with open(fname, 'r') as f: obj = json.load(f) all_results.extend(obj) os.unlink(fname) output_file = os.path.join( logdir, '{}-outputs{}.json'.format(self._eval_dataset, self.global_step)) scores = DetectionDataset().eval_or_save_inference_results( all_results, self._eval_dataset, output_file) for k, v in scores.items(): self.trainer.monitors.put_scalar(k, v)
def do_evaluate(pred_config, output_file): num_gpu = cfg.TRAIN.NUM_GPUS graph_funcs = MultiTowerOfflinePredictor(pred_config, list( range(num_gpu))).get_predictors() for dataset in cfg.DATA.VAL: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_gpu) for k in range(num_gpu) ] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DetectionDataset().eval_or_save_inference_results( all_results, dataset, output) for dataset in cfg.DATA.TEST: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_gpu) for k in range(num_gpu) ] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DetectionDataset().eval_or_save_inference_results( all_results, dataset, output)
def background_coco(all_results): output_file = os.path.join( logdir, '{}-outputs{}'.format(self._eval_dataset, self.global_step)) scores = DetectionDataset().eval_or_save_inference_results( all_results, self._eval_dataset, output_file) cfg.TRAIN.SHOULD_STOP = scores[ 'mAP(bbox)/IoU=0.5:0.95'] >= cfg.TEST.BOX_TARGET and scores[ 'mAP(segm)/IoU=0.5:0.95'] >= cfg.TEST.MASK_TARGET for k, v in scores.items(): self.trainer.monitors.put_scalar(k, v) return
def get_eval_dataflow(name, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DetectionDataset().load_inference_roidbs(name) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]], ['file_name', 'image_id']) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def setUp(self): # sample data generator batch_size = 64 self.dataset = DetectionDataset(data_type='train') self.imgs, self.labs_info = self.dataset[:batch_size] print('Number of Classes : {}'.format(self.dataset.num_classes)) print('Image shape : {} || Label shape : {}'.format(self.imgs.shape, self.labs_info.shape)) self.strides = [4, 8, 16] self.scales = [10, 25, 40] self.ratios = [(1, 1), (1.5, 0.5), (1.2, 0.8), (0.8, 1.2), (1.4, 1.4)] self.prior = PriorBoxes(self.strides, self.scales, self.ratios) self.prior_boxes = self.prior.generate((128, 128)) # prior boxes shape : (6720, 4) # 0번째 이미지를 쌤플 이미지로 사용함. self.group_labs = self.labs_info.groupby('image_index') for ind, labs in self.group_labs: self.labs = labs break self.gt_boxes = self.labs[['cx', 'cy', 'w', 'h']].values self.gt_labels = self.labs['label'].values self.iou = calculate_iou(self.prior_boxes, self.gt_boxes) print('Ground Truths Shape : {}'.format(self.gt_boxes.shape)) print('IOU Shape : {}'.format(self.iou.shape)) print(list(self.labs.groupby('image_index')))
def get_predictor(cls): ''' load trained model''' with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor os.environ['TENSORPACK_FP16'] = 'true' # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel(True) try: model_dir = os.environ['SM_MODEL_DIR'] except KeyError: model_dir = '/opt/ml/model' try: cls.pretrained_model = os.environ['PRETRAINED_MODEL'] except KeyError: pass # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index" ) for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model print(f'Using model: {trained_model}') # fixed resnet50 backbone weights cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model) cfg.MODE_FPN = True cfg.MODE_MASK = True # calling detection dataset gets the number of coco categories # and saves in the configuration DetectionDataset() finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors cls.predictor = OfflinePredictor(PredictConfig( model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=['images', 'orig_image_dims'], output_names=[ 'generate_{}_proposals_topk_per_image/boxes'.format('fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals_topk_per_image/scores'.format('fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_scores', 'output/boxes', 'output/scores', 'output/labels', 'output/masks' ])) return cls.predictor
class DetectionGenerator(Sequence): 'Generates Localization dataset for Keras' def __init__(self, dataset:DetectionDataset, prior:PriorBoxes, batch_size=32, best_match_policy=False, shuffle=True): 'Initialization' # Dictionary로 받았을 때에만 Multiprocessing이 동작가능함. # Keras fit_generator에서 Multiprocessing으로 동작시키기 위함 if isinstance(dataset, dict): self.dataset = DetectionDataset(**dataset) elif isinstance(dataset, DetectionDataset): self.dataset = dataset else: raise ValueError('dataset은 dict혹은 DetectionDataset Class로 이루어져 있어야 합니다.') if isinstance(prior, dict): self.prior = PriorBoxes(**prior) elif isinstance(prior, PriorBoxes): self.prior = prior else: raise ValueError('PriorBoxes은 dict 혹은 PriorBoxes Class로 이루어져 있어야 합니다.') self.batch_size = batch_size self.best_match_policy = best_match_policy self.shuffle = shuffle self.num_classes = self.dataset.num_classes self.on_epoch_end() def __len__(self): 'Denotes the number of batches per epoch' return len(self.dataset) // self.batch_size def __getitem__(self, index): 'Generate one batch of data' images, ground_truths = self.dataset[self.batch_size * index: self.batch_size * (index + 1)] pr_boxes = self.prior.generate(images.shape[1:]) y_trues = label_generator(ground_truths.groupby('image_index'), pr_boxes, self.num_classes + 1) return images, y_trues def on_epoch_end(self): 'Updates indexes after each epoch' if self.shuffle: self.dataset.shuffle()
def _init_model(self): logger.set_logger_dir("/tmp/test_log/", 'd') from dataset import DetectionDataset from train import ResNetFPNTrackModel # init tensorpack model cfg.freeze(False) model = ResNetFPNTrackModel() DetectionDataset( ) # initialize the config with information from our dataset finalize_configs(is_training=False) return model
def get_batched_eval_dataflow(name, shard=0, num_shards=1, batch_size=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DetectionDataset().load_inference_roidbs(name) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDictBatched(roidbs[img_range[0]:img_range[1]], ['file_name', 'image_id'], batch_size) def decode_images(inputs): return [[cv2.imread(inp[0], cv2.IMREAD_COLOR), inp[1]] for inp in inputs] def resize_images(inputs): resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) resized_imgs = [resizer.augment(inp[0]) for inp in inputs] org_shapes = [inp[0].shape for inp in inputs] scales = [ np.sqrt(rimg.shape[0] * 1.0 / org_shape[0] * rimg.shape[1] / org_shape[1]) for rimg, org_shape in zip(resized_imgs, org_shapes) ] return [[resized_imgs[i], inp[1], scales[i], org_shapes[i][:2]] for i, inp in enumerate(inputs)] def pad_and_batch(inputs): heights, widths, _ = zip(*[inp[0].shape for inp in inputs]) max_h, max_w = max(heights), max(widths) padded_images = np.stack([ np.pad(inp[0], [[0, max_h - inp[0].shape[0]], [0, max_w - inp[0].shape[1]], [0, 0]], 'constant') for inp in inputs ]) return [ padded_images, [inp[1] for inp in inputs], list(zip(heights, widths)), [inp[2] for inp in inputs], [inp[3] for inp in inputs] ] ds = MapData(ds, decode_images) ds = MapData(ds, resize_images) ds = MapData(ds, pad_and_batch) return ds
def __init__(self, name, need_network=True, need_img=True, model="best"): super().__init__(name=name, is_deterministic=True) self._resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) self._prev_box = None self._ff_gt_feats = None self._need_network = need_network self._need_img = need_img self._rotated_bbox = None if need_network: logger.set_logger_dir( "/tmp/test_log_/" + str(random.randint(0, 10000)), 'd') if model == "best": load = "train_log/hard_mining3/model-1360500" elif model == "nohardexamples": load = "train_log/condrcnn_all_2gpu_lrreduce2/model-1200500" elif model == "newrpn": load = "train_log/newrpn1/model" elif model == "resnet50_nohardexamples": load = "train_log/condrcnn_all_resnet50/model-1200500" cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3] elif model == "resnet50": load = "train_log/hard_mining3_resnet50/model-1360500" cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3] elif model == "gotonly": load = "train_log/hard_mining3_onlygot/model-1361000" elif model.startswith("checkpoint:"): load = model.replace("checkpoint:", "") else: assert False, ("unknown model", model) from dataset import DetectionDataset # init tensorpack model # cfg.freeze(False) DetectionDataset( ) # initialize the config with information from our dataset cfg.EXTRACT_GT_FEATURES = True cfg.MODE_TRACK = False extract_model = ResNetFPNModel() extract_ff_feats_cfg = PredictConfig( model=extract_model, session_init=get_model_loader(load), input_names=['image', 'roi_boxes'], output_names=['rpn/feature']) finalize_configs(is_training=False) self._extract_func = OfflinePredictor(extract_ff_feats_cfg) cfg.EXTRACT_GT_FEATURES = False cfg.MODE_TRACK = True cfg.USE_PRECOMPUTED_REF_FEATURES = True self._pred_func = self._make_pred_func(load)
def __init__(self, dataset:DetectionDataset, prior:PriorBoxes, batch_size=32, best_match_policy=False, shuffle=True): 'Initialization' # Dictionary로 받았을 때에만 Multiprocessing이 동작가능함. # Keras fit_generator에서 Multiprocessing으로 동작시키기 위함 if isinstance(dataset, dict): self.dataset = DetectionDataset(**dataset) elif isinstance(dataset, DetectionDataset): self.dataset = dataset else: raise ValueError('dataset은 dict혹은 DetectionDataset Class로 이루어져 있어야 합니다.') if isinstance(prior, dict): self.prior = PriorBoxes(**prior) elif isinstance(prior, PriorBoxes): self.prior = prior else: raise ValueError('PriorBoxes은 dict 혹은 PriorBoxes Class로 이루어져 있어야 합니다.') self.batch_size = batch_size self.best_match_policy = best_match_policy self.shuffle = shuffle self.num_classes = self.dataset.num_classes self.on_epoch_end()
def print_class_histogram(roidbs): """ Args: roidbs (list[dict]): the same format as the output of `load_training_roidbs`. """ dataset = DetectionDataset() hist_bins = np.arange(dataset.num_classes + 1) # Histogram of ground-truth objects gt_hist = np.zeros((dataset.num_classes, ), dtype=np.int) for entry in roidbs: # filter crowd? gt_inds = np.where((entry['class'] > 0) & (entry['is_crowd'] == 0))[0] gt_classes = entry['class'][gt_inds] gt_hist += np.histogram(gt_classes, bins=hist_bins)[0] data = [[dataset.class_names[i], v] for i, v in enumerate(gt_hist)] data.append(['total', sum([x[1] for x in data])]) table = tabulate(data, headers=['class', '#box'], tablefmt='pipe') logger.info("Ground-Truth Boxes:\n" + colored(table, 'cyan'))
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.TRAINER == 'horovod': ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
def setUp(self): dataset = DetectionDataset(data_type='train') self.train_imgs, _ = dataset[:2000]
def get_predictor(cls): """load trained model""" with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor os.environ["TENSORPACK_FP16"] = "true" # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel(True) try: model_dir = os.environ["SM_MODEL_DIR"] except KeyError: model_dir = "/opt/ml/model" try: resnet_arch = os.environ["RESNET_ARCH"] except KeyError: resnet_arch = "resnet50" # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index") for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model print(f"Using model: {trained_model}") # fixed resnet50 backbone weights cfg.MODE_FPN = True cfg.MODE_MASK = True if resnet_arch == "resnet101": cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 23, 3] else: cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3] cfg_prefix = "CONFIG__" for key, value in dict(os.environ).items(): if key.startswith(cfg_prefix): attr_name = key[len(cfg_prefix):] attr_name = attr_name.replace("__", ".") value = eval(value) print(f"update config: {attr_name}={value}") nested_var = cfg attr_list = attr_name.split(".") for attr in attr_list[0:-1]: nested_var = getattr(nested_var, attr) setattr(nested_var, attr_list[-1], value) # calling detection dataset gets the number of coco categories # and saves in the configuration DetectionDataset() finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors cls.predictor = OfflinePredictor( PredictConfig( model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=["images", "orig_image_dims"], output_names=[ "generate_{}_proposals_topk_per_image/boxes".format( "fpn" if cfg.MODE_FPN else "rpn"), "generate_{}_proposals_topk_per_image/scores".format( "fpn" if cfg.MODE_FPN else "rpn"), "fastrcnn_all_scores", "output/boxes", "output/scores", "output/labels", "output/masks", ], )) return cls.predictor
def main(): parser = ArgumentParser() parser.add_argument('-d', '--data_path', dest='data_path', type=str, default='../../data/', help='path to the data') parser.add_argument('-e', '--epochs', dest='epochs', default=1, type=int, help='number of epochs') parser.add_argument('-b', '--batch_size', dest='batch_size', default=1, type=int, help='batch size') parser.add_argument('-v', '--val_split', dest='val_split', default=0.8, type=float, help='train/val split') args = parser.parse_args() DETECTOR_MODEL_PATH = '../pretrained/detector.pt' all_marks = load_json(os.path.join(args.data_path, 'train.json')) test_start = int(args.val_split * len(all_marks)) train_marks = all_marks[:test_start] val_marks = all_marks[test_start:] device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') my_transforms = transforms.Compose([transforms.ToTensor()]) train_dataset = DetectionDataset(marks=train_marks, img_folder=args.data_path, transforms=my_transforms) val_dataset = DetectionDataset(marks=val_marks, img_folder=args.data_path, transforms=my_transforms) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, drop_last=True, num_workers=4, collate_fn=collate_fn) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, drop_last=False, num_workers=4, collate_fn=collate_fn) torch.cuda.empty_cache() gc.collect() model = get_detector_model() model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=20, factor=0.5, verbose=True) try: train(model, optimizer, scheduler, train_dataloader, val_dataloader, DETECTOR_MODEL_PATH, args.epochs, device=device) except KeyboardInterrupt: torch.save(model.state_dict(), DETECTOR_MODEL_PATH + '_INTERRUPTED') #logger.info('Saved interrupt') sys.exit(0)
def main(): parser = ArgumentParser() parser.add_argument('-d', '--data_path', dest='data_path', type=str, default=None, help='path to the data') parser.add_argument('-e', '--epochs', dest='epochs', default=20, type=int, help='number of epochs') parser.add_argument('-b', '--batch_size', dest='batch_size', default=40, type=int, help='batch size') parser.add_argument('-s', '--image_size', dest='image_size', default=256, type=int, help='input image size') parser.add_argument('-lr', '--learning_rate', dest='lr', default=0.0001, type=float, help='learning rate') parser.add_argument('-wd', '--weight_decay', dest='weight_decay', default=5e-4, type=float, help='weight decay') parser.add_argument('-lrs', '--learning_rate_step', dest='lr_step', default=10, type=int, help='learning rate step') parser.add_argument('-lrg', '--learning_rate_gamma', dest='lr_gamma', default=0.5, type=float, help='learning rate gamma') parser.add_argument( '-m', '--model', dest='model', default='fpn', ) parser.add_argument('-w', '--weight_bce', default=0.5, type=float, help='weight BCE loss') parser.add_argument('-l', '--load', dest='load', default=False, help='load file model') parser.add_argument('-v', '--val_split', dest='val_split', default=0.7, help='train/val split') parser.add_argument('-o', '--output_dir', dest='output_dir', default='./output', help='dir to save log and models') args = parser.parse_args() os.makedirs(args.output_dir, exist_ok=True) logger = get_logger(os.path.join(args.output_dir, 'train.log')) logger.info('Start training with params:') for arg, value in sorted(vars(args).items()): logger.info("Argument %s: %r", arg, value) # net = UNet() # TODO: to use move novel arch or/and more lightweight blocks (mobilenet) to enlarge the batch_size # net = smp.FPN('mobilenet_v2', encoder_weights='imagenet', classes=2) net = smp.FPN('se_resnet50', encoder_weights='imagenet', classes=2) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if args.load: net.load_state_dict(torch.load(args.load)) logger.info('Model type: {}'.format(net.__class__.__name__)) net.to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = lambda x, y: (args.weight_bce * nn.BCELoss()(x, y), (1. - args.weight_bce) * dice_loss(x, y)) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma) \ if args.lr_step > 0 else None train_transforms = Compose([ Crop(min_size=1 - 1 / 3., min_ratio=1.0, max_ratio=1.0, p=0.5), Flip(p=0.05), RandomRotate(), Pad(max_size=0.6, p=0.25), Resize(size=(args.image_size, args.image_size), keep_aspect=True), ScaleToZeroOne(), ]) val_transforms = Compose([ Resize(size=(args.image_size, args.image_size)), ScaleToZeroOne(), ]) train_dataset = DetectionDataset(args.data_path, os.path.join(args.data_path, 'train_mask.json'), transforms=train_transforms) val_dataset = DetectionDataset(args.data_path, None, transforms=val_transforms) train_size = int(len(train_dataset) * args.val_split) val_dataset.image_names = train_dataset.image_names[train_size:] val_dataset.mask_names = train_dataset.mask_names[train_size:] train_dataset.image_names = train_dataset.image_names[:train_size] train_dataset.mask_names = train_dataset.mask_names[:train_size] train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=8, shuffle=True, drop_last=True) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=4, shuffle=False, drop_last=False) logger.info('Number of batches of train/val=%d/%d', len(train_dataloader), len(val_dataloader)) try: train(net, optimizer, criterion, scheduler, train_dataloader, val_dataloader, logger=logger, args=args, device=device) except KeyboardInterrupt: torch.save( net.state_dict(), os.path.join(args.output_dir, f'{args.model}_INTERRUPTED.pth')) logger.info('Saved interrupt') sys.exit(0)
def main(): parser = ArgumentParser() parser.add_argument('-d', '--data_path', dest='data_path', type=str, default=None, help='path to the data') parser.add_argument('-e', '--epochs', dest='epochs', default=20, type=int, help='number of epochs') parser.add_argument('-b', '--batch_size', dest='batch_size', default=40, type=int, help='batch size') parser.add_argument('-s', '--image_size', dest='image_size', default=256, type=int, help='input image size') parser.add_argument('-lr', '--learning_rate', dest='lr', default=0.0001, type=float, help='learning rate') parser.add_argument('-wd', '--weight_decay', dest='weight_decay', default=5e-4, type=float, help='weight decay') parser.add_argument('-lrs', '--learning_rate_step', dest='lr_step', default=10, type=int, help='learning rate step') parser.add_argument('-lrg', '--learning_rate_gamma', dest='lr_gamma', default=0.5, type=float, help='learning rate gamma') parser.add_argument('-m', '--model', dest='model', default='unet', choices=('unet', )) parser.add_argument('-w', '--weight_bce', default=0.5, type=float, help='weight BCE loss') parser.add_argument('-l', '--load', dest='load', default=False, help='load file model') parser.add_argument('-v', '--val_split', dest='val_split', default=0.8, help='train/val split') parser.add_argument('-o', '--output_dir', dest='output_dir', default='/tmp/logs/', help='dir to save log and models') args = parser.parse_args() # os.makedirs(args.output_dir, exist_ok=True) logger = get_logger(os.path.join(args.output_dir, 'train.log')) logger.info('Start training with params:') for arg, value in sorted(vars(args).items()): logger.info("Argument %s: %r", arg, value) # net = UNet( ) # TODO: to use move novel arch or/and more lightweight blocks (mobilenet) to enlarge the batch_size # TODO: img_size=256 is rather mediocre, try to optimize network for at least 512 logger.info('Model type: {}'.format(net.__class__.__name__)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if args.load: net.load_state_dict(torch.load(args.load)) net.to(device) # net = nn.DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) # TODO: loss experimentation, fight class imbalance, there're many ways you can tackle this challenge criterion = lambda x, y: (args.weight_bce * nn.BCELoss()(x, y), (1. - args.weight_bce) * dice_loss(x, y)) # TODO: you can always try on plateau scheduler as a default option scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma) \ if args.lr_step > 0 else None # dataset # TODO: to work on transformations a lot, look at albumentations package for inspiration train_transforms = Compose([ Crop(min_size=1 - 1 / 3., min_ratio=1.0, max_ratio=1.0, p=0.5), Flip(p=0.05), Pad(max_size=0.6, p=0.25), Resize(size=(args.image_size, args.image_size), keep_aspect=True) ]) # TODO: don't forget to work class imbalance and data cleansing val_transforms = Resize(size=(args.image_size, args.image_size)) train_dataset = DetectionDataset(args.data_path, os.path.join(args.data_path, 'train_mask.json'), transforms=train_transforms) val_dataset = DetectionDataset(args.data_path, None, transforms=val_transforms) # split dataset into train/val, don't try to do this at home ;) train_size = int(len(train_dataset) * args.val_split) val_dataset.image_names = train_dataset.image_names[train_size:] val_dataset.mask_names = train_dataset.mask_names[train_size:] train_dataset.image_names = train_dataset.image_names[:train_size] train_dataset.mask_names = train_dataset.mask_names[:train_size] # TODO: always work with the data: cleaning, sampling train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=8, shuffle=True, drop_last=True) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=4, shuffle=False, drop_last=False) logger.info('Length of train/val=%d/%d', len(train_dataset), len(val_dataset)) logger.info('Number of batches of train/val=%d/%d', len(train_dataloader), len(val_dataloader)) try: train(net, optimizer, criterion, scheduler, train_dataloader, val_dataloader, logger=logger, args=args, device=device) except KeyboardInterrupt: torch.save(net.state_dict(), os.path.join(args.output_dir, 'INTERRUPTED.pth')) logger.info('Saved interrupt') sys.exit(0)
def get_train_dataflow(): roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) ds = DataFromList(roidbs, shuffle=True) # for now let's not do flipping to keep things simple aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) ]) #, #imgaug.Flip(horiz=True)]) if cfg.MODE_HARD_MINING: from annoy import AnnoyIndex hard_mining_index = AnnoyIndex(128, 'euclidean') hard_mining_index.load(cfg.HARD_MINING_DATA_PATH + "/index_all/index.ann") names_path = cfg.HARD_MINING_DATA_PATH + "index_all/names.txt" hard_mining_names_all = [] with open(names_path) as f: for l in f: hard_mining_names_all.append(l.strip()) hard_example_names_got = [ x[7:] for x in hard_mining_names_all if x.startswith("GOT10k/") ] hard_example_names_vid = [ x[12:] for x in hard_mining_names_all if x.startswith("ImageNetVID/") ] hard_example_names_ytbvos = [ x[11:] for x in hard_mining_names_all if x.startswith("YouTubeVOS/") ] hard_example_names_lasot = [ x[6:] for x in hard_mining_names_all if x.startswith("LaSOT/") ] assert len(hard_example_names_got) > 0 assert len(hard_example_names_vid) > 0 assert len(hard_example_names_ytbvos) > 0 assert len(hard_example_names_lasot) > 0 hard_example_names_got.sort() hard_example_names_vid.sort() hard_example_names_ytbvos.sort() hard_example_names_lasot.sort() hard_mining_names = { "all": hard_mining_names_all, "GOT10k": hard_example_names_got, "ImageNetVID": hard_example_names_vid, "YouTubeVOS": hard_example_names_ytbvos, "LaSOT": hard_example_names_lasot } else: hard_mining_index = None hard_mining_names = None def preprocess(roidb): if roidb.startswith("VID/"): return _preprocess_imagenet_vid(roidb[4:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("DAVIS/"): return _preprocess_davis_like( roidb[6:], aug, os.path.join(cfg.DATA.DAVIS2017_ROOT, "Annotations", "480p")) elif roidb.startswith("YouTubeVOS/"): return _preprocess_davis_like( roidb[11:], aug, os.path.join(cfg.DATA.YOUTUBE_VOS_ROOT, "train", "Annotations"), "YouTubeVOS", hard_mining_index, hard_mining_names) elif roidb.startswith("GOT10K/"): return _preprocess_got10k(roidb[7:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("LaSOT/"): return _preprocess_lasot(roidb[6:], aug, hard_mining_index, hard_mining_names) elif roidb.startswith("YouTube-BB/"): return _preprocess_youtube_bb(roidb[11:], aug) elif roidb.startswith("TrackingNet/"): return _preprocess_trackingnet(roidb[12:], aug) else: assert False #ds = MultiProcessMapDataZMQ(ds, 10, preprocess) #ds = MapData(ds, preprocess) if cfg.DATA.DEBUG_VIS or not cfg.DATA.MULTITHREAD: ds = MapData(ds, preprocess) else: #ds = MultiThreadMapData(ds, 6, preprocess) ds = MultiThreadMapData(ds, 8, preprocess, buffer_size=80) return ds
n_anchors = 5 image_shape = (128, 128) # Generate Detection Network inputs, pred = simple_detection_netowrk((128, 128, 3), n_anchors, n_classes) # Generate prior boxes strides = [4, 8, 16] scales = [10, 25, 40] ratios = [(1, 1), (1.5, 0.5), (1.2, 0.8), (0.8, 1.2), (1.4, 1.4)] prior = PriorBoxes(strides, scales, ratios) prior_boxes = prior.generate(image_shape) # Generate Dataset trainset = DetectionDataset(data_type='train') validset = DetectionDataset(data_type='validation') traingen = DetectionGenerator(trainset.config, prior.config, batch_size=64) validgen = DetectionGenerator(validset.config, prior.config, batch_size=64) # Define Loss ssd_loss = SSDLoss(1.0, 3.) # Training model = Model(inputs, pred) model.compile(Adam(1e-3), loss=SSDLoss(1.0, 3.)) rlrop = ReduceLROnPlateau(factor=0.1, min_lr=1e-6, patience=5, cooldown=3) callbacks = [] callbacks.append(rlrop) model.fit_generator(traingen, epochs=50,
def get_batch_train_dataflow(batch_size): """ Return a training dataflow. Each datapoint consists of the following: A batch of images: (BS, h, w, 3), For each image 1 or more pairs of (anchor_labels, anchor_boxes) : anchor_labels: (BS, h', w', maxNumAnchors) anchor_boxes: (BS, h', w', maxNumAnchors, 4) gt_boxes: (BS, maxNumAnchors, 4) gt_labels: (BS, maxNumAnchors) If MODE_MASK, gt_masks: (BS, maxNumAnchors, h, w) """ print("In train dataflow") roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) print("Done loading roidbs") # print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format( num - len(roidbs), len(roidbs))) roidbs = sorted(roidbs, key=lambda x: float(x['width']) / float(x['height']), reverse=True) # will shuffle it later at every rank print("Batching roidbs") batched_roidbs = [] if cfg.PREPROC.PREDEFINED_PADDING: taken = [False for _ in roidbs] done = False for i, d in enumerate(roidbs): batch = [] if not taken[i]: batch.append(d) padding_shape = get_padding_shape(d['height'], d['width']) while len(batch) < batch_size: k = get_next_roidb(roidbs, i, padding_shape, taken) if k == None: done = True break batch.append(roidbs[k]) taken[i], taken[k] = True, True if not done: batched_roidbs.append(batch) else: batch = [] for i, d in enumerate(roidbs): if i % batch_size == 0: if len(batch) == batch_size: batched_roidbs.append(batch) batch = [] batch.append(d) #batched_roidbs = sort_by_aspect_ratio(roidbs, batch_size) #batched_roidbs = group_by_aspect_ratio(roidbs, batch_size) print("Done batching roidbs") # Notes: # - discard any leftover images # - The batches will be shuffled, but the contents of each batch will always be the same # - TODO: Fix lack of batch contents shuffling aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True)]) # aug = imgaug.AugmentorList([CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(roidb_batch): datapoint_list = [] for roidb in roidb_batch: fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'images': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: raise NotImplementedError("[armand] Batch mode only available for FPN") boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['filename'] = fname if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks datapoint_list.append(ret) ################################################################################################################# # Batchify the output ################################################################################################################# # Now we need to batch the various fields # Easily stackable: # - anchor_labels_lvl2 # - anchor_boxes_lvl2 # - anchor_labels_lvl3 # - anchor_boxes_lvl3 # - anchor_labels_lvl4 # - anchor_boxes_lvl4 # - anchor_labels_lvl5 # - anchor_boxes_lvl5 # - anchor_labels_lvl6 # - anchor_boxes_lvl6 batched_datapoint = {} for stackable_field in ["anchor_labels_lvl2", "anchor_boxes_lvl2", "anchor_labels_lvl3", "anchor_boxes_lvl3", "anchor_labels_lvl4", "anchor_boxes_lvl4", "anchor_labels_lvl5", "anchor_boxes_lvl5", "anchor_labels_lvl6", "anchor_boxes_lvl6"]: batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list]) # Require padding and original dimension storage # - image (HxWx3) # - gt_boxes (?x4) # - gt_labels (?) # - gt_masks (?xHxW) """ Find the minimum container size for images (maxW x maxH) Find the maximum number of ground truth boxes For each image, save original dimension and pad """ if cfg.PREPROC.PREDEFINED_PADDING: padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list] max_height = max([shp[0] for shp in padding_shapes]) max_width = max([shp[1] for shp in padding_shapes]) else: image_dims = [d["images"].shape for d in datapoint_list] heights = [dim[0] for dim in image_dims] widths = [dim[1] for dim in image_dims] max_height = max(heights) max_width = max(widths) # image padded_images = [] original_image_dims = [] for datapoint in datapoint_list: image = datapoint["images"] original_image_dims.append(image.shape) h_padding = max_height - image.shape[0] w_padding = max_width - image.shape[1] padded_image = np.pad(image, [[0, h_padding], [0, w_padding], [0, 0]], 'constant') padded_images.append(padded_image) batched_datapoint["images"] = np.stack(padded_images) #print(batched_datapoint["images"].shape) batched_datapoint["orig_image_dims"] = np.stack(original_image_dims) # gt_boxes and gt_labels max_num_gts = max([d["gt_labels"].size for d in datapoint_list]) gt_counts = [] padded_gt_labels = [] padded_gt_boxes = [] padded_gt_masks = [] for datapoint in datapoint_list: gt_count_for_image = datapoint["gt_labels"].size gt_counts.append(gt_count_for_image) gt_padding = max_num_gts - gt_count_for_image padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1) padded_gt_labels.append(padded_gt_labels_for_img) padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"], [[0, gt_padding], [0,0]], 'constant') padded_gt_boxes.append(padded_gt_boxes_for_img) h_padding = max_height - datapoint["images"].shape[0] w_padding = max_width - datapoint["images"].shape[1] if cfg.MODE_MASK: padded_gt_masks_for_img = np.pad(datapoint["gt_masks"], [[0, gt_padding], [0, h_padding], [0, w_padding]], 'constant') padded_gt_masks.append(padded_gt_masks_for_img) batched_datapoint["orig_gt_counts"] = np.stack(gt_counts) batched_datapoint["gt_labels"] = np.stack(padded_gt_labels) batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes) batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list] if cfg.MODE_MASK: batched_datapoint["gt_masks"] = np.stack(padded_gt_masks) return batched_datapoint ds = DataFromList(batched_roidbs, shuffle=True) if cfg.TRAINER == 'horovod': # ds = MapData(ds, preprocess) ds = MultiThreadMapData(ds, 5, preprocess) # MPI does not like fork() else: ds = MultiProcessMapDataZMQ(ds, 10, preprocess) return ds
"This argument is the path to the output json evaluation file") parser.add_argument('--predict', help="Run prediction on a given image. " "This argument is the path to the input image file") parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') if get_tf_version_tuple() < (1, 6): # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky.") args = parser.parse_args() if args.config: cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() DetectionDataset() # initialize the config with information from our dataset if args.visualize or args.evaluate or args.predict: assert tf.test.is_gpu_available() assert args.load finalize_configs(is_training=False) if args.predict or args.visualize: cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS if args.visualize: do_visualize(MODEL, args.load) else: predcfg = PredictConfig( model=MODEL, session_init=get_model_loader(args.load),
def get_predictor(cls): ''' load trained model''' with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor os.environ['TENSORPACK_FP16'] = 'true' # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel(True) try: model_dir = os.environ['SM_MODEL_DIR'] except KeyError: model_dir = '/opt/ml/model' try: cls.pretrained_model = os.environ['PRETRAINED_MODEL'] except KeyError: pass try: div = int(eval(os.environ['divisor'])) except KeyError: div = 1 pass rpn_anchor_stride = int(16 / div) rpn_anchor_sizes = (int(32 / div), int(64 / div), int(128 / div), int(256 / div), int(512 / div)) try: rpn_anchor_stride = int(eval(os.environ['rpnanchor_stride'])) except KeyError: pass try: nms_topk = int(eval(os.environ['NMS_TOPK'])) except KeyError: nms_topk = 2 pass try: nms_thresh = eval(os.environ['NMS_THRESH']) except KeyError: nms_thresh = 0.7 pass try: results_per_img = eval(os.environ['res_perimg']) except KeyError: results_per_img = 400 pass # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index") for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model print(f'Using model: {trained_model}') # fixed resnet50 backbone weights cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model) cfg.MODE_FPN = True cfg.MODE_MASK = True cfg.RPN.ANCHOR_STRIDE = rpn_anchor_stride cfg.RPN.ANCHOR_SIZES = rpn_anchor_sizes cfg.RPN.TEST_PRE_NMS_TOPK = int(6000 * nms_topk) cfg.RPN.TEST_POST_NMS_TOPK = int(1000 * nms_topk) cfg.RPN.TEST_PER_LEVEL_NMS_TOPK = int(1000 * nms_topk) # testing ----------------------- cfg.TEST.FRCNN_NMS_THRESH = nms_thresh cfg.TEST.RESULT_SCORE_THRESH = 0.05 cfg.TEST.RESULT_SCORE_THRESH_VIS = 0.2 # only visualize confident results cfg.TEST.RESULTS_PER_IM = results_per_img # calling detection dataset gets the number of coco categories # and saves in the configuration DetectionDataset() finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors cls.predictor = OfflinePredictor( PredictConfig( model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=['images', 'orig_image_dims'], output_names=[ 'generate_{}_proposals_topk_per_image/boxes'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals_topk_per_image/scores'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_scores', 'output/boxes', 'output/scores', 'output/labels', 'output/masks' ])) return cls.predictor