Ejemplo n.º 1
0
    def _eval(self):
        logdir = self._output_dir
        if cfg.TRAINER == 'replicated':
            all_results = multithread_predict_dataflow(self.dataflows,
                                                       self.predictors)
        else:
            if self.batched:
                local_results = predict_dataflow_batch(self.dataflow,
                                                       self.predictor)
            else:
                local_results = predict_dataflow(self.dataflow, self.predictor)

            results = gather_result_from_all_processes(local_results)
            if hvd.rank() > 0:
                return
            all_results = []
            for item in results:
                if item is not None:
                    all_results.extend(item)

        output_file = os.path.join(
            logdir, '{}-outputs{}'.format(self._eval_dataset,
                                          self.global_step))

        scores = DetectionDataset().eval_or_save_inference_results(
            all_results, self._eval_dataset, output_file)
        for k, v in scores.items():
            self.trainer.monitors.put_scalar(k, v)
Ejemplo n.º 2
0
    def _eval(self):
        logdir = self._output_dir
        if cfg.TRAINER == 'replicated':
            all_results = multithread_predict_dataflow(self.dataflows, self.predictors)
        else:
            filenames = [os.path.join(
                logdir, 'outputs{}-part{}.json'.format(self.global_step, rank)
            ) for rank in range(hvd.local_size())]

            if self._horovod_run_eval:
                local_results = predict_dataflow(self.dataflow, self.predictor)
                fname = filenames[hvd.local_rank()]
                with open(fname, 'w') as f:
                    json.dump(local_results, f)
            self.barrier.eval()
            if hvd.rank() > 0:
                return
            all_results = []
            for fname in filenames:
                with open(fname, 'r') as f:
                    obj = json.load(f)
                all_results.extend(obj)
                os.unlink(fname)

        output_file = os.path.join(
            logdir, '{}-outputs{}.json'.format(self._eval_dataset, self.global_step))

        scores = DetectionDataset().eval_or_save_inference_results(
            all_results, self._eval_dataset, output_file)
        for k, v in scores.items():
            self.trainer.monitors.put_scalar(k, v)
Ejemplo n.º 3
0
def do_evaluate(pred_config, output_file):
    num_gpu = cfg.TRAIN.NUM_GPUS
    graph_funcs = MultiTowerOfflinePredictor(pred_config, list(
        range(num_gpu))).get_predictors()

    for dataset in cfg.DATA.VAL:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_gpu)
            for k in range(num_gpu)
        ]
        all_results = multithread_predict_dataflow(dataflows, graph_funcs)
        output = output_file + '-' + dataset
        DetectionDataset().eval_or_save_inference_results(
            all_results, dataset, output)
    for dataset in cfg.DATA.TEST:
        logger.info("Evaluating {} ...".format(dataset))
        dataflows = [
            get_eval_dataflow(dataset, shard=k, num_shards=num_gpu)
            for k in range(num_gpu)
        ]
        all_results = multithread_predict_dataflow(dataflows, graph_funcs)
        output = output_file + '-' + dataset
        DetectionDataset().eval_or_save_inference_results(
            all_results, dataset, output)
Ejemplo n.º 4
0
 def background_coco(all_results):
     output_file = os.path.join(
         logdir, '{}-outputs{}'.format(self._eval_dataset,
                                       self.global_step))
     scores = DetectionDataset().eval_or_save_inference_results(
         all_results, self._eval_dataset, output_file)
     cfg.TRAIN.SHOULD_STOP = scores[
         'mAP(bbox)/IoU=0.5:0.95'] >= cfg.TEST.BOX_TARGET and scores[
             'mAP(segm)/IoU=0.5:0.95'] >= cfg.TEST.MASK_TARGET
     for k, v in scores.items():
         self.trainer.monitors.put_scalar(k, v)
     return
Ejemplo n.º 5
0
def get_eval_dataflow(name, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = DetectionDataset().load_inference_roidbs(name)

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]],
                            ['file_name', 'image_id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds
Ejemplo n.º 6
0
    def setUp(self):
        # sample data generator
        batch_size = 64
        self.dataset = DetectionDataset(data_type='train')
        self.imgs, self.labs_info = self.dataset[:batch_size]
        print('Number of Classes : {}'.format(self.dataset.num_classes))
        print('Image shape : {} || Label shape : {}'.format(self.imgs.shape, self.labs_info.shape))
        self.strides = [4, 8, 16]
        self.scales = [10, 25, 40]
        self.ratios = [(1, 1),
                  (1.5, 0.5),
                  (1.2, 0.8),
                  (0.8, 1.2),
                  (1.4, 1.4)]

        self.prior = PriorBoxes(self.strides, self.scales, self.ratios)
        self.prior_boxes = self.prior.generate((128, 128))  # prior boxes shape : (6720, 4)

        # 0번째 이미지를 쌤플 이미지로 사용함.
        self.group_labs = self.labs_info.groupby('image_index')
        for ind, labs in self.group_labs:
            self.labs = labs
            break
        self.gt_boxes = self.labs[['cx', 'cy', 'w', 'h']].values
        self.gt_labels = self.labs['label'].values
        self.iou = calculate_iou(self.prior_boxes, self.gt_boxes)
        print('Ground Truths Shape : {}'.format(self.gt_boxes.shape))
        print('IOU Shape : {}'.format(self.iou.shape))
        print(list(self.labs.groupby('image_index')))
Ejemplo n.º 7
0
    def get_predictor(cls):
        ''' load trained model'''

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            os.environ['TENSORPACK_FP16'] = 'true'
        
            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel(True)

            try:
                model_dir = os.environ['SM_MODEL_DIR']
            except KeyError:
                model_dir = '/opt/ml/model'

            try:
                cls.pretrained_model = os.environ['PRETRAINED_MODEL']
            except KeyError:
                pass

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index" )
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model
            print(f'Using model: {trained_model}')

            # fixed resnet50 backbone weights
            cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model)
            cfg.MODE_FPN = True
            cfg.MODE_MASK = True

            # calling detection dataset gets the number of coco categories 
            # and saves in the configuration
            DetectionDataset()
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            cls.predictor = OfflinePredictor(PredictConfig(
                model=mask_rcnn_model,
                session_init=get_model_loader(trained_model),
                input_names=['images', 'orig_image_dims'],
                output_names=[
                    'generate_{}_proposals_topk_per_image/boxes'.format('fpn' if cfg.MODE_FPN else 'rpn'),
                    'generate_{}_proposals_topk_per_image/scores'.format('fpn' if cfg.MODE_FPN else 'rpn'),
                    'fastrcnn_all_scores',
                    'output/boxes',
                    'output/scores',
                    'output/labels',
                    'output/masks'
                ]))
            return cls.predictor
Ejemplo n.º 8
0
class DetectionGenerator(Sequence):
    'Generates Localization dataset for Keras'
    def __init__(self, dataset:DetectionDataset, prior:PriorBoxes,
                 batch_size=32, best_match_policy=False, shuffle=True):
        'Initialization'
        # Dictionary로 받았을 때에만 Multiprocessing이 동작가능함.
        # Keras fit_generator에서 Multiprocessing으로 동작시키기 위함
        if isinstance(dataset, dict):
            self.dataset = DetectionDataset(**dataset)
        elif isinstance(dataset, DetectionDataset):
            self.dataset = dataset
        else:
            raise ValueError('dataset은 dict혹은 DetectionDataset Class로 이루어져 있어야 합니다.')

        if isinstance(prior, dict):
            self.prior = PriorBoxes(**prior)
        elif isinstance(prior, PriorBoxes):
            self.prior = prior
        else:
            raise ValueError('PriorBoxes은 dict 혹은 PriorBoxes Class로 이루어져 있어야 합니다.')

        self.batch_size = batch_size
        self.best_match_policy = best_match_policy
        self.shuffle = shuffle
        self.num_classes = self.dataset.num_classes
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return len(self.dataset) // self.batch_size

    def __getitem__(self, index):
        'Generate one batch of data'
        images, ground_truths = self.dataset[self.batch_size * index:
                                             self.batch_size * (index + 1)]
        pr_boxes = self.prior.generate(images.shape[1:])
        y_trues = label_generator(ground_truths.groupby('image_index'), pr_boxes, self.num_classes + 1)
        return images, y_trues

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle:
            self.dataset.shuffle()
 def _init_model(self):
     logger.set_logger_dir("/tmp/test_log/", 'd')
     from dataset import DetectionDataset
     from train import ResNetFPNTrackModel
     # init tensorpack model
     cfg.freeze(False)
     model = ResNetFPNTrackModel()
     DetectionDataset(
     )  # initialize the config with information from our dataset
     finalize_configs(is_training=False)
     return model
Ejemplo n.º 10
0
def get_batched_eval_dataflow(name, shard=0, num_shards=1, batch_size=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = DetectionDataset().load_inference_roidbs(name)

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDictBatched(roidbs[img_range[0]:img_range[1]],
                                   ['file_name', 'image_id'], batch_size)

    def decode_images(inputs):
        return [[cv2.imread(inp[0], cv2.IMREAD_COLOR), inp[1]]
                for inp in inputs]

    def resize_images(inputs):
        resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                               cfg.PREPROC.MAX_SIZE)
        resized_imgs = [resizer.augment(inp[0]) for inp in inputs]
        org_shapes = [inp[0].shape for inp in inputs]
        scales = [
            np.sqrt(rimg.shape[0] * 1.0 / org_shape[0] * rimg.shape[1] /
                    org_shape[1])
            for rimg, org_shape in zip(resized_imgs, org_shapes)
        ]

        return [[resized_imgs[i], inp[1], scales[i], org_shapes[i][:2]]
                for i, inp in enumerate(inputs)]

    def pad_and_batch(inputs):
        heights, widths, _ = zip(*[inp[0].shape for inp in inputs])
        max_h, max_w = max(heights), max(widths)
        padded_images = np.stack([
            np.pad(inp[0], [[0, max_h - inp[0].shape[0]],
                            [0, max_w - inp[0].shape[1]], [0, 0]], 'constant')
            for inp in inputs
        ])
        return [
            padded_images, [inp[1] for inp in inputs],
            list(zip(heights, widths)), [inp[2] for inp in inputs],
            [inp[3] for inp in inputs]
        ]

    ds = MapData(ds, decode_images)
    ds = MapData(ds, resize_images)
    ds = MapData(ds, pad_and_batch)
    return ds
Ejemplo n.º 11
0
    def __init__(self, name, need_network=True, need_img=True, model="best"):
        super().__init__(name=name, is_deterministic=True)
        self._resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE,
                                     cfg.PREPROC.MAX_SIZE)
        self._prev_box = None
        self._ff_gt_feats = None
        self._need_network = need_network
        self._need_img = need_img
        self._rotated_bbox = None

        if need_network:
            logger.set_logger_dir(
                "/tmp/test_log_/" + str(random.randint(0, 10000)), 'd')
            if model == "best":
                load = "train_log/hard_mining3/model-1360500"
            elif model == "nohardexamples":
                load = "train_log/condrcnn_all_2gpu_lrreduce2/model-1200500"
            elif model == "newrpn":
                load = "train_log/newrpn1/model"
            elif model == "resnet50_nohardexamples":
                load = "train_log/condrcnn_all_resnet50/model-1200500"
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3]
            elif model == "resnet50":
                load = "train_log/hard_mining3_resnet50/model-1360500"
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3]
            elif model == "gotonly":
                load = "train_log/hard_mining3_onlygot/model-1361000"
            elif model.startswith("checkpoint:"):
                load = model.replace("checkpoint:", "")
            else:
                assert False, ("unknown model", model)
            from dataset import DetectionDataset
            # init tensorpack model
            # cfg.freeze(False)
            DetectionDataset(
            )  # initialize the config with information from our dataset

            cfg.EXTRACT_GT_FEATURES = True
            cfg.MODE_TRACK = False
            extract_model = ResNetFPNModel()
            extract_ff_feats_cfg = PredictConfig(
                model=extract_model,
                session_init=get_model_loader(load),
                input_names=['image', 'roi_boxes'],
                output_names=['rpn/feature'])
            finalize_configs(is_training=False)
            self._extract_func = OfflinePredictor(extract_ff_feats_cfg)

            cfg.EXTRACT_GT_FEATURES = False
            cfg.MODE_TRACK = True
            cfg.USE_PRECOMPUTED_REF_FEATURES = True
            self._pred_func = self._make_pred_func(load)
Ejemplo n.º 12
0
    def __init__(self, dataset:DetectionDataset, prior:PriorBoxes,
                 batch_size=32, best_match_policy=False, shuffle=True):
        'Initialization'
        # Dictionary로 받았을 때에만 Multiprocessing이 동작가능함.
        # Keras fit_generator에서 Multiprocessing으로 동작시키기 위함
        if isinstance(dataset, dict):
            self.dataset = DetectionDataset(**dataset)
        elif isinstance(dataset, DetectionDataset):
            self.dataset = dataset
        else:
            raise ValueError('dataset은 dict혹은 DetectionDataset Class로 이루어져 있어야 합니다.')

        if isinstance(prior, dict):
            self.prior = PriorBoxes(**prior)
        elif isinstance(prior, PriorBoxes):
            self.prior = prior
        else:
            raise ValueError('PriorBoxes은 dict 혹은 PriorBoxes Class로 이루어져 있어야 합니다.')

        self.batch_size = batch_size
        self.best_match_policy = best_match_policy
        self.shuffle = shuffle
        self.num_classes = self.dataset.num_classes
        self.on_epoch_end()
Ejemplo n.º 13
0
def print_class_histogram(roidbs):
    """
    Args:
        roidbs (list[dict]): the same format as the output of `load_training_roidbs`.
    """
    dataset = DetectionDataset()
    hist_bins = np.arange(dataset.num_classes + 1)

    # Histogram of ground-truth objects
    gt_hist = np.zeros((dataset.num_classes, ), dtype=np.int)
    for entry in roidbs:
        # filter crowd?
        gt_inds = np.where((entry['class'] > 0) & (entry['is_crowd'] == 0))[0]
        gt_classes = entry['class'][gt_inds]
        gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
    data = [[dataset.class_names[i], v] for i, v in enumerate(gt_hist)]
    data.append(['total', sum([x[1] for x in data])])
    table = tabulate(data, headers=['class', '#box'], tablefmt='pipe')
    logger.info("Ground-Truth Boxes:\n" + colored(table, 'cyan'))
Ejemplo n.º 14
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Ejemplo n.º 15
0
 def setUp(self):
     dataset = DetectionDataset(data_type='train')
     self.train_imgs, _ = dataset[:2000]
Ejemplo n.º 16
0
    def get_predictor(cls):
        """load trained model"""

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            os.environ["TENSORPACK_FP16"] = "true"

            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel(True)

            try:
                model_dir = os.environ["SM_MODEL_DIR"]
            except KeyError:
                model_dir = "/opt/ml/model"
            try:
                resnet_arch = os.environ["RESNET_ARCH"]
            except KeyError:
                resnet_arch = "resnet50"

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index")
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model
            print(f"Using model: {trained_model}")

            # fixed resnet50 backbone weights
            cfg.MODE_FPN = True
            cfg.MODE_MASK = True
            if resnet_arch == "resnet101":
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 23, 3]
            else:
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3]

            cfg_prefix = "CONFIG__"
            for key, value in dict(os.environ).items():
                if key.startswith(cfg_prefix):
                    attr_name = key[len(cfg_prefix):]
                    attr_name = attr_name.replace("__", ".")
                    value = eval(value)
                    print(f"update config: {attr_name}={value}")
                    nested_var = cfg
                    attr_list = attr_name.split(".")
                    for attr in attr_list[0:-1]:
                        nested_var = getattr(nested_var, attr)
                    setattr(nested_var, attr_list[-1], value)

            # calling detection dataset gets the number of coco categories
            # and saves in the configuration
            DetectionDataset()
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            cls.predictor = OfflinePredictor(
                PredictConfig(
                    model=mask_rcnn_model,
                    session_init=get_model_loader(trained_model),
                    input_names=["images", "orig_image_dims"],
                    output_names=[
                        "generate_{}_proposals_topk_per_image/boxes".format(
                            "fpn" if cfg.MODE_FPN else "rpn"),
                        "generate_{}_proposals_topk_per_image/scores".format(
                            "fpn" if cfg.MODE_FPN else "rpn"),
                        "fastrcnn_all_scores",
                        "output/boxes",
                        "output/scores",
                        "output/labels",
                        "output/masks",
                    ],
                ))
            return cls.predictor
Ejemplo n.º 17
0
def main():

    parser = ArgumentParser()
    parser.add_argument('-d',
                        '--data_path',
                        dest='data_path',
                        type=str,
                        default='../../data/',
                        help='path to the data')
    parser.add_argument('-e',
                        '--epochs',
                        dest='epochs',
                        default=1,
                        type=int,
                        help='number of epochs')
    parser.add_argument('-b',
                        '--batch_size',
                        dest='batch_size',
                        default=1,
                        type=int,
                        help='batch size')
    parser.add_argument('-v',
                        '--val_split',
                        dest='val_split',
                        default=0.8,
                        type=float,
                        help='train/val split')

    args = parser.parse_args()

    DETECTOR_MODEL_PATH = '../pretrained/detector.pt'

    all_marks = load_json(os.path.join(args.data_path, 'train.json'))
    test_start = int(args.val_split * len(all_marks))
    train_marks = all_marks[:test_start]
    val_marks = all_marks[test_start:]

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    my_transforms = transforms.Compose([transforms.ToTensor()])

    train_dataset = DetectionDataset(marks=train_marks,
                                     img_folder=args.data_path,
                                     transforms=my_transforms)
    val_dataset = DetectionDataset(marks=val_marks,
                                   img_folder=args.data_path,
                                   transforms=my_transforms)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  drop_last=True,
                                  num_workers=4,
                                  collate_fn=collate_fn)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                drop_last=False,
                                num_workers=4,
                                collate_fn=collate_fn)

    torch.cuda.empty_cache()
    gc.collect()
    model = get_detector_model()

    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=20,
                                                           factor=0.5,
                                                           verbose=True)

    try:
        train(model,
              optimizer,
              scheduler,
              train_dataloader,
              val_dataloader,
              DETECTOR_MODEL_PATH,
              args.epochs,
              device=device)
    except KeyboardInterrupt:
        torch.save(model.state_dict(), DETECTOR_MODEL_PATH + '_INTERRUPTED')
        #logger.info('Saved interrupt')
        sys.exit(0)
Ejemplo n.º 18
0
def main():
    parser = ArgumentParser()
    parser.add_argument('-d',
                        '--data_path',
                        dest='data_path',
                        type=str,
                        default=None,
                        help='path to the data')
    parser.add_argument('-e',
                        '--epochs',
                        dest='epochs',
                        default=20,
                        type=int,
                        help='number of epochs')
    parser.add_argument('-b',
                        '--batch_size',
                        dest='batch_size',
                        default=40,
                        type=int,
                        help='batch size')
    parser.add_argument('-s',
                        '--image_size',
                        dest='image_size',
                        default=256,
                        type=int,
                        help='input image size')
    parser.add_argument('-lr',
                        '--learning_rate',
                        dest='lr',
                        default=0.0001,
                        type=float,
                        help='learning rate')
    parser.add_argument('-wd',
                        '--weight_decay',
                        dest='weight_decay',
                        default=5e-4,
                        type=float,
                        help='weight decay')
    parser.add_argument('-lrs',
                        '--learning_rate_step',
                        dest='lr_step',
                        default=10,
                        type=int,
                        help='learning rate step')
    parser.add_argument('-lrg',
                        '--learning_rate_gamma',
                        dest='lr_gamma',
                        default=0.5,
                        type=float,
                        help='learning rate gamma')
    parser.add_argument(
        '-m',
        '--model',
        dest='model',
        default='fpn',
    )
    parser.add_argument('-w',
                        '--weight_bce',
                        default=0.5,
                        type=float,
                        help='weight BCE loss')
    parser.add_argument('-l',
                        '--load',
                        dest='load',
                        default=False,
                        help='load file model')
    parser.add_argument('-v',
                        '--val_split',
                        dest='val_split',
                        default=0.7,
                        help='train/val split')
    parser.add_argument('-o',
                        '--output_dir',
                        dest='output_dir',
                        default='./output',
                        help='dir to save log and models')
    args = parser.parse_args()

    os.makedirs(args.output_dir, exist_ok=True)
    logger = get_logger(os.path.join(args.output_dir, 'train.log'))
    logger.info('Start training with params:')
    for arg, value in sorted(vars(args).items()):
        logger.info("Argument %s: %r", arg, value)


#     net = UNet() # TODO: to use move novel arch or/and more lightweight blocks (mobilenet) to enlarge the batch_size
#     net = smp.FPN('mobilenet_v2', encoder_weights='imagenet', classes=2)
    net = smp.FPN('se_resnet50', encoder_weights='imagenet', classes=2)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if args.load:
        net.load_state_dict(torch.load(args.load))
    logger.info('Model type: {}'.format(net.__class__.__name__))

    net.to(device)

    optimizer = optim.Adam(net.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    criterion = lambda x, y: (args.weight_bce * nn.BCELoss()(x, y),
                              (1. - args.weight_bce) * dice_loss(x, y))
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma) \
        if args.lr_step > 0 else None

    train_transforms = Compose([
        Crop(min_size=1 - 1 / 3., min_ratio=1.0, max_ratio=1.0, p=0.5),
        Flip(p=0.05),
        RandomRotate(),
        Pad(max_size=0.6, p=0.25),
        Resize(size=(args.image_size, args.image_size), keep_aspect=True),
        ScaleToZeroOne(),
    ])
    val_transforms = Compose([
        Resize(size=(args.image_size, args.image_size)),
        ScaleToZeroOne(),
    ])

    train_dataset = DetectionDataset(args.data_path,
                                     os.path.join(args.data_path,
                                                  'train_mask.json'),
                                     transforms=train_transforms)
    val_dataset = DetectionDataset(args.data_path,
                                   None,
                                   transforms=val_transforms)

    train_size = int(len(train_dataset) * args.val_split)
    val_dataset.image_names = train_dataset.image_names[train_size:]
    val_dataset.mask_names = train_dataset.mask_names[train_size:]
    train_dataset.image_names = train_dataset.image_names[:train_size]
    train_dataset.mask_names = train_dataset.mask_names[:train_size]
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=8,
                                  shuffle=True,
                                  drop_last=True)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                num_workers=4,
                                shuffle=False,
                                drop_last=False)
    logger.info('Number of batches of train/val=%d/%d', len(train_dataloader),
                len(val_dataloader))

    try:
        train(net,
              optimizer,
              criterion,
              scheduler,
              train_dataloader,
              val_dataloader,
              logger=logger,
              args=args,
              device=device)
    except KeyboardInterrupt:
        torch.save(
            net.state_dict(),
            os.path.join(args.output_dir, f'{args.model}_INTERRUPTED.pth'))
        logger.info('Saved interrupt')
        sys.exit(0)
Ejemplo n.º 19
0
def main():
    parser = ArgumentParser()
    parser.add_argument('-d',
                        '--data_path',
                        dest='data_path',
                        type=str,
                        default=None,
                        help='path to the data')
    parser.add_argument('-e',
                        '--epochs',
                        dest='epochs',
                        default=20,
                        type=int,
                        help='number of epochs')
    parser.add_argument('-b',
                        '--batch_size',
                        dest='batch_size',
                        default=40,
                        type=int,
                        help='batch size')
    parser.add_argument('-s',
                        '--image_size',
                        dest='image_size',
                        default=256,
                        type=int,
                        help='input image size')
    parser.add_argument('-lr',
                        '--learning_rate',
                        dest='lr',
                        default=0.0001,
                        type=float,
                        help='learning rate')
    parser.add_argument('-wd',
                        '--weight_decay',
                        dest='weight_decay',
                        default=5e-4,
                        type=float,
                        help='weight decay')
    parser.add_argument('-lrs',
                        '--learning_rate_step',
                        dest='lr_step',
                        default=10,
                        type=int,
                        help='learning rate step')
    parser.add_argument('-lrg',
                        '--learning_rate_gamma',
                        dest='lr_gamma',
                        default=0.5,
                        type=float,
                        help='learning rate gamma')
    parser.add_argument('-m',
                        '--model',
                        dest='model',
                        default='unet',
                        choices=('unet', ))
    parser.add_argument('-w',
                        '--weight_bce',
                        default=0.5,
                        type=float,
                        help='weight BCE loss')
    parser.add_argument('-l',
                        '--load',
                        dest='load',
                        default=False,
                        help='load file model')
    parser.add_argument('-v',
                        '--val_split',
                        dest='val_split',
                        default=0.8,
                        help='train/val split')
    parser.add_argument('-o',
                        '--output_dir',
                        dest='output_dir',
                        default='/tmp/logs/',
                        help='dir to save log and models')
    args = parser.parse_args()
    #
    os.makedirs(args.output_dir, exist_ok=True)
    logger = get_logger(os.path.join(args.output_dir, 'train.log'))
    logger.info('Start training with params:')
    for arg, value in sorted(vars(args).items()):
        logger.info("Argument %s: %r", arg, value)
    #
    net = UNet(
    )  # TODO: to use move novel arch or/and more lightweight blocks (mobilenet) to enlarge the batch_size
    # TODO: img_size=256 is rather mediocre, try to optimize network for at least 512
    logger.info('Model type: {}'.format(net.__class__.__name__))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if args.load:
        net.load_state_dict(torch.load(args.load))
    net.to(device)
    # net = nn.DataParallel(net)

    optimizer = optim.Adam(net.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    # TODO: loss experimentation, fight class imbalance, there're many ways you can tackle this challenge
    criterion = lambda x, y: (args.weight_bce * nn.BCELoss()(x, y),
                              (1. - args.weight_bce) * dice_loss(x, y))
    # TODO: you can always try on plateau scheduler as a default option
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma) \
        if args.lr_step > 0 else None

    # dataset
    # TODO: to work on transformations a lot, look at albumentations package for inspiration
    train_transforms = Compose([
        Crop(min_size=1 - 1 / 3., min_ratio=1.0, max_ratio=1.0, p=0.5),
        Flip(p=0.05),
        Pad(max_size=0.6, p=0.25),
        Resize(size=(args.image_size, args.image_size), keep_aspect=True)
    ])
    # TODO: don't forget to work class imbalance and data cleansing
    val_transforms = Resize(size=(args.image_size, args.image_size))

    train_dataset = DetectionDataset(args.data_path,
                                     os.path.join(args.data_path,
                                                  'train_mask.json'),
                                     transforms=train_transforms)
    val_dataset = DetectionDataset(args.data_path,
                                   None,
                                   transforms=val_transforms)

    # split dataset into train/val, don't try to do this at home ;)
    train_size = int(len(train_dataset) * args.val_split)
    val_dataset.image_names = train_dataset.image_names[train_size:]
    val_dataset.mask_names = train_dataset.mask_names[train_size:]
    train_dataset.image_names = train_dataset.image_names[:train_size]
    train_dataset.mask_names = train_dataset.mask_names[:train_size]

    # TODO: always work with the data: cleaning, sampling
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=8,
                                  shuffle=True,
                                  drop_last=True)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                num_workers=4,
                                shuffle=False,
                                drop_last=False)
    logger.info('Length of train/val=%d/%d', len(train_dataset),
                len(val_dataset))
    logger.info('Number of batches of train/val=%d/%d', len(train_dataloader),
                len(val_dataloader))

    try:
        train(net,
              optimizer,
              criterion,
              scheduler,
              train_dataloader,
              val_dataloader,
              logger=logger,
              args=args,
              device=device)
    except KeyboardInterrupt:
        torch.save(net.state_dict(),
                   os.path.join(args.output_dir, 'INTERRUPTED.pth'))
        logger.info('Saved interrupt')
        sys.exit(0)
Ejemplo n.º 20
0
def get_train_dataflow():
    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    ds = DataFromList(roidbs, shuffle=True)
    # for now let's not do flipping to keep things simple
    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
    ])  #,
    #imgaug.Flip(horiz=True)])

    if cfg.MODE_HARD_MINING:
        from annoy import AnnoyIndex
        hard_mining_index = AnnoyIndex(128, 'euclidean')
        hard_mining_index.load(cfg.HARD_MINING_DATA_PATH +
                               "/index_all/index.ann")
        names_path = cfg.HARD_MINING_DATA_PATH + "index_all/names.txt"
        hard_mining_names_all = []
        with open(names_path) as f:
            for l in f:
                hard_mining_names_all.append(l.strip())
            hard_example_names_got = [
                x[7:] for x in hard_mining_names_all if x.startswith("GOT10k/")
            ]
            hard_example_names_vid = [
                x[12:] for x in hard_mining_names_all
                if x.startswith("ImageNetVID/")
            ]
            hard_example_names_ytbvos = [
                x[11:] for x in hard_mining_names_all
                if x.startswith("YouTubeVOS/")
            ]
            hard_example_names_lasot = [
                x[6:] for x in hard_mining_names_all if x.startswith("LaSOT/")
            ]
            assert len(hard_example_names_got) > 0
            assert len(hard_example_names_vid) > 0
            assert len(hard_example_names_ytbvos) > 0
            assert len(hard_example_names_lasot) > 0
            hard_example_names_got.sort()
            hard_example_names_vid.sort()
            hard_example_names_ytbvos.sort()
            hard_example_names_lasot.sort()
            hard_mining_names = {
                "all": hard_mining_names_all,
                "GOT10k": hard_example_names_got,
                "ImageNetVID": hard_example_names_vid,
                "YouTubeVOS": hard_example_names_ytbvos,
                "LaSOT": hard_example_names_lasot
            }
    else:
        hard_mining_index = None
        hard_mining_names = None

    def preprocess(roidb):
        if roidb.startswith("VID/"):
            return _preprocess_imagenet_vid(roidb[4:], aug, hard_mining_index,
                                            hard_mining_names)
        elif roidb.startswith("DAVIS/"):
            return _preprocess_davis_like(
                roidb[6:], aug,
                os.path.join(cfg.DATA.DAVIS2017_ROOT, "Annotations", "480p"))
        elif roidb.startswith("YouTubeVOS/"):
            return _preprocess_davis_like(
                roidb[11:], aug,
                os.path.join(cfg.DATA.YOUTUBE_VOS_ROOT, "train",
                             "Annotations"), "YouTubeVOS", hard_mining_index,
                hard_mining_names)
        elif roidb.startswith("GOT10K/"):
            return _preprocess_got10k(roidb[7:], aug, hard_mining_index,
                                      hard_mining_names)
        elif roidb.startswith("LaSOT/"):
            return _preprocess_lasot(roidb[6:], aug, hard_mining_index,
                                     hard_mining_names)
        elif roidb.startswith("YouTube-BB/"):
            return _preprocess_youtube_bb(roidb[11:], aug)
        elif roidb.startswith("TrackingNet/"):
            return _preprocess_trackingnet(roidb[12:], aug)
        else:
            assert False

    #ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    #ds = MapData(ds, preprocess)
    if cfg.DATA.DEBUG_VIS or not cfg.DATA.MULTITHREAD:
        ds = MapData(ds, preprocess)
    else:
        #ds = MultiThreadMapData(ds, 6, preprocess)
        ds = MultiThreadMapData(ds, 8, preprocess, buffer_size=80)
    return ds
Ejemplo n.º 21
0
    n_anchors = 5
    image_shape = (128, 128)

    # Generate Detection Network
    inputs, pred = simple_detection_netowrk((128, 128, 3), n_anchors,
                                            n_classes)

    # Generate prior boxes
    strides = [4, 8, 16]
    scales = [10, 25, 40]
    ratios = [(1, 1), (1.5, 0.5), (1.2, 0.8), (0.8, 1.2), (1.4, 1.4)]
    prior = PriorBoxes(strides, scales, ratios)
    prior_boxes = prior.generate(image_shape)

    # Generate Dataset
    trainset = DetectionDataset(data_type='train')
    validset = DetectionDataset(data_type='validation')
    traingen = DetectionGenerator(trainset.config, prior.config, batch_size=64)
    validgen = DetectionGenerator(validset.config, prior.config, batch_size=64)
    # Define Loss
    ssd_loss = SSDLoss(1.0, 3.)

    # Training
    model = Model(inputs, pred)
    model.compile(Adam(1e-3), loss=SSDLoss(1.0, 3.))

    rlrop = ReduceLROnPlateau(factor=0.1, min_lr=1e-6, patience=5, cooldown=3)
    callbacks = []
    callbacks.append(rlrop)
    model.fit_generator(traingen,
                        epochs=50,
Ejemplo n.º 22
0
def get_batch_train_dataflow(batch_size):
    """
    Return a training dataflow. Each datapoint consists of the following:

    A batch of images: (BS, h, w, 3),

    For each image

    1 or more pairs of (anchor_labels, anchor_boxes) :
    anchor_labels: (BS, h', w', maxNumAnchors)
    anchor_boxes: (BS, h', w', maxNumAnchors, 4)

    gt_boxes: (BS, maxNumAnchors, 4)
    gt_labels: (BS, maxNumAnchors)

    If MODE_MASK, gt_masks: (BS, maxNumAnchors, h, w)
    """
    print("In train dataflow")
    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    print("Done loading roidbs")

    # print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs))
    logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
        num - len(roidbs), len(roidbs)))

    roidbs = sorted(roidbs, key=lambda x: float(x['width']) / float(x['height']), reverse=True)     # will shuffle it later at every rank

    print("Batching roidbs")
    batched_roidbs = []

    if cfg.PREPROC.PREDEFINED_PADDING:
        taken = [False for _ in roidbs]
        done = False

        for i, d in enumerate(roidbs):
            batch = []
            if not taken[i]:
                batch.append(d)
                padding_shape = get_padding_shape(d['height'], d['width'])
                while len(batch) < batch_size:
                    k = get_next_roidb(roidbs, i, padding_shape, taken)
                    if k == None:
                        done = True
                        break
                    batch.append(roidbs[k])
                    taken[i], taken[k] = True, True
                if not done:
                    batched_roidbs.append(batch)
    else:
        batch = []
        for i, d in enumerate(roidbs):
            if i % batch_size == 0:
                if len(batch) == batch_size:
                    batched_roidbs.append(batch)
                batch = []
            batch.append(d)

    #batched_roidbs = sort_by_aspect_ratio(roidbs, batch_size)
    #batched_roidbs = group_by_aspect_ratio(roidbs, batch_size)
    print("Done batching roidbs")


    # Notes:
    #   - discard any leftover images
    #   - The batches will be shuffled, but the contents of each batch will always be the same
    #   - TODO: Fix lack of batch contents shuffling


    aug = imgaug.AugmentorList(
         [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
          imgaug.Flip(horiz=True)])

    # aug = imgaug.AugmentorList([CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])


    def preprocess(roidb_batch):
        datapoint_list = []
        for roidb in roidb_batch:
            fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
            boxes = np.copy(boxes)
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            im = im.astype('float32')
            # assume floatbox as input
            assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

            # augmentation:
            im, params = aug.augment_return_params(im)
            points = box_to_point8(boxes)
            points = aug.augment_coords(points, params)
            boxes = point8_to_box(points)
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

            ret = {'images': im}
            # rpn anchor:
            try:
                if cfg.MODE_FPN:
                    multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                    for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                        ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                        ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
                else:
                    raise NotImplementedError("[armand] Batch mode only available for FPN")

                boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
                klass = klass[is_crowd == 0]
                ret['gt_boxes'] = boxes
                ret['gt_labels'] = klass
                ret['filename'] = fname
                if not len(boxes):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
                return None

            if cfg.MODE_MASK:
                # augmentation will modify the polys in-place
                segmentation = copy.deepcopy(roidb['segmentation'])
                segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
                assert len(segmentation) == len(boxes)

                # Apply augmentation on polygon coordinates.
                # And produce one image-sized binary mask per box.
                masks = []
                for polys in segmentation:
                    polys = [aug.augment_coords(p, params) for p in polys]
                    masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
                masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
                ret['gt_masks'] = masks

            datapoint_list.append(ret)

        #################################################################################################################
        # Batchify the output
        #################################################################################################################

        # Now we need to batch the various fields

        # Easily stackable:
        # - anchor_labels_lvl2
        # - anchor_boxes_lvl2
        # - anchor_labels_lvl3
        # - anchor_boxes_lvl3
        # - anchor_labels_lvl4
        # - anchor_boxes_lvl4
        # - anchor_labels_lvl5
        # - anchor_boxes_lvl5
        # - anchor_labels_lvl6
        # - anchor_boxes_lvl6

        batched_datapoint = {}
        for stackable_field in ["anchor_labels_lvl2",
                                "anchor_boxes_lvl2",
                                "anchor_labels_lvl3",
                                "anchor_boxes_lvl3",
                                "anchor_labels_lvl4",
                                "anchor_boxes_lvl4",
                                "anchor_labels_lvl5",
                                "anchor_boxes_lvl5",
                                "anchor_labels_lvl6",
                                "anchor_boxes_lvl6"]:
            batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list])



        # Require padding and original dimension storage
        # - image (HxWx3)
        # - gt_boxes (?x4)
        # - gt_labels (?)
        # - gt_masks (?xHxW)

        """
        Find the minimum container size for images (maxW x maxH)
        Find the maximum number of ground truth boxes
        For each image, save original dimension and pad
        """

        if cfg.PREPROC.PREDEFINED_PADDING:
            padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list]
            max_height = max([shp[0] for shp in padding_shapes])
            max_width = max([shp[1] for shp in padding_shapes])
        else:
            image_dims = [d["images"].shape for d in datapoint_list]
            heights = [dim[0] for dim in image_dims]
            widths = [dim[1] for dim in image_dims]

            max_height = max(heights)
            max_width = max(widths)


        # image
        padded_images = []
        original_image_dims = []
        for datapoint in datapoint_list:
            image = datapoint["images"]
            original_image_dims.append(image.shape)

            h_padding = max_height - image.shape[0]
            w_padding = max_width - image.shape[1]

            padded_image = np.pad(image,
                                  [[0, h_padding],
                                   [0, w_padding],
                                   [0, 0]],
                                  'constant')

            padded_images.append(padded_image)

        batched_datapoint["images"] = np.stack(padded_images)
        #print(batched_datapoint["images"].shape)
        batched_datapoint["orig_image_dims"] = np.stack(original_image_dims)


        # gt_boxes and gt_labels
        max_num_gts = max([d["gt_labels"].size for d in datapoint_list])

        gt_counts = []
        padded_gt_labels = []
        padded_gt_boxes = []
        padded_gt_masks = []
        for datapoint in datapoint_list:
            gt_count_for_image = datapoint["gt_labels"].size
            gt_counts.append(gt_count_for_image)

            gt_padding = max_num_gts - gt_count_for_image

            padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1)
            padded_gt_labels.append(padded_gt_labels_for_img)

            padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"],
                                             [[0, gt_padding],
                                              [0,0]],
                                             'constant')
            padded_gt_boxes.append(padded_gt_boxes_for_img)




            h_padding = max_height - datapoint["images"].shape[0]
            w_padding = max_width - datapoint["images"].shape[1]



            if cfg.MODE_MASK:
                padded_gt_masks_for_img = np.pad(datapoint["gt_masks"],
                                         [[0, gt_padding],
                                          [0, h_padding],
                                          [0, w_padding]],
                                         'constant')
                padded_gt_masks.append(padded_gt_masks_for_img)


        batched_datapoint["orig_gt_counts"] = np.stack(gt_counts)
        batched_datapoint["gt_labels"] = np.stack(padded_gt_labels)
        batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes)
        batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list]

        if cfg.MODE_MASK:
            batched_datapoint["gt_masks"] = np.stack(padded_gt_masks)



        return batched_datapoint

    ds = DataFromList(batched_roidbs, shuffle=True)



    if cfg.TRAINER == 'horovod':
        # ds = MapData(ds, preprocess)
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Ejemplo n.º 23
0
                                           "This argument is the path to the output json evaluation file")
    parser.add_argument('--predict', help="Run prediction on a given image. "
                                          "This argument is the path to the input image file")
    parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py",
                        nargs='+')

    if get_tf_version_tuple() < (1, 6):
        # https://github.com/tensorflow/tensorflow/issues/14657
        logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky.")

    args = parser.parse_args()
    if args.config:
        cfg.update_args(args.config)

    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()
    DetectionDataset()  # initialize the config with information from our dataset

    if args.visualize or args.evaluate or args.predict:
        assert tf.test.is_gpu_available()
        assert args.load
        finalize_configs(is_training=False)

        if args.predict or args.visualize:
            cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS

        if args.visualize:
            do_visualize(MODEL, args.load)
        else:
            predcfg = PredictConfig(
                model=MODEL,
                session_init=get_model_loader(args.load),
    def get_predictor(cls):
        ''' load trained model'''

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            os.environ['TENSORPACK_FP16'] = 'true'

            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel(True)

            try:
                model_dir = os.environ['SM_MODEL_DIR']
            except KeyError:
                model_dir = '/opt/ml/model'

            try:
                cls.pretrained_model = os.environ['PRETRAINED_MODEL']
            except KeyError:
                pass

            try:
                div = int(eval(os.environ['divisor']))
            except KeyError:
                div = 1
                pass

            rpn_anchor_stride = int(16 / div)
            rpn_anchor_sizes = (int(32 / div), int(64 / div), int(128 / div),
                                int(256 / div), int(512 / div))

            try:
                rpn_anchor_stride = int(eval(os.environ['rpnanchor_stride']))
            except KeyError:
                pass

            try:
                nms_topk = int(eval(os.environ['NMS_TOPK']))
            except KeyError:
                nms_topk = 2
                pass

            try:
                nms_thresh = eval(os.environ['NMS_THRESH'])
            except KeyError:
                nms_thresh = 0.7
                pass

            try:
                results_per_img = eval(os.environ['res_perimg'])
            except KeyError:
                results_per_img = 400
                pass

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index")
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model
            print(f'Using model: {trained_model}')

            # fixed resnet50 backbone weights
            cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model)
            cfg.MODE_FPN = True
            cfg.MODE_MASK = True
            cfg.RPN.ANCHOR_STRIDE = rpn_anchor_stride
            cfg.RPN.ANCHOR_SIZES = rpn_anchor_sizes
            cfg.RPN.TEST_PRE_NMS_TOPK = int(6000 * nms_topk)
            cfg.RPN.TEST_POST_NMS_TOPK = int(1000 * nms_topk)
            cfg.RPN.TEST_PER_LEVEL_NMS_TOPK = int(1000 * nms_topk)
            # testing -----------------------
            cfg.TEST.FRCNN_NMS_THRESH = nms_thresh
            cfg.TEST.RESULT_SCORE_THRESH = 0.05
            cfg.TEST.RESULT_SCORE_THRESH_VIS = 0.2  # only visualize confident results
            cfg.TEST.RESULTS_PER_IM = results_per_img

            # calling detection dataset gets the number of coco categories
            # and saves in the configuration
            DetectionDataset()
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            cls.predictor = OfflinePredictor(
                PredictConfig(
                    model=mask_rcnn_model,
                    session_init=get_model_loader(trained_model),
                    input_names=['images', 'orig_image_dims'],
                    output_names=[
                        'generate_{}_proposals_topk_per_image/boxes'.format(
                            'fpn' if cfg.MODE_FPN else 'rpn'),
                        'generate_{}_proposals_topk_per_image/scores'.format(
                            'fpn' if cfg.MODE_FPN else 'rpn'),
                        'fastrcnn_all_scores', 'output/boxes', 'output/scores',
                        'output/labels', 'output/masks'
                    ]))
            return cls.predictor