예제 #1
0
def compute_map(labels_and_predictions, val_json_file):
  """Use model predictions to compute mAP.

  The evaluation code is largely copied from the MLPerf reference
  implementation. While it is possible to write the evaluation as a tensor
  metric and use Estimator.evaluate(), this approach was selected for simplicity
  and ease of duck testing.
  """

  with tf.gfile.Open(val_json_file, "r") as f:
    annotation_data = json.load(f)

  predictions = []
  mlperf_log.ssd_print(
      key=mlperf_log.NMS_THRESHOLD, value=ssd_constants.OVERLAP_CRITERIA)
  mlperf_log.ssd_print(
      key=mlperf_log.NMS_MAX_DETECTIONS, value=ssd_constants.MAX_NUM_EVAL_BOXES)
  for example in labels_and_predictions:
    pred_box = example["pred_box"]
    pred_scores = example["pred_scores"]
    indices = example['indices']

    loc, label, prob = decode_single(
        pred_box, pred_scores, indices, ssd_constants.OVERLAP_CRITERIA,
        ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES)

    htot, wtot, _ = example[ssd_constants.RAW_SHAPE]
    for loc_, label_, prob_ in zip(loc, label, prob):
      # Ordering convention differs, hence [1], [0] rather than [0], [1]
      predictions.append([
          int(example[ssd_constants.SOURCE_ID]), loc_[1] * wtot, loc_[0] * htot,
          (loc_[3] - loc_[1]) * wtot, (loc_[2] - loc_[0]) * htot, prob_,
          ssd_constants.CLASS_INV_MAP[label_]
          ])

  if val_json_file.startswith("gs://"):
    _, local_val_json = tempfile.mkstemp(suffix=".json")
    tf.gfile.Remove(local_val_json)

    tf.gfile.Copy(val_json_file, local_val_json)
    atexit.register(tf.gfile.Remove, local_val_json)
  else:
    local_val_json = val_json_file

  cocoGt = COCO(local_val_json)
  cocoDt = cocoGt.loadRes(np.array(predictions))

  E = COCOeval(cocoGt, cocoDt, iouType='bbox')
  E.evaluate()
  E.accumulate()
  E.summarize()
  print("Current AP: {:.5f}".format(E.stats[0]))
  metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
                  'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']

  # Prefix with "COCO" to group in TensorBoard.
  return {"COCO/" + key: value for key, value in zip(metric_names, E.stats)}
예제 #2
0
def concat_outputs(cls_outputs, box_outputs):
    """Concatenate predictions into a single tensor.

  This function takes the dicts of class and box prediction tensors and
  concatenates them into a single tensor for comparison with the ground truth
  boxes and class labels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width,
      num_anchors * num_classses].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4].
  Returns:
    concatenanted cls_outputs and box_outputs.
  """
    assert set(cls_outputs.keys()) == set(box_outputs.keys())

    # This sort matters. The labels assume a certain order based on
    # ssd_constants.FEATURE_SIZES, and this sort matches that convention.
    keys = sorted(cls_outputs.keys())
    batch_size = int(cls_outputs[keys[0]].shape[0])

    flat_cls = []
    flat_box = []

    mlperf_log.ssd_print(key=mlperf_log.FEATURE_SIZES,
                         value=ssd_constants.FEATURE_SIZES)

    for i, k in enumerate(keys):
        # TODO(taylorrobie): confirm that this reshape, transpose,
        # reshape is correct.
        scale = ssd_constants.FEATURE_SIZES[i]
        split_shape = (ssd_constants.NUM_DEFAULTS[i],
                       ssd_constants.NUM_CLASSES)
        assert cls_outputs[k].shape[3] == split_shape[0] * split_shape[1]
        intermediate_shape = (batch_size, scale, scale) + split_shape
        final_shape = (batch_size, scale**2 * split_shape[0], split_shape[1])
        flat_cls.append(
            tf.reshape(
                tf.transpose(tf.reshape(cls_outputs[k], intermediate_shape),
                             (0, 3, 1, 2, 4)), final_shape))

        split_shape = (ssd_constants.NUM_DEFAULTS[i], 4)
        assert box_outputs[k].shape[3] == split_shape[0] * split_shape[1]
        intermediate_shape = (batch_size, scale, scale) + split_shape
        final_shape = (batch_size, scale**2 * split_shape[0], split_shape[1])
        flat_box.append(
            tf.reshape(
                tf.transpose(tf.reshape(box_outputs[k], intermediate_shape),
                             (0, 3, 1, 2, 4)), final_shape))

    return tf.concat(flat_cls, axis=1), tf.concat(flat_box, axis=1)
예제 #3
0
def resnet_v1(resnet_depth, params, data_format='channels_last'):
    """Returns the ResNet model for a given size and number of output classes."""
    model_params = {34: {'block': residual_block, 'layers': [3, 4, 6, 3]}}

    if resnet_depth not in model_params:
        raise ValueError('Not a valid resnet_depth:', resnet_depth)

    resnet_params = model_params[resnet_depth]
    mlperf_log.ssd_print(key=mlperf_log.BACKBONE,
                         value='resnet{}'.format(resnet_depth))
    return resnet_v1_generator(resnet_params['block'], resnet_params['layers'],
                               params, data_format)
예제 #4
0
def normalize_image(image):
    """Normalize the image to zero mean and unit variance."""
    mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_MEAN,
                         value=ssd_constants.NORMALIZATION_MEAN)
    mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_STD,
                         value=ssd_constants.NORMALIZATION_STD)

    image -= tf.constant(ssd_constants.NORMALIZATION_MEAN)[tf.newaxis,
                                                           tf.newaxis, :]

    image /= tf.constant(ssd_constants.NORMALIZATION_STD)[tf.newaxis,
                                                          tf.newaxis, :]

    return image
예제 #5
0
def dboxes300_coco():
    figsize = 300
    feat_size = [38, 19, 10, 5, 3, 1]
    mlperf_log.ssd_print(key=mlperf_log.FEATURE_SIZES, value=feat_size)

    steps = [8, 16, 32, 64, 100, 300]
    mlperf_log.ssd_print(key=mlperf_log.STEPS, value=steps)

    # use the scales here: https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py
    scales = [21, 45, 99, 153, 207, 261, 315]
    mlperf_log.ssd_print(key=mlperf_log.SCALES, value=scales)

    aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
    mlperf_log.ssd_print(key=mlperf_log.ASPECT_RATIOS, value=aspect_ratios)

    dboxes = DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios)
    mlperf_log.ssd_print(key=mlperf_log.NUM_DEFAULTS,
                         value=len(dboxes.default_boxes))
    return dboxes
예제 #6
0
    def __init__(self):

        self.sample_options = (
            # Do nothing
            None,
            # min IoU, max IoU
            (0.1, None),
            (0.3, None),
            (0.5, None),
            (0.7, None),
            (0.9, None),
            # no IoU requirements
            (None, None),
        )
        # Implementation uses 1 iteration to find a possible candidate, this
        # was shown to produce the same mAP as using more iterations.
        self.num_cropping_iterations = 1
        mlperf_log.ssd_print(key=mlperf_log.NUM_CROPPING_ITERATIONS,
                             value=self.num_cropping_iterations)
예제 #7
0
    def __init__(self, dboxes, size=(300, 300), val=False):
        # define vgg16 mean
        self.size = size
        self.val = val

        self.dboxes_ = dboxes  # DefaultBoxes300()
        self.encoder = Encoder(self.dboxes_)

        self.crop = SSDCropping()
        self.img_trans = transforms.Compose([
            transforms.Resize(self.size),
            # transforms.Resize((300, 300)),
            # transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.125,
                                   contrast=0.5,
                                   saturation=0.5,
                                   hue=0.05),
            transforms.ToTensor()
            # LightingNoice(),
        ])
        self.hflip = RandomHorizontalFlip()

        # All Pytorch Tensor will be normalized
        # https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683
        normalization_mean = [0.485, 0.456, 0.406]
        normalization_std = [0.229, 0.224, 0.225]
        mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_MEAN,
                             value=normalization_mean)
        mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_STD,
                             value=normalization_std)
        self.normalize = transforms.Normalize(mean=normalization_mean,
                                              std=normalization_std)
        # self.normalize = transforms.Normalize(mean = [104.0, 117.0, 123.0],
        #                                      std = [1.0, 1.0, 1.0])

        self.trans_val = transforms.Compose([
            transforms.Resize(self.size),
            transforms.ToTensor(),
            # ToTensor(),
            self.normalize,
        ])
예제 #8
0
    def __init__(self,
                 label_num,
                 backbone='resnet34',
                 model_path="./resnet34-333f7ec4.pth"):

        super(SSD300, self).__init__()

        self.label_num = label_num

        if backbone == 'resnet34':
            self.model = ResNet34()
            mlperf_log.ssd_print(key=mlperf_log.BACKBONE, value='resnet34')
            out_channels = 256
            out_size = 38
            self.out_chan = [out_channels, 512, 512, 256, 256, 256]
            mlperf_log.ssd_print(key=mlperf_log.LOC_CONF_OUT_CHANNELS,
                                 value=self.out_chan)

        else:
            raise ValueError('Invalid backbone chosen')

        self._build_additional_features(out_size, self.out_chan)

        # after l2norm, conv7, conv8_2, conv9_2, conv10_2, conv11_2
        # classifer 1, 2, 3, 4, 5 ,6

        self.num_defaults = [4, 6, 6, 6, 4, 4]
        mlperf_log.ssd_print(key=mlperf_log.NUM_DEFAULTS_PER_CELL,
                             value=self.num_defaults)
        self.loc = []
        self.conf = []

        for nd, oc in zip(self.num_defaults, self.out_chan):
            #self.loc.append(bf16cutfp_mod())
            self.loc.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            #self.loc.append(bf16cutbp_mod())
            #self.conf.append(bf16cutfp_mod())
            self.conf.append(
                nn.Conv2d(oc, nd * label_num, kernel_size=3, padding=1))
            #self.conf.append(bf16cutbp_mod())

        self.loc = nn.ModuleList(self.loc)
        self.conf = nn.ModuleList(self.conf)
        # intitalize all weights
        self._init_weights()
예제 #9
0
def main():
    args = parse_args()

    if not os.path.isdir('./models'):
        os.mkdir('./models')

    if args.seed is not None:
        print("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    torch.backends.cudnn.benchmark = True

    # start timing here
    mlperf_log.ssd_print(key=mlperf_log.RUN_START)

    success = train300_mlperf_coco(args)

    # end timing here
    mlperf_log.ssd_print(key=mlperf_log.RUN_STOP, value={"success": success})
    mlperf_log.ssd_print(key=mlperf_log.RUN_FINAL)
예제 #10
0
    def __init__(self):
        mlperf_log.ssd_print(key=mlperf_log.STEPS, value=ssd_constants.STEPS)
        fk = ssd_constants.IMAGE_SIZE / np.array(ssd_constants.STEPS)

        self.default_boxes = []
        mlperf_log.ssd_print(key=mlperf_log.ASPECT_RATIOS,
                             value=ssd_constants.ASPECT_RATIOS)
        # size of feature and number of feature
        for idx, feature_size in enumerate(ssd_constants.FEATURE_SIZES):
            sk1 = ssd_constants.SCALES[idx] / ssd_constants.IMAGE_SIZE
            sk2 = ssd_constants.SCALES[idx + 1] / ssd_constants.IMAGE_SIZE
            sk3 = math.sqrt(sk1 * sk2)
            all_sizes = [(sk1, sk1), (sk3, sk3)]

            for alpha in ssd_constants.ASPECT_RATIOS[idx]:
                w, h = sk1 * math.sqrt(alpha), sk1 / math.sqrt(alpha)
                all_sizes.append((w, h))
                all_sizes.append((h, w))

            assert len(all_sizes) == ssd_constants.NUM_DEFAULTS[idx]

            for w, h in all_sizes:
                for i, j in it.product(range(feature_size), repeat=2):
                    cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx]
                    box = tuple(np.clip(k, 0, 1) for k in (cy, cx, h, w))
                    self.default_boxes.append(box)

        mlperf_log.ssd_print(key=mlperf_log.NUM_DEFAULTS,
                             value=len(self.default_boxes))
        assert len(self.default_boxes) == ssd_constants.NUM_SSD_BOXES

        def to_ltrb(cy, cx, h, w):
            return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2

        # For IoU calculation
        self.default_boxes_ltrb = tuple(
            to_ltrb(*i) for i in self.default_boxes)
예제 #11
0
def train300_mlperf_coco(args):
    from coco import COCO

    # Check that GPUs are actually available
    if not torch.cuda.is_available():
        print("Error. No GPU available.")
        return False

    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)

    input_size = 300
    train_trans = SSDTransformer(dboxes, (input_size, input_size), val=False)
    val_trans = SSDTransformer(dboxes, (input_size, input_size), val=True)
    mlperf_log.ssd_print(key=mlperf_log.INPUT_SIZE, value=input_size)

    val_annotate = os.path.join(args.data,
                                "annotations/instances_val2017.json")
    val_coco_root = os.path.join(args.data, "val2017")
    train_annotate = os.path.join(args.data,
                                  "annotations/instances_train2017.json")
    train_coco_root = os.path.join(args.data, "train2017")

    cocoGt = COCO(annotation_file=val_annotate)
    val_coco = COCODetection(val_coco_root, val_annotate, val_trans)
    train_coco = COCODetection(train_coco_root, train_annotate, train_trans)

    train_pipe = COCOPipeline(args.batch_size, train_coco_root, train_annotate,
                              dboxes, args.seed)
    train_pipe.build()
    train_loader = DALIGenericIterator(train_pipe,
                                       ["images", "boxes", "labels"],
                                       train_pipe.epoch_size("Reader"))

    mlperf_log.ssd_print(key=mlperf_log.INPUT_SHARD, value=None)
    mlperf_log.ssd_print(key=mlperf_log.INPUT_ORDER)
    mlperf_log.ssd_print(key=mlperf_log.INPUT_BATCH_SIZE,
                         value=args.batch_size)

    ssd300 = SSD300(train_coco.labelnum)
    if args.checkpoint is not None:
        print("loading model checkpoint", args.checkpoint)
        od = torch.load(args.checkpoint)
        ssd300.load_state_dict(od["model"])
    ssd300.train()
    ssd300.cuda()
    loss_func = Loss(dboxes)
    loss_func.cuda()

    current_lr = 1e-3
    current_momentum = 0.9
    current_weight_decay = 5e-4
    optim = torch.optim.SGD(ssd300.parameters(),
                            lr=current_lr,
                            momentum=current_momentum,
                            weight_decay=current_weight_decay)
    mlperf_log.ssd_print(key=mlperf_log.OPT_NAME, value="SGD")
    mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=current_lr)
    mlperf_log.ssd_print(key=mlperf_log.OPT_MOMENTUM, value=current_momentum)
    mlperf_log.ssd_print(key=mlperf_log.OPT_WEIGHT_DECAY,
                         value=current_weight_decay)

    print("epoch", "nbatch", "loss")

    iter_num = args.iteration
    avg_loss = 0.0
    inv_map = {v: k for k, v in val_coco.label_map.items()}

    mean, std = generate_mean_std()

    data_perf = AverageMeter()
    batch_perf = AverageMeter()
    end = time.time()
    train_start = end

    mlperf_log.ssd_print(key=mlperf_log.TRAIN_LOOP)
    for epoch in range(args.epochs):
        mlperf_log.ssd_print(key=mlperf_log.TRAIN_EPOCH, value=epoch)
        for nbatch, data in enumerate(train_loader):
            img = data[0]["images"]
            bbox = data[0]["boxes"]
            label = data[0]["labels"]

            boxes_in_batch = len(label.nonzero())

            if boxes_in_batch == 0:
                print("No labels in batch")
                continue

            label = label.type(torch.cuda.LongTensor)

            img = Variable(img, requires_grad=True)
            trans_bbox = bbox.transpose(1, 2).contiguous()

            gloc, glabel = Variable(trans_bbox, requires_grad=False), \
                           Variable(label, requires_grad=False)

            data_perf.update(time.time() - end)

            if iter_num == 160000:
                current_lr = 1e-4
                print("")
                print("lr decay step #1")
                for param_group in optim.param_groups:
                    param_group['lr'] = current_lr
                mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=current_lr)

            if iter_num == 200000:
                current_lr = 1e-5
                print("")
                print("lr decay step #2")
                for param_group in optim.param_groups:
                    param_group['lr'] = current_lr
                mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=current_lr)

            ploc, plabel = ssd300(img)
            loss = loss_func(ploc, plabel, gloc, glabel)

            if not np.isinf(loss.item()):
                avg_loss = 0.999 * avg_loss + 0.001 * loss.item()

            optim.zero_grad()
            loss.backward()
            optim.step()

            batch_perf.update(time.time() - end)

            if iter_num in args.evaluation:
                if not args.no_save:
                    print("")
                    print("saving model...")
                    torch.save(
                        {
                            "model": ssd300.state_dict(),
                            "label_map": train_coco.label_info
                        }, "./models/iter_{}.pt".format(iter_num))

                try:
                    if coco_eval(ssd300, val_coco, cocoGt, encoder, inv_map,
                                 args.threshold, epoch, iter_num):
                        return True
                except:
                    print("Eval error on iteration {0}".format(iter_num))

            print("Iteration: {:6d}, Loss function: {:5.3f}, Average Loss: {:.3f}, Data perf: {:3f} img/sec, Batch perf: {:3f} img/sec, Avg Data perf: {:3f} img/sec, Avg Batch perf: {:3f} img/sec"\
                        .format(iter_num, loss.item(), avg_loss, args.batch_size / data_perf.val, args.batch_size / batch_perf.val, args.batch_size / data_perf.avg, args.batch_size / batch_perf.avg), end="\r")

            end = time.time()
            iter_num += 1
            if iter_num == 10 and epoch == 0:
                data_perf.reset()
                batch_perf.reset()

        train_loader.reset()

    print("\n\n")
    print("Training end: Data perf: {:3f} img/sec, Batch perf: {:3f} img/sec, Total time: {:3f} sec"\
        .format(args.batch_size / data_perf.avg, args.batch_size / batch_perf.avg, time.time() - train_start))
    return False
예제 #12
0
 def __init__(self, p=0.5):
     self.p = p
     mlperf_log.ssd_print(key=mlperf_log.RANDOM_FLIP_PROBABILITY,
                          value=self.p)
예제 #13
0
def ssd(features, params, is_training_bn=False):
    """SSD classification and regression model."""
    # upward layers
    with tf.variable_scope('resnet%s' % ssd_constants.RESNET_DEPTH):
        resnet_fn = resnet_v1(ssd_constants.RESNET_DEPTH, params)
        _, _, u4, _ = resnet_fn(features, is_training_bn)

    with tf.variable_scope('ssd'):
        feats = {}
        # output channels for mlperf logging.
        out_channels = [256]
        feats[3] = u4
        feats[4] = tf.layers.conv2d(feats[3],
                                    filters=256,
                                    kernel_size=(1, 1),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block7-conv1x1')
        feats[4] = tf.layers.conv2d(feats[4],
                                    filters=512,
                                    strides=(2, 2),
                                    kernel_size=(3, 3),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block7-conv3x3')
        out_channels.append(512)
        feats[5] = tf.layers.conv2d(feats[4],
                                    filters=256,
                                    kernel_size=(1, 1),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block8-conv1x1')
        feats[5] = tf.layers.conv2d(feats[5],
                                    filters=512,
                                    strides=(2, 2),
                                    kernel_size=(3, 3),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block8-conv3x3')
        out_channels.append(512)
        feats[6] = tf.layers.conv2d(feats[5],
                                    filters=128,
                                    kernel_size=(1, 1),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block9-conv1x1')
        feats[6] = tf.layers.conv2d(feats[6],
                                    filters=256,
                                    strides=(2, 2),
                                    kernel_size=(3, 3),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block9-conv3x3')
        out_channels.append(256)
        feats[7] = tf.layers.conv2d(feats[6],
                                    filters=128,
                                    kernel_size=(1, 1),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block10-conv1x1')
        feats[7] = tf.layers.conv2d(feats[7],
                                    filters=256,
                                    kernel_size=(3, 3),
                                    padding='valid',
                                    activation=tf.nn.relu,
                                    name='block10-conv3x3')
        out_channels.append(256)
        feats[8] = tf.layers.conv2d(feats[7],
                                    filters=128,
                                    kernel_size=(1, 1),
                                    padding='same',
                                    activation=tf.nn.relu,
                                    name='block11-conv1x1')
        feats[8] = tf.layers.conv2d(feats[8],
                                    filters=256,
                                    kernel_size=(3, 3),
                                    padding='valid',
                                    activation=tf.nn.relu,
                                    name='block11-conv3x3')
        out_channels.append(256)
        mlperf_log.ssd_print(key=mlperf_log.LOC_CONF_OUT_CHANNELS,
                             value=out_channels)

        class_outputs = {}
        box_outputs = {}
        min_level = ssd_constants.MIN_LEVEL
        max_level = ssd_constants.MAX_LEVEL
        num_classes = ssd_constants.NUM_CLASSES
        mlperf_log.ssd_print(key=mlperf_log.NUM_DEFAULTS_PER_CELL,
                             value=ssd_constants.NUM_DEFAULTS)

        with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE):
            for level in range(min_level, max_level + 1):
                class_outputs[level] = class_net(feats[level], level,
                                                 num_classes)

        with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE):
            for level in range(min_level, max_level + 1):
                box_outputs[level] = box_net(feats[level], level)

    return class_outputs, box_outputs
예제 #14
0
def main(argv):
    del argv  # Unused.
    global SUCCESS
    print(FLAGS.model_dir)
    if FLAGS.model_dir: print(FLAGS.model_dir)
    else:
        print(FLAGS.training_file_pattern)
        raise Exception('No model dir')
    # Check data path
    if FLAGS.mode in (
            'train', 'train_and_eval') and FLAGS.training_file_pattern is None:
        raise RuntimeError(
            'You must specify --training_file_pattern for training.')
    if FLAGS.mode in ('eval', 'train_and_eval', 'eval_once'):
        if FLAGS.validation_file_pattern is None:
            raise RuntimeError('You must specify --validation_file_pattern '
                               'for evaluation.')
        if FLAGS.val_json_file is None:
            raise RuntimeError(
                'You must specify --val_json_file for evaluation.')

    run_config, params = construct_run_config(FLAGS.iterations_per_loop)

    if FLAGS.mode != 'eval' and FLAGS.mode != 'eval_once':
        if params['train_with_low_level_api']:
            params['batch_size'] = FLAGS.train_batch_size // FLAGS.num_shards
            trunner = train_low_level_runner.TrainLowLevelRunner(
                iterations=FLAGS.iterations_per_loop)
            input_fn = dataloader.SSDInputReader(
                FLAGS.training_file_pattern,
                params['transpose_input'],
                is_training=True,
                use_fake_data=FLAGS.use_fake_data)
            mlperf_log.ssd_print(key=mlperf_log.RUN_START)
            trunner.initialize(input_fn, ssd_model.ssd_model_fn, params)
        else:
            mlperf_log.ssd_print(key=mlperf_log.RUN_START)

    if FLAGS.mode in ('eval', 'train_and_eval', 'eval_once'):
        if params['eval_with_low_level_api']:
            params['batch_size'] = FLAGS.eval_batch_size // FLAGS.num_shards
            erunner = eval_low_level_runner.EvalLowLevelRunner(
                eval_steps=int(FLAGS.eval_samples / FLAGS.eval_batch_size))
            input_fn = dataloader.SSDInputReader(
                FLAGS.validation_file_pattern,
                is_training=False,
                use_fake_data=FLAGS.use_fake_data)
            erunner.initialize(input_fn, params)
            erunner.build_model(ssd_model.ssd_model_fn, params)

    # TPU Estimator
    if FLAGS.mode == 'train':
        if params['train_with_low_level_api']:
            train_steps = int(
                (FLAGS.num_epochs * FLAGS.num_examples_per_epoch) /
                FLAGS.train_batch_size)
            mlperf_log.ssd_print(key=mlperf_log.TRAIN_LOOP)
            mlperf_log.ssd_print(key=mlperf_log.TRAIN_EPOCH, value=0)
            trunner.train(train_steps)
            trunner.shutdown()
        else:
            if FLAGS.device == 'gpu':
                params['dataset_num_shards'] = 1
                params['dataset_index'] = 0
                train_params = dict(params)
                train_params['batch_size'] = FLAGS.train_batch_size
                train_estimator = tf.estimator.Estimator(
                    model_fn=ssd_model.ssd_model_fn,
                    model_dir=FLAGS.model_dir,
                    config=run_config,
                    params=train_params)
            else:
                train_estimator = tpu_estimator.TPUEstimator(
                    model_fn=ssd_model.ssd_model_fn,
                    use_tpu=FLAGS.use_tpu,
                    train_batch_size=FLAGS.train_batch_size,
                    config=run_config,
                    params=params)

            tf.logging.info(params)
            mlperf_log.ssd_print(key=mlperf_log.TRAIN_LOOP)
            mlperf_log.ssd_print(key=mlperf_log.TRAIN_EPOCH, value=0)
            hooks = []

            if FLAGS.use_async_checkpoint:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, FLAGS.iterations_per_loop)))
            train_estimator.train(
                input_fn=dataloader.SSDInputReader(
                    FLAGS.training_file_pattern,
                    params['transpose_input'],
                    is_training=True,
                    use_fake_data=FLAGS.use_fake_data),
                steps=int((FLAGS.num_epochs * FLAGS.num_examples_per_epoch) /
                          FLAGS.train_batch_size),
                hooks=hooks)

        if FLAGS.eval_after_training:
            eval_estimator = tpu_estimator.TPUEstimator(
                model_fn=ssd_model.ssd_model_fn,
                use_tpu=FLAGS.use_tpu,
                train_batch_size=FLAGS.train_batch_size,
                predict_batch_size=FLAGS.eval_batch_size,
                config=run_config,
                params=params)

            predictions = list(
                eval_estimator.predict(input_fn=dataloader.SSDInputReader(
                    FLAGS.validation_file_pattern,
                    is_training=False,
                    use_fake_data=FLAGS.use_fake_data)))

            eval_results = coco_metric.compute_map(predictions,
                                                   FLAGS.val_json_file)

            tf.logging.info('Eval results: %s' % eval_results)

    elif FLAGS.mode == 'train_and_eval':
        output_dir = os.path.join(FLAGS.model_dir, 'eval')
        tf.gfile.MakeDirs(output_dir)
        # Summary writer writes out eval metrics.
        summary_writer = tf.summary.FileWriter(output_dir)

        current_step = 0
        mlperf_log.ssd_print(key=mlperf_log.TRAIN_LOOP)
        threads = []
        for eval_step in ssd_constants.EVAL_STEPS:
            # Compute the actual eval steps based on the actural train_batch_size
            steps = int(eval_step * ssd_constants.DEFAULT_BATCH_SIZE /
                        FLAGS.train_batch_size)
            current_epoch = current_step // params['steps_per_epoch']
            # TODO(wangtao): figure out how to log for each epoch.
            mlperf_log.ssd_print(key=mlperf_log.TRAIN_EPOCH,
                                 value=current_epoch)

            tf.logging.info('Starting training cycle for %d steps.' % steps)
            if params['train_with_low_level_api']:
                trunner.train(steps)
            else:
                run_config, params = construct_run_config(steps)
                if FLAGS.device == 'gpu':
                    train_params = dict(params)
                    train_params['batch_size'] = FLAGS.train_batch_size
                    train_estimator = tf.estimator.Estimator(
                        model_fn=ssd_model.ssd_model_fn,
                        model_dir=FLAGS.model_dir,
                        config=run_config,
                        params=train_params)
                else:
                    train_estimator = tpu_estimator.TPUEstimator(
                        model_fn=ssd_model.ssd_model_fn,
                        use_tpu=FLAGS.use_tpu,
                        train_batch_size=FLAGS.train_batch_size,
                        config=run_config,
                        params=params)

                tf.logging.info(params)
                train_estimator.train(input_fn=dataloader.SSDInputReader(
                    FLAGS.training_file_pattern,
                    params['transpose_input'],
                    is_training=True,
                    use_fake_data=FLAGS.use_fake_data),
                                      steps=steps)

            if SUCCESS:
                break

            current_step = current_step + steps
            current_epoch = current_step // params['steps_per_epoch']
            tf.logging.info('Starting evaluation cycle at step %d.' %
                            current_step)
            mlperf_log.ssd_print(key=mlperf_log.EVAL_START,
                                 value=current_epoch)
            # Run evaluation at the given step.
            if params['eval_with_low_level_api']:
                predictions = list(erunner.predict())
            else:
                if FLAGS.device == 'gpu':
                    eval_params = dict(params)
                    eval_params['batch_size'] = FLAGS.eval_batch_size
                    eval_estimator = tf.estimator.Estimator(
                        model_fn=ssd_model.ssd_model_fn,
                        model_dir=FLAGS.model_dir,
                        config=run_config,
                        params=eval_params)
                else:
                    eval_estimator = tpu_estimator.TPUEstimator(
                        model_fn=ssd_model.ssd_model_fn,
                        use_tpu=FLAGS.use_tpu,
                        train_batch_size=FLAGS.train_batch_size,
                        predict_batch_size=FLAGS.eval_batch_size,
                        config=run_config,
                        params=params)

                predictions = list(
                    eval_estimator.predict(input_fn=dataloader.SSDInputReader(
                        FLAGS.validation_file_pattern,
                        is_training=False,
                        use_fake_data=FLAGS.use_fake_data)))

            t = threading.Thread(target=coco_eval,
                                 args=(predictions, current_epoch,
                                       current_step, summary_writer))
            threads.append(t)
            t.start()

        trunner.shutdown()

        for t in threads:
            t.join()

        # success is a string right now as boolean is not JSON serializable.
        if not SUCCESS:
            mlperf_log.ssd_print(key=mlperf_log.RUN_STOP,
                                 value={'success': 'false'})
            mlperf_log.ssd_print(key=mlperf_log.RUN_FINAL)
        summary_writer.close()

    elif FLAGS.mode == 'eval':
        if not params['eval_with_low_level_api']:
            if FLAGS.device == 'gpu':
                eval_params = dict(params)
                eval_params['batch_size'] = FLAGS.eval_batch_size
                eval_estimator = tf.estimator.Estimator(
                    model_fn=ssd_model.ssd_model_fn,
                    model_dir=FLAGS.model_dir,
                    config=run_config,
                    params=eval_params)
            else:
                eval_estimator = tpu_estimator.TPUEstimator(
                    model_fn=ssd_model.ssd_model_fn,
                    use_tpu=FLAGS.use_tpu,
                    train_batch_size=FLAGS.train_batch_size,
                    predict_batch_size=FLAGS.eval_batch_size,
                    config=run_config,
                    params=params)

        output_dir = os.path.join(FLAGS.model_dir, 'eval')
        tf.gfile.MakeDirs(output_dir)
        # Summary writer writes out eval metrics.
        summary_writer = tf.summary.FileWriter(output_dir)

        eval_steps = np.cumsum(ssd_constants.EVAL_STEPS).tolist()
        eval_epochs = [
            steps * ssd_constants.DEFAULT_BATCH_SIZE /
            FLAGS.train_batch_size // params['steps_per_epoch']
            for steps in eval_steps
        ]

        # For 8x8 slices and above.
        if FLAGS.train_batch_size >= 4096:
            eval_epochs = [i * 2 for i in eval_epochs]

        tf.logging.info('Eval epochs: %s' % eval_epochs)
        # Run evaluation when there's a new checkpoint
        threads = []
        count = 1
        for ckpt in next_checkpoint(FLAGS.model_dir):
            print("current count is {}\n".format(count))
            count += 1
            if SUCCESS:
                break
            current_step = int(os.path.basename(ckpt).split('-')[1])
            current_epoch = current_step // params['steps_per_epoch']
            tf.logging.info('current step: %s' % current_step)
            tf.logging.info('current epoch: %s' % current_epoch)
            if not params[
                    'eval_every_checkpoint'] and current_epoch not in eval_epochs:
                continue

            tf.logging.info('Starting to evaluate.')
            try:
                mlperf_log.ssd_print(key=mlperf_log.EVAL_START,
                                     value=current_epoch)

                if params['eval_with_low_level_api']:
                    predictions = list(erunner.predict(checkpoint_path=ckpt))
                else:
                    predictions = list(
                        eval_estimator.predict(
                            checkpoint_path=ckpt,
                            input_fn=dataloader.SSDInputReader(
                                FLAGS.validation_file_pattern,
                                is_training=False,
                                use_fake_data=FLAGS.use_fake_data)))

                t = threading.Thread(target=coco_eval,
                                     args=(predictions, current_epoch,
                                           current_step, summary_writer))
                threads.append(t)
                t.start()

                # Terminate eval job when final checkpoint is reached
                total_step = int(
                    (FLAGS.num_epochs * FLAGS.num_examples_per_epoch) /
                    FLAGS.train_batch_size)
                if current_step >= total_step:
                    tf.logging.info(
                        'Evaluation finished after training step %d' %
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long
                # after the CPU job tells it to start evaluating. In this case,
                # the checkpoint file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint' %
                    ckpt)

        for t in threads:
            t.join()

        if not SUCCESS:
            mlperf_log.ssd_print(key=mlperf_log.RUN_STOP,
                                 value={'success': 'false'})
            mlperf_log.ssd_print(key=mlperf_log.RUN_FINAL)
        summary_writer.close()
    elif FLAGS.mode == 'eval_once':
        if not params['eval_with_low_level_api']:
            eval_estimator = tpu_estimator.TPUEstimator(
                model_fn=ssd_model.ssd_model_fn,
                use_tpu=FLAGS.use_tpu,
                train_batch_size=FLAGS.train_batch_size,
                predict_batch_size=FLAGS.eval_batch_size,
                config=run_config,
                params=params)

        output_dir = os.path.join(FLAGS.model_dir, 'eval')
        tf.gfile.MakeDirs(output_dir)
        # Summary writer writes out eval metrics.
        summary_writer = tf.summary.FileWriter(output_dir)

        # Run evaluation when there's a new checkpoint
        for ckpt in next_checkpoint(FLAGS.model_dir):
            current_step = int(os.path.basename(ckpt).split('-')[1])
            current_epoch = current_step // params['steps_per_epoch']
            print('current epoch: %s' % current_epoch)
            if FLAGS.eval_epoch < current_epoch:
                break
            if FLAGS.eval_epoch > current_epoch:
                continue

            tf.logging.info('Starting to evaluate.')
            try:
                mlperf_log.ssd_print(key=mlperf_log.EVAL_START,
                                     value=current_epoch)

                if params['eval_with_low_level_api']:
                    predictions = list(erunner.predict(checkpoint_path=ckpt))
                else:
                    predictions = list(
                        eval_estimator.predict(
                            checkpoint_path=ckpt,
                            input_fn=dataloader.SSDInputReader(
                                FLAGS.validation_file_pattern,
                                is_training=False,
                                use_fake_data=FLAGS.use_fake_data)))

                coco_eval(predictions, current_epoch, current_step,
                          summary_writer)

                # Terminate eval job when final checkpoint is reached
                total_step = int(
                    (FLAGS.num_epochs * FLAGS.num_examples_per_epoch) /
                    FLAGS.train_batch_size)
                if current_step >= total_step:
                    if not SUCCESS:
                        mlperf_log.ssd_print(key=mlperf_log.RUN_STOP,
                                             value={'success': 'false'})
                        mlperf_log.ssd_print(key=mlperf_log.RUN_FINAL)
                    print('Evaluation finished after training step %d' %
                          current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long
                # after the CPU job tells it to start evaluating. In this case,
                # the checkpoint file could have been deleted already.
                print('Checkpoint %s no longer exists, skipping checkpoint' %
                      ckpt)

        print('%d ending' % FLAGS.eval_epoch)
        summary_writer.close()
예제 #15
0
def coco_eval(predictions, current_epoch, current_step, summary_writer):
    """Call the coco library to get the eval metrics."""
    global SUCCESS
    eval_results = coco_metric.compute_map(predictions, FLAGS.val_json_file)
    mlperf_log.ssd_print(key=mlperf_log.EVAL_STOP, value=current_epoch)
    mlperf_log.ssd_print(key=mlperf_log.EVAL_SIZE, value=FLAGS.eval_samples)
    mlperf_log.ssd_print(key=mlperf_log.EVAL_ACCURACY,
                         value={
                             'epoch': current_epoch,
                             'value': eval_results['COCO/AP']
                         })
    mlperf_log.ssd_print(key=mlperf_log.EVAL_TARGET,
                         value=ssd_constants.EVAL_TARGET)
    mlperf_log.ssd_print(key=mlperf_log.EVAL_ITERATION_ACCURACY,
                         value={
                             'iteration': current_step,
                             'value': eval_results['COCO/AP']
                         })
    print("The coco AP is: {}\n".format(eval_results['COCO/AP']))
    if eval_results['COCO/AP'] >= ssd_constants.EVAL_TARGET and not SUCCESS:
        mlperf_log.ssd_print(key=mlperf_log.RUN_STOP,
                             value={'success': 'true'})
        mlperf_log.ssd_print(key=mlperf_log.RUN_FINAL)
        SUCCESS = True
    tf.logging.info('Eval results: %s' % eval_results)
    # Write out eval results for the checkpoint.
    with tf.Graph().as_default():
        summaries = []
        for metric in eval_results:
            summaries.append(
                tf.Summary.Value(tag=metric,
                                 simple_value=eval_results[metric]))
        tf_summary = tf.Summary(value=list(summaries))
        summary_writer.add_summary(tf_summary, current_step)
예제 #16
0
def coco_eval(model,
              coco,
              cocoGt,
              encoder,
              inv_map,
              threshold,
              epoch,
              iteration,
              use_cuda=True):
    from pycocotools.cocoeval import COCOeval
    print("")
    model.eval()
    if use_cuda:
        model.cuda()
    ret = []

    overlap_threshold = 0.50
    nms_max_detections = 200
    mlperf_log.ssd_print(key=mlperf_log.NMS_THRESHOLD, value=overlap_threshold)
    mlperf_log.ssd_print(key=mlperf_log.NMS_MAX_DETECTIONS,
                         value=nms_max_detections)

    mlperf_log.ssd_print(key=mlperf_log.EVAL_START, value=epoch)

    start = time.time()
    for idx, image_id in enumerate(coco.img_keys):
        img, (htot, wtot), _, _ = coco[idx]

        with torch.no_grad():
            print("Parsing image: {}/{}".format(idx + 1, len(coco)), end="\r")
            inp = img.unsqueeze(0)
            if use_cuda:
                inp = inp.cuda()
            ploc, plabel = model(inp)

            try:
                result = encoder.decode_batch(ploc, plabel, overlap_threshold,
                                              nms_max_detections)[0]

            except:
                #raise
                print("")
                print("No object detected in idx: {}".format(idx))
                continue

            loc, label, prob = [r.cpu().numpy() for r in result]
            for loc_, label_, prob_ in zip(loc, label, prob):
                ret.append([image_id, loc_[0]*wtot, \
                                      loc_[1]*htot,
                                      (loc_[2] - loc_[0])*wtot,
                                      (loc_[3] - loc_[1])*htot,
                                      prob_,
                                      inv_map[label_]])
    print("")
    print("Predicting Ended, total time: {:.2f} s".format(time.time() - start))

    cocoDt = cocoGt.loadRes(np.array(ret))

    E = COCOeval(cocoGt, cocoDt, iouType='bbox')
    E.evaluate()
    E.accumulate()
    E.summarize()
    print("Current AP: {:.5f} AP goal: {:.5f}".format(E.stats[0], threshold))

    # put your model back into training mode
    model.train()

    current_accuracy = E.stats[0]
    mlperf_log.ssd_print(key=mlperf_log.EVAL_SIZE, value=idx + 1)
    mlperf_log.ssd_print(key=mlperf_log.EVAL_ACCURACY,
                         value={
                             "epoch": epoch,
                             "value": current_accuracy
                         })
    mlperf_log.ssd_print(key=mlperf_log.EVAL_ITERATION_ACCURACY,
                         value={
                             "iteration": iteration,
                             "value": current_accuracy
                         })
    mlperf_log.ssd_print(key=mlperf_log.EVAL_TARGET, value=threshold)
    mlperf_log.ssd_print(key=mlperf_log.EVAL_STOP, value=epoch)
    return current_accuracy >= threshold  #Average Precision  (AP) @[ IoU=050:0.95 | area=   all | maxDets=100 ]
예제 #17
0
def _model_fn(features, labels, mode, params, model):
    """Model defination for the SSD model based on ResNet-50.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the SSD model outputs class logits and box regression outputs.

  Returns:
    spec: the EstimatorSpec or TPUEstimatorSpec to run training, evaluation,
      or prediction.
  """
    if mode == tf.estimator.ModeKeys.PREDICT:
        labels = features
        features = labels.pop('image')

    # Manually apply the double transpose trick for training data.
    if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT:
        features = tf.transpose(features, [3, 0, 1, 2])
        labels[ssd_constants.BOXES] = tf.transpose(labels[ssd_constants.BOXES],
                                                   [2, 0, 1])
        labels[ssd_constants.CLASSES] = tf.transpose(
            labels[ssd_constants.CLASSES], [2, 0, 1])

    # Normalize the image to zero mean and unit variance.
    mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_MEAN,
                         value=ssd_constants.NORMALIZATION_MEAN)
    mlperf_log.ssd_print(key=mlperf_log.DATA_NORMALIZATION_STD,
                         value=ssd_constants.NORMALIZATION_STD)

    features -= tf.constant(ssd_constants.NORMALIZATION_MEAN,
                            shape=[1, 1, 3],
                            dtype=features.dtype)

    features /= tf.constant(ssd_constants.NORMALIZATION_STD,
                            shape=[1, 1, 3],
                            dtype=features.dtype)

    def _model_outputs():
        return model(features,
                     params,
                     is_training_bn=(mode == tf.estimator.ModeKeys.TRAIN))

    if params['use_bfloat16']:
        with bfloat16.bfloat16_scope():
            cls_outputs, box_outputs = _model_outputs()
            levels = cls_outputs.keys()
            for level in levels:
                cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
                box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
    else:
        cls_outputs, box_outputs = _model_outputs()
        levels = cls_outputs.keys()

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        flattened_cls, flattened_box = concat_outputs(cls_outputs, box_outputs)
        mlperf_log.ssd_print(key=mlperf_log.SCALES,
                             value=ssd_constants.BOX_CODER_SCALES)
        ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
            scale_factors=ssd_constants.BOX_CODER_SCALES)

        anchors = box_list.BoxList(
            tf.convert_to_tensor(dataloader.DefaultBoxes()('ltrb')))

        decoded_boxes = box_coder.batch_decode(encoded_boxes=flattened_box,
                                               box_coder=ssd_box_coder,
                                               anchors=anchors)

        pred_scores = tf.nn.softmax(flattened_cls, axis=2)

        pred_scores, indices = select_top_k_scores(
            pred_scores, ssd_constants.MAX_NUM_EVAL_BOXES)

        predictions = dict(
            labels,
            indices=indices,
            pred_scores=pred_scores,
            pred_box=decoded_boxes,
        )

        if params['visualize_dataloader']:
            # this is for inference visualization.
            predictions['image'] = features

        if params['use_tpu']:
            return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                                  predictions=predictions)

        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Load pretrained model from checkpoint.
    if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            tf.train.init_from_checkpoint(
                params['resnet_checkpoint'], {
                    '/': 'resnet%s/' % ssd_constants.RESNET_DEPTH,
                })
            return tf.train.Scaffold()
    else:
        scaffold_fn = None

    # Set up training loss and learning rate.
    update_learning_rate_schedule_parameters(params)
    global_step = tf.train.get_or_create_global_step()
    learning_rate = learning_rate_schedule(params, global_step)
    mlperf_log.ssd_print(key=mlperf_log.OPT_LR, deferred=True)
    # cls_loss and box_loss are for logging. only total_loss is optimized.
    total_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs,
                                                    labels)

    total_loss += params['weight_decay'] * tf.add_n(
        [tf.nn.l2_loss(v) for v in tf.trainable_variables()])

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=ssd_constants.MOMENTUM)
        if params['use_tpu']:
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        mlperf_log.ssd_print(key=mlperf_log.OPT_NAME,
                             value='tf.train.MomentumOptimizer')
        # TODO(wangtao): figure out how to log learning rate.
        # mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=learning_rate)
        mlperf_log.ssd_print(key=mlperf_log.OPT_MOMENTUM,
                             value=ssd_constants.MOMENTUM)
        mlperf_log.ssd_print(key=mlperf_log.OPT_WEIGHT_DECAY,
                             value=params['weight_decay'])

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if params['device'] == 'gpu':
            # GPU uses tf.group to avoid dependency overhead on update_ops; also,
            # multi-GPU requires a different EstimatorSpec class object
            train_op = tf.group(optimizer.minimize(total_loss, global_step),
                                update_ops)
            return model_fn_lib.EstimatorSpec(mode=mode,
                                              loss=total_loss,
                                              train_op=train_op,
                                              scaffold=scaffold_fn())
        else:
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(total_loss, global_step)

        if params['use_host_call']:

            def host_call_fn(global_step, total_loss, cls_loss, box_loss,
                             learning_rate):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          global_step: `Tensor with shape `[batch, ]` for the global_step.
          total_loss: `Tensor` with shape `[batch, ]` for the training loss.
          cls_loss: `Tensor` with shape `[batch, ]` for the training cls loss.
          box_loss: `Tensor` with shape `[batch, ]` for the training box loss.
          learning_rate: `Tensor` with shape `[batch, ]` for the learning_rate.

        Returns:
          List of summary ops to run on the CPU host.
        """
                # Outfeed supports int32 but global_step is expected to be int64.
                global_step = tf.reduce_mean(global_step)
                # Host call fns are executed FLAGS.iterations_per_loop times after one
                # TPU loop is finished, setting max_queue value to the same as number of
                # iterations will make the summary writer only flush the data to storage
                # once per loop.
                with (tf.contrib.summary.create_file_writer(
                        params['model_dir'],
                        max_queue=params['iterations_per_loop']).as_default()):
                    with tf.contrib.summary.always_record_summaries():
                        tf.contrib.summary.scalar('total_loss',
                                                  tf.reduce_mean(total_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar('cls_loss',
                                                  tf.reduce_mean(cls_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar('box_loss',
                                                  tf.reduce_mean(box_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar(
                            'learning_rate',
                            tf.reduce_mean(learning_rate),
                            step=global_step)

                        return tf.contrib.summary.all_summary_ops()

            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [params['batch_size']].
            global_step_t = tf.reshape(global_step, [1])
            total_loss_t = tf.reshape(total_loss, [1])
            cls_loss_t = tf.reshape(cls_loss, [1])
            box_loss_t = tf.reshape(box_loss, [1])
            learning_rate_t = tf.reshape(learning_rate, [1])
            host_call = (host_call_fn, [
                global_step_t, total_loss_t, cls_loss_t, box_loss_t,
                learning_rate_t
            ])
    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:
        raise NotImplementedError

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=total_loss,
                                          train_op=train_op,
                                          host_call=host_call,
                                          eval_metrics=eval_metrics,
                                          scaffold_fn=scaffold_fn)
예제 #18
0
def train300_mlperf_coco(args):
    from coco import COCO
    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)

    input_size = 300
    train_trans = SSDTransformer(dboxes, (input_size, input_size), val=False)
    val_trans = SSDTransformer(dboxes, (input_size, input_size), val=True)
    mlperf_log.ssd_print(key=mlperf_log.INPUT_SIZE, value=input_size)

    val_annotate = os.path.join(args.data,
                                "annotations/instances_val2017.json")
    val_coco_root = os.path.join(args.data, "val2017")
    train_annotate = os.path.join(args.data,
                                  "annotations/instances_train2017.json")
    train_coco_root = os.path.join(args.data, "train2017")

    cocoGt = COCO(annotation_file=val_annotate)
    val_coco = COCODetection(val_coco_root, val_annotate, val_trans)
    train_coco = COCODetection(train_coco_root, train_annotate, train_trans)

    #print("Number of labels: {}".format(train_coco.labelnum))
    train_dataloader = DataLoader(train_coco,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=4)
    # set shuffle=True in DataLoader
    mlperf_log.ssd_print(key=mlperf_log.INPUT_SHARD, value=None)
    mlperf_log.ssd_print(key=mlperf_log.INPUT_ORDER)
    mlperf_log.ssd_print(key=mlperf_log.INPUT_BATCH_SIZE,
                         value=args.batch_size)

    ssd300 = SSD300(train_coco.labelnum)
    if args.checkpoint is not None:
        print("loading model checkpoint", args.checkpoint)
        od = torch.load(args.checkpoint)
        ssd300.load_state_dict(od["model"])
    ssd300.train()
    if use_cuda:
        ssd300.cuda()
    loss_func = Loss(dboxes)
    if use_cuda:
        loss_func.cuda()

    current_lr = 1e-3
    current_momentum = 0.9
    current_weight_decay = 5e-4
    optim = torch.optim.SGD(ssd300.parameters(),
                            lr=current_lr,
                            momentum=current_momentum,
                            weight_decay=current_weight_decay)
    mlperf_log.ssd_print(key=mlperf_log.OPT_NAME, value="SGD")
    mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=current_lr)
    mlperf_log.ssd_print(key=mlperf_log.OPT_MOMENTUM, value=current_momentum)
    mlperf_log.ssd_print(key=mlperf_log.OPT_WEIGHT_DECAY,
                         value=current_weight_decay)

    print("epoch", "nbatch", "loss")

    iter_num = args.iteration
    avg_loss = 0.0
    inv_map = {v: k for k, v in val_coco.label_map.items()}

    mlperf_log.ssd_print(key=mlperf_log.TRAIN_LOOP)
    for epoch in range(args.epochs):
        mlperf_log.ssd_print(key=mlperf_log.TRAIN_EPOCH, value=epoch)
        for nbatch, (img, img_size, bbox,
                     label) in enumerate(train_dataloader):

            if iter_num == 160000:
                current_lr = 1e-4
                print("")
                print("lr decay step #1")
                for param_group in optim.param_groups:
                    param_group['lr'] = current_lr
                mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=current_lr)

            if iter_num == 200000:
                current_lr = 1e-5
                print("")
                print("lr decay step #2")
                for param_group in optim.param_groups:
                    param_group['lr'] = current_lr
                mlperf_log.ssd_print(key=mlperf_log.OPT_LR, value=current_lr)
            if use_cuda:
                img = img.cuda()
            img = Variable(img, requires_grad=True)
            ploc, plabel = ssd300(img)
            trans_bbox = bbox.transpose(1, 2).contiguous()
            if use_cuda:
                trans_bbox = trans_bbox.cuda()
                label = label.cuda()
            gloc, glabel = Variable(trans_bbox, requires_grad=False), \
                           Variable(label, requires_grad=False)
            loss = loss_func(ploc, plabel, gloc, glabel)

            if not np.isinf(loss.item()):
                avg_loss = 0.999 * avg_loss + 0.001 * loss.item()

            print("Iteration: {:6d}, Loss function: {:5.3f}, Average Loss: {:.3f}"\
                        .format(iter_num, loss.item(), avg_loss), end="\r")
            optim.zero_grad()
            loss.backward()
            optim.step()

            if iter_num in args.evaluation:
                if not args.no_save:
                    print("")
                    print("saving model...")
                    torch.save(
                        {
                            "model": ssd300.state_dict(),
                            "label_map": train_coco.label_info
                        }, "./models/iter_{}.pt".format(iter_num))

                if coco_eval(ssd300, val_coco, cocoGt, encoder, inv_map,
                             args.threshold, epoch, iter_num):
                    return True

            iter_num += 1
    return False
예제 #19
0
def ssd_print(*args, **kwargs):
    barrier()
    if get_rank() == 0:
        kwargs['stack_offset'] = 2
        mlperf_log.ssd_print(*args, **kwargs)
예제 #20
0
        def _parse_example(data):
            with tf.name_scope('augmentation'):
                source_id = data['source_id']
                image = tf.image.convert_image_dtype(data['image'],
                                                     dtype=tf.float32)
                raw_shape = tf.shape(image)
                boxes = data['groundtruth_boxes']
                classes = tf.reshape(data['groundtruth_classes'], [-1, 1])

                # Only 80 of the 90 COCO classes are used.
                class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
                classes = tf.gather(class_map, classes)
                classes = tf.cast(classes, dtype=tf.float32)

                if self._is_training:
                    image, boxes, classes = ssd_crop(image, boxes, classes)

                    # random_horizontal_flip() is hard coded to flip with 50% chance.
                    mlperf_log.ssd_print(
                        key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5)
                    image, boxes = preprocessor.random_horizontal_flip(
                        image=image, boxes=boxes)

                    # TODO(shibow): Investigate the parameters for color jitter.
                    image = color_jitter(image,
                                         brightness=0.125,
                                         contrast=0.5,
                                         saturation=0.5,
                                         hue=0.05)
                    image = normalize_image(image)

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
                        boxes, classes)

                    # TODO(taylorrobie): Check that this cast is valid.
                    encoded_classes = tf.cast(encoded_classes, tf.int32)

                    labels = {
                        ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes,
                        ssd_constants.BOXES: encoded_boxes,
                        ssd_constants.CLASSES: encoded_classes,
                    }
                    # This is for dataloader visualization; actual model doesn't use this.
                    if params['visualize_dataloader']:
                        box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
                            scale_factors=ssd_constants.BOX_CODER_SCALES)
                        decoded_boxes = tf.expand_dims(box_coder.decode(
                            rel_codes=tf.squeeze(encoded_boxes),
                            anchors=box_list.BoxList(
                                tf.convert_to_tensor(
                                    DefaultBoxes()('ltrb')))).get(),
                                                       axis=0)
                        labels['decoded_boxes'] = tf.squeeze(decoded_boxes)

                    return image, labels

                else:
                    mlperf_log.ssd_print(key=mlperf_log.INPUT_SIZE,
                                         value=ssd_constants.IMAGE_SIZE)
                    image = tf.image.resize_images(
                        image[tf.newaxis, :, :, :],
                        size=(ssd_constants.IMAGE_SIZE,
                              ssd_constants.IMAGE_SIZE))[0, :, :, :]

                    image = normalize_image(image)

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    def trim_and_pad(inp_tensor, dim_1):
                        """Limit the number of boxes, and pad if necessary."""
                        inp_tensor = inp_tensor[:ssd_constants.
                                                MAX_NUM_EVAL_BOXES]
                        num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(
                            inp_tensor)[0]
                        inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
                        return tf.reshape(
                            inp_tensor,
                            [ssd_constants.MAX_NUM_EVAL_BOXES, dim_1])

                    boxes, classes = trim_and_pad(boxes,
                                                  4), trim_and_pad(classes, 1)

                    return {
                        ssd_constants.IMAGE:
                        image,
                        ssd_constants.BOXES:
                        boxes,
                        ssd_constants.CLASSES:
                        classes,
                        ssd_constants.SOURCE_ID:
                        tf.string_to_number(source_id, tf.int32),
                        ssd_constants.RAW_SHAPE:
                        raw_shape,
                    }
예제 #21
0
    def __call__(self, params):
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _parse_example(data):
            with tf.name_scope('augmentation'):
                source_id = data['source_id']
                image = tf.image.convert_image_dtype(data['image'],
                                                     dtype=tf.float32)
                raw_shape = tf.shape(image)
                boxes = data['groundtruth_boxes']
                classes = tf.reshape(data['groundtruth_classes'], [-1, 1])

                # Only 80 of the 90 COCO classes are used.
                class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
                classes = tf.gather(class_map, classes)
                classes = tf.cast(classes, dtype=tf.float32)

                if self._is_training:
                    image, boxes, classes = ssd_crop(image, boxes, classes)

                    # random_horizontal_flip() is hard coded to flip with 50% chance.
                    mlperf_log.ssd_print(
                        key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5)
                    image, boxes = preprocessor.random_horizontal_flip(
                        image=image, boxes=boxes)

                    # TODO(shibow): Investigate the parameters for color jitter.
                    image = color_jitter(image,
                                         brightness=0.125,
                                         contrast=0.5,
                                         saturation=0.5,
                                         hue=0.05)
                    image = normalize_image(image)

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
                        boxes, classes)

                    # TODO(taylorrobie): Check that this cast is valid.
                    encoded_classes = tf.cast(encoded_classes, tf.int32)

                    labels = {
                        ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes,
                        ssd_constants.BOXES: encoded_boxes,
                        ssd_constants.CLASSES: encoded_classes,
                    }
                    # This is for dataloader visualization; actual model doesn't use this.
                    if params['visualize_dataloader']:
                        box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
                            scale_factors=ssd_constants.BOX_CODER_SCALES)
                        decoded_boxes = tf.expand_dims(box_coder.decode(
                            rel_codes=tf.squeeze(encoded_boxes),
                            anchors=box_list.BoxList(
                                tf.convert_to_tensor(
                                    DefaultBoxes()('ltrb')))).get(),
                                                       axis=0)
                        labels['decoded_boxes'] = tf.squeeze(decoded_boxes)

                    return image, labels

                else:
                    mlperf_log.ssd_print(key=mlperf_log.INPUT_SIZE,
                                         value=ssd_constants.IMAGE_SIZE)
                    image = tf.image.resize_images(
                        image[tf.newaxis, :, :, :],
                        size=(ssd_constants.IMAGE_SIZE,
                              ssd_constants.IMAGE_SIZE))[0, :, :, :]

                    image = normalize_image(image)

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    def trim_and_pad(inp_tensor, dim_1):
                        """Limit the number of boxes, and pad if necessary."""
                        inp_tensor = inp_tensor[:ssd_constants.
                                                MAX_NUM_EVAL_BOXES]
                        num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(
                            inp_tensor)[0]
                        inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
                        return tf.reshape(
                            inp_tensor,
                            [ssd_constants.MAX_NUM_EVAL_BOXES, dim_1])

                    boxes, classes = trim_and_pad(boxes,
                                                  4), trim_and_pad(classes, 1)

                    return {
                        ssd_constants.IMAGE:
                        image,
                        ssd_constants.BOXES:
                        boxes,
                        ssd_constants.CLASSES:
                        classes,
                        ssd_constants.SOURCE_ID:
                        tf.string_to_number(source_id, tf.int32),
                        ssd_constants.RAW_SHAPE:
                        raw_shape,
                    }

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
        mlperf_log.ssd_print(key=mlperf_log.INPUT_ORDER)
        mlperf_log.ssd_print(key=mlperf_log.INPUT_BATCH_SIZE, value=batch_size)

        if self._is_training:
            dataset = dataset.shard(
                params['context'].num_hosts,
                params['context'].current_input_fn_deployment()[1])
            dataset = dataset.shuffle(
                tf.to_int64(256 / params['context'].num_hosts))

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(_prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=self._is_training))

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(example_decoder.decode, num_parallel_calls=64)

        if self._is_training:
            dataset = dataset.map(
                # pylint: disable=g-long-lambda
                lambda data:
                (data, tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)),
                num_parallel_calls=64)
            dataset = dataset.filter(lambda data, pred: pred)
            dataset = dataset.prefetch(batch_size * 64)
            dataset = dataset.cache().apply(
                tf.contrib.data.shuffle_and_repeat(64))
            dataset = dataset.prefetch(batch_size * 64)
            dataset = dataset.apply(
                tf.contrib.data.map_and_batch(
                    lambda data, _: _parse_example(data),
                    batch_size=batch_size,
                    drop_remainder=True,
                    num_parallel_calls=128))
        else:
            dataset = dataset.prefetch(batch_size * 64)
            dataset = dataset.apply(
                tf.contrib.data.map_and_batch(_parse_example,
                                              batch_size=batch_size,
                                              drop_remainder=True,
                                              num_parallel_calls=128))

        # Manually apply the double transpose trick for training data.
        def _transpose_dataset(image, labels):
            image = tf.transpose(image, [1, 2, 3, 0])
            labels[ssd_constants.BOXES] = tf.transpose(
                labels[ssd_constants.BOXES], [1, 2, 0])
            labels[ssd_constants.CLASSES] = tf.transpose(
                labels[ssd_constants.CLASSES], [1, 2, 0])
            return image, labels

        if self._transpose_input and self._is_training:
            dataset = dataset.map(_transpose_dataset, num_parallel_calls=128)

        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)

        return dataset
예제 #22
0
    def crop_proposal():
        mlperf_log.ssd_print(key=mlperf_log.NUM_CROPPING_ITERATIONS,
                             value=ssd_constants.NUM_CROP_PASSES)
        rand_vec = lambda minval, maxval: tf.random_uniform(shape=(
            ssd_constants.NUM_CROP_PASSES, 1),
                                                            minval=minval,
                                                            maxval=maxval,
                                                            dtype=tf.float32)

        width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
        left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height)

        right = left + width
        bottom = top + height

        ltrb = tf.concat([left, top, right, bottom], axis=1)

        min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
        ious = calc_iou_tensor(ltrb, boxes)

        # discard any bboxes whose center not in the cropped image
        xc, yc = [
            tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
                    (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)
        ]

        masks = tf.reduce_all(tf.stack([
            tf.greater(xc, tf.tile(left, (1, num_boxes))),
            tf.less(xc, tf.tile(right, (1, num_boxes))),
            tf.greater(yc, tf.tile(top, (1, num_boxes))),
            tf.less(yc, tf.tile(bottom, (1, num_boxes))),
        ],
                                       axis=2),
                              axis=2)

        # Checks of whether a crop is valid.
        valid_aspect = tf.logical_and(tf.less(height / width, 2),
                                      tf.less(height / width, 2))
        valid_ious = tf.reduce_all(tf.greater(ious, min_iou),
                                   axis=1,
                                   keepdims=True)
        valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)

        valid_all = tf.cast(
            tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks],
                                    axis=1),
                          axis=1), tf.int32)

        # One indexed, as zero is needed for the case of no matches.
        index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)

        # Either one-hot, or zeros if there is no valid crop.
        selection = tf.equal(tf.reduce_max(index * valid_all), index)

        use_crop = tf.reduce_any(selection)
        output_ltrb = tf.reduce_sum(tf.multiply(
            ltrb,
            tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))),
                                    axis=0)
        output_masks = tf.reduce_any(tf.logical_and(
            masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))),
                                     axis=0)

        return use_crop, output_ltrb, output_masks