Beispiel #1
0
 def find(self, center_w, center_h):
     # find the anchor that has highest IOU with this ground truth box
     best_anchor = -1
     max_iou = -1
     shifted_box = BoundBox(0, 0, center_w, center_h)
     for i in range(len(self.anchors)):
         anchor = self.anchors[i]
         iou = bbox_iou(shifted_box, anchor)
         if max_iou < iou:
             best_anchor = i
             max_iou = iou
     return best_anchor, max_iou
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
        bs = target.size(0)

        mask = torch.zeros(bs, self.num_anchors, in_h, in_w)
        noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w)
        tx = torch.zeros(bs, self.num_anchors, in_h, in_w)
        ty = torch.zeros(bs, self.num_anchors, in_h, in_w)
        tw = torch.zeros(bs, self.num_anchors, in_h, in_w)
        th = torch.zeros(bs, self.num_anchors, in_h, in_w)
        tconf = torch.zeros(bs, self.num_anchors, in_h, in_w)
        tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes)
        for b in range(bs):
            for t in range(target.shape[1]):
                if target[b, t].sum() == 0:
                    continue
                # Convert to position relative to box
                gx = target[b, t, 1] * in_w
                gy = target[b, t, 2] * in_h
                gw = target[b, t, 3] * in_w
                gh = target[b, t, 4] * in_h
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                # Get shape of gt box
                gt_box = torch.FloatTensor(
                    np.array([0, 0, float(gw), float(gh)])).unsqueeze(0)
                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (self.num_anchors, 2)), np.array(anchors)), 1))
                # Calculate iou between gt and anchor shapes
                anch_ious = bbox_iou(gt_box, anchor_shapes)
                # Where the overlap is larger than threshold set mask to zero (ignore)
                noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0
                # Find the best matching anchor box
                best_n = np.argmax(anch_ious)

                # Masks
                mask[b, best_n, gj, gi] = 1
                # Coordinates
                tx[b, best_n, gj, gi] = gx - gi
                ty[b, best_n, gj, gi] = gy - gj
                # Width and height
                tw[b, best_n, gj,
                   gi] = math.log(gw / anchors[best_n][0] + 1e-16)
                th[b, best_n, gj,
                   gi] = math.log(gh / anchors[best_n][1] + 1e-16)
                # object
                tconf[b, best_n, gj, gi] = 1
                # One-hot encoding of label
                tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1

        return mask, noobj_mask, tx, ty, tw, th, tconf, tcls
Beispiel #3
0
def main(argv):
  del argv  # Unused.
  #################
  # CONFIG
  #################

  experiment_name = None
  num_epochs = 100
  num_train_iter = 50
  test_every_n_epochs = 1
  batch_size = 5
  learning_rate = 1e-4
  seq_len = 10
  num_hidden = 100
  outputlayer_size = 4
  ssprob = 1  # scheduled sampling
  x_size = 32
  lstm_type = 'vanilla'  # 'conv'  #
  convnet_type = 'small'
  conv_out_size = 72
  join_layer = 'none'  # 'concat'  #
  ds_loss_type =  'cosine'  # None  # 'xent' #
  iou_loss = False  # True
  # bbox_loss_parameterization = 'coords'
  detection = None  # 'shape'
  device = 'gpu'
  dtype = tf.float32
  separate_lstms = False  # True

  config = locals() #dict(locals(), **FLAGS) #update locals with any flags passed by cmdln

  # make experimental directory
  if experiment_name is None:
    experiment_path = '_'.join([lstm_type, convnet_type, ds_loss_type,
                                join_layer, str(ssprob),
                                str(batch_size), str(seq_len),
                                str(num_epochs)])
  else:
    experiment_path = experiment_name

  i = 0
  while os.path.exists(experiment_path + "_" + str(i)):
    i += 1
  experiment_path = experiment_path + "_" + str(i)
  os.mkdir(experiment_path)
  config['experiment_path'] = experiment_path
  print('Saving to ' + str(experiment_path))

  # write config file
  with open(os.path.join(experiment_path, 'exp_config.txt'), 'w') as f:
    for key in sorted(config):
      f.write(key + '\t' + str(config[key]) + '\n')

  # open log file
  log_file = open(os.path.join(experiment_path, 'log.txt'), 'w')

  # if detection == 'shape':
  #   labels = []

  #################
  # SET UP GRAPH
  #################

  tf.reset_default_graph()  #

  with tf.device('/' + device + ':0'):
    #################
    # MODEL PARAMS
    #################

    if convnet_type == 'small':
      convnet = models.SmallConvNet()

    if separate_lstms:
      preframelstm_w = tf.get_variable('preframelstm_weight', [conv_out_size, num_hidden], dtype=dtype)
      preframelstm_b = tf.get_variable('prereplstm_bias', [num_hidden], dtype=dtype)

      prereplstm_w = tf.get_variable('prelstm_weight', [conv_out_size, num_hidden], dtype=dtype)
      prereplstm_b = tf.get_variable('prelstm_bias', [num_hidden], dtype=dtype)

      frame_lstm_initial_state = tf.nn.rnn_cell.LSTMStateTuple(
          tf.get_variable('initial_frame_c', [batch_size, num_hidden], dtype=dtype),
          tf.get_variable('initial_frame_h', [batch_size, num_hidden], dtype=dtype)
      )
      rep_lstm_initial_state = tf.nn.rnn_cell.LSTMStateTuple(
          tf.get_variable('initial_rep_c', [batch_size, num_hidden], dtype=dtype),
          tf.get_variable('initial_rep_h', [batch_size, num_hidden], dtype=dtype)
      )
      if lstm_type == 'conv':
        frame_cell = tf.contrib.rnn.ConvLSTMCell(num_hidden)
        rep_cell = tf.contrib.rnn.ConvLSTMCell(num_hidden)
      else:
        frame_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden)
        rep_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden)

    else:
      prelstm_w = tf.get_variable('prelstm_weight', [conv_out_size, num_hidden], dtype=dtype)
      prelstm_b = tf.get_variable('prelstm_bias', [num_hidden], dtype=dtype)

      lstm_initial_state = tf.nn.rnn_cell.LSTMStateTuple(
          tf.get_variable('initial_c', [batch_size, num_hidden], dtype=dtype),
          tf.get_variable('initial_h', [batch_size, num_hidden], dtype=dtype)
      )
      if lstm_type == 'conv':
        cell = tf.contrib.rnn.ConvLSTMCell(num_hidden)
      else:
        cell = tf.contrib.rnn.BasicLSTMCell(num_hidden)

    # params of output bbox linear layer
    bbox_w = tf.get_variable('bbox_weight', [num_hidden, outputlayer_size])
    bbox_b = tf.get_variable('bbox_bias', [outputlayer_size])

    if detection is not None:
      detection_w =  tf.get_variable('detection_weight', [num_hidden, labels_size], dtype=dtype)
      detection_b = tf.get_variable('detection_bias', [labels_size], dtype=dtype)

    #################
    # TRAIN GRAPH
    #################

    with tf.name_scope('train'):
      x_train = tf.placeholder(tf.float32, shape=[batch_size, seq_len, x_size, x_size, 3], name='x_train')
      bboxes = tf.placeholder(tf.float32, shape=[batch_size, seq_len, 4], name='bboxes')
      if separate_lstms:
        frame_state_t = frame_lstm_initial_state
        rep_state_t = rep_lstm_initial_state
      else:
        state_t = lstm_initial_state
      # manual unroll
      reps = []
      initialreps = []
      pred_bboxes = []
      target_bboxes = []
      with tf.variable_scope('observer'):
        for t in range(seq_len):
          if t > 0:
            tf.get_variable_scope().reuse_variables()

            # scheduled sampling
            r = tf.random_uniform([1])
            target_bbox = tf.cond(
                tf.reduce_all(r > ssprob),
                lambda: pred_bboxes[t - 1],
                lambda: bboxes[:, t, :])
          else:
            target_bbox = bboxes[:, t, :]
          input_t = x_train[:, t, :, :, :]
          bboxed_t = tf.image.crop_and_resize(
              input_t,
              target_bbox,
              tf.constant(range(batch_size), tf.int32),
              tf.constant([x_size, x_size], tf.int32)
          )
          conv_bboxed = tf.contrib.layers.flatten(convnet(bboxed_t))
          conv_frame = tf.contrib.layers.flatten(convnet(input_t))
          if join_layer == 'concat':
            lstm_in = conv_frame + conv_bboxed
          elif join_layer == 'none':
            lstm_in = conv_frame + conv_frame
          elif join_layer == 'crossconv':
            pass
          elif join_layer == 'film':
            pass
          else:
            print ("that's not a concat layer")

          # TODO: concat before LSTM layer, or separate LSTMs for frame and rep?
          pre_lstm = tf.nn.relu(tf.matmul(lstm_in, prelstm_w) + prelstm_b)
          output_t, state_t = cell(pre_lstm, state_t)
          if t == 0:
            initial_rep = state_t
          initialreps.append(initial_rep)
          reps.append(state_t)
          if detection is not None:
            logits = tf.matmul(output_t, detection_w) + detection_b
          pred_bbox_t = tf.nn.relu(tf.matmul(output_t, bbox_w) + bbox_b)
          pred_bboxes.append(pred_bbox_t)
          target_bboxes.append(target_bbox)

        pred_bboxes_tf = tf.stack(axis=1, values=pred_bboxes, name="preds_tf")
        target_bboxes_tf = tf.stack(axis=1, values=target_bboxes, name='targets_tf')
        reps_tf = tf.stack(axis=1, values=reps, name="reps_tf")
        initialreps_tf = tf.stack(axis=1, values=initialreps, name="initialreps_tf")

    # loss, metrics, optimizer
    bbox_loss = tf.losses.absolute_difference(target_bboxes_tf, pred_bboxes_tf)
    if ds_loss_type == 'cosine':
      rep_loss = tf.abs(tf.losses.cosine_distance(initialreps_tf, reps_tf, axis=0))
      loss = bbox_loss + rep_loss
    elif ds_loss_type == 'xent':
      rep_loss = tf.losses.sigmoid_cross_entropy(initialreps_tf, reps_tf)
      loss = bbox_loss + rep_loss
    else:
      rep_loss = tf.abs(tf.losses.cosine_distance(initialreps_tf, reps_tf, axis=0))
      loss = bbox_loss

    if detection is not None:
      detection_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels, logits)
      loss += detection_loss

    iou = util.bbox_iou(target_bboxes_tf, pred_bboxes_tf)
    mean_iou = tf.reduce_mean(iou)

    if iou_loss is not None:
      loss += mean_iou

    nonfail_count, fail_count, robustness = tf.py_func(
        util.get_failures_and_robustness, [iou, 0, 0, 0],
        [tf.int64, tf.int64, tf.float32],
        name='failure_and_robustness')

    #optim = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    grad_and_vars = optimizer.compute_gradients(loss)
    optim = optimizer.apply_gradients(grad_and_vars)


    #################
    # TESTING GRAPH
    #################
#
#     with tf.name_scope('test'):
#       x_test = tf.placeholder(tf.float32, shape=[batch_size, seq_len, x_size, x_size, 3])
#       bboxes = tf.placeholder(tf.float32, shape=[batch_size, seq_len, 4])
#       state_t = lstm_initial_state
#       # manual unroll
#       reps = []
#       initialreps = []
#       pred_bboxes = []
#       target_bboxes = []
#       with tf.variable_scope('observer'):
#         for t in range(seq_len):
#           if t > 0:
#             tf.get_variable_scope().reuse_variables()
#
#             # scheduled sampling
#             r = tf.random_uniform([1])
#             target_bbox = tf.cond(
#                 tf.reduce_all(r > ssprob),
#                 lambda: pred_bboxes[t - 1],
#                 lambda: bboxes[:, t, :])
#           else:
#             target_bbox = bboxes[:, t, :]
#           input_t = x_train[:, t, :, :, :]
#           conv_out = tf.contrib.layers.flatten(convnet(input_t))
#           pre_lstm = tf.nn.relu(tf.matmul(conv_out, prelstm_w) + prelstm_b)
#           output_t, state_t = cell(pre_lstm, state_t)
#           if t == 0:
#             initial_rep = state_t
#           initialreps.append(initial_rep)
#           reps.append(state_t)
#           # if detection is not None:
#           #   logits = tf.matmul(output_t, detection_w) + detection_b
#           pred_bbox_t = tf.nn.relu(tf.matmul(output_t, bbox_w) + bbox_b)
#           pred_bboxes.append(pred_bbox_t)
#           target_bboxes.append(target_bbox)
#
#         pred_bboxes_tf = tf.stack(axis=1, values=pred_bboxes)
#         target_bboxes_tf = tf.stack(axis=1, values=target_bboxes)
#         reps_tf = tf.stack(axis=1, values=reps)
#         initialreps_tf = tf.stack(axis=1, values=initialreps)

    #################
    # RUN MAIN LOOP
    #################

    cf = tf.ConfigProto(allow_soft_placement=True)
    saver = tf.train.Saver()
    with tf.Session(config=cf) as sess:
      sess.run(tf.global_variables_initializer())
      sess.run(tf.initialize_local_variables())

      data_source = dataset.FlyingShapesDataHandler(batch_size=batch_size,
                                                    seq_len=seq_len)

      # np_test_batch = data_source.GetUnlabelledBatch()
      # test_batch = {
      #     'test_image': np_batch['image'],#tf.convert_to_tensor(np_batch['image'], dtype=tf.float32),
      #     'test_bbox': np_batch['bbox']#tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32)
      # }

      train_loss = []
      train_rep_loss = []
      train_iou = []
      train_fail = []
      train_rob = []
      # test_loss = []
      # test_iou = []
      # test_fail = []
      # test_rob = []

      for n in range(num_epochs):
        np_batch = data_source.GetUnlabelledBatch()
        batch = {
          'image': np_batch['image'],#tf.convert_to_tensor(np_batch['image'], dtype=tf.float32),
          'bbox': np_batch['bbox']#tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32)
        }
        print ('Epoch ' + str(n) )
        for i in range(num_train_iter):
          # if True: #detection is None:
            # np_batch = data_source.GetUnlabelledBatch()
            # batch = {
            #     'image': np_batch['image'],#tf.convert_to_tensor(np_batch['image'], dtype=tf.float32),
            #     'bbox': np_batch['bbox']#tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32)
            # }
          # else:
          #   pass
          res = sess.run({
              'loss': loss,
              'rep_loss': rep_loss,
              'optim': optim,
              'iou': mean_iou,
              'fail_rate': fail_count,
              'robustness': robustness,
              'pred_bboxes': pred_bboxes_tf,
              'target_bboxes': target_bboxes_tf,
              'initial_rep': initial_rep
              },
              feed_dict={
                  x_train: batch['image'],
                  bboxes: batch['bbox']
              })
          #fig, ax = plt.subplots(1)
          #ax.imshow(im)
          #rect = patches.Rectangle(, linewidth=1

          train_loss.append(res['loss'])
          train_rep_loss.append(res['rep_loss'])
          train_iou.append(res['iou'])
          train_fail.append(res['fail_rate'])
          train_rob.append(res['robustness'])


          # PRINT TO LOG FILE AND STDERR

          log_str = ('Train ' + str(i) + ': ' +
                     str(res['rep_loss']) + '\t' +
                     str(res['loss']) + '\t' +
                     str(res['iou']) +  '\t' +
                     str(res['fail_rate'][0]) + '\t' +
                     str(res['robustness'][0]))

          print(log_str)
          log_file.write(log_str + '\n')

        # PLOT IMGS AND BBOXES
        num_rows = 1
        plt.figure(2, figsize=(20, 1))
        plt.clf()
        for i in xrange(seq_len):
          figgy = plt.subplot(num_rows, seq_len, i+1)
          gt = batch['bbox'][0][i]
          targetbox = res['target_bboxes'][0][i]
          predbox = res['pred_bboxes'][0][i]
          figgy.add_patch(patches.Rectangle((gt[1],
                                             gt[0]),
                                            (gt[3] - gt[1]),
                                            (gt[2] - gt[0]),
                                            linewidth=2,
                                            edgecolor='b',
                                            facecolor='none',
                                            alpha=0.6))
          figgy.add_patch(patches.Rectangle((targetbox[1],
                                             targetbox[0]),
                                            (targetbox[3] - targetbox[1]),
                                            (targetbox[2] - targetbox[0]),
                                            linewidth=2,
                                            edgecolor='y',
                                            facecolor='none',
                                            alpha=0.6))
          figgy.add_patch(patches.Rectangle((predbox[1],
                                             predbox[0]),
                                            (predbox[3] - predbox[1]),
                                            (predbox[2] - predbox[0]),
                                            linewidth=2,
                                            edgecolor='g',
                                            facecolor='none',
                                            alpha=0.6))
          plt.imshow(batch['image'][0][i])
          plt.axis('off')
        plt.savefig(os.path.join(experiment_path,'bboxes'+str(n)), bbox_inches='tight')
      saver.save(sess, os.path.join(experiment_path, 'saved_model'))
      #plt.draw()


        # print('bbox:', batch['bbox'])
        # print('target:', res['target_bboxes'])
        # print('pred:', res['pred_bboxes'])

#       if (n + 1) % test_every_n_epochs == 0:
#         np_test_batch = data_source.GetUnlabelledBatch()
#         test_batch = {
#             'image': tf.convert_to_tensor(np_batch['image'], dtype=tf.float32),
#             'bbox': tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32)
#         }
#         test_res = sess.run({
#             'test_loss': test_loss,
#             'test_iou': test_iou,
#             'test_fail_rate': test_fail_rate,
#             'test_robustness': test_robustness
#             },
#             feed_dict={
#                 x_test: test_batch['image'],
#                 bboxes_ttest: test_batch['bbox']
#             })
#         # test_loss.append(res['test_loss'])
#         # test_iou.append(res['test_iou'])
#         # test_fail.append(res['test_fail_rate'])
#         # test_rob.append(res['test_robustness'])
#
#         print('Test 'str(i) + ': ' +
#               str(res['test_loss']) + '\t' +
#               str(res['test_iou']) +  '\t' +
#               str(res['test_fail_rate'][0]) + '\t' +
#               str(res['test_robustness'][0])
#              )

    log_file.close()
    plt.close()

    plt.plot(train_loss, label='Train Loss')
    # plt.plot(test_loss, label='Test Loss')
    plt.legend()
    plt.savefig(os.path.join(experiment_path,'loss.png'))
    plt.gcf().clear()

    plt.plot(train_rep_loss, label='Rep Loss')
    # plt.plot(test_loss, label='Test Loss')
    plt.legend()
    plt.savefig(os.path.join(experiment_path,'rep_loss.png'))
    plt.gcf().clear()

    plt.plot(train_iou, label='Train IOU')
    # plt.plot(test_iou, label='Test IOU')
    plt.legend()
    plt.savefig(os.path.join(experiment_path,'iou.png'))
    plt.gcf().clear()

    plt.plot(train_fail, label='Train Failure Rate')
    # plt.plot(test_fail, label='Test Failure Rate')
    plt.legend()
    plt.savefig(os.path.join(experiment_path,'fails.png'))
    plt.gcf().clear()

    plt.plot(train_rob, label='Train Robustness')
    # plt.plot(test_rob, label='Test Robustness')
    plt.legend()
    plt.savefig(os.path.join(experiment_path,'robustness.png'))
Beispiel #4
0
    def filter_results(self, prediction, nms=True):
        conf_mask = (prediction[:, :, 4] >
                     self.confidence).float().unsqueeze(2)
        prediction = prediction * conf_mask

        try:
            torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous()
        except:
            return 0

        box_a = prediction.new(prediction.shape)
        box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
        box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
        box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
        box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
        prediction[:, :, :4] = box_a[:, :, :4]

        batch_size = prediction.size(0)

        output = prediction.new(1, prediction.size(2) + 1)
        write = False

        for ind in range(batch_size):
            image_pred = prediction[ind]

            max_conf, max_conf_score = torch.max(
                image_pred[:, 5:5 + self.num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_score = max_conf_score.float().unsqueeze(1)
            seq = (image_pred[:, :5], max_conf, max_conf_score)
            image_pred = torch.cat(seq, 1)

            non_zero_ind = (torch.nonzero(image_pred[:, 4]))

            try:
                image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
            except:
                continue
            img_classes = util.unique(image_pred_[:, -1])

            for cls in img_classes:
                #get the detections with one particular class
                cls_mask = image_pred_ * (image_pred_[:, -1]
                                          == cls).float().unsqueeze(1)
                class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

                image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                if nms:
                    for i in range(idx):
                        try:
                            ious = util.bbox_iou(
                                image_pred_class[i].unsqueeze(0),
                                image_pred_class[i + 1:])
                        except ValueError:
                            break

                        except IndexError:
                            break

                        iou_mask = (ious <
                                    self.nms_thresh).float().unsqueeze(1)
                        image_pred_class[i + 1:] *= iou_mask

                        non_zero_ind = torch.nonzero(
                            image_pred_class[:, 4]).squeeze()
                        image_pred_class = image_pred_class[non_zero_ind].view(
                            -1, 7)

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                seq = batch_ind, image_pred_class
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))

        return output
Beispiel #5
0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]

        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        #Get rid of the zero entries
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))

        image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

        #Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:, -1])
        except:
            continue
        #WE will do NMS classwise
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

            #sort the detections such that the entry with the maximum objectness
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(
                        -1, 7)

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column

            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
Beispiel #6
0
    def forward(self, x, y_true=None):
        """
        Transform feature map into 2-D tensor. Transformation includes
        1. Re-organize tensor to make each row correspond to a bbox
        2. Transform center coordinates
        bx = sigmoid(tx) + cx
        by = sigmoid(ty) + cy
        3. Transform width and height
        bw = pw * exp(tw)
        bh = ph * exp(th)
        4. Activation
        @Args
        x: (Tensor) feature map with size [bs, (5+nC)*nA, gs, gs]
            5 => [4 offsets (xc, yc, w, h), objectness]
        @Returns
        detections: (Tensor) feature map with size [bs, nA, gs, gs, 5+nC]
        """
        bs, _, gs, _ = x.size()
        stride = self.reso // gs  # no pooling used, stride is the only downsample
        num_attrs = 5 + self.num_classes  # tx, ty, tw, th, p0
        nA = len(self.anchors)
        scaled_anchors = torch.Tensor(
            [(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]).cuda()

        # Re-organize [bs, (5+nC)*nA, gs, gs] => [bs, nA, gs, gs, 5+nC]
        x = x.view(bs, nA, num_attrs, gs, gs).permute(
            0, 1, 3, 4, 2).contiguous()

        pred = torch.Tensor(bs, nA, gs, gs, num_attrs).cuda()

        pred_tx = torch.sigmoid(x[..., 0]).cuda()
        pred_ty = torch.sigmoid(x[..., 1]).cuda()
        pred_tw = x[..., 2].cuda()
        pred_th = x[..., 3].cuda()
        pred_conf = torch.sigmoid(x[..., 4]).cuda()
        if self.training == True:
            pred_cls = x[..., 5:].cuda()  # softmax in cross entropy
        else:
            pred_cls = F.softmax(x[..., 5:], dim=-1).cuda()  # class

        grid_x = torch.arange(gs).repeat(gs, 1).view(
            [1, 1, gs, gs]).float().cuda()
        grid_y = torch.arange(gs).repeat(gs, 1).t().view(
            [1, 1, gs, gs]).float().cuda()
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
        pred[..., 0] = pred_tx + grid_x
        pred[..., 1] = pred_ty + grid_y
        pred[..., 2] = torch.exp(pred_tw) * anchor_w
        pred[..., 3] = torch.exp(pred_th) * anchor_h
        pred[..., 4] = pred_conf
        pred[..., 5:] = pred_cls

        if not self.training:
            pred[..., :4] *= stride
            return pred.view(bs, -1, num_attrs)
        else:
            gt_tx = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_ty = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_tw = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_th = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_conf = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_cls = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()

            obj_mask = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            for idx in range(bs):
                for y_true_one in y_true[idx]:
                    y_true_one = y_true_one.cuda()
                    gt_bbox = y_true_one[:4] * gs
                    gt_cls_label = int(y_true_one[4])

                    gt_xc, gt_yc, gt_w, gt_h = gt_bbox[0:4]
                    gt_i = gt_xc.long().cuda()
                    gt_j = gt_yc.long().cuda()

                    pred_bbox = pred[idx, :, gt_j, gt_i, :4]
                    ious = bbox_iou(xywh2xyxy(pred_bbox), xywh2xyxy(gt_bbox))
                    best_iou, best_a = torch.max(ious, 0)

                    w, h = scaled_anchors[best_a]
                    gt_tw[idx, best_a, gt_j, gt_i] = torch.log(gt_w / w)
                    gt_th[idx, best_a, gt_j, gt_i] = torch.log(gt_h / h)
                    gt_tx[idx, best_a, gt_j, gt_i] = gt_xc - gt_i.float()
                    gt_ty[idx, best_a, gt_j, gt_i] = gt_yc - gt_j.float()
                    gt_conf[idx, best_a, gt_j, gt_i] = best_iou
                    gt_cls[idx, best_a, gt_j, gt_i] = gt_cls_label

                    obj_mask[idx, best_a, gt_j, gt_i] = 1

            MSELoss = nn.MSELoss(reduction='sum')
            BCELoss = nn.BCELoss(reduction='sum')
            CELoss = nn.CrossEntropyLoss(reduction='sum')

            loss = dict()
            loss['x'] = MSELoss(pred_tx * obj_mask, gt_tx * obj_mask)
            loss['y'] = MSELoss(pred_ty * obj_mask, gt_ty * obj_mask)
            loss['w'] = MSELoss(pred_tw * obj_mask, gt_tw * obj_mask)
            loss['h'] = MSELoss(pred_th * obj_mask, gt_th * obj_mask)
            # loss['cls'] = BCELoss(pred_cls * obj_mask, cls_mask * obj_mask)

            loss['cls'] = CELoss((pred_cls * obj_mask.unsqueeze(-1)).view(-1, self.num_classes),
                                 (gt_cls * obj_mask).view(-1).long())
            loss['conf'] = MSELoss(pred_conf * obj_mask * 5, gt_conf * obj_mask * 5) + \
                MSELoss(pred_conf * (1 - obj_mask), gt_conf * (1 - obj_mask))

            pprint(loss)

            return loss
                anchors = pw_ph
                offset = cx_cy
                strd = stride
                write = 1
            else:
                anchors = torch.cat((anchors, pw_ph), 0).to(device='cuda')
                offset = torch.cat((offset, cx_cy), 0).to(device='cuda')
                strd = torch.cat((strd, stride), 0).to(device='cuda')

        true_pred = util.transform(raw_pred.clone(), anchors, offset, strd)
        iou_mask, noobj_mask = util.get_responsible_masks(
            true_pred, target, offset, stride)

        iou = torch.diag(
            util.bbox_iou(
                util.get_abs_coord(true_pred[iou_mask.T, :].unsqueeze(-3)),
                target)).mean().item()

        noobj_box = raw_pred[:, :, 4:5].clone()
        conf = noobj_box[iou_mask.T, :].mean().item()

        noobj_box = noobj_box[noobj_mask.T, :]
        no_obj_conf = noobj_box.mean().item()

        raw_pred = raw_pred[iou_mask.T, :]
        anchors = anchors[iou_mask.T, :]
        offset = offset[iou_mask.T, :]
        strd = strd[iou_mask.T, :]

        if (
                strd.shape[0] == sample_batched['image'].shape[0]