def find(self, center_w, center_h): # find the anchor that has highest IOU with this ground truth box best_anchor = -1 max_iou = -1 shifted_box = BoundBox(0, 0, center_w, center_h) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: best_anchor = i max_iou = iou return best_anchor, max_iou
def get_target(self, target, anchors, in_w, in_h, ignore_threshold): bs = target.size(0) mask = torch.zeros(bs, self.num_anchors, in_h, in_w) noobj_mask = torch.ones(bs, self.num_anchors, in_h, in_w) tx = torch.zeros(bs, self.num_anchors, in_h, in_w) ty = torch.zeros(bs, self.num_anchors, in_h, in_w) tw = torch.zeros(bs, self.num_anchors, in_h, in_w) th = torch.zeros(bs, self.num_anchors, in_h, in_w) tconf = torch.zeros(bs, self.num_anchors, in_h, in_w) tcls = torch.zeros(bs, self.num_anchors, in_h, in_w, self.num_classes) for b in range(bs): for t in range(target.shape[1]): if target[b, t].sum() == 0: continue # Convert to position relative to box gx = target[b, t, 1] * in_w gy = target[b, t, 2] * in_h gw = target[b, t, 3] * in_w gh = target[b, t, 4] * in_h # Get grid box indices gi = int(gx) gj = int(gy) # Get shape of gt box gt_box = torch.FloatTensor( np.array([0, 0, float(gw), float(gh)])).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate iou between gt and anchor shapes anch_ious = bbox_iou(gt_box, anchor_shapes) # Where the overlap is larger than threshold set mask to zero (ignore) noobj_mask[b, anch_ious > ignore_threshold, gj, gi] = 0 # Find the best matching anchor box best_n = np.argmax(anch_ious) # Masks mask[b, best_n, gj, gi] = 1 # Coordinates tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # Width and height tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) # object tconf[b, best_n, gj, gi] = 1 # One-hot encoding of label tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1 return mask, noobj_mask, tx, ty, tw, th, tconf, tcls
def main(argv): del argv # Unused. ################# # CONFIG ################# experiment_name = None num_epochs = 100 num_train_iter = 50 test_every_n_epochs = 1 batch_size = 5 learning_rate = 1e-4 seq_len = 10 num_hidden = 100 outputlayer_size = 4 ssprob = 1 # scheduled sampling x_size = 32 lstm_type = 'vanilla' # 'conv' # convnet_type = 'small' conv_out_size = 72 join_layer = 'none' # 'concat' # ds_loss_type = 'cosine' # None # 'xent' # iou_loss = False # True # bbox_loss_parameterization = 'coords' detection = None # 'shape' device = 'gpu' dtype = tf.float32 separate_lstms = False # True config = locals() #dict(locals(), **FLAGS) #update locals with any flags passed by cmdln # make experimental directory if experiment_name is None: experiment_path = '_'.join([lstm_type, convnet_type, ds_loss_type, join_layer, str(ssprob), str(batch_size), str(seq_len), str(num_epochs)]) else: experiment_path = experiment_name i = 0 while os.path.exists(experiment_path + "_" + str(i)): i += 1 experiment_path = experiment_path + "_" + str(i) os.mkdir(experiment_path) config['experiment_path'] = experiment_path print('Saving to ' + str(experiment_path)) # write config file with open(os.path.join(experiment_path, 'exp_config.txt'), 'w') as f: for key in sorted(config): f.write(key + '\t' + str(config[key]) + '\n') # open log file log_file = open(os.path.join(experiment_path, 'log.txt'), 'w') # if detection == 'shape': # labels = [] ################# # SET UP GRAPH ################# tf.reset_default_graph() # with tf.device('/' + device + ':0'): ################# # MODEL PARAMS ################# if convnet_type == 'small': convnet = models.SmallConvNet() if separate_lstms: preframelstm_w = tf.get_variable('preframelstm_weight', [conv_out_size, num_hidden], dtype=dtype) preframelstm_b = tf.get_variable('prereplstm_bias', [num_hidden], dtype=dtype) prereplstm_w = tf.get_variable('prelstm_weight', [conv_out_size, num_hidden], dtype=dtype) prereplstm_b = tf.get_variable('prelstm_bias', [num_hidden], dtype=dtype) frame_lstm_initial_state = tf.nn.rnn_cell.LSTMStateTuple( tf.get_variable('initial_frame_c', [batch_size, num_hidden], dtype=dtype), tf.get_variable('initial_frame_h', [batch_size, num_hidden], dtype=dtype) ) rep_lstm_initial_state = tf.nn.rnn_cell.LSTMStateTuple( tf.get_variable('initial_rep_c', [batch_size, num_hidden], dtype=dtype), tf.get_variable('initial_rep_h', [batch_size, num_hidden], dtype=dtype) ) if lstm_type == 'conv': frame_cell = tf.contrib.rnn.ConvLSTMCell(num_hidden) rep_cell = tf.contrib.rnn.ConvLSTMCell(num_hidden) else: frame_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden) rep_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden) else: prelstm_w = tf.get_variable('prelstm_weight', [conv_out_size, num_hidden], dtype=dtype) prelstm_b = tf.get_variable('prelstm_bias', [num_hidden], dtype=dtype) lstm_initial_state = tf.nn.rnn_cell.LSTMStateTuple( tf.get_variable('initial_c', [batch_size, num_hidden], dtype=dtype), tf.get_variable('initial_h', [batch_size, num_hidden], dtype=dtype) ) if lstm_type == 'conv': cell = tf.contrib.rnn.ConvLSTMCell(num_hidden) else: cell = tf.contrib.rnn.BasicLSTMCell(num_hidden) # params of output bbox linear layer bbox_w = tf.get_variable('bbox_weight', [num_hidden, outputlayer_size]) bbox_b = tf.get_variable('bbox_bias', [outputlayer_size]) if detection is not None: detection_w = tf.get_variable('detection_weight', [num_hidden, labels_size], dtype=dtype) detection_b = tf.get_variable('detection_bias', [labels_size], dtype=dtype) ################# # TRAIN GRAPH ################# with tf.name_scope('train'): x_train = tf.placeholder(tf.float32, shape=[batch_size, seq_len, x_size, x_size, 3], name='x_train') bboxes = tf.placeholder(tf.float32, shape=[batch_size, seq_len, 4], name='bboxes') if separate_lstms: frame_state_t = frame_lstm_initial_state rep_state_t = rep_lstm_initial_state else: state_t = lstm_initial_state # manual unroll reps = [] initialreps = [] pred_bboxes = [] target_bboxes = [] with tf.variable_scope('observer'): for t in range(seq_len): if t > 0: tf.get_variable_scope().reuse_variables() # scheduled sampling r = tf.random_uniform([1]) target_bbox = tf.cond( tf.reduce_all(r > ssprob), lambda: pred_bboxes[t - 1], lambda: bboxes[:, t, :]) else: target_bbox = bboxes[:, t, :] input_t = x_train[:, t, :, :, :] bboxed_t = tf.image.crop_and_resize( input_t, target_bbox, tf.constant(range(batch_size), tf.int32), tf.constant([x_size, x_size], tf.int32) ) conv_bboxed = tf.contrib.layers.flatten(convnet(bboxed_t)) conv_frame = tf.contrib.layers.flatten(convnet(input_t)) if join_layer == 'concat': lstm_in = conv_frame + conv_bboxed elif join_layer == 'none': lstm_in = conv_frame + conv_frame elif join_layer == 'crossconv': pass elif join_layer == 'film': pass else: print ("that's not a concat layer") # TODO: concat before LSTM layer, or separate LSTMs for frame and rep? pre_lstm = tf.nn.relu(tf.matmul(lstm_in, prelstm_w) + prelstm_b) output_t, state_t = cell(pre_lstm, state_t) if t == 0: initial_rep = state_t initialreps.append(initial_rep) reps.append(state_t) if detection is not None: logits = tf.matmul(output_t, detection_w) + detection_b pred_bbox_t = tf.nn.relu(tf.matmul(output_t, bbox_w) + bbox_b) pred_bboxes.append(pred_bbox_t) target_bboxes.append(target_bbox) pred_bboxes_tf = tf.stack(axis=1, values=pred_bboxes, name="preds_tf") target_bboxes_tf = tf.stack(axis=1, values=target_bboxes, name='targets_tf') reps_tf = tf.stack(axis=1, values=reps, name="reps_tf") initialreps_tf = tf.stack(axis=1, values=initialreps, name="initialreps_tf") # loss, metrics, optimizer bbox_loss = tf.losses.absolute_difference(target_bboxes_tf, pred_bboxes_tf) if ds_loss_type == 'cosine': rep_loss = tf.abs(tf.losses.cosine_distance(initialreps_tf, reps_tf, axis=0)) loss = bbox_loss + rep_loss elif ds_loss_type == 'xent': rep_loss = tf.losses.sigmoid_cross_entropy(initialreps_tf, reps_tf) loss = bbox_loss + rep_loss else: rep_loss = tf.abs(tf.losses.cosine_distance(initialreps_tf, reps_tf, axis=0)) loss = bbox_loss if detection is not None: detection_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels, logits) loss += detection_loss iou = util.bbox_iou(target_bboxes_tf, pred_bboxes_tf) mean_iou = tf.reduce_mean(iou) if iou_loss is not None: loss += mean_iou nonfail_count, fail_count, robustness = tf.py_func( util.get_failures_and_robustness, [iou, 0, 0, 0], [tf.int64, tf.int64, tf.float32], name='failure_and_robustness') #optim = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grad_and_vars = optimizer.compute_gradients(loss) optim = optimizer.apply_gradients(grad_and_vars) ################# # TESTING GRAPH ################# # # with tf.name_scope('test'): # x_test = tf.placeholder(tf.float32, shape=[batch_size, seq_len, x_size, x_size, 3]) # bboxes = tf.placeholder(tf.float32, shape=[batch_size, seq_len, 4]) # state_t = lstm_initial_state # # manual unroll # reps = [] # initialreps = [] # pred_bboxes = [] # target_bboxes = [] # with tf.variable_scope('observer'): # for t in range(seq_len): # if t > 0: # tf.get_variable_scope().reuse_variables() # # # scheduled sampling # r = tf.random_uniform([1]) # target_bbox = tf.cond( # tf.reduce_all(r > ssprob), # lambda: pred_bboxes[t - 1], # lambda: bboxes[:, t, :]) # else: # target_bbox = bboxes[:, t, :] # input_t = x_train[:, t, :, :, :] # conv_out = tf.contrib.layers.flatten(convnet(input_t)) # pre_lstm = tf.nn.relu(tf.matmul(conv_out, prelstm_w) + prelstm_b) # output_t, state_t = cell(pre_lstm, state_t) # if t == 0: # initial_rep = state_t # initialreps.append(initial_rep) # reps.append(state_t) # # if detection is not None: # # logits = tf.matmul(output_t, detection_w) + detection_b # pred_bbox_t = tf.nn.relu(tf.matmul(output_t, bbox_w) + bbox_b) # pred_bboxes.append(pred_bbox_t) # target_bboxes.append(target_bbox) # # pred_bboxes_tf = tf.stack(axis=1, values=pred_bboxes) # target_bboxes_tf = tf.stack(axis=1, values=target_bboxes) # reps_tf = tf.stack(axis=1, values=reps) # initialreps_tf = tf.stack(axis=1, values=initialreps) ################# # RUN MAIN LOOP ################# cf = tf.ConfigProto(allow_soft_placement=True) saver = tf.train.Saver() with tf.Session(config=cf) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.initialize_local_variables()) data_source = dataset.FlyingShapesDataHandler(batch_size=batch_size, seq_len=seq_len) # np_test_batch = data_source.GetUnlabelledBatch() # test_batch = { # 'test_image': np_batch['image'],#tf.convert_to_tensor(np_batch['image'], dtype=tf.float32), # 'test_bbox': np_batch['bbox']#tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32) # } train_loss = [] train_rep_loss = [] train_iou = [] train_fail = [] train_rob = [] # test_loss = [] # test_iou = [] # test_fail = [] # test_rob = [] for n in range(num_epochs): np_batch = data_source.GetUnlabelledBatch() batch = { 'image': np_batch['image'],#tf.convert_to_tensor(np_batch['image'], dtype=tf.float32), 'bbox': np_batch['bbox']#tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32) } print ('Epoch ' + str(n) ) for i in range(num_train_iter): # if True: #detection is None: # np_batch = data_source.GetUnlabelledBatch() # batch = { # 'image': np_batch['image'],#tf.convert_to_tensor(np_batch['image'], dtype=tf.float32), # 'bbox': np_batch['bbox']#tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32) # } # else: # pass res = sess.run({ 'loss': loss, 'rep_loss': rep_loss, 'optim': optim, 'iou': mean_iou, 'fail_rate': fail_count, 'robustness': robustness, 'pred_bboxes': pred_bboxes_tf, 'target_bboxes': target_bboxes_tf, 'initial_rep': initial_rep }, feed_dict={ x_train: batch['image'], bboxes: batch['bbox'] }) #fig, ax = plt.subplots(1) #ax.imshow(im) #rect = patches.Rectangle(, linewidth=1 train_loss.append(res['loss']) train_rep_loss.append(res['rep_loss']) train_iou.append(res['iou']) train_fail.append(res['fail_rate']) train_rob.append(res['robustness']) # PRINT TO LOG FILE AND STDERR log_str = ('Train ' + str(i) + ': ' + str(res['rep_loss']) + '\t' + str(res['loss']) + '\t' + str(res['iou']) + '\t' + str(res['fail_rate'][0]) + '\t' + str(res['robustness'][0])) print(log_str) log_file.write(log_str + '\n') # PLOT IMGS AND BBOXES num_rows = 1 plt.figure(2, figsize=(20, 1)) plt.clf() for i in xrange(seq_len): figgy = plt.subplot(num_rows, seq_len, i+1) gt = batch['bbox'][0][i] targetbox = res['target_bboxes'][0][i] predbox = res['pred_bboxes'][0][i] figgy.add_patch(patches.Rectangle((gt[1], gt[0]), (gt[3] - gt[1]), (gt[2] - gt[0]), linewidth=2, edgecolor='b', facecolor='none', alpha=0.6)) figgy.add_patch(patches.Rectangle((targetbox[1], targetbox[0]), (targetbox[3] - targetbox[1]), (targetbox[2] - targetbox[0]), linewidth=2, edgecolor='y', facecolor='none', alpha=0.6)) figgy.add_patch(patches.Rectangle((predbox[1], predbox[0]), (predbox[3] - predbox[1]), (predbox[2] - predbox[0]), linewidth=2, edgecolor='g', facecolor='none', alpha=0.6)) plt.imshow(batch['image'][0][i]) plt.axis('off') plt.savefig(os.path.join(experiment_path,'bboxes'+str(n)), bbox_inches='tight') saver.save(sess, os.path.join(experiment_path, 'saved_model')) #plt.draw() # print('bbox:', batch['bbox']) # print('target:', res['target_bboxes']) # print('pred:', res['pred_bboxes']) # if (n + 1) % test_every_n_epochs == 0: # np_test_batch = data_source.GetUnlabelledBatch() # test_batch = { # 'image': tf.convert_to_tensor(np_batch['image'], dtype=tf.float32), # 'bbox': tf.convert_to_tensor(np_batch['bbox'], dtype=tf.float32) # } # test_res = sess.run({ # 'test_loss': test_loss, # 'test_iou': test_iou, # 'test_fail_rate': test_fail_rate, # 'test_robustness': test_robustness # }, # feed_dict={ # x_test: test_batch['image'], # bboxes_ttest: test_batch['bbox'] # }) # # test_loss.append(res['test_loss']) # # test_iou.append(res['test_iou']) # # test_fail.append(res['test_fail_rate']) # # test_rob.append(res['test_robustness']) # # print('Test 'str(i) + ': ' + # str(res['test_loss']) + '\t' + # str(res['test_iou']) + '\t' + # str(res['test_fail_rate'][0]) + '\t' + # str(res['test_robustness'][0]) # ) log_file.close() plt.close() plt.plot(train_loss, label='Train Loss') # plt.plot(test_loss, label='Test Loss') plt.legend() plt.savefig(os.path.join(experiment_path,'loss.png')) plt.gcf().clear() plt.plot(train_rep_loss, label='Rep Loss') # plt.plot(test_loss, label='Test Loss') plt.legend() plt.savefig(os.path.join(experiment_path,'rep_loss.png')) plt.gcf().clear() plt.plot(train_iou, label='Train IOU') # plt.plot(test_iou, label='Test IOU') plt.legend() plt.savefig(os.path.join(experiment_path,'iou.png')) plt.gcf().clear() plt.plot(train_fail, label='Train Failure Rate') # plt.plot(test_fail, label='Test Failure Rate') plt.legend() plt.savefig(os.path.join(experiment_path,'fails.png')) plt.gcf().clear() plt.plot(train_rob, label='Train Robustness') # plt.plot(test_rob, label='Test Robustness') plt.legend() plt.savefig(os.path.join(experiment_path,'robustness.png'))
def filter_results(self, prediction, nms=True): conf_mask = (prediction[:, :, 4] > self.confidence).float().unsqueeze(2) prediction = prediction * conf_mask try: torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): image_pred = prediction[ind] max_conf, max_conf_score = torch.max( image_pred[:, 5:5 + self.num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) non_zero_ind = (torch.nonzero(image_pred[:, 4])) try: image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) except: continue img_classes = util.unique(image_pred_[:, -1]) for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) if nms: for i in range(idx): try: ious = util.bbox_iou( image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break iou_mask = (ious < self.nms_thresh).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4])) image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) #Get the various classes detected in the image try: img_classes = unique(image_pred_[:, -1]) except: continue #WE will do NMS classwise for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break #Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask #Remove the non-zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def forward(self, x, y_true=None): """ Transform feature map into 2-D tensor. Transformation includes 1. Re-organize tensor to make each row correspond to a bbox 2. Transform center coordinates bx = sigmoid(tx) + cx by = sigmoid(ty) + cy 3. Transform width and height bw = pw * exp(tw) bh = ph * exp(th) 4. Activation @Args x: (Tensor) feature map with size [bs, (5+nC)*nA, gs, gs] 5 => [4 offsets (xc, yc, w, h), objectness] @Returns detections: (Tensor) feature map with size [bs, nA, gs, gs, 5+nC] """ bs, _, gs, _ = x.size() stride = self.reso // gs # no pooling used, stride is the only downsample num_attrs = 5 + self.num_classes # tx, ty, tw, th, p0 nA = len(self.anchors) scaled_anchors = torch.Tensor( [(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]).cuda() # Re-organize [bs, (5+nC)*nA, gs, gs] => [bs, nA, gs, gs, 5+nC] x = x.view(bs, nA, num_attrs, gs, gs).permute( 0, 1, 3, 4, 2).contiguous() pred = torch.Tensor(bs, nA, gs, gs, num_attrs).cuda() pred_tx = torch.sigmoid(x[..., 0]).cuda() pred_ty = torch.sigmoid(x[..., 1]).cuda() pred_tw = x[..., 2].cuda() pred_th = x[..., 3].cuda() pred_conf = torch.sigmoid(x[..., 4]).cuda() if self.training == True: pred_cls = x[..., 5:].cuda() # softmax in cross entropy else: pred_cls = F.softmax(x[..., 5:], dim=-1).cuda() # class grid_x = torch.arange(gs).repeat(gs, 1).view( [1, 1, gs, gs]).float().cuda() grid_y = torch.arange(gs).repeat(gs, 1).t().view( [1, 1, gs, gs]).float().cuda() anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1)) pred[..., 0] = pred_tx + grid_x pred[..., 1] = pred_ty + grid_y pred[..., 2] = torch.exp(pred_tw) * anchor_w pred[..., 3] = torch.exp(pred_th) * anchor_h pred[..., 4] = pred_conf pred[..., 5:] = pred_cls if not self.training: pred[..., :4] *= stride return pred.view(bs, -1, num_attrs) else: gt_tx = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda() gt_ty = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda() gt_tw = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda() gt_th = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda() gt_conf = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda() gt_cls = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda() obj_mask = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda() for idx in range(bs): for y_true_one in y_true[idx]: y_true_one = y_true_one.cuda() gt_bbox = y_true_one[:4] * gs gt_cls_label = int(y_true_one[4]) gt_xc, gt_yc, gt_w, gt_h = gt_bbox[0:4] gt_i = gt_xc.long().cuda() gt_j = gt_yc.long().cuda() pred_bbox = pred[idx, :, gt_j, gt_i, :4] ious = bbox_iou(xywh2xyxy(pred_bbox), xywh2xyxy(gt_bbox)) best_iou, best_a = torch.max(ious, 0) w, h = scaled_anchors[best_a] gt_tw[idx, best_a, gt_j, gt_i] = torch.log(gt_w / w) gt_th[idx, best_a, gt_j, gt_i] = torch.log(gt_h / h) gt_tx[idx, best_a, gt_j, gt_i] = gt_xc - gt_i.float() gt_ty[idx, best_a, gt_j, gt_i] = gt_yc - gt_j.float() gt_conf[idx, best_a, gt_j, gt_i] = best_iou gt_cls[idx, best_a, gt_j, gt_i] = gt_cls_label obj_mask[idx, best_a, gt_j, gt_i] = 1 MSELoss = nn.MSELoss(reduction='sum') BCELoss = nn.BCELoss(reduction='sum') CELoss = nn.CrossEntropyLoss(reduction='sum') loss = dict() loss['x'] = MSELoss(pred_tx * obj_mask, gt_tx * obj_mask) loss['y'] = MSELoss(pred_ty * obj_mask, gt_ty * obj_mask) loss['w'] = MSELoss(pred_tw * obj_mask, gt_tw * obj_mask) loss['h'] = MSELoss(pred_th * obj_mask, gt_th * obj_mask) # loss['cls'] = BCELoss(pred_cls * obj_mask, cls_mask * obj_mask) loss['cls'] = CELoss((pred_cls * obj_mask.unsqueeze(-1)).view(-1, self.num_classes), (gt_cls * obj_mask).view(-1).long()) loss['conf'] = MSELoss(pred_conf * obj_mask * 5, gt_conf * obj_mask * 5) + \ MSELoss(pred_conf * (1 - obj_mask), gt_conf * (1 - obj_mask)) pprint(loss) return loss
anchors = pw_ph offset = cx_cy strd = stride write = 1 else: anchors = torch.cat((anchors, pw_ph), 0).to(device='cuda') offset = torch.cat((offset, cx_cy), 0).to(device='cuda') strd = torch.cat((strd, stride), 0).to(device='cuda') true_pred = util.transform(raw_pred.clone(), anchors, offset, strd) iou_mask, noobj_mask = util.get_responsible_masks( true_pred, target, offset, stride) iou = torch.diag( util.bbox_iou( util.get_abs_coord(true_pred[iou_mask.T, :].unsqueeze(-3)), target)).mean().item() noobj_box = raw_pred[:, :, 4:5].clone() conf = noobj_box[iou_mask.T, :].mean().item() noobj_box = noobj_box[noobj_mask.T, :] no_obj_conf = noobj_box.mean().item() raw_pred = raw_pred[iou_mask.T, :] anchors = anchors[iou_mask.T, :] offset = offset[iou_mask.T, :] strd = strd[iou_mask.T, :] if ( strd.shape[0] == sample_batched['image'].shape[0]