def track_video(model, video): toc, regions = 0, [] image_files, gt = video['image_files'], video['gt'] for f, image_file in enumerate(image_files): im = cv2.imread(image_file) # TODO: batch load tic = cv2.getTickCount() if f == 0: # init target_pos, target_sz = rect_2_cxy_wh(gt[f]) state = SiamRPN_init(im, target_pos, target_sz, model) # init tracker location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) regions.append(gt[f]) elif f > 0: # tracking state = SiamRPN_track(state, im) # track location = cxy_wh_2_rect(state['target_pos'] + 1, state['target_sz']) regions.append(location) toc += cv2.getTickCount() - tic if args.visualization and f >= 0: # visualization if f == 0: cv2.destroyAllWindows() if len(gt[f]) == 8: cv2.polylines(im, [np.array(gt[f], np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) else: cv2.rectangle(im, (gt[f, 0], gt[f, 1]), (gt[f, 0] + gt[f, 2], gt[f, 1] + gt[f, 3]), (0, 255, 0), 3) if len(location) == 8: cv2.polylines(im, [location.reshape((-1, 1, 2))], True, (0, 255, 255), 3) else: location = [int(l) for l in location] # cv2.rectangle( im, (location[0], location[1]), (location[0] + location[2], location[1] + location[3]), (0, 255, 255), 3) cv2.putText(im, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video['name'], im) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save result video_path = join('../test', args.dataset, 'SiamRPN_AlexNet_OTB2015') if not isdir(video_path): makedirs(video_path) result_path = join(video_path, '{:s}.txt'.format(video['name'])) with open(result_path, "w") as fin: for x in regions: fin.write(','.join([str(i) for i in x]) + '\n') print('({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps'.format( v_id, video['name'], toc, f / toc)) return f / toc
def main(imagedir, gtdir): # load net net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model') net = SiamRPNBIG() net.load_state_dict(torch.load(net_file)) net.eval().cuda() # warm up for i in range(10): net.temple( torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda()) net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda()) # start to track # get the first frame groundtruth gt_file = os.path.join(gtdir, 'gt.txt') with open(gt_file, 'r') as f: lines = f.readlines() gt = [] for line in lines: line = line.split(' ') gt.append([int(float(x)) for x in line]) init_bbox = gt[0] # top-left x y,w,h target_pos, target_sz = rect_2_cxy_wh( init_bbox) # top-left x y,w,h --> center x y,w,h image_list = glob.glob(os.path.join(imagedir, '*.jpg')) image_list.sort() im = cv2.imread(image_list[0]) # HxWxC state = SiamRPN_init(im, target_pos, target_sz, net) # init tracker bboxes = [] for i in range(1, len(gt)): im = cv2.imread(image_list[i]) # HxWxC state = SiamRPN_track(state, im) # track res = cxy_wh_2_rect( state['target_pos'], state['target_sz']) # center x y,w,h --> top-left x y,w,h bboxes.append(res.tolist()) _, precision, precision_auc, iou = _compile_results(gt[1:], bboxes) print(' -- Precision ' + "(20 px)" + ': ' + "%.2f" % precision +\ ' -- Precision AUC: ' + "%.2f" % precision_auc + \ ' -- IOU: ' + "%.2f" % iou + ' --') isSavebbox = True if isSavebbox: print('saving bbox...') res_bbox_file = os.path.join('results_bbox.json') json.dump(bboxes, open(res_bbox_file, 'w'), indent=2) isSavevideo = True if isSavevideo: print('saving video...') save_video(image_list, bboxes) print('done')
for jj in range(int(_data_provider.cur_img_num / 100)): exemplar_list = [None for i in range(batch_size)] source_list = [None for i in range(batch_size)] instance_list = [None for i in range(batch_size)] exemplar_cxy_list = [[None, None] for i in range(batch_size)] source_cxy_list = [[None, None] for i in range(batch_size)] instance_cxy_list = [[None, None] for i in range(batch_size)] for batch in range(batch_size): pairs, gts = _data_provider.rand_pick_pair() exemplar = cv2.imread(pairs[0]) source = cv2.imread(pairs[1]) instance = cv2.imread(pairs[2]) exemplar_pos, exemplar_sz = rect_2_cxy_wh(gts[0]) source_pos, source_sz = rect_2_cxy_wh(gts[1]) instance_pos, instance_sz = rect_2_cxy_wh(gts[2]) exemplar_list[batch] = exemplar source_list[batch] = source instance_list[batch] = instance exemplar_cxy_list[batch][0], exemplar_cxy_list[batch][ 1] = exemplar_pos, exemplar_sz source_cxy_list[batch][0], source_cxy_list[batch][ 1] = source_pos, source_sz instance_cxy_list[batch][0], instance_cxy_list[batch][ 1] = instance_pos, instance_sz train_config = SiamRPN_init_batch(exemplar_list, exemplar_cxy_list, net)
total_failure = 0 warped_images = [] video_length = vot.get_frame_length(video_name) #ground truth bounding box gts = vot.get_gts(video_name) frame_tags = vot.get_frame_tags(video_name) video_frames = vot.get_frames(video_name) flow_dir = os.path.join(flow_dirs, video_name + '.txt') img_dir = os.path.join(vot_dir, video_name,'color') confidence_dir = os.path.join('/home/jianingq/backward_flow_confidence_vot/',video_name) #initialize network # image and init box init_rbox = gts[0] if(len(init_rbox) == 4): [cx, cy], [w, h] = rect_2_cxy_wh(init_rbox) else: [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox) # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) im = video_frames[0]# HxWxC state = SiamRPN_init(im, target_pos, target_sz, net) detection_box = [int(cx-w/2),int(cy-h/2),int(cx+w/2),int(cy+h/2)] for i in range(0,video_length - 1): #track im1 = np.copy(video_frames[i]) im2 = np.copy(video_frames[i + 1]) entropy_data = np.load(os.path.join(confidence_dir,format(i+1, '08')+'_entropy.npy'))
def rtaa_attack(net, x_init, x, gt, target_pos, target_sz, scale_z, p, eps=10, alpha=1, iteration=10, x_val_min=0, x_val_max=255): x = Variable(x.data) x_adv = Variable(x_init.data, requires_grad=True) alpha = eps * 1.0 / iteration for i in range(iteration): delta, score = net(x_adv) score_temp = score.permute(1, 2, 3, 0).contiguous().view(2, -1) score = torch.transpose(score_temp, 0, 1) delta1 = delta.permute(1, 2, 3, 0).contiguous().view(4, -1) delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy() # calculate proposals gt_cen = rect_2_cxy_wh(gt) gt_cen = np.tile(gt_cen, (p.anchor.shape[0], 1)) gt_cen[:, 0] = ((gt_cen[:, 0] - target_pos[0]) * scale_z - p.anchor[:, 0]) / p.anchor[:, 2] gt_cen[:, 1] = ((gt_cen[:, 1] - target_pos[1]) * scale_z - p.anchor[:, 1]) / p.anchor[:, 3] gt_cen[:, 2] = np.log(gt_cen[:, 2] * scale_z) / p.anchor[:, 2] gt_cen[:, 3] = np.log(gt_cen[:, 3] * scale_z) / p.anchor[:, 3] # create pseudo proposals randomly gt_cen_pseudo = rect_2_cxy_wh(gt) gt_cen_pseudo = np.tile(gt_cen_pseudo, (p.anchor.shape[0], 1)) rate_xy1 = np.random.uniform(0.3, 0.5) rate_xy2 = np.random.uniform(0.3, 0.5) rate_wd = np.random.uniform(0.7, 0.9) gt_cen_pseudo[:, 0] = ((gt_cen_pseudo[:, 0] - target_pos[0] - rate_xy1 * gt_cen_pseudo[:, 2]) * scale_z - p.anchor[:, 0]) / p.anchor[:, 2] gt_cen_pseudo[:, 1] = ((gt_cen_pseudo[:, 1] - target_pos[1] - rate_xy2 * gt_cen_pseudo[:, 3]) * scale_z - p.anchor[:, 1]) / p.anchor[:, 3] gt_cen_pseudo[:, 2] = np.log( gt_cen_pseudo[:, 2] * rate_wd * scale_z) / p.anchor[:, 2] gt_cen_pseudo[:, 3] = np.log( gt_cen_pseudo[:, 3] * rate_wd * scale_z) / p.anchor[:, 3] delta[0, :] = (delta[0, :] * p.anchor[:, 2] + p.anchor[:, 0]) / scale_z + target_pos[0] delta[1, :] = (delta[1, :] * p.anchor[:, 3] + p.anchor[:, 1]) / scale_z + target_pos[1] delta[2, :] = (np.exp(delta[2, :]) * p.anchor[:, 2]) / scale_z delta[3, :] = (np.exp(delta[3, :]) * p.anchor[:, 3]) / scale_z location = np.array([ delta[0] - delta[2] / 2, delta[1] - delta[3] / 2, delta[2], delta[3] ]) label = overlap_ratio(location, gt) # set thresold to define positive and negative samples, following the training step iou_hi = 0.6 iou_low = 0.3 # make labels y_pos = np.where(label > iou_hi, 1, 0) y_pos = torch.from_numpy(y_pos).cuda().long() y_neg = np.where(label < iou_low, 0, 1) y_neg = torch.from_numpy(y_neg).cuda().long() pos_index = np.where(y_pos.cpu() == 1) neg_index = np.where(y_neg.cpu() == 0) index = np.concatenate((pos_index, neg_index), axis=1) # make pseudo lables y_pos_pseudo = np.where(label > iou_hi, 0, 1) y_pos_pseudo = torch.from_numpy(y_pos_pseudo).cuda().long() y_neg_pseudo = np.where(label < iou_low, 1, 0) y_neg_pseudo = torch.from_numpy(y_neg_pseudo).cuda().long() y_truth = y_pos y_pseudo = y_pos_pseudo # calculate classification loss loss_truth_cls = -F.cross_entropy(score[index], y_truth[index]) loss_pseudo_cls = -F.cross_entropy(score[index], y_pseudo[index]) loss_cls = (loss_truth_cls - loss_pseudo_cls) * (1) # calculate regression loss loss_truth_reg = -rpn_smoothL1(delta1, gt_cen, y_pos) loss_pseudo_reg = -rpn_smoothL1(delta1, gt_cen_pseudo, y_pos) loss_reg = (loss_truth_reg - loss_pseudo_reg) * (5) # final adversarial loss loss = loss_cls + loss_reg # calculate the derivative net.zero_grad() if x_adv.grad is not None: x_adv.grad.data.fill_(0) loss.backward(retain_graph=True) adv_grad = where((x_adv.grad > 0) | (x_adv.grad < 0), x_adv.grad, 0) adv_grad = torch.sign(adv_grad) x_adv = x_adv - alpha * adv_grad x_adv = where(x_adv > x + eps, x + eps, x_adv) x_adv = where(x_adv < x - eps, x - eps, x_adv) x_adv = torch.clamp(x_adv, x_val_min, x_val_max) x_adv = Variable(x_adv.data, requires_grad=True) return x_adv
net.load_state_dict(model_dict) net.cuda().eval() # warm up for i in range(10): net.temple(torch.autograd.Variable(torch.ones(1, 3, 127, 127)).cuda(), \ torch.autograd.Variable(torch.ones(1, 3, 271, 271)).cuda()) net(torch.autograd.Variable(torch.ones(1, 3, 271, 271)).cuda()) for seq in OTB_seqs: _data_provider.pick_seq(seq) exemplar_path, exemplar_gt, cur_img_num = _data_provider.eval_pick_exemplar( ) exemplar = cv2.imread(exemplar_path) exemplar_pos, exemplar_sz = rect_2_cxy_wh(exemplar_gt) state = SiamRPN_init(exemplar, exemplar_pos, exemplar_sz, net) save_file = save_res_path + seq + '_ours.txt' tracking_res = open(save_file, 'w') for idx in range(cur_img_num): instance_path = _data_provider.eval_pick_instance() instance = cv2.imread(instance_path) state = SiamRPN_track(state, instance) print('seq:{}:{} , score:{}'.format(seq, idx, state['score'])) res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) tracking_res.write('{},{},{},{}'.format(res[0], res[1], res[2], res[3])) tracking_res.write('\n')