def _construct_mask(self, pth, prev=False): video_name, frame_num = self._info_from_pth(pth) # if frame_num > 3: # return prev_frame_num = frame_num - 1 flow = np.load( os.path.join(self.flow_dir, video_name, format(frame_num, '08') + '_flow.npy')) print(flow.shape) if prev_frame_num == 0: bbox = self.gts[video_name][prev_frame_num] x0, y0, x1, y1 = bbox_format(bbox, 'tlxy_wh_2_rect') mask = np.zeros([flow.shape[0], flow.shape[1]]) x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1) mask[y0:y1, x0:x1] = 1 else: if not prev: bboxes, scores, det_scores = self._load_bbox( video_name, prev_frame_num) mask = np.zeros([flow.shape[0], flow.shape[1]]) for bbox, score in zip(bboxes, det_scores): x1, y1, w, h = bbox x1, y1, w, h = int(x1), int(y1), int(w), int(h) gaussian_mask = self._gaussian_box(h, w) #mask[y1:y1+h, x1:x1+w] = np.maximum(mask[y1:y1+h, x1:x1+w],score*gaussian_mask) mask[y1:y1 + h, x1:x1 + w] = np.maximum(mask[y1:y1 + h, x1:x1 + w], gaussian_mask) else: mask = np.load( os.path.join( self.data_dir, video_name, format(prev_frame_num, '08') + '_fgmask_nd.npy')) # cv2.imwrite('../results/tests/new_mask.png', mask * 255) mask = flow_warp(mask, flow) # rgb_img = self.vot.get_frames(video_name)[frame_num] # mask = cv2.resize(mask, (self.tg_size[1], self.tg_size[0])) # cv2.imwrite('../results/tests/new_warped_mask.png', (mask+0.1)[:,:,np.newaxis] * rgb_img) #if not prev: # print(pth+'_fgmask.npy') # np.save(pth+'_fgmask.npy', mask) if not prev: print(pth + '_fgmask_nd.npy') np.save(pth + '_fgmask_nd.npy', mask) else: print(pth + '_prev_fgmask_nd.npy') np.save(pth + '_prev_fgmask_nd.npy', mask)
int(cx + w / 2), int(cy + h / 2) ] foreground_candidate_mask = np.zeros(im2.shape) foreground_candidate_mask[ detection_box[1]:detection_box[3], detection_box[0]:detection_box[2], :] = 1 state = SiamRPN_init(im2, target_pos, target_sz, net) font = cv2.FONT_HERSHEY_SIMPLEX data_dir = os.path.join('/home/jianingq/vot_info', video_name) if not os.path.exists(data_dir): os.makedirs(data_dir) for i in range(0, video_length - 1): im1 = np.copy(video_frames[i]) im2 = np.copy(video_frames[i + 1]) current_bbox = bbox_format(gts[i], 'tlxy_wh_2_rect') current_bbox = [int(j) for j in current_bbox] next_bbox = bbox_format(gts[i + 1], 'tlxy_wh_2_rect') next_bbox = [int(j) for j in next_bbox] flow = np.load( os.path.join(confidence_dir, format(i + 1, '08') + '_flow.npy')) entropy_data = np.load( os.path.join(confidence_dir, format(i + 1, '08') + '_entropy.npy')) entropy = (entropy_data - np.min(entropy_data)) / ( np.max(entropy_data) - np.min(entropy_data)) confidence = 1 - entropy warped_im1 = flow_warp(im1, flow) diff, rgb_diff = color_confidence(warped_im1, im2)
foreground = foreground_rank[1] foreground_index = np.where(((labeled == foreground))) binary_labeled[foreground_index] = 1 else: #foreground = np.argmax(np.bincount(patch.flatten())) foreground_index = np.where(((labeled == foreground))) binary_labeled[foreground_index] = 1 ##################combined##################### bi_mask = np.zeros((np.shape(binary_labeled)[0],np.shape(binary_labeled)[1],3),dtype='bool') bi_mask[:,:,0] = binary_labeled bi_mask[:,:,1] = binary_labeled bi_mask[:,:,2] = binary_labeled ############################################ current_bbox = bbox_format(gts[i],'tlxy_wh_2_rect') next_bbox = bbox_format(gts[i + 1],'tlxy_wh_2_rect') next_bbox = [int(l) for l in next_bbox] next_mask = background_flow(im, detection_box, flow) state = SiamRPN_track_bbox(score_net, state, im, (next_mask[:,:,0]>0.99) ,(bi_mask[:,:,0]), gts[i + 1]) res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) #x0,y0,w,h res = [int(l) for l in res] #x0,y0,x1,y1 detection_box = [res[0], res[1], res[0] + res[2], res[1] + res[3]] foreground_index = np.bitwise_or(((next_mask[:,:,0])>0.99) ,bi_mask[:,:,0])
def tracker_eval_record_data(net, x_crop, target_pos, target_sz, window, scale_z, p, im, next_mask, conf_mask, index_1, index_2, frame_num, data_dir, gtbbox): delta, score = net(x_crop) delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy() score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1), dim=0).data[1, :].cpu().numpy() delta[0, :] = delta[0, :] * p.anchor[:, 2] + p.anchor[:, 0] delta[1, :] = delta[1, :] * p.anchor[:, 3] + p.anchor[:, 1] delta[2, :] = np.exp(delta[2, :]) * p.anchor[:, 2] delta[3, :] = np.exp(delta[3, :]) * p.anchor[:, 3] def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) # size penalty s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz))) # scale penalty r_c = change((target_sz[0] / target_sz[1]) / (delta[2, :] / delta[3, :])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1.) * p.penalty_k) pscore = penalty * score # window float pscore = pscore * (1 - p.window_influence) + window * p.window_influence #background penalty inspect_num = 100 top_score = score.argsort()[-inspect_num:][::-1] temp_result = np.zeros((inspect_num, 4), dtype=int) fg_result = np.zeros(inspect_num) score_result = np.zeros(inspect_num) original_score = np.zeros(inspect_num) all_scores = [] for i in range(0, inspect_num): target = delta[:, top_score[i]] / scale_z res_x = target[0] + target_pos[0] res_y = target[1] + target_pos[1] res_w = target[2] res_h = target[3] res = cxy_wh_2_rect(np.array([res_x, res_y]), np.array([res_w, res_h])) res = [int(l) for l in res] temp = [res[0], res[1], (res[0] + res[2]), (res[1] + res[3])] res = [np.clip(temp[0], 0, next_mask.shape[1]-1),\ np.clip(temp[1], 0, next_mask.shape[0]-1),\ np.clip(temp[2], 0, next_mask.shape[1]-1),\ np.clip(temp[3], 0, next_mask.shape[0]-1)] res[2] = res[2] - res[0] res[3] = res[3] - res[1] #IOU with groundtruth iou = calculate_iou([res[0], res[1], res[0] + res[2], res[1] + res[3]], bbox_format(gtbbox, 'tlxy_wh_2_rect')) #score bbox_mask = np.zeros(next_mask.shape) bbox_mask[(res[1]):(res[1] + res[3]), (res[0]):(res[0] + res[2])] = 1 intersection = np.count_nonzero(np.logical_and(next_mask, bbox_mask)) fg_result[i] = float(intersection) / float(res[3] * res[2]) #conf score conf_intersection = np.count_nonzero( np.logical_and(conf_mask, bbox_mask)) conf_score = float(conf_intersection) / float(res[3] * res[2]) #size_penalty size_penalty_score = np.exp(r_c[top_score[i]]) #scale_penalty scale_penalty_score = np.exp(s_c[top_score[i]]) #cosine score cosine_score = window[top_score[i]] temp_result[i, :] = res original_score[i] = score[top_score[i]] score_result[i] = np.exp( -(r_c[top_score[i]] * s_c[top_score[i]] - 1.) * p.penalty_k) scores = [ original_score[i], fg_result[i], conf_score, size_penalty_score, scale_penalty_score, cosine_score ] all_scores.append(scores) #This is probably the input to your network #original_score[i]) , fg_result[i]) , conf_score , size_penalty_score , scale_penalty_score , cosine_score #iou is the groundtruth iou x = Variable(torch.from_numpy(np.array(all_scores)).float().cuda()) y = net(x).data.cpu().numpy() #pick the bounding box with largest network score #visualize top ten box """ for j in range(9,-1,-1): cv2.rectangle(im, (box x0, box y0), (box x1, box y1), (255,255, 0), 3) target_pos = np.array([box x0 + ((box x1 - box x0)/2), box y0 + ((box y1 - box y0)/2)]) target_sz = np.array([(box x1 - box x0),(box y1 - box y0)]) alternative = [] return target_pos, target_sz, score_result[top_ids[0]], alternative """ # score_result = score_result * score[top_score] # #score_result = 0.1248*original_score + 0.4754*fg_result + 0.1267*score_result # score_result = score_result * (1 - p.window_influence) + window[top_score] * p.window_influence # top_ids = score_result.argsort()[-10:][::-1] # font = cv2.FONT_HERSHEY_SIMPLEX # best = [] # for j in range(9,-1,-1): # cv2.putText(im,"%.2f" % fg_result[top_ids[j]],(20,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA) # cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA) # cv2.putText(im,"%.2f" % original_score[top_ids[j]],(220,20+j*10), font, 0.4,(0,0,0), 1,cv2.LINE_AA) # res = temp_result[top_ids[j]] # if(original_score[top_ids[j]] > 0.75): # cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,255, 0), 3) # else: # cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (255,0, 0), 3) # #cv2.putText(im,"%.2f" % score_result[top_ids[j]],(50,20+j*10), font, 0.4,(255,255,255), 1,cv2.LINE_AA) # #cv2.rectangle(im, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0,0, 255), 3) chosen_id = np.argmax(y) target_pos = np.array([ temp_result[chosen_id, 0] + (temp_result[chosen_id, 2] / 2), temp_result[top_ids[0], 1] + (temp_result[top_ids[0], 3] / 2) ]) target_sz = np.array( [temp_result[chosen_id, 2], temp_result[chosen_id, 3]]) alternative = [] return target_pos, target_sz, score_result[chosen_id], alternative