def track(sess, run_opts, hp, run, design, image_, pos_x, pos_y, target_w, target_h, x_sz, scale_factors, final_score_sz, penalty, filename, image, templates_z, templates_z_, scores): scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors image_, scores_ = sess.run( [image, scores], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_), image: image_, }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz = (1 - hp.scale_lr ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id] target_w = (1 - hp.scale_lr ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id] target_h = (1 - hp.scale_lr ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] my_score = score_ # normalized scores score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - hp.window_influence) * score_ + hp.window_influence * penalty pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) bbox = pos_x, pos_y, target_w, target_h if run.visualization: # convert <cx,cy,w,h> to <x,y,w,h> and save output bbox_d = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h show_frame(image_, bbox_d, 1) p = np.asarray(np.unravel_index(np.argmax(score_), np.shape(score_))) #print("Score bbox(%i,%i) %s max=%f(%f) at scale %d at %s" % ( # pos_x, pos_y, str(my_score.shape), np.max(my_score), np.max(score_), new_scale_id, str(p))) return image_, bbox, np.max(score_), new_scale_id, x_sz
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, siam, start_frame): num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames,4)) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context*(target_w+target_h) z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # with tf.Session() as sess: # tf.global_variables_initializer().run() # # Coordinate the loading of image files. # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(coord=coord) if True: # for replacing the sess.run() # save first frame position (from ground-truth) bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h # image_, templates_z_ = sess.run([image, templates_z], feed_dict={ # siam.pos_x_ph: pos_x, # siam.pos_y_ph: pos_y, # siam.z_sz_ph: z_sz, # filename: frame_name_list[0]}) image_, templates_z_ = siam.get_template_z(pos_x, pos_y, z_sz, frame_name_list[0], design) new_templates_z_ = templates_z_ t_start = time.time() # Get an image from the queue for i in range(1, num_frames): if i % 10 == 0: print('frame: %d' % (i + 1)) scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors image_, scores_ = siam.get_scores(pos_x, pos_y, scaled_search_area, templates_z_, frame_name_list[i], design, final_score_sz) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:] scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1,2))) # update scaled sizes x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id] target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id] target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id,:,:] score_ = score_ - np.min(score_) score_ = score_/np.sum(score_) # apply displacement penalty score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h # update the target representation with a rolling average if hp.z_lr>0: # new_templates_z_ = sess.run([templates_z], feed_dict={ # siam.pos_x_ph: pos_x, # siam.pos_y_ph: pos_y, # siam.z_sz_ph: z_sz, # image: image_ # }) _, new_templates_z_ = siam.get_template_z(pos_x, pos_y, z_sz, image_, design) templates_z_ = (1 - hp.z_lr) * templates_z_ + hp.z_lr * new_templates_z_ # update template patch size z_sz = (1 - hp.scale_lr) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id] if run.visualization: show_frame(image_, bboxes[i,:], 1) t_elapsed = time.time() - t_start speed = num_frames / t_elapsed plt.close('all') return bboxes, speed
def tracker_v2(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, start_frame, path_ckpt, siamNet): num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames,4)) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context*(target_w+target_h) z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context)))#(w +2p)*(h+2p) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} saver = tf.train.Saver() # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: saver.restore(sess, path_ckpt) print("Model restored......") # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h z_image = cv2.imread(frame_name_list[0]) """ cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.rectangle(z_image, (int(pos_x-target_w/2), int(pos_y-target_h/2)), (int( pos_x+target_w/2), int(pos_y+target_h/2)), (255,0,0), 2) cv2.imshow('image',z_image) cv2.waitKey(0) """ #z_image = cv2.resize(z_image, (resize_width,resize_height)) image_, templates_z_ = sess.run([image, templates_z], feed_dict={ siamNet.batched_pos_x_ph: [pos_x], siamNet.batched_pos_y_ph: [pos_y], siamNet.batched_z_sz_ph: [z_sz], image: [z_image / 255. - 0.5]}) new_templates_z_ = templates_z_ t_start = time.time() # Get an image from the queue for i in range(1, num_frames): scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors x_image = cv2.imread(frame_name_list[i]) #x_image = cv2.resize(x_image, (resize_width,resize_height)) image_, scores_ = sess.run( [image, scores], feed_dict={ siamNet.batched_pos_x_ph: [pos_x], siamNet.batched_pos_y_ph: [pos_y], siamNet.batched_x_sz0_ph: [scaled_search_area[0]], siamNet.batched_x_sz1_ph: [scaled_search_area[1]], siamNet.batched_x_sz2_ph: [scaled_search_area[2]], templates_z: np.squeeze(templates_z_), image: [x_image / 255. - 0.5], }, **run_opts) """ plt.imshow(np.squeeze(scores_[0]), cmap = 'gray') plt.show() plt.pause(5) """ scores_ = np.squeeze(scores_) # penalize change of scale scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:] scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1,2))) # update scaled sizes x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id] target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id] target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id,:,:] score_ = score_ - np.min(score_) score_ = score_/np.sum(score_) # apply displacement penalty score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty """ plt.imshow(np.squeeze(score_), cmap = 'gray') plt.show() plt.pause(5) """ pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h # update the target representation with a rolling average if hp.z_lr>0: new_templates_z_ = sess.run([templates_z], feed_dict={ siamNet.batched_pos_x_ph: [pos_x], siamNet.batched_pos_y_ph: [pos_y], siamNet.batched_z_sz_ph: [z_sz], image: image_ }) templates_z_=(1-hp.z_lr)*np.asarray(templates_z_) + hp.z_lr*np.asarray(new_templates_z_) # update template patch size z_sz = (1-hp.scale_lr)*z_sz + hp.scale_lr*scaled_exemplar[new_scale_id] if run.visualization: show_frame((image_[0] + 0.5) * 255 , bboxes[i,:], 1) t_elapsed = time.time() - t_start speed = num_frames/t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return bboxes, speed
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame, candidate_scores): num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames, 4)) scale_factors = hp.scale_step ** np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h image_, templates_z_ = sess.run([image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, filename: frame_name_list[0]}) new_templates_z_ = templates_z_ t_start = time.time() # Get an image from the queue for i in range(1, num_frames): scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors image_, scores_, candidate_scores_ = sess.run( [image, scores, candidate_scores], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_), filename: frame_name_list[i], }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz = (1 - hp.scale_lr) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id] target_w = (1 - hp.scale_lr) * target_w + hp.scale_lr * scaled_target_w[new_scale_id] target_h = (1 - hp.scale_lr) * target_h + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - hp.window_influence) * score_ + hp.window_influence * penalty pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h # update the target representation with a rolling average if hp.z_lr > 0: new_templates_z_ = sess.run([templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: image_ }) templates_z_ = (1 - hp.z_lr) * np.asarray(templates_z_) + hp.z_lr * np.asarray(new_templates_z_) # update template patch size z_sz = (1 - hp.scale_lr) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id] if run.visualization: show_frame(image_, bboxes[i, :], 1) t_elapsed = time.time() - t_start speed = num_frames / t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return bboxes, speed
def main_camera(): cam = cv2.VideoCapture(0) if not cam.isOpened(): exit() bboxes = np.zeros((10, 4)) # avoid printing TF debugging information os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # TODO: allow parameters from command line or leave everything in json files? hp, evaluation, run, env, design = parse_arguments() # Set size for use with tf.image.resize_images with align_corners=True. # For example, # [1 4 7] => [1 2 3 4 5 6 7] (length 3*(3-1)+1) # instead of # [1 4 7] => [1 1 2 3 4 5 6 7 7] (length 3*3) final_score_sz = hp.response_up * (design.score_sz - 1) + 1 # build TF graph once for all image, templates_z, scores = siam.build_tracking_graph_cam( final_score_sz, design, env) ret, frame = cam.read() print(frame.dtype) roi = get_roi(frame) pos_x, pos_y, target_w, target_h = convert_roi(roi[0][0], roi[0][1]) # pos_x, pos_y, target_w, target_h = region_to_bbox(gt[evaluation.start_frame]) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz run_opts = {} with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[ 0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h # TODO: convert roi[0] to the silly siam format image_, templates_z_ = sess.run( [image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: frame }) new_templates_z_ = templates_z_ t_start = time.time() num_frames = 0 # Get an image from the queue while True: scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors ret, frame = cam.read() num_frames += 1 image_, scores_ = sess.run( [image, scores], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_), image: frame, }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz = (1 - hp.scale_lr ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id] target_w = ( 1 - hp.scale_lr ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id] target_h = ( 1 - hp.scale_lr ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - hp.window_influence ) * score_ + hp.window_influence * penalty pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output out = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h # out = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h # update the target representation with a rolling average if hp.z_lr > 0: new_templates_z_ = sess.run( [templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: image_ }) templates_z_ = (1 - hp.z_lr) * np.asarray( templates_z_) + hp.z_lr * np.asarray(new_templates_z_) # update template patch size z_sz = (1 - hp.scale_lr ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id] key = 0 if run.visualization: key = show_frame(image_, out) t_elapsed = time.time() - t_start speed = num_frames / t_elapsed if key == 120: print("Speed", speed) sess.close() cv2.destroyAllWindows() exit()
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame, video_name, frame_sz, z_crops, x_crops, anchor_coord): num_frames = np.size(frame_name_list) - start_frame # stores tracker's output for evaluation bboxes = np.zeros((num_frames,4)) reinitialize = False scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context*(target_w+target_h) z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz #detector settings options = {"model": "/home/mdinh/siamfc-tf/cfg/yolo-mio.cfg", "pbLoad": "/home/mdinh/siamfc-tf/built_graph/yolo-mio.pb", "metaLoad": "/home/mdinh/siamfc-tf/built_graph/yolo-mio.meta", "gpu": 0.4, "threshold": 0.4} tfnet = TFNet(options) # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h image_, templates_z_ = sess.run([image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, filename: frame_name_list[start_frame]}) new_templates_z_ = templates_z_ t_start = time.time() # Get an image from the queue for i in range(1, num_frames): scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors image_, scores_, x_crops_ , anchor_coord__= sess.run( [image, scores, x_crops, anchor_coord], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_), filename: frame_name_list[i + start_frame], }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:] scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1,2))) # update scaled sizes x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id] target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id] target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id] #print("target size -------------") #print([target_h, target_w]) #print("target sipatch size -------------") #print(z_sz) # select response with new_scale_id score_ = scores_[new_scale_id,:,:] max_score = score_.max() min_score = score_.min() score_augmentation = np.full((257, 257), float(0)) #run detector for drift if abs(pos_x) > frame_sz[1] or abs(pos_y) > frame_sz[0] or pos_x < 0 or pos_y < 0: #print('OOB') OOB = True reinitialize = True best_detection = [] else: OOB = False bbox_crop = [] best_detection = [] if OOB == False: result_crop = tfnet.return_predict(x_crops_[new_scale_id, :, :]) #print (result_crop) if result_crop: maxdetection_crop = max(result_crop, key=lambda x: x['confidence']) for detection in result_crop: bbox_crop.append([detection['topleft']['x'], detection['topleft']['y'], detection['bottomright']['x'] - detection['topleft']['x'], detection['bottomright']['y'] - detection['topleft']['y']]) best_crop = ([maxdetection_crop['topleft']['x'], maxdetection_crop['topleft']['y'], maxdetection_crop['bottomright']['x'] - maxdetection_crop['topleft']['x'], maxdetection_crop['bottomright']['y'] - maxdetection_crop['topleft']['y']]) peak_x = (maxdetection_crop['bottomright']['x'] + maxdetection_crop['topleft']['x']) / 4 peak_y = (maxdetection_crop['bottomright']['y'] + maxdetection_crop['topleft']['y']) / 4 if peak_x in range(100,200) and peak_y in range(100,200): reinitialize = True # generate an augmentation map score_augmentation = np.full((257, 257), float(min_score)) for x in range(peak_x - 5, peak_x + 5): for y in range(peak_y - 5, peak_y + 5): score_augmentation[y, x] = max_score bbox_frame = [ element * scaled_search_area[new_scale_id] / design.search_sz for element in best_crop ] bbox_frame[0] = best_crop[0] + anchor_coord__[new_scale_id, 1] bbox_frame[1] = best_crop[1] + anchor_coord__[new_scale_id, 0] #print(anchor_coord__[new_scale_id, 0]) #print(anchor_coord__[new_scale_id, 1]) #pos_x = 2 * peak_x * scaled_search_area[new_scale_id] / 512 #pos_y = 2 * peak_y * scaled_search_area[new_scale_id] / 512 #generate an augmentation map target_h = (bbox_frame[3] + target_h) / 2 target_w = (bbox_frame[2] + target_w) / 2 #print(scale_factors[new_scale_id]) #print("size in crop -----------") #print([best_crop[3], best_crop[2]]) #print("size in frame -----------") #print([target_h_scaled, target_w_scaled]) #print("scaled size -------------") #print([target_h, target_w]) #update score map score_updated = (hp.scale_lr)*score_ + (1-hp.scale_lr) *score_augmentation score_updated = score_updated - np.min(score_updated) score_updated = score_updated / np.sum(score_updated) # apply displacement penalty score_updated = (1 - hp.window_influence) * score_updated + hp.window_influence * penalty #print("old position -----------") #print([pos_x, pos_y]) pos_x, pos_y = _update_target_position(pos_x, pos_y, score_updated, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) #print("updated position -----------") #print([pos_x, pos_y]) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h bbox_detection = [0, 0, 0, 0] #print (bboxes[i,:]) #print (frame_sz) # run detector on whole frame if out of bounds if OOB == True: OOB = False result = tfnet.return_predict(image_) #print ([sorted([object['confidence'] for object in result])]) if len(result) > 0: maxdetection = max(result, key=lambda x: x['confidence']) #mindetection = min(result, key=lambda x: x['confidence']) for detection in result: bbox_detection.append([detection['topleft']['x'],detection['topleft']['y'], detection['bottomright']['x'] - detection['topleft']['x'], detection['bottomright']['y'] - detection['topleft']['y']]) best_detection = ([maxdetection['topleft']['x'], maxdetection['topleft']['y'], maxdetection['bottomright']['x'] - maxdetection['topleft']['x'], maxdetection['bottomright']['y'] - maxdetection['topleft']['y']]) #print("frame number: " + str(i)) #print (bbox_detection[0]) #print (bboxes[i,:]) #print (bbox_crop) # if best_detection: # iou = utils.iou(best_detection, bboxes[i, :]) # scale = utils.scale(best_detection, bboxes[i, :]) # print (iou) # print(scale) # # if iou > 0 and iou < 0.5: # print("drift") # reinitialize = True # bboxes[i, :] = np.asarray(best_detection) # pos_x = (maxdetection['topleft']['x'] + maxdetection['bottomright']['x'])/2 # pos_y = (maxdetection['bottomright']['y'] + maxdetection['topleft']['y'])/2 # # # print(pos_x) # print (pos_y) #TODO reinitialize when tracker collides with edge with detection closest to the last position # update the target representation with a rolling average if hp.z_lr>0: new_templates_z_ = sess.run([templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: image_ }) #if best_crop: if reinitialize == True: #print(reinitialize) # assign new target height and width if best_detection and OOB == True: OOB = False bboxes[i, :] = best_detection target_h = best_detection[3] target_w = best_detection[2] pos_x = best_detection[0] + best_detection[2]/2 pos_y = best_detection[1] + best_detection[3]/2 context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz templates_z_ = np.asarray(new_templates_z_) reinitialize = False else: templates_z_=(1-hp.z_lr)*np.asarray(templates_z_) + hp.z_lr*np.asarray(new_templates_z_) #print(reinitialize) # update template patch size z_sz = (1-hp.scale_lr)*z_sz + hp.scale_lr*scaled_exemplar[new_scale_id] if run.visualization: show_frame(image_, bboxes[i,:],bbox_detection, i, video_name,1) #show_crops(x_crops_, best_crop,i,video_name, 3) #show_scores(scores_,1) #show_score(score_,i, video_name,1) #show_score(score_augmentation, i, video_name,2) #show_score(score_updated,i, video_name, 3) #end of loop t_elapsed = time.time() - t_start speed = num_frames/t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return bboxes, speed
def cozmo_program(robot: cozmo.robot.Robot): global angle angle = 25. robot.set_head_angle(degrees(angle)).wait_for_completed() robot.set_lift_height(0.0).wait_for_completed() robot.camera.image_stream_enabled = True robot.camera.color_image_enabled = True robot.camera.enable_auto_exposure = True os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' frame = os.path.join(directory, "current.jpeg") print("Starting Tensorflow...") with tf.Session() as sess: print("Session successfully started") model = load_model('modelv1.07-0.96.hdf5') while True: global X, Y, W, H global result X = 245. Y = 165. W = 150. H = 150. gt = [X, Y, W, H] pos_x, pos_y, target_w, target_h = region_to_bbox(gt) frame = os.path.join(directory, "current.jpeg") result = 0 dog_counter = 0 cat_counter = 0 background_counter = 0 next_state = 0 current_state = 0 #Background: 0, Cat:1, Dog:2 while True: latest_img = robot.world.latest_image if latest_img is not None: pilImage = latest_img.raw_image pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG") show_frame(np.asarray(Image.open(frame)), [900.,900.,900.,900.], 1) img = load_image(frame) [result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\ ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img}) next_state = np.argmax(result) print('Arg max: ',next_state) # Initial Current State is Background if current_state == 0: print('Background') if next_state == 1: # Detected a Cat current_state = 1 # Transition to Cat State background_counter = 0 cat_counter = 1 dog_counter = 0 elif next_state == 2: # Detected a Dog current_state = 2 # Transition to Dog state background_counter = 0 cat_counter = 0 dog_counter = 1 # Current State is Cat elif current_state == 1: print('\t\t\t\t\t\tCat') if next_state == 0: # Detected Background background_counter += 1 if background_counter >= 6: # Transition to Background only if Background appeared for more than 6 times background_counter = 0 current_state = 0 cat_counter = 0 elif next_state == 1: # Detected Cat itself cat_counter +=1 if cat_counter >= 30: print('Cozmo sees a cat') dense = model.get_layer('dense').get_weights() weights = dense[0].T testing_counter = 0 detected_centroid = 0 xmin_avg = 0 xmax_avg = 0 ymin_avg = 0 ymax_avg = 0 frame_average = 2 frame_count = 0 while True: latest_img = robot.world.latest_image if latest_img is not None: pilImage = latest_img.raw_image pilImage.resize((640,480), Image.ANTIALIAS).save(os.path.join(directory, "current.jpeg"), "JPEG") img = load_image(frame) [result,out_relu,global_average_pooling2d] = sess.run([model.outputs,model.get_layer('out_relu').output\ ,model.get_layer('global_average_pooling2d').output ], feed_dict={model.input.name:img}) kernels = out_relu.reshape(7,7,1280) final = np.dot(kernels,weights[result[0].argmax()]) final1 = array_to_img(final.reshape(7,7,1)) final1 = final1.resize((224,224), Image.ANTIALIAS) box = img_to_array(final1).reshape(224,224) #box = cv2.blur(box,(30,30)) temp = (box > box.max()*.8) *1 temp_adjusted = np.ndarray(shape=np.shape(temp), dtype=np.dtype(np.uint8)) temp_adjusted[:,:] = np.asarray(temp)*255 contours, hierarchy = cv2.findContours(temp_adjusted, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2:] contours = np.array(contours) max_area = [0,0] # contours index and area for index, contour in enumerate(contours): if(max_area[1]< len(contour)): max_area = [index,len(contour)] contours_adjusted = contours[max_area[0]].squeeze(axis=1).T xmin = contours_adjusted[0].min() * (640./224.) ymin = contours_adjusted[1].min() * (480./224.) xmax = contours_adjusted[0].max() * (640./224.) ymax = contours_adjusted[1].max() * (480./224.) if result[0].argmax() == 1: # Frame smoothing frame_count = frame_count + 1 xmin_avg = xmin_avg + xmin xmax_avg = xmax_avg + xmax ymin_avg = ymin_avg + ymin ymax_avg = ymax_avg + ymax if frame_count % frame_average == 0: frame_count = 0 xmin_avg = xmin_avg/frame_average xmax_avg = xmax_avg/frame_average ymin_avg = ymin_avg/frame_average ymax_avg = ymax_avg/frame_average print(xmin_avg, end=",") print(ymin_avg, end=",") print(xmax_avg, end=",") print(ymax_avg, end="\n") ymin_avg = ymin_avg + (ymax_avg - ymin_avg)/2. - H/2. xmin_avg = xmin_avg + (xmax_avg - xmin_avg)/2. - W/2. print("150: ",xmin_avg, end=",") print("150: ",ymin_avg, end="\n") gt = [xmin_avg, ymin_avg, W, H] xmin_avg = 0 xmax_avg = 0 ymin_avg = 0 ymax_avg = 0 pos_x, pos_y, target_w, target_h = region_to_bbox(gt) bboxes = np.zeros((1, 4)) #bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h bboxes[0,:] = pos_x-W/2, pos_y-H/2, W, H print(len(contours)) testing_counter = testing_counter + 1 print("Testing_counter: ",testing_counter) show_frame(np.asarray(Image.open(frame)), gt, 1) print("Cat is detected") print("Starting the tracker ...") if (bboxes[0,1] + bboxes[0,3]/2) < (Y + H/2 - 40): print("Command: Raise the head") angle = angle + 0.5 if angle > 44.5: angle = 44.5 elif (bboxes[0,1] + bboxes[0,3]/2) > (Y + H/2 + 40): print("Command: Lower the head") angle = angle - 0.5 if angle < 0: angle = 0 else: pass set_head_angle_action = robot.set_head_angle(degrees(angle), max_speed=20, in_parallel=True) if straight(bboxes[0,:])[0] != 0 and turn(bboxes[0,:])[0] != 0: robot.drive_wheel_motors(straight(bboxes[0,:])[0] + turn(bboxes[0,:])[0], straight(bboxes[0,:])[1] + turn(bboxes[0,:])[1]) detected_centroid = 0 elif straight(bboxes[0,:])[0] == 0 and turn(bboxes[0,:])[0] == 0: robot.stop_all_motors() detected_centroid = detected_centroid + 1 elif straight(bboxes[0,:])[0] == 0: robot.drive_wheel_motors(turn(bboxes[0,:])[0], turn(bboxes[0,:])[1]) detected_centroid = 0 elif turn(bboxes[0,:])[0] == 0: robot.drive_wheel_motors(straight(bboxes[0,:])[0], straight(bboxes[0,:])[1]) detected_centroid = 0 else: robot.stop_all_motors() detected_centroid = detected_centroid + 1 if detected_centroid > 20//frame_average: detected_centroid = 0 print("Reached a stable state.........\t\t\t\t\t\t\t\t STABLE") # Go near the object set_head_angle_action.wait_for_completed() robot.abort_all_actions(log_abort_messages=True) robot.wait_for_all_actions_completed() robot.set_head_angle(degrees(0.5)).wait_for_completed() print("Robot's head angle: ",robot.head_angle) target_frame_count = 1 while True: latest_img = None while latest_img is None: latest_img = robot.world.latest_image target_frame1 = latest_img.raw_image target_frame1 = target_frame1.resize((640,480), Image.ANTIALIAS) #target_frame1 = target_frame1.convert('L') target_frame1 = np.asarray(target_frame1) #orb1 = cv2.ORB_create(500) #kp1 = orb1.detect(target_frame1,None) #kp1, des1 = orb1.compute(target_frame1, kp1) #features_img1 = cv2.drawKeypoints(target_frame1, kp1, None, color=(255,0,0), flags=0) #plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",features_img1) plt.imsave("target_frame1_"+str(target_frame_count)+".jpeg",target_frame1) drive_straight_action = robot.drive_straight(distance=cozmo.util.distance_mm(distance_mm=10),speed=cozmo.util.speed_mmps(10), in_parallel=True) drive_straight_action.wait_for_completed() robot.set_head_angle(degrees(0.5)).wait_for_completed() print("Robot's head angle: ",robot.head_angle) latest_img = None while latest_img is None: latest_img = robot.world.latest_image target_frame2 = latest_img.raw_image target_frame2 = target_frame2.resize((640,480), Image.ANTIALIAS) #target_frame2 = target_frame2.convert('L') target_frame2 = np.asarray(target_frame2) #orb2 = cv2.ORB_create(500) #kp2 = orb2.detect(target_frame2,None) #kp2, des2 = orb2.compute(target_frame2, kp2) #features_img2 = cv2.drawKeypoints(target_frame2, kp2, None, color=(255,0,0), flags=0) #plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",features_img2) plt.imsave("target_frame2_"+str(target_frame_count)+".jpeg",target_frame2) target_frame_count = target_frame_count + 1 ''' matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING) matches = matcher.match(des1, des2, None) matches.sort(key=lambda x: x.distance, reverse=False) matches = matches[:10] imMatches = cv2.drawMatches(target_frame1, kp1, target_frame2, kp2, matches, None) cv2.imwrite("matches_tf1_tf2.jpg", imMatches) points1 = np.zeros((len(matches), 2), dtype=np.float32) points2 = np.zeros((len(matches), 2), dtype=np.float32) for i, match in enumerate(matches): points1[i, :] = kp1[match.queryIdx].pt points2[i, :] = kp2[match.trainIdx].pt print("Points1 [{}]: {}".format(i,points1[i][0]), points1[i][1],"\tPoints2: ",points2[i][0], points2[i][1]) index = None dist1_x = [] dist2_x = [] for index in range(len(points1)): dist1_x.append((W/2.)-points1[index][0]) # Extract only the x-coordinate dist2_x.append((W/2.)-points2[index][0]) # Extract only the x-coordinate fw_x = 1./((1./np.array(dist2_x)) - (1./np.array(dist1_x))) # Calculate the image plane to obj plane mapping in x direction pt1_x = [] pt2_x = [] for index in range(len(points1)): pt1_x.append(fw_x[index]/(W/2. - points1[index][0])) pt2_x.append(fw_x[index]/(W/2. - points2[index][0])) print("Approx. distance[{}]: {}".format(index, pt1_x[index])) if len(pt2_x) < 10: break ''' sys.exit(0) else: # Detected Dog dog_counter += 1 if dog_counter >= 6: # Transition to Dog only if Dog appeared for more than 6 times cat_counter = 0 current_state = 2 # Current State is Dog elif current_state == 2: print('\t\t\t\t\t\t\t\t\t\t\t\tDog') if next_state == 0: # Detected Background background_counter += 1 if background_counter >= 6: # Transition to Background only if Background appeared for more than 6 times background_counter = 0 current_state = 0 dog_counter = 0 elif next_state == 2: # Detected Dog itself dog_counter +=1 if dog_counter >= 30: print('Cozmo sees a Dog') robot.drive_wheels(-50, -50) time.sleep(3) robot.drive_wheels(70, -70) time.sleep(2.8) robot.drive_wheels(0, 0) break else: # Detected Cat cat_counter += 1 if cat_counter >= 6: # Transition to Cat only if Cat appeared for more than 6 times dog_counter = 0 current_state = 1
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame): num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames, 4)) bboxesupper = np.zeros((num_frames, 4)) bboxeslower = np.zeros((num_frames, 4)) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[ 0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h bboxesupper[ 0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h / 2 bboxeslower[0, :] = pos_x - target_w / 2, pos_y, target_w, target_h / 2 image_, templates_z_ = sess.run( [image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, filename: frame_name_list[0] }) image_, templates_z_upper = sess.run( [image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y - target_h / 2, siam.z_sz_ph: z_sz, filename: frame_name_list[0] }) image_, templates_z_lower = sess.run( [image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y + target_h / 2, siam.z_sz_ph: z_sz, filename: frame_name_list[0] }) new_templates_z_ = templates_z_ new_templates_z_upper = templates_z_upper new_templates_z_lower = templates_z_lower t_start = time.time() sco_final = np.zeros((3, 257, 257)) # Get an image from the queue distance_transform = dt2d for i in range(1, num_frames): for j in range(1, 4): scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors image_, scores_ = sess.run( [image, scores], feed_dict={ siam.pos_x_ph: pos_x, #siam.pos_y_ph: pos_y, siam.pos_y_ph: pos_y - target_h / 2 if j == 1 else (pos_y + target_h / 2 if j == 2 else pos_y), siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_upper) if j == 1 else (np.squeeze(templates_z_lower) if j == 2 else np.squeeze(templates_z_)), filename: frame_name_list[i], }, **run_opts) if j == 1: templates_zupper = np.squeeze(templates_z_upper) templates_zupper = tf.convert_to_tensor( templates_zupper, np.float32) elif j == 2: templates_zlower = np.squeeze(templates_z_lower) templates_zlower = tf.convert_to_tensor( templates_zlower, np.float32) else: templates_zmain = np.squeeze(templates_z_) templates_zmain = tf.convert_to_tensor( templates_zmain, np.float32) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz = ( 1 - hp.scale_lr ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id] target_w = ( 1 - hp.scale_lr ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id] target_h = ( 1 - hp.scale_lr ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty #score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty min1 = score_.min() max1 = score_.max() #score_max = skimage.measure.block_reduce(score_, (5,5), np.max) #score_max=ndimage.distance_transform_edt(score_) w = [0.1, 0, 0.1, 0] score_ = (score_ / score_.max()) * 255 score_max = distance_transform(score_, w, 4) score_max = (((score_max - min1) * (score_max.max() - score_max.min())) / (max1 - min1)) + score_max.min() #score_max_norm = Image.fromarray(score_max) new_width = 257 new_height = 257 #sco = score_max_norm.resize((new_width,new_height),Image.ANTIALIAS) #sco = resize(int(score_max), (257, 257)) sco = score_max sco_final[j - 1, :, :] = sco ##################################################################################### sco_f = sco_final[0, :, :] + sco_final[1, :, :] + sco_final[ 2, :, :] #sco_f = sco_final[0,:,:] pos_x, pos_y = _update_target_position(pos_x, pos_y, sco_f, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) pos_x_upper, pos_y_upper = _update_target_position( pos_x, pos_y, sco_final[0, :, :], final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) pos_x_lower, pos_y_lower = _update_target_position( pos_x, pos_y, sco_final[1, :, :], final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[ i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h bboxesupper[ i, :] = pos_x_upper - target_w / 2, pos_y_upper - target_h / 2, target_w, target_h / 2 bboxeslower[ i, :] = pos_x_lower - target_w / 2, pos_y_lower, target_w, target_h / 2 # update the target representation with a rolling average if hp.z_lr > 0: new_templates_z_ = sess.run( [templates_zmain], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: image_ }) new_templates_z_upper = sess.run( [templates_zupper], feed_dict={ siam.pos_x_ph: pos_x_upper, siam.pos_y_ph: pos_y_upper, siam.z_sz_ph: z_sz, image: image_ }) new_templates_z_lower = sess.run( [templates_zlower], feed_dict={ siam.pos_x_ph: pos_x_lower, siam.pos_y_ph: pos_y_lower, siam.z_sz_ph: z_sz, image: image_ }) templates_z_ = (1 - hp.z_lr) * np.asarray( templates_z_) + hp.z_lr * np.asarray(new_templates_z_) templates_z_upper = (1 - hp.z_lr) * np.asarray( templates_z_upper) + hp.z_lr * np.asarray( new_templates_z_upper) templates_z_lower = (1 - hp.z_lr) * np.asarray( templates_z_lower) + hp.z_lr * np.asarray( new_templates_z_lower) # update template patch size z_sz = (1 - hp.scale_lr ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id] if run.visualization: show_frame(image_, bboxes[i, :], bboxesupper[i, :], bboxeslower[i, :], 1) t_elapsed = time.time() - t_start speed = num_frames / t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return bboxes, speed
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, path_ckpt, siamNet): """ run the tracking steps under tensorflow session. Inputs: hp, run, design: system parameters. frame_name_list: a list of paths for all frames in the tracking vedio. pos_x, pos_y, target_w, target_h: target position and size in the first frame from ground thruth, will be updated during tracking. final_score_sz: size of the final score map after bilinear interpolation. image, templates_z, scores: tensors that will be run in tensorflow session. See siamese.py for detailed explanation. path_ckpt: path of the checkpoint file used to retore model variables. siamNet: an instance of siamese network class. Returns: bboxes: a list of the predicted bboxes speed: average tracking speed(fps) """ num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames, 4)) scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod( (target_w + context) * (target_h + context))) #(w +2p)*(h+2p) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} saver = tf.train.Saver() # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: saver.restore(sess, path_ckpt) print("Model restored from: ", path_ckpt) print("Start tracking......") # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[ 0, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h # Read the first frame as z, and input into conv net to get its feature map z_image = cv2.imread(frame_name_list[0]) image_, templates_z_ = sess.run( [image, templates_z], feed_dict={ siamNet.batched_pos_x_ph: [pos_x], siamNet.batched_pos_y_ph: [pos_y], siamNet.batched_z_sz_ph: [z_sz], image: [z_image / 255. * 2 - 1] }) new_templates_z_ = templates_z_ t_start = time.time() # Get an image from the queue for i in range(1, num_frames): scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors # Read the next frame as x, input x into conv net, with the featre # map of z to get the final score map x_image = cv2.imread(frame_name_list[i]) image_, scores_ = sess.run( [image, scores], feed_dict={ siamNet.batched_pos_x_ph: [pos_x], siamNet.batched_pos_y_ph: [pos_y], siamNet.batched_x_sz0_ph: [scaled_search_area[0]], siamNet.batched_x_sz1_ph: [scaled_search_area[1]], siamNet.batched_x_sz2_ph: [scaled_search_area[2]], templates_z: np.squeeze(templates_z_), image: [x_image / 255. * 2 - 1], }, **run_opts) # visualize the output score map """ plt.imshow(np.squeeze(scores_[0]), cmap = 'gray') plt.show() plt.pause(5) """ #finalize the score map scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz = (1 - hp.scale_lr ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id] target_w = ( 1 - hp.scale_lr ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id] target_h = ( 1 - hp.scale_lr ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - hp.window_influence ) * score_ + hp.window_influence * penalty # visualize the finalized score map """ plt.imshow(np.squeeze(score_), cmap = 'gray') plt.show() plt.pause(5) """ pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[ i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h # update the target representation with a rolling average if hp.z_lr > 0: new_templates_z_ = sess.run( [templates_z], feed_dict={ siamNet.batched_pos_x_ph: [pos_x], siamNet.batched_pos_y_ph: [pos_y], siamNet.batched_z_sz_ph: [z_sz], image: image_ }) templates_z_ = (1 - hp.z_lr) * np.asarray( templates_z_) + hp.z_lr * np.asarray(new_templates_z_) # update template patch size z_sz = (1 - hp.scale_lr ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id] if run.visualization: show_frame(x_image, bboxes[i, :], 1) t_elapsed = time.time() - t_start speed = num_frames / t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return bboxes, speed
def tracker(hp, run, design, video, pos_x, pos_y, target_w, target_h, final_score_sz, image, templates_z, scores, process1, queue): # num_frames = np.size(frame_name_list) # stores tracker's output for evaluation # bboxes = np.zeros((num_frames,4)) bboxes = [] scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) context = design.context * (target_w + target_h) z_sz = np.sqrt(np.prod((target_w + context) * (target_h + context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } ## model # model_path = '../frozen_inference_graph.pb' # odapi = DetectorAPI(path_to_ckpt=model_path) run_opts = {} # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) # bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h frame_idx = 1 # in_bytes = process1.stdout.read(width * height * 3) # if not in_bytes : # print ("none") # return # video = (np.frombuffer(in_bytes, np.unit8).reshape([height, width, 3])) # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR) # box = odapi.processFrame(video, frame_idx) # pos_x, pos_y, target_w, target_h = box[0], box[1], box[2], box[3] # image = tf.convert_to_tensor(image) print(image, type(image), '*' * 10) image_, templates_z_ = sess.run( [image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: video }) new_templates_z_ = templates_z_ # print ('start time: ') # t_start = time.time() while True: frame_idx += 1 # in_bytes = process1.stdout.read(width * height * 3) # if not in_bytes : # print ("none") # continue # video = (np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3])) video = queue.get() # video = cv2.cvtColor(video, cv2.COLOR_RGB2BGR) # t_start = time.time() # Get an image from the queue # for i in range(1, num_frames): scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors image_, scores_ = sess.run( [image, scores], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_), # filename: frame_name_list[i], image: video, }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz = (1 - hp.scale_lr ) * x_sz + hp.scale_lr * scaled_search_area[new_scale_id] target_w = ( 1 - hp.scale_lr ) * target_w + hp.scale_lr * scaled_target_w[new_scale_id] target_h = ( 1 - hp.scale_lr ) * target_h + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - hp.window_influence ) * score_ + hp.window_influence * penalty pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output # bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h current_boxes = [ pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h ] # bboxes.append(current_boxes) # update the target representation with a rolling average print(time.time()) if hp.z_lr > 0: new_templates_z_ = sess.run( [templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: image_ }) templates_z_ = (1 - hp.z_lr) * np.asarray( templates_z_) + hp.z_lr * np.asarray(new_templates_z_) print(time.time()) # update template patch size z_sz = (1 - hp.scale_lr ) * z_sz + hp.scale_lr * scaled_exemplar[new_scale_id] if run.visualization: # show_frame(image_, bboxes[i,:], 1) show_frame(video, current_boxes, 1) # t_elapsed = time.time() - t_start # speed = frame_idx/t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return
def tracker(hp, run, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_z, scores, start_frame): num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames,4)) # scale_step为缩放比例,根据设置的尺度数目求出一个缩放系数的列表 scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2), np.ceil(hp.scale_num/2), hp.scale_num) # cosine window to penalize large displacements # 定义一个惩罚函数为海明函数,这个函数的特点是中心数值大,四周数值小 hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) # context是根据box画出一个包含目标物体与部分背景的正方形,xsz与zsz的比例按照设定的比例来 context = design.context*(target_w+target_h) z_sz = np.sqrt(np.prod((target_w+context)*(target_h+context))) x_sz = float(design.search_sz) / design.exemplar_sz * z_sz # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h # 开始运行神经网络来Z的提取特征值 image_, templates_z_ = sess.run([image, templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, filename: frame_name_list[0]}) new_templates_z_ = templates_z_ t_start = time.time() # Get an image from the queue progress = progressbar.ProgressBar(widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA()]) for i in progress(range(1, num_frames)): # 得到不同尺度的z,x,目标box的宽与高,w,h的初始值为第一帧的box scaled_exemplar = z_sz * scale_factors scaled_search_area = x_sz * scale_factors scaled_target_w = target_w * scale_factors scaled_target_h = target_h * scale_factors # 运行卷积部分,得到scores image_, scores_ = sess.run( [image, scores], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, # siam.x_sz0_ph: scaled_search_area[0], # siam.x_sz1_ph: scaled_search_area[1], # siam.x_sz2_ph: scaled_search_area[2], siam.x_sz_ph: scaled_search_area, templates_z: np.squeeze(templates_z_), filename: frame_name_list[i], }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0,:,:] = hp.scale_penalty*scores_[0,:,:] scores_[2,:,:] = hp.scale_penalty*scores_[2,:,:] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1,2))) # update scaled sizes # scaled_search_area[new_scale_id]代表分数最高的点哪种尺度上,但是不能完全取这个尺度,依然要依靠前一帧的x_sz进行加权 # 同理,对于W,H来说也是要有加权的,scale-lr越大,上一步的尺度影响程度越小 x_sz = (1-hp.scale_lr)*x_sz + hp.scale_lr*scaled_search_area[new_scale_id] target_w = (1-hp.scale_lr)*target_w + hp.scale_lr*scaled_target_w[new_scale_id] target_h = (1-hp.scale_lr)*target_h + hp.scale_lr*scaled_target_h[new_scale_id] # select response with new_scale_id # 得到最优的那个尺度的分数分布,然后对分数做归一化 score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_/np.sum(score_) # apply displacement penalty # 这一步根据位置进一步优化score,根据penalty越接近中心分数加成越高 # 然后根据分数得到最终确定的box的位置 score_ = (1-hp.window_influence)*score_ + hp.window_influence*penalty pos_x, pos_y = _update_target_position(pos_x, pos_y, score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[i, :] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h # update the target representation with a rolling average # 将box用于这一帧,通过神经网络得到一个新的z,同样最终的z要与上一帧的z做加权和,用z-lr来表示学习率 if hp.z_lr > 0: new_templates_z_ = sess.run([templates_z], feed_dict={ siam.pos_x_ph: pos_x, siam.pos_y_ph: pos_y, siam.z_sz_ph: z_sz, image: image_ }) templates_z_=(1-hp.z_lr)*np.asarray(templates_z_) + hp.z_lr*np.asarray(new_templates_z_) # update template patch size z_sz = (1-hp.scale_lr)*z_sz + hp.scale_lr*scaled_exemplar[new_scale_id] if run.visualization: show_frame(image_, bboxes[i,:], 1) t_elapsed = time.time() - t_start speed = num_frames/t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return bboxes, speed
def track_one_sequence(hp, design, frame_name_list, pos_x, pos_y, target_w, target_h, final_score_sz, filename, image, templates_x, templates_z, scores_list, vid_name, dataset_type, sess, visualize_results, save_images, save_bboxes, vot_handle, gt=None): """ Handles tracking for one whole sequence. Inputs are fed to the network and the results are collected and can be shown on the screen and saved to the disk. Args: hp: namespace: hyperparameters. design: namespace: design parameters. frame_name_list: string list: list of sorted image paths to be read. pos_x: int: horizontal center of the target. pos_y: int: vertical center of the target. target_w: int: target width. target_h: int: target height. final_score_sz: int: size of the score map after upsampling. filename: string tensor: placeholder for the image path to be read. image: 3D tensor: the image read from the path. templates_x: 4D tensor: instance features from one or more layers concatenated by channels. See siam_mcf_net.inference comments for more details. templates_z: 4D tensor: exemplar features from one or more layers concatenated by channels. See siam_mcf_net.inference comments for more details. scores_list: 5D tensor: batch of score heatmaps for each of the selected layers. vid_name: string: name of this sequence (only for saving purposes). dataset_type: string: name of this dataset (only for saving purposes). sess: an open tf.Session to execute the graph. visualize_results: boolean: whether to show the results on the screen. save_images: boolean: whether to save image results to the disk. save_bboxes: boolean: whether to save bounding boxes to the disk. vot_handle: vot handle for running the VOT toolkit. gt: Nx4 array: optional ground truth bounding boxes (only for visualization purposes). Returns: Nx4 array: the resulting bounding boxes from the tracking. float: the tracking speed in frames per second. """ num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = np.zeros((num_frames, 4)) if save_images: res_dir = 'results/%s/frames/%s' % ( dataset_type, vid_name) if not os.path.exists(res_dir): os.makedirs(res_dir) if save_bboxes: bb_res_dir = 'results/%s/bboxes' % (dataset_type) if not os.path.exists(bb_res_dir): os.makedirs(bb_res_dir) # save first frame position (from ground-truth) bboxes[0, :] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h if vot_handle is not None: frame_path = vot_handle.frame() else: frame_path = frame_name_list[0] tracker = SiamMcfTracker( design.context, design.exemplar_sz, design.search_sz, hp.scale_step, hp.scale_num, hp.scale_penalty, hp.scale_lr, hp.window_influence, design.tot_stride, hp.response_up, final_score_sz, pos_x, pos_y, target_w, target_h, frame_path, sess, templates_z, filename) t_start = time.time() # Get an image from the queue for i in range(1, num_frames): if vot_handle is not None: frame_path = vot_handle.frame() else: frame_path = frame_name_list[i] if save_images or visualize_results: image_ = sess.run(image, feed_dict={filename: frame_path}) bbox = tracker.track( frame_path, sess, templates_z, templates_x, scores_list, filename) # convert <cx,cy,w,h> to <x,y,w,h> and save output bboxes[i, :] = bbox if vot_handle is not None: vot_rect = vot.Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) vot_handle.report(vot_rect) if visualize_results: show_frame(image_, bboxes[i, :], 1) if save_images: out_img = Image.fromarray(image_.copy().astype(np.uint8)) out_draw = ImageDraw.Draw(out_img) if gt is not None: gt_rect = np.array(region_to_bbox(gt[i, :], False)).astype( np.int32) gt_rect[2:] = gt_rect[:2] + gt_rect[2:] rect = bboxes[i].copy() rect[2:] = rect[:2] + rect[2:] rect = rect.astype(np.int32) pillow_version = [int(x) for x in PIL.__version__.split('.')] if (pillow_version[0] > 5 or (pillow_version[0] == 5 and pillow_version[1] >= 3)): if gt is not None: out_draw.rectangle( [tuple(gt_rect[:2]), tuple(gt_rect[2:])], outline=(0, 0, 255), width=2) out_draw.rectangle( [tuple(rect[:2]), tuple(rect[2:])], outline=(255, 0, 0), width=3) else: if gt is not None: out_draw.rectangle( [tuple(gt_rect[:2]), tuple(gt_rect[2:])], outline=(0, 0, 255)) out_draw.rectangle( [tuple(rect[:2]), tuple(rect[2:])], outline=(255, 0, 0)) out_img.save(os.path.join(res_dir, '%05d.jpg' % (i + 1))) t_elapsed = time.time() - t_start speed = num_frames/t_elapsed if save_bboxes: with open(os.path.join(bb_res_dir, vid_name+'.txt'), 'w') as f: for bb in bboxes: f.write('%.02f,%.02f,%.02f,%.02f\n' % tuple(bb)) return bboxes, speed
def tracker(hp, run, design, frame_name_list, objects, final_score_sz, filename, image, templates_z, scores, start_frame): num_frames = np.size(frame_name_list) # stores tracker's output for evaluation bboxes = [np.zeros((len(objects), 4)) for i in range(0, num_frames)] # save first frame position (from ground-truth) for i in range(len(objects)): pos_x = objects[i][0] pos_y = objects[i][1] target_w = objects[i][2] target_h = objects[i][3] #bboxes[0][i] = objects[i] bboxes[0][ i, :] = pos_x - target_w / 2, pos_y - target_h / 2, target_w, target_h scale_factors = hp.scale_step**np.linspace(-np.ceil(hp.scale_num / 2), np.ceil(hp.scale_num / 2), hp.scale_num) # cosine window to penalize large displacements hann_1d = np.expand_dims(np.hanning(final_score_sz), axis=0) penalty = np.transpose(hann_1d) * hann_1d penalty = penalty / np.sum(penalty) """ # I don't see this values in any part of the code, so I assume it's safe to comment them # thresholds to saturate patches shrinking/growing min_z = hp.scale_min * z_sz max_z = hp.scale_max * z_sz min_x = hp.scale_min * x_sz max_x = hp.scale_max * x_sz """ # This variables use the box data, so they should have different values per object # Object Box information pos_x = [0] * len(objects) pos_y = [0] * len(objects) target_w = [0] * len(objects) target_h = [0] * len(objects) # Other variables context = [0] * len(objects) z_sz = [0] * len(objects) x_sz = [0] * len(objects) for o in range(len(objects)): pos_x[o] = objects[o][0] pos_y[o] = objects[o][1] target_w[o] = objects[o][2] target_h[o] = objects[o][3] context[o] = design.context * (target_w[o] + target_h[o]) z_sz[o] = np.sqrt( np.prod((target_w[o] + context[o]) * (target_h[o] + context[o]))) x_sz[o] = float(design.search_sz) / design.exemplar_sz * z_sz[o] # run_metadata = tf.RunMetadata() # run_opts = { # 'options': tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # 'run_metadata': run_metadata, # } run_opts = {} # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: with tf.Session() as sess: tf.global_variables_initializer().run() # Coordinate the loading of image files. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # save first frame position (from ground-truth) #bboxes[0,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h scores_ = [0] * len(objects) templates_z_ = [0] * len(objects) for o in range(len(objects)): #print ('Box {} template! x: {}, y: {}, z_sz: {}'.format(o, pos_x[o], pos_y[o], z_sz[o]) ) image_, templates_z_[o] = sess.run( [image, templates_z], feed_dict={ #image_, templates_z_res = sess.run([image, templates_z], feed_dict={ siam.pos_x_ph: pos_x[o], siam.pos_y_ph: pos_y[o], siam.z_sz_ph: z_sz[o], filename: frame_name_list[0] }) #templates_z_[o] = templates_z_res t_start = time.time() # Get an image from the queue for i in range(1, num_frames): for o in range(len(objects)): scaled_exemplar = z_sz[o] * scale_factors scaled_search_area = x_sz[o] * scale_factors scaled_target_w = target_w[o] * scale_factors scaled_target_h = target_h[o] * scale_factors image_, scores_ = sess.run( [image, scores], feed_dict={ siam.pos_x_ph: pos_x[o], siam.pos_y_ph: pos_y[o], siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], templates_z: np.squeeze(templates_z_[o]), filename: frame_name_list[i], }, **run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes x_sz[o] = (1 - hp.scale_lr) * x_sz[ o] + hp.scale_lr * scaled_search_area[new_scale_id] target_w[o] = (1 - hp.scale_lr) * target_w[ o] + hp.scale_lr * scaled_target_w[new_scale_id] target_h[o] = (1 - hp.scale_lr) * target_h[ o] + hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - hp.window_influence ) * score_ + hp.window_influence * penalty pos_x[o], pos_y[o] = _update_target_position( pos_x[o], pos_y[o], score_, final_score_sz, design.tot_stride, design.search_sz, hp.response_up, x_sz[o]) # convert <cx,cy,w,h> to <x,y,w,h> and save output #bboxes[i,:] = pos_x-target_w/2, pos_y-target_h/2, target_w, target_h bboxes[i][o, :] = pos_x[o] - target_w[o] / 2, pos_y[ o] - target_h[o] / 2, target_w[o], target_h[o] if hp.z_lr > 0: new_templates_z_ = sess.run( [templates_z], feed_dict={ siam.pos_x_ph: pos_x[o], siam.pos_y_ph: pos_y[o], siam.z_sz_ph: z_sz[o], image: image_ }) templates_z_[o] = (1 - hp.z_lr) * np.asarray(templates_z_[ o]) + hp.z_lr * np.asarray(new_templates_z_) # update template patch size z_sz[o] = (1 - hp.scale_lr) * z_sz[ o] + hp.scale_lr * scaled_exemplar[new_scale_id] if run.visualization: show_frame(image_, bboxes[i], 1) t_elapsed = time.time() - t_start speed = num_frames / t_elapsed # Finish off the filename queue coordinator. coord.request_stop() coord.join(threads) # from tensorflow.python.client import timeline # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # trace_file = open('timeline-search.ctf.json', 'w') # trace_file.write(trace.generate_chrome_trace_format()) plt.close('all') return bboxes, speed