def build_tracking_graph(final_score_sz, design, env): frame_sz = tf.shape(image) # used to pad the crops if design.pad_with_image_mean: avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan') else: avg_chan = None # pad with if necessary frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz) frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net template_z, templates_x, p_names_list, p_val_list = _create_siamese( os.path.join(env.root_pretrained, design.net), x_crops, z_crops) template_z = tf.squeeze(template_z) templates_z = tf.stack([template_z, template_z, template_z]) # compare templates via cross-correlation scores = _match_templates(templates_z, templates_x, p_names_list, p_val_list) # upsample the score maps scores_up = tf.image.resize_images(scores, [final_score_sz, final_score_sz], method=tf.image.ResizeMethod.BICUBIC, align_corners=True) return image, templates_z, scores_up
def build_tracking_graph_2(final_score_sz, design, env): # Make a queue of file names # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames) # image_reader = tf.WholeFileReader() # # Read a whole file from the queue # image_name, image_file = image_reader.read(filename_queue) image = tf.placeholder(tf.float32, [None,None,3]) frame_sz = tf.shape(image) # used to pad the crops if design.pad_with_image_mean: avg_chan = tf.reduce_mean(image, axis=(0,1), name='avg_chan') else: avg_chan = None # pad with if necessary frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz) frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net template_z, templates_x, p_names_list, p_val_list = _create_siamese(os.path.join(env.root_pretrained,design.net), x_crops, z_crops) template_z = tf.squeeze(template_z) templates_z = tf.stack([template_z, template_z, template_z]) # compare templates via cross-correlation scores = _match_templates(templates_z, templates_x, p_names_list, p_val_list) # upsample the score maps scores_up = tf.image.resize_images(scores, [final_score_sz, final_score_sz], method=tf.image.ResizeMethod.BICUBIC, align_corners=True) print 'ss', scores_up.shape return image, templates_z, scores_up
def build_tracking_graph(final_score_sz, design, env): # Make a queue of file names # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames) # image_reader = tf.WholeFileReader() # # Read a whole file from the queue # image_name, image_file = image_reader.read(filename_queue) filename = tf.placeholder(tf.string, [], name='filename') image_file = tf.read_file(filename) # Decode the image as a JPEG file, this will turn it into a Tensor image = tf.image.decode_jpeg(image_file) image = 255.0 * tf.image.convert_image_dtype(image, tf.float32) frame_sz = tf.shape(image) # used to pad the crops if design.pad_with_image_mean: avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan') else: avg_chan = None # pad with if necessary frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz) frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net template_z, templates_x, p_names_list, p_val_list = _create_siamese( os.path.join(env.root_pretrained, design.net), x_crops, z_crops) template_z = tf.squeeze(template_z) templates_z = tf.stack([template_z, template_z, template_z]) feature_params = dict(maxCorners=10, qualityLevel=0.3, minDistance=7, blockSize=7, gradientSize=5) # compare templates via cross-correlation scores = _match_templates(templates_z, templates_x, p_names_list, p_val_list) # upsample the score maps scores_up = tf.image.resize_images(scores, [final_score_sz, final_score_sz], method=tf.image.ResizeMethod.BICUBIC, align_corners=True) return filename, image, templates_z, scores_up
def get_template_z(self, pos_x, pos_y, z_sz, image, design): if isinstance(image, six.string_types): image = Image.open(image) avg_chan = ImageStat.Stat(image).mean frame_padded_z, npad_z = pad_frame(image, image.size, pos_x, pos_y, z_sz, avg_chan) z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x, pos_y, z_sz, design.exemplar_sz) template_z = self.branch(Variable(z_crops)) return image, template_z
def build_tracking_graph(final_score_sz, design, env, hp): # Make a queue of file names # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames) # image_reader = tf.WholeFileReader() # # Read a whole file from the queue # image_name, image_file = image_reader.read(filename_queue) filename = tf.placeholder(tf.string, [], name='filename') image_file = tf.read_file(filename) # Decode the image as a JPEG file, this will turn it into a Tensor image = tf.image.decode_jpeg(image_file) image = 255.0 * tf.image.convert_image_dtype(image, tf.float32) frame_sz = tf.shape(image) # used to pad the crops 是否用平均值来填充图片,是则求图片所有像素值的平均值 if design.pad_with_image_mean: avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan') else: avg_chan = None # pad with if necessary 根据Z的情况,用平均值来填充原图片 frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops 将文中的z从前一帧提取出来 z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz) # 根据X的情况填充原图片 # frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan) frame_padded_x, npad_x = pad_frame2(image, frame_sz, pos_x_ph, pos_y_ph, x_sz_ph, avg_chan) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) # x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) x_crops = extract_crops_x2(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz_ph, design.search_sz) # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net # 对提取出的X,Z通过孪生神经网络进行特征提取 template_z, templates_x, p_names_list, p_val_list = _create_siamese( os.path.join(env.root_pretrained, design.net), x_crops, z_crops) template_z = tf.squeeze(template_z) templates_z = [] for i in range(hp.scale_num): templates_z.append(template_z) templates_z = tf.stack(templates_z) # compare templates via cross-correlation # 通过对tX,tZ进行卷积得到score分布 scores = _match_templates(templates_z, templates_x, p_names_list, p_val_list) # upsample the score maps 将socre分布放大到元图像大小 scores_up = tf.image.resize_images(scores, [final_score_sz, final_score_sz], method=tf.image.ResizeMethod.BICUBIC, align_corners=True) return filename, image, templates_z, scores_up
def get_scores(self, pos_x, pos_y, scaled_search_area, template_z, filename, design, final_score_sz): image = Image.fromarray(filename.astype('uint8'), 'RGB') #image = Image.open(filename) avg_chan = ImageStat.Stat(image).mean frame_padded_x, npad_x = pad_frame(image, image.size, pos_x, pos_y, scaled_search_area[2], avg_chan) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x, pos_y, scaled_search_area[0], scaled_search_area[1], scaled_search_area[2], design.search_sz) if self.use_cuda: x_crops = x_crops.cuda() template_x = self.branch(Variable(x_crops)) template_z = template_z.repeat(template_x.size(0), 1, 1, 1) scores = self.xcorr(template_z, template_x) #print(scores.shape) #print("Test: " + str((torch.max(scores, dim=(1))))) scores = self.bn_adjust(scores) #print(scores) #print(torch.min(scores)) #scores = scores - torch.min(scores) #scores = torch.div(scores, torch.max(scores)) # parece que hay 3 frames con la pantalla en verde, todos los pixeles tienen el mismo valor asi que no puedo dividir entre # el valor maximo (ya que todos son iguales) # TODO: any elegant alternator? scores = scores.squeeze().permute(1, 2, 0).data.cpu().numpy() scores_up = cv2.resize(scores, (final_score_sz, final_score_sz), interpolation=cv2.INTER_CUBIC) scores_up = scores_up.transpose((2, 0, 1)) return image, scores_up
def get_scores(self, pos_x, pos_y, scaled_search_area, template_z, filename, design, final_score_sz): image = Image.open(filename) avg_chan = ImageStat.Stat(image).mean frame_padded_x, npad_x = pad_frame(image, image.size, pos_x, pos_y, scaled_search_area[2], avg_chan) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x, pos_y, scaled_search_area[0], scaled_search_area[1], scaled_search_area[2], design.search_sz) template_x = self.branch(Variable(x_crops)) # -- Where the actual conv net is called template_z = template_z.repeat(template_x.size(0), 1, 1, 1) scores = self.xcorr(template_z, template_x) scores = self.bn_adjust(scores) # TODO: any elegant alternator? scores = scores.squeeze().permute(1, 2, 0).data.numpy() scores_up = cv2.resize(scores, (final_score_sz, final_score_sz), interpolation=cv2.INTER_CUBIC) scores_up = scores_up.transpose((2, 0, 1)) return image, scores_up
def build_tracking_graph(final_score_sz, design, env): filename = tf.placeholder(tf.string, [], name='filename') image_file = tf.read_file(filename) # Decode the image as a JPEG file, this will turn it into a Tensor image = tf.image.decode_jpeg(image_file) image = 255.0 * tf.image.convert_image_dtype(image, tf.float32) frame_sz = tf.shape(image) # TODO: Decide how much we can remove from this block. I can't image that we should be padding in # this case. That is, design.pad_with_image_mean is probably always False. # used to pad the crops if design.pad_with_image_mean: avg_chan = tf.reduce_mean(image, reduction_indices=(0,1), name='avg_chan') else: avg_chan = None # pad with if necessary frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz) frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net template_z, templates_x, p_names_list, p_val_list = _create_siamese(os.path.join(env.root_pretrained,design.net), x_crops, z_crops) template_z = tf.squeeze(template_z) templates_z = tf.pack([template_z, template_z, template_z]) # compare templates via cross-correlation scores = _match_templates(templates_z, templates_x, p_names_list, p_val_list) # upsample the score maps scores_up = tf.image.resize_images(scores, [final_score_sz, final_score_sz], method=tf.image.ResizeMethod.BICUBIC, align_corners=True) return filename, image, templates_z, scores_up
def get_scores(self, pos_x, pos_y, scaled_search_area, template_z, image, design, final_score_sz): if isinstance(image, six.string_types): image = Image.open(image) avg_chan = ImageStat.Stat(image).mean frame_padded_x, npad_x = pad_frame(image, image.size, pos_x, pos_y, scaled_search_area[2], avg_chan) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x, pos_y, scaled_search_area[0], scaled_search_area[1], scaled_search_area[2], design.search_sz) x_crops = x_crops.cuda() template_x = self.branch(Variable(x_crops)) # template_z = Variable(torch.from_numpy(template_z)).repeat(template_x.size(0), 1, 1, 1).cuda() template_z = template_z.repeat(template_x.size(0), 1, 1, 1) scores = self.xcorr(template_z, template_x) scores = self.bn_adjust(scores) # TODO: any elegant alternator? scores = scores.squeeze().permute(1, 2, 0).data.cpu().numpy() scores_up = cv2.resize(scores, (final_score_sz, final_score_sz), interpolation=cv2.INTER_CUBIC) scores_up = scores_up.transpose((2, 0, 1)) return image, scores_up
def build_tracking_graph(root_dir, final_score_sz, design, env, hp): """ Defines and builds the tracking graph. Args: root_dir: string: path to the root directory of this project. final_score_sz: int: size of the score map after upsampling. design: namespace: design parameters. env: namespace: environment parameters. hp: namespace: hyperparameters. Returns: string tensor: placeholder for the image path to be read. 3D tensor: the image read from the path. 4D tensor: instance features from one or more layers concatenated by channels. See siam_mcf_net.inference comments for more details. 4D tensor: exemplar features from one or more layers concatenated by channels. See siam_mcf_net.inference comments for more details. 5D tensor: batch of score heatmaps for each of the selected layers. """ filename = tf.placeholder(tf.string, [], name='filename') image_file = tf.read_file(filename) # Decode the image as a JPEG file, this will turn it into a Tensor image = tf.image.decode_jpeg(image_file, channels=3) image = 255.0 * tf.image.convert_image_dtype(image, tf.float32) # image = image[:, :, ::-1] frame_sz = tf.shape(image) # used to pad the crops if design.pad_with_image_mean: avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan') else: avg_chan = None # pad with if necessary frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz) z_crops = tf.concat([z_crops for _ in range(hp.scale_num)], axis=0) frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz) templates_z, templates_x, scores_list = _create_siamese( design, x_crops, z_crops, use_res_reduce=True) # upsample the score maps scores_up_list = [ tf.image.resize_images(s, [final_score_sz, final_score_sz], method=tf.image.ResizeMethod.BICUBIC, align_corners=True) for s in scores_list ] scores_up_list = tf.stack(scores_up_list) scores_list = tf.stack(scores_list) scores_up_list = tf.reshape(scores_up_list, [ scores_up_list.get_shape()[0], hp.scale_num, scores_up_list.get_shape()[2], scores_up_list.get_shape()[3], scores_up_list.get_shape()[4] ]) scores_list = tf.reshape(scores_list, [ scores_list.get_shape()[0], hp.scale_num, scores_list.get_shape()[2], scores_list.get_shape()[3], scores_list.get_shape()[4] ]) return filename, image, templates_x, templates_z, scores_up_list
def make_siameseFC(env, design, hp): #------------------------------------------------------------------------- #function//im_z和im_x可复用此函数 #------------------------------------------------------------------------- filename = tf.placeholder(tf.string, [], name='filename') image_file = tf.read_file(filename) #Decode the image as a JPEG/BMP... file,and turn it into a tensor #choose the decode type if env.image_type == 'jpg': image = tf.image.decode_jpeg(image_file) elif env.image_type == 'bmp': image = tf.image.decode_bmp(image_file) #将像素值缩放到[0,1] im = 255.0 * tf.image.convert_image_dtype(image, tf.float32) frame_size = tf.shape(im) if design.pad_with_image_mean: #get the mean pixel value of each channle avg_chan = tf.reduce_mean(im, axis=(0, 1), name='avg_chan') else: avg_chan = None #pad the image before crop #def pad_frame(im,frame_size,pos_x,pos_y,patch_size,avg_chan) #z im_padded_z, npad_z = pad_frame(im, frame_size, pos_x, pos_y, z_size, avg_chan) im_padded_z = tf.cast(im_padded_z, tf.float32) #crop the z patch #def extract_crops(im,npad,pos_x,pos_y,size_src,size_dst) crop_z = extract_crops(im_padded_z, npad_z, pos_x, pos_y, z_size, design.exemplarSize) #x #x_size=tf.cast(x_size,tf.float64) im_padded_x, npad_x = pad_frame(im, frame_size, pos_x, pos_y, x_size, avg_chan) im_padded_x = tf.cast(im_padded_x, tf.float32) #crop the x patch crop_x = extract_crops(im_padded_x, npad_x, pos_x, pos_y, x_size, design.instacneSize) #use the crops as a input of Siamese net to train _siam_net_z, _siam_net_x = create_net_define_var(crop_x, crop_z) #evaliate the correlation between x and z scores = _match_templates(_siam_net_z, _siam_net_x) #upsample the score maps scores_up = tf.image.resize_images(scores, [design.score_size, design.score_size], method=tf.image.ResizeMethod.BICUBIC, align_corners=True) scores_gt = create_label([design.score_size, design.score_size], design.dPos) Hz, Wz, Bz, Cz = tf.unstack(tf.shape(scores_up)) scores_up_re = tf.squeeze( tf.reshape(scores_up, (1, 1, 1, Hz * Wz * Bz * Cz))) scores_gt_re = tf.squeeze( tf.reshape(scores_gt, (1, 1, 1, Hz * Wz * Bz * Cz))) #train --back propagation #if need tf.sqrt??????????????? print('begin calculate the loss') loss = tf.sqrt( tf.reduce_mean( tf.square(scores_up_re - tf.cast(scores_gt_re, tf.float32)))) tf.summary.scalar('loss', loss) #train --back propagation #the train_op trains the variables that define with "tf.Variable" or "tf.get_variable" train_op = tf.train.AdamOptimizer(hp.learning_rate).minimize(loss) print('loss end1') return filename, _siam_net_z, loss, train_op
def build_tracking_graph_train(self, final_score_sz, design, env, hp, frame_sz): # Make a queue of file names # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames) # image_reader = tf.WholeFileReader() # # Read a whole file from the queue # image_name, image_file = image_reader.read(filename_queue) image = tf.placeholder(tf.float32, [self.batch_size] + frame_sz, name="input_image") # used to pad the crops if design.pad_with_image_mean: avg_chan = tf.reduce_mean( image, axis=(1, 2), name='avg_chan' ) ####need to change to the mean value of each img########## else: avg_chan = None # pad with if necessary single_crops_z = [] single_crops_x = [] for batch in range(self.batch_size): single_z = image[batch] single_pos_x_ph = self.batched_pos_x_ph[batch] single_pos_y_ph = self.batched_pos_y_ph[batch] single_z_sz_ph = self.batched_z_sz_ph[batch] single_x_sz0_ph = self.batched_x_sz0_ph[batch] single_x_sz1_ph = self.batched_x_sz1_ph[batch] single_x_sz2_ph = self.batched_x_sz2_ph[batch] frame_padded_z, npad_z = pad_frame(single_z, frame_sz, single_pos_x_ph, single_pos_y_ph, single_z_sz_ph, avg_chan[batch]) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops single_crops_z.append( tf.squeeze( extract_crops_z(frame_padded_z, npad_z, single_pos_x_ph, single_pos_y_ph, single_z_sz_ph, design.exemplar_sz))) single_x = image[batch] frame_padded_x, npad_x = pad_frame(single_x, frame_sz, single_pos_x_ph, single_pos_y_ph, single_x_sz2_ph, avg_chan[batch]) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) single_crops_x.append( tf.squeeze( extract_crops_x(frame_padded_x, npad_x, single_pos_x_ph, single_pos_y_ph, single_x_sz0_ph, single_x_sz1_ph, single_x_sz2_ph, design.search_sz))) z_crops = tf.stack(single_crops_z) x_crops = tf.stack(single_crops_x) x_crops_shape = x_crops.get_shape().as_list() x_crops = tf.reshape(x_crops, [x_crops_shape[0] * x_crops_shape[1]] + x_crops_shape[2:]) print("shape of single_crops_x: ", single_crops_x[0].shape, "shape of x_crops: ", x_crops.shape) print("shape of single_crops_z: ", single_crops_z[0].shape, "shape of z_crops: ", z_crops.shape) # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net template_z, templates_x = self._create_siamese_train( x_crops, z_crops, design) print("shape of template_z:", template_z.shape) #template_z = tf.squeeze(template_z) template_z_list = [] for batch in range(self.batch_size): template_z_list.append(template_z[batch]) template_z_list.append(template_z[batch]) template_z_list.append(template_z[batch]) templates_z = tf.stack(template_z_list) print("shape of templates_z:", templates_z.get_shape().as_list()) print("shape of templates_x:", templates_x.get_shape().as_list()) # compare templates via cross-correlation scores = self._match_templates_train(templates_z, templates_x) print("shape of small score map:", scores.get_shape().as_list()) """ scores = scores / tf.reduce_mean(scores) scores = tf.maximum(scores, -10) scores = tf.minimum(scores, 10) """ # upsample the score maps #scores = tf.Print(scores, [scores], summarize = 300) scores_up = tf.image.resize_bilinear(scores, [final_score_sz, final_score_sz], align_corners=True) """ score_w = scores_up.get_shape().as_list()[1] score_h = scores_up.get_shape().as_list()[2] scores_up = tf.reshape(scores_up, [self.batch_size * 3,score_w * score_h]) scores_up = tf.nn.softmax(scores_up) * 5 - 2.5 scores_up = tf.reshape(scores_up, [self.batch_size * 3, score_w, score_h]) """ print("shape of big score map:", scores_up.get_shape().as_list()) score = tf.squeeze( tf.stack([ scores_up[i] for i in [0 + 3 * i for i in range(self.batch_size)] ])) loss = self.cal_loss(score) distance_to_gt, max_pos_x, max_pos_y = self.distance( score, final_score_sz, hp) train_step = tf.train.AdamOptimizer(hp.lr).minimize(loss) summary = tf.summary.scalar('distance_to_gt', distance_to_gt) return image, z_crops, x_crops, templates_z, scores_up, loss, train_step, distance_to_gt, summary, templates_x, max_pos_x, max_pos_y
def build_tracking_graph_train(self, final_score_sz, design, env, hp): image = tf.placeholder(tf.float32, [self.batch_size] + [None, None, 3], name = "input_image") # get frame_sz image_w = tf.foldl((lambda prev, cur: prev + 1), image[0], initializer = 0) image_h = tf.foldl((lambda prev, cur: prev + 1), image[0][0], initializer = 0) image_c = tf.foldl((lambda prev, cur: prev + 1), image[0][0][0], initializer = 0) frame_sz = [image_w, image_h, image_c] # used to pad the crops if design.pad_with_image_mean: avg_chan = tf.reduce_mean(image, axis=(1, 2), name='avg_chan') else: avg_chan = None # pad with if necessary single_crops_z = [] single_crops_x = [] #slice a batch into single images, and crop them one by one for batch in range(self.batch_size): single_pos_x_ph = self.batched_pos_x_ph[batch] single_pos_y_ph = self.batched_pos_y_ph[batch] single_z_sz_ph = self.batched_z_sz_ph[batch] single_x_sz0_ph = self.batched_x_sz0_ph[batch] single_x_sz1_ph = self.batched_x_sz1_ph[batch] single_x_sz2_ph = self.batched_x_sz2_ph[batch] #pad crop z single_z = image[batch] frame_padded_z, npad_z = pad_frame(single_z, frame_sz, single_pos_x_ph, single_pos_y_ph, single_z_sz_ph, avg_chan[batch]) frame_padded_z = tf.cast(frame_padded_z, tf.float32) # extract tensor of z_crops single_crops_z.append(tf.squeeze(extract_crops_z(frame_padded_z, npad_z, single_pos_x_ph, single_pos_y_ph, single_z_sz_ph, design.exemplar_sz))) # pad crop x single_x = image[batch] frame_padded_x, npad_x = pad_frame(single_x, frame_sz, single_pos_x_ph, single_pos_y_ph, single_x_sz2_ph, avg_chan[batch]) frame_padded_x = tf.cast(frame_padded_x, tf.float32) # extract tensor of x_crops (3 scales) single_crops_x.append(tf.squeeze(extract_crops_x(frame_padded_x, npad_x, single_pos_x_ph, single_pos_y_ph, single_x_sz0_ph, single_x_sz1_ph, single_x_sz2_ph, design.search_sz))) # stack the cropped single images z_crops = tf.stack(single_crops_z) x_crops = tf.stack(single_crops_x) x_crops_shape = x_crops.get_shape().as_list() x_crops = tf.reshape(x_crops, [x_crops_shape[0] * x_crops_shape[1]] + x_crops_shape[2: ]) print("shape of single_crops_x: ", single_crops_x[0].shape, "shape of x_crops: ", x_crops.shape) print("shape of single_crops_z: ", single_crops_z[0].shape, "shape of z_crops: ", z_crops.shape) # use crops as input of fully-convolutional Siamese net template_z, templates_x = self._create_siamese_train(x_crops, z_crops, design) print("shape of template_z:", template_z.shape) # extend template_z to match the triple scaled feature map of x template_z_list = [] for batch in range(self.batch_size): template_z_list.append(template_z[batch]) template_z_list.append(template_z[batch]) template_z_list.append(template_z[batch]) templates_z = tf.stack(template_z_list) print("shape of templates_z:", templates_z.get_shape().as_list()) print("shape of templates_x:", templates_x.get_shape().as_list()) # compare templates via cross-correlation scores = self._match_templates_train(templates_z, templates_x) # resize to final_score_sz scores_up = tf.image.resize_bilinear(scores, [final_score_sz, final_score_sz], align_corners=True) print("shape of big score map:", scores_up.get_shape().as_list()) # only choose one scale for each image score = tf.squeeze(tf.stack([scores_up[i] for i in [0 + 3 * i for i in range(self.batch_size)]])) loss = self.cal_loss(score) distance_to_gt, max_pos_x, max_pos_y = self.distance(score, final_score_sz, hp) train_step = tf.train.AdamOptimizer(hp.lr).minimize(loss) summary = tf.summary.scalar('distance_to_gt', distance_to_gt) return image, z_crops, x_crops, templates_z, scores_up, loss, train_step, distance_to_gt, summary