Esempio n. 1
0
def build_tracking_graph(final_score_sz, design, env):

    frame_sz = tf.shape(image)
    # used to pad the crops
    if design.pad_with_image_mean:
        avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan')
    else:
        avg_chan = None
    # pad with if necessary
    frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph,
                                       z_sz_ph, avg_chan)
    frame_padded_z = tf.cast(frame_padded_z, tf.float32)
    # extract tensor of z_crops
    z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph,
                              z_sz_ph, design.exemplar_sz)
    frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph,
                                       x_sz2_ph, avg_chan)
    frame_padded_x = tf.cast(frame_padded_x, tf.float32)
    # extract tensor of x_crops (3 scales)
    x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph,
                              x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)
    # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net
    template_z, templates_x, p_names_list, p_val_list = _create_siamese(
        os.path.join(env.root_pretrained, design.net), x_crops, z_crops)
    template_z = tf.squeeze(template_z)
    templates_z = tf.stack([template_z, template_z, template_z])
    # compare templates via cross-correlation
    scores = _match_templates(templates_z, templates_x, p_names_list,
                              p_val_list)
    # upsample the score maps
    scores_up = tf.image.resize_images(scores,
                                       [final_score_sz, final_score_sz],
                                       method=tf.image.ResizeMethod.BICUBIC,
                                       align_corners=True)
    return image, templates_z, scores_up
Esempio n. 2
0
def build_tracking_graph_2(final_score_sz, design, env):
    # Make a queue of file names
    # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames)
    # image_reader = tf.WholeFileReader()
    # # Read a whole file from the queue
    # image_name, image_file = image_reader.read(filename_queue)

    image = tf.placeholder(tf.float32, [None,None,3])
    frame_sz = tf.shape(image)
    # used to pad the crops
    if design.pad_with_image_mean:
        avg_chan = tf.reduce_mean(image, axis=(0,1), name='avg_chan')
    else:
        avg_chan = None
    # pad with if necessary
    frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan)
    frame_padded_z = tf.cast(frame_padded_z, tf.float32)
    # extract tensor of z_crops
    z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz)
    frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan)
    frame_padded_x = tf.cast(frame_padded_x, tf.float32)
    # extract tensor of x_crops (3 scales)
    x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)
    # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net
    template_z, templates_x, p_names_list, p_val_list = _create_siamese(os.path.join(env.root_pretrained,design.net), x_crops, z_crops)
    template_z = tf.squeeze(template_z)
    templates_z = tf.stack([template_z, template_z, template_z])
    # compare templates via cross-correlation
    scores = _match_templates(templates_z, templates_x, p_names_list, p_val_list)
    # upsample the score maps
    scores_up = tf.image.resize_images(scores, [final_score_sz, final_score_sz],
        method=tf.image.ResizeMethod.BICUBIC, align_corners=True)
    print 'ss', scores_up.shape
    return image, templates_z, scores_up
def build_tracking_graph(final_score_sz, design, env):
    # Make a queue of file names
    # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames)
    # image_reader = tf.WholeFileReader()
    # # Read a whole file from the queue
    # image_name, image_file = image_reader.read(filename_queue)

    filename = tf.placeholder(tf.string, [], name='filename')
    image_file = tf.read_file(filename)
    # Decode the image as a JPEG file, this will turn it into a Tensor
    image = tf.image.decode_jpeg(image_file)
    image = 255.0 * tf.image.convert_image_dtype(image, tf.float32)
    frame_sz = tf.shape(image)
    # used to pad the crops
    if design.pad_with_image_mean:
        avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan')
    else:
        avg_chan = None
    # pad with if necessary
    frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph,
                                       z_sz_ph, avg_chan)
    frame_padded_z = tf.cast(frame_padded_z, tf.float32)
    # extract tensor of z_crops
    z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph,
                              z_sz_ph, design.exemplar_sz)
    frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph,
                                       x_sz2_ph, avg_chan)
    frame_padded_x = tf.cast(frame_padded_x, tf.float32)
    # extract tensor of x_crops (3 scales)
    x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph,
                              x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)
    # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net
    template_z, templates_x, p_names_list, p_val_list = _create_siamese(
        os.path.join(env.root_pretrained, design.net), x_crops, z_crops)
    template_z = tf.squeeze(template_z)
    templates_z = tf.stack([template_z, template_z, template_z])

    feature_params = dict(maxCorners=10,
                          qualityLevel=0.3,
                          minDistance=7,
                          blockSize=7,
                          gradientSize=5)

    # compare templates via cross-correlation
    scores = _match_templates(templates_z, templates_x, p_names_list,
                              p_val_list)
    # upsample the score maps
    scores_up = tf.image.resize_images(scores,
                                       [final_score_sz, final_score_sz],
                                       method=tf.image.ResizeMethod.BICUBIC,
                                       align_corners=True)
    return filename, image, templates_z, scores_up
Esempio n. 4
0
 def get_template_z(self, pos_x, pos_y, z_sz, image, 
                    design):
     if isinstance(image, six.string_types):
         image = Image.open(image)
     avg_chan = ImageStat.Stat(image).mean
     frame_padded_z, npad_z = pad_frame(image, image.size, pos_x, pos_y, z_sz, avg_chan)
     z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x, pos_y, z_sz, design.exemplar_sz)
     template_z = self.branch(Variable(z_crops))
     return image, template_z
Esempio n. 5
0
def build_tracking_graph(final_score_sz, design, env, hp):
    # Make a queue of file names
    # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames)
    # image_reader = tf.WholeFileReader()
    # # Read a whole file from the queue
    # image_name, image_file = image_reader.read(filename_queue)

    filename = tf.placeholder(tf.string, [], name='filename')
    image_file = tf.read_file(filename)
    # Decode the image as a JPEG file, this will turn it into a Tensor
    image = tf.image.decode_jpeg(image_file)
    image = 255.0 * tf.image.convert_image_dtype(image, tf.float32)
    frame_sz = tf.shape(image)
    # used to pad the crops 是否用平均值来填充图片,是则求图片所有像素值的平均值
    if design.pad_with_image_mean:
        avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan')
    else:
        avg_chan = None
    # pad with if necessary  根据Z的情况,用平均值来填充原图片
    frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph,
                                       z_sz_ph, avg_chan)
    frame_padded_z = tf.cast(frame_padded_z, tf.float32)
    # extract tensor of z_crops  将文中的z从前一帧提取出来
    z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph,
                              z_sz_ph, design.exemplar_sz)

    # 根据X的情况填充原图片
    # frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan)
    frame_padded_x, npad_x = pad_frame2(image, frame_sz, pos_x_ph, pos_y_ph,
                                        x_sz_ph, avg_chan)
    frame_padded_x = tf.cast(frame_padded_x, tf.float32)
    # extract tensor of x_crops (3 scales)
    # x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)
    x_crops = extract_crops_x2(frame_padded_x, npad_x, pos_x_ph, pos_y_ph,
                               x_sz_ph, design.search_sz)

    # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net
    # 对提取出的X,Z通过孪生神经网络进行特征提取
    template_z, templates_x, p_names_list, p_val_list = _create_siamese(
        os.path.join(env.root_pretrained, design.net), x_crops, z_crops)
    template_z = tf.squeeze(template_z)
    templates_z = []
    for i in range(hp.scale_num):
        templates_z.append(template_z)
    templates_z = tf.stack(templates_z)

    # compare templates via cross-correlation
    # 通过对tX,tZ进行卷积得到score分布
    scores = _match_templates(templates_z, templates_x, p_names_list,
                              p_val_list)
    # upsample the score maps 将socre分布放大到元图像大小
    scores_up = tf.image.resize_images(scores,
                                       [final_score_sz, final_score_sz],
                                       method=tf.image.ResizeMethod.BICUBIC,
                                       align_corners=True)
    return filename, image, templates_z, scores_up
Esempio n. 6
0
    def get_scores(self, pos_x, pos_y, scaled_search_area, template_z,
                   filename, design, final_score_sz):

        image = Image.fromarray(filename.astype('uint8'), 'RGB')

        #image = Image.open(filename)
        avg_chan = ImageStat.Stat(image).mean

        frame_padded_x, npad_x = pad_frame(image, image.size, pos_x, pos_y,
                                           scaled_search_area[2], avg_chan)

        x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x, pos_y,
                                  scaled_search_area[0], scaled_search_area[1],
                                  scaled_search_area[2], design.search_sz)

        if self.use_cuda:
            x_crops = x_crops.cuda()

        template_x = self.branch(Variable(x_crops))

        template_z = template_z.repeat(template_x.size(0), 1, 1, 1)

        scores = self.xcorr(template_z, template_x)

        #print(scores.shape)

        #print("Test: " + str((torch.max(scores, dim=(1)))))

        scores = self.bn_adjust(scores)
        #print(scores)

        #print(torch.min(scores))
        #scores = scores - torch.min(scores)

        #scores = torch.div(scores, torch.max(scores))

        # parece que hay 3 frames con la pantalla en verde, todos los pixeles tienen el mismo valor asi que no puedo dividir entre
        # el valor maximo (ya que todos son iguales)

        # TODO: any elegant alternator?

        scores = scores.squeeze().permute(1, 2, 0).data.cpu().numpy()

        scores_up = cv2.resize(scores, (final_score_sz, final_score_sz),
                               interpolation=cv2.INTER_CUBIC)

        scores_up = scores_up.transpose((2, 0, 1))

        return image, scores_up
Esempio n. 7
0
 def get_scores(self, pos_x, pos_y, scaled_search_area, template_z, filename,
                design, final_score_sz):
     image = Image.open(filename)
     avg_chan = ImageStat.Stat(image).mean
     frame_padded_x, npad_x = pad_frame(image, image.size, pos_x, pos_y, scaled_search_area[2], avg_chan)
     x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x, pos_y, scaled_search_area[0], scaled_search_area[1], scaled_search_area[2], design.search_sz)
     template_x = self.branch(Variable(x_crops)) # -- Where the actual conv net is called 
     template_z = template_z.repeat(template_x.size(0), 1, 1, 1)
     scores = self.xcorr(template_z, template_x)
     scores = self.bn_adjust(scores)
     # TODO: any elegant alternator?
     scores = scores.squeeze().permute(1, 2, 0).data.numpy()
     scores_up = cv2.resize(scores, (final_score_sz, final_score_sz), interpolation=cv2.INTER_CUBIC)
     scores_up = scores_up.transpose((2, 0, 1))
     return image, scores_up
Esempio n. 8
0
def build_tracking_graph(final_score_sz, design, env):

    filename = tf.placeholder(tf.string, [], name='filename')
    image_file = tf.read_file(filename)
    # Decode the image as a JPEG file, this will turn it into a Tensor
    image = tf.image.decode_jpeg(image_file)
    image = 255.0 * tf.image.convert_image_dtype(image, tf.float32)
    frame_sz = tf.shape(image)

    # TODO: Decide how much we can remove from this block. I can't image that we should be padding in
    # this case. That is, design.pad_with_image_mean is probably always False.
    # used to pad the crops
    if design.pad_with_image_mean:
        avg_chan = tf.reduce_mean(image, reduction_indices=(0,1), name='avg_chan')
    else:
        avg_chan = None
    # pad with if necessary
    frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, z_sz_ph, avg_chan)
    frame_padded_z = tf.cast(frame_padded_z, tf.float32)
    # extract tensor of z_crops
    z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph, z_sz_ph, design.exemplar_sz)
    frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph, x_sz2_ph, avg_chan)
    frame_padded_x = tf.cast(frame_padded_x, tf.float32)
    # extract tensor of x_crops (3 scales)
    x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph, x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)

    # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net
    template_z, templates_x, p_names_list, p_val_list = _create_siamese(os.path.join(env.root_pretrained,design.net), x_crops, z_crops)
    template_z = tf.squeeze(template_z)
    templates_z = tf.pack([template_z, template_z, template_z])
    # compare templates via cross-correlation
    scores = _match_templates(templates_z, templates_x, p_names_list, p_val_list)
    # upsample the score maps
    scores_up = tf.image.resize_images(scores, [final_score_sz, final_score_sz],
        method=tf.image.ResizeMethod.BICUBIC, align_corners=True)
    return filename, image, templates_z, scores_up
Esempio n. 9
0
 def get_scores(self, pos_x, pos_y, scaled_search_area, template_z, image,
                design, final_score_sz):
     if isinstance(image, six.string_types):
         image = Image.open(image)
     avg_chan = ImageStat.Stat(image).mean
     frame_padded_x, npad_x = pad_frame(image, image.size, pos_x, pos_y,
                                        scaled_search_area[2], avg_chan)
     x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x, pos_y,
                               scaled_search_area[0], scaled_search_area[1],
                               scaled_search_area[2], design.search_sz)
     x_crops = x_crops.cuda()
     template_x = self.branch(Variable(x_crops))
     #        template_z = Variable(torch.from_numpy(template_z)).repeat(template_x.size(0), 1, 1, 1).cuda()
     template_z = template_z.repeat(template_x.size(0), 1, 1, 1)
     scores = self.xcorr(template_z, template_x)
     scores = self.bn_adjust(scores)
     # TODO: any elegant alternator?
     scores = scores.squeeze().permute(1, 2, 0).data.cpu().numpy()
     scores_up = cv2.resize(scores, (final_score_sz, final_score_sz),
                            interpolation=cv2.INTER_CUBIC)
     scores_up = scores_up.transpose((2, 0, 1))
     return image, scores_up
Esempio n. 10
0
def build_tracking_graph(root_dir, final_score_sz, design, env, hp):
    """ Defines and builds the tracking graph.

    Args:
      root_dir: string: path to the root directory of this project.
      final_score_sz: int: size of the score map after upsampling.
      design: namespace: design parameters.
      env: namespace: environment parameters.
      hp: namespace: hyperparameters.

    Returns:
      string tensor: placeholder for the image path to be read.
      3D tensor: the image read from the path.
      4D tensor: instance features from one or more layers concatenated
        by channels. See siam_mcf_net.inference comments for more details.
      4D tensor: exemplar features from one or more layers concatenated
        by channels. See siam_mcf_net.inference comments for more details.
      5D tensor: batch of score heatmaps for each of the selected layers.
    """
    filename = tf.placeholder(tf.string, [], name='filename')
    image_file = tf.read_file(filename)
    # Decode the image as a JPEG file, this will turn it into a Tensor
    image = tf.image.decode_jpeg(image_file, channels=3)
    image = 255.0 * tf.image.convert_image_dtype(image, tf.float32)
    # image = image[:, :, ::-1]
    frame_sz = tf.shape(image)
    # used to pad the crops
    if design.pad_with_image_mean:
        avg_chan = tf.reduce_mean(image, axis=(0, 1), name='avg_chan')
    else:
        avg_chan = None
    # pad with if necessary
    frame_padded_z, npad_z = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph,
                                       z_sz_ph, avg_chan)
    frame_padded_z = tf.cast(frame_padded_z, tf.float32)
    # extract tensor of z_crops
    z_crops = extract_crops_z(frame_padded_z, npad_z, pos_x_ph, pos_y_ph,
                              z_sz_ph, design.exemplar_sz)
    z_crops = tf.concat([z_crops for _ in range(hp.scale_num)], axis=0)

    frame_padded_x, npad_x = pad_frame(image, frame_sz, pos_x_ph, pos_y_ph,
                                       x_sz2_ph, avg_chan)
    frame_padded_x = tf.cast(frame_padded_x, tf.float32)
    # extract tensor of x_crops (3 scales)
    x_crops = extract_crops_x(frame_padded_x, npad_x, pos_x_ph, pos_y_ph,
                              x_sz0_ph, x_sz1_ph, x_sz2_ph, design.search_sz)

    templates_z, templates_x, scores_list = _create_siamese(
        design, x_crops, z_crops, use_res_reduce=True)

    # upsample the score maps
    scores_up_list = [
        tf.image.resize_images(s, [final_score_sz, final_score_sz],
                               method=tf.image.ResizeMethod.BICUBIC,
                               align_corners=True) for s in scores_list
    ]
    scores_up_list = tf.stack(scores_up_list)
    scores_list = tf.stack(scores_list)
    scores_up_list = tf.reshape(scores_up_list, [
        scores_up_list.get_shape()[0], hp.scale_num,
        scores_up_list.get_shape()[2],
        scores_up_list.get_shape()[3],
        scores_up_list.get_shape()[4]
    ])
    scores_list = tf.reshape(scores_list, [
        scores_list.get_shape()[0], hp.scale_num,
        scores_list.get_shape()[2],
        scores_list.get_shape()[3],
        scores_list.get_shape()[4]
    ])

    return filename, image, templates_x, templates_z, scores_up_list
Esempio n. 11
0
def make_siameseFC(env, design, hp):
    #-------------------------------------------------------------------------
    #function//im_z和im_x可复用此函数
    #-------------------------------------------------------------------------
    filename = tf.placeholder(tf.string, [], name='filename')
    image_file = tf.read_file(filename)

    #Decode the image as a JPEG/BMP... file,and turn it into a tensor
    #choose the decode type
    if env.image_type == 'jpg':
        image = tf.image.decode_jpeg(image_file)
    elif env.image_type == 'bmp':
        image = tf.image.decode_bmp(image_file)

    #将像素值缩放到[0,1]
    im = 255.0 * tf.image.convert_image_dtype(image, tf.float32)

    frame_size = tf.shape(im)

    if design.pad_with_image_mean:
        #get the mean pixel value of each channle
        avg_chan = tf.reduce_mean(im, axis=(0, 1), name='avg_chan')
    else:
        avg_chan = None

    #pad the image before crop
    #def pad_frame(im,frame_size,pos_x,pos_y,patch_size,avg_chan)
    #z
    im_padded_z, npad_z = pad_frame(im, frame_size, pos_x, pos_y, z_size,
                                    avg_chan)
    im_padded_z = tf.cast(im_padded_z, tf.float32)
    #crop the z patch
    #def extract_crops(im,npad,pos_x,pos_y,size_src,size_dst)
    crop_z = extract_crops(im_padded_z, npad_z, pos_x, pos_y, z_size,
                           design.exemplarSize)
    #x
    #x_size=tf.cast(x_size,tf.float64)
    im_padded_x, npad_x = pad_frame(im, frame_size, pos_x, pos_y, x_size,
                                    avg_chan)
    im_padded_x = tf.cast(im_padded_x, tf.float32)
    #crop the x patch
    crop_x = extract_crops(im_padded_x, npad_x, pos_x, pos_y, x_size,
                           design.instacneSize)

    #use the crops as a input of Siamese net to train
    _siam_net_z, _siam_net_x = create_net_define_var(crop_x, crop_z)
    #evaliate the correlation between x and z
    scores = _match_templates(_siam_net_z, _siam_net_x)
    #upsample the score maps
    scores_up = tf.image.resize_images(scores,
                                       [design.score_size, design.score_size],
                                       method=tf.image.ResizeMethod.BICUBIC,
                                       align_corners=True)

    scores_gt = create_label([design.score_size, design.score_size],
                             design.dPos)

    Hz, Wz, Bz, Cz = tf.unstack(tf.shape(scores_up))
    scores_up_re = tf.squeeze(
        tf.reshape(scores_up, (1, 1, 1, Hz * Wz * Bz * Cz)))
    scores_gt_re = tf.squeeze(
        tf.reshape(scores_gt, (1, 1, 1, Hz * Wz * Bz * Cz)))

    #train --back propagation
    #if need tf.sqrt???????????????
    print('begin calculate the loss')
    loss = tf.sqrt(
        tf.reduce_mean(
            tf.square(scores_up_re - tf.cast(scores_gt_re, tf.float32))))
    tf.summary.scalar('loss', loss)

    #train --back propagation
    #the train_op trains the variables that define with "tf.Variable" or "tf.get_variable"
    train_op = tf.train.AdamOptimizer(hp.learning_rate).minimize(loss)
    print('loss end1')

    return filename, _siam_net_z, loss, train_op
Esempio n. 12
0
    def build_tracking_graph_train(self, final_score_sz, design, env, hp,
                                   frame_sz):
        # Make a queue of file names
        # filename_queue = tf.train.string_input_producer(frame_name_list, shuffle=False, capacity=num_frames)
        # image_reader = tf.WholeFileReader()
        # # Read a whole file from the queue
        # image_name, image_file = image_reader.read(filename_queue)
        image = tf.placeholder(tf.float32, [self.batch_size] + frame_sz,
                               name="input_image")
        # used to pad the crops
        if design.pad_with_image_mean:
            avg_chan = tf.reduce_mean(
                image, axis=(1, 2), name='avg_chan'
            )  ####need to change to the mean value of each img##########
        else:
            avg_chan = None
        # pad with if necessary
        single_crops_z = []
        single_crops_x = []
        for batch in range(self.batch_size):
            single_z = image[batch]

            single_pos_x_ph = self.batched_pos_x_ph[batch]
            single_pos_y_ph = self.batched_pos_y_ph[batch]
            single_z_sz_ph = self.batched_z_sz_ph[batch]

            single_x_sz0_ph = self.batched_x_sz0_ph[batch]
            single_x_sz1_ph = self.batched_x_sz1_ph[batch]
            single_x_sz2_ph = self.batched_x_sz2_ph[batch]

            frame_padded_z, npad_z = pad_frame(single_z, frame_sz,
                                               single_pos_x_ph,
                                               single_pos_y_ph, single_z_sz_ph,
                                               avg_chan[batch])
            frame_padded_z = tf.cast(frame_padded_z, tf.float32)
            # extract tensor of z_crops
            single_crops_z.append(
                tf.squeeze(
                    extract_crops_z(frame_padded_z, npad_z, single_pos_x_ph,
                                    single_pos_y_ph, single_z_sz_ph,
                                    design.exemplar_sz)))

            single_x = image[batch]

            frame_padded_x, npad_x = pad_frame(single_x, frame_sz,
                                               single_pos_x_ph,
                                               single_pos_y_ph,
                                               single_x_sz2_ph,
                                               avg_chan[batch])
            frame_padded_x = tf.cast(frame_padded_x, tf.float32)
            # extract tensor of x_crops (3 scales)
            single_crops_x.append(
                tf.squeeze(
                    extract_crops_x(frame_padded_x, npad_x, single_pos_x_ph,
                                    single_pos_y_ph, single_x_sz0_ph,
                                    single_x_sz1_ph, single_x_sz2_ph,
                                    design.search_sz)))
        z_crops = tf.stack(single_crops_z)
        x_crops = tf.stack(single_crops_x)
        x_crops_shape = x_crops.get_shape().as_list()
        x_crops = tf.reshape(x_crops, [x_crops_shape[0] * x_crops_shape[1]] +
                             x_crops_shape[2:])
        print("shape of single_crops_x: ", single_crops_x[0].shape,
              "shape of x_crops: ", x_crops.shape)
        print("shape of single_crops_z: ", single_crops_z[0].shape,
              "shape of z_crops: ", z_crops.shape)
        # use crops as input of (MatConvnet imported) pre-trained fully-convolutional Siamese net
        template_z, templates_x = self._create_siamese_train(
            x_crops, z_crops, design)
        print("shape of template_z:", template_z.shape)
        #template_z = tf.squeeze(template_z)
        template_z_list = []
        for batch in range(self.batch_size):
            template_z_list.append(template_z[batch])
            template_z_list.append(template_z[batch])
            template_z_list.append(template_z[batch])
        templates_z = tf.stack(template_z_list)
        print("shape of templates_z:", templates_z.get_shape().as_list())
        print("shape of templates_x:", templates_x.get_shape().as_list())
        # compare templates via cross-correlation
        scores = self._match_templates_train(templates_z, templates_x)

        print("shape of small score map:", scores.get_shape().as_list())
        """
		scores = scores / tf.reduce_mean(scores)
		scores = tf.maximum(scores, -10)
		scores = tf.minimum(scores, 10)
		"""
        # upsample the score maps

        #scores = tf.Print(scores, [scores], summarize = 300)

        scores_up = tf.image.resize_bilinear(scores,
                                             [final_score_sz, final_score_sz],
                                             align_corners=True)
        """
		score_w = scores_up.get_shape().as_list()[1]
		score_h = scores_up.get_shape().as_list()[2]
		scores_up = tf.reshape(scores_up, [self.batch_size * 3,score_w * score_h])
		scores_up = tf.nn.softmax(scores_up) * 5 - 2.5
		scores_up = tf.reshape(scores_up, [self.batch_size * 3, score_w, score_h])
		"""
        print("shape of big score map:", scores_up.get_shape().as_list())
        score = tf.squeeze(
            tf.stack([
                scores_up[i]
                for i in [0 + 3 * i for i in range(self.batch_size)]
            ]))

        loss = self.cal_loss(score)
        distance_to_gt, max_pos_x, max_pos_y = self.distance(
            score, final_score_sz, hp)
        train_step = tf.train.AdamOptimizer(hp.lr).minimize(loss)
        summary = tf.summary.scalar('distance_to_gt', distance_to_gt)

        return image, z_crops, x_crops, templates_z, scores_up, loss, train_step, distance_to_gt, summary, templates_x, max_pos_x, max_pos_y
Esempio n. 13
0
	def build_tracking_graph_train(self, final_score_sz, design, env, hp):
	    
		image = tf.placeholder(tf.float32, [self.batch_size] + [None, None, 3], name = "input_image") 
		# get frame_sz
		image_w = tf.foldl((lambda prev, cur: prev + 1), image[0], initializer = 0)
		image_h = tf.foldl((lambda prev, cur: prev + 1), image[0][0], initializer = 0)
		image_c = tf.foldl((lambda prev, cur: prev + 1), image[0][0][0], initializer = 0)
		frame_sz = [image_w, image_h, image_c]
		
		# used to pad the crops
		if design.pad_with_image_mean:
			avg_chan = tf.reduce_mean(image, axis=(1, 2), name='avg_chan') 
		else:
			avg_chan = None
		# pad with if necessary
		single_crops_z = []
		single_crops_x = []
		#slice a batch into single images, and crop them one by one
		for batch in range(self.batch_size):
			single_pos_x_ph = self.batched_pos_x_ph[batch]
			single_pos_y_ph = self.batched_pos_y_ph[batch]
			single_z_sz_ph = self.batched_z_sz_ph[batch]
			single_x_sz0_ph = self.batched_x_sz0_ph[batch]
			single_x_sz1_ph = self.batched_x_sz1_ph[batch]
			single_x_sz2_ph = self.batched_x_sz2_ph[batch]
			
			#pad crop z
			single_z = image[batch]
			frame_padded_z, npad_z = pad_frame(single_z, frame_sz, single_pos_x_ph, single_pos_y_ph, single_z_sz_ph, avg_chan[batch])
			frame_padded_z = tf.cast(frame_padded_z, tf.float32)
			# extract tensor of z_crops
			single_crops_z.append(tf.squeeze(extract_crops_z(frame_padded_z, npad_z, single_pos_x_ph, single_pos_y_ph, single_z_sz_ph, design.exemplar_sz)))
			
			# pad crop x
			single_x = image[batch]
			
			frame_padded_x, npad_x = pad_frame(single_x, frame_sz, single_pos_x_ph, single_pos_y_ph, single_x_sz2_ph, avg_chan[batch])
			frame_padded_x = tf.cast(frame_padded_x, tf.float32)
			
			# extract tensor of x_crops (3 scales)
			single_crops_x.append(tf.squeeze(extract_crops_x(frame_padded_x, npad_x, single_pos_x_ph, single_pos_y_ph, single_x_sz0_ph, single_x_sz1_ph, single_x_sz2_ph, design.search_sz)))

		# stack the cropped single images
		z_crops = tf.stack(single_crops_z)
		x_crops = tf.stack(single_crops_x)
		
		x_crops_shape = x_crops.get_shape().as_list()
		x_crops = tf.reshape(x_crops, [x_crops_shape[0] * x_crops_shape[1]] + x_crops_shape[2: ])		
		print("shape of single_crops_x: ", single_crops_x[0].shape, "shape of x_crops: ", x_crops.shape)
		print("shape of single_crops_z: ", single_crops_z[0].shape, "shape of z_crops: ", z_crops.shape)
		
		# use crops as input of  fully-convolutional Siamese net
		template_z, templates_x = self._create_siamese_train(x_crops, z_crops, design)
		print("shape of template_z:", template_z.shape)
		
		# extend template_z to match the triple scaled feature map of x
		template_z_list = []
		for batch in range(self.batch_size):
			template_z_list.append(template_z[batch])
			template_z_list.append(template_z[batch])
			template_z_list.append(template_z[batch])
		templates_z = tf.stack(template_z_list)
		print("shape of templates_z:", templates_z.get_shape().as_list())
		print("shape of templates_x:", templates_x.get_shape().as_list())
		
		# compare templates via cross-correlation
		scores = self._match_templates_train(templates_z, templates_x)
        # resize to final_score_sz
		scores_up = tf.image.resize_bilinear(scores, [final_score_sz, final_score_sz], align_corners=True)
		print("shape of big score map:", scores_up.get_shape().as_list())
		
		# only choose one scale for each image
		score = tf.squeeze(tf.stack([scores_up[i]  for i in [0 + 3 * i for i in range(self.batch_size)]]))
		
		loss = self.cal_loss(score)
		distance_to_gt, max_pos_x, max_pos_y = self.distance(score, final_score_sz, hp)
		train_step = tf.train.AdamOptimizer(hp.lr).minimize(loss)
		summary = tf.summary.scalar('distance_to_gt', distance_to_gt)
		
		return image, z_crops, x_crops, templates_z, scores_up, loss, train_step, distance_to_gt, summary