Ejemplo n.º 1
0
def generate_ranked_samples(feature, filter_size):
    """
    function: generates samples with different scales and offsets based on the input target positions
              make the size ratio from 0.5 to 2 as possible
    args:
        feature -
        filter_size - [height, width]
    results:
        samples -
        pair_labels -
    """
    feature_size = torch.tensor(
        feature.shape).numpy()  #[batch, channel, height, width
    assert (np.prod(
        (filter_size %
         2).astype(int)) == 1), 'filter_size need to be an odd number.\n'
    # target_location in feature [y_c, x_c, height, width]
    target_location = np.append(np.round(feature_size[-2:] / 2 - 1),
                                filter_size)
    b_filter_size = np.floor(filter_size / 2).astype(int)  #[height, width]
    feature_c_height = (np.floor(feature_size[-2] / 2) + 1).astype(int)
    re_sizes = (np.arange(np.max(b_filter_size), 2 *
                          (feature_c_height - 1) + 1) * 2 + 1).astype(int)
    c_index = (np.where(re_sizes == feature_size[-2]))[0][0]

    left_pad_num = c_index  #0-index
    right_pad_num = feature_size[-2] - (c_index + 1)
    pad_num = min(left_pad_num, right_pad_num)
    re_sizes = re_sizes[c_index - pad_num:c_index + pad_num + 1]
    ratios = re_sizes / feature_size[-2]
    target_locations = np.concatenate(
        ((-b_filter_size[0] + np.floor(
            (re_sizes - 1) / 2))[:, np.newaxis].astype(int),
         (-b_filter_size[1] + np.floor(
             (re_sizes - 1) / 2))[:, np.newaxis].astype(int),
         (b_filter_size[0] + np.floor(
             (re_sizes - 1) / 2))[:, np.newaxis].astype(int),
         (b_filter_size[1] + np.floor(
             (re_sizes - 1) / 2))[:, np.newaxis].astype(int)),
        axis=1)

    re_features = [
        resize_tensor(feature, (re_size, re_size), align_corners=True)
        for re_size in re_sizes
    ]
    target_features = [
        re_feat[:, :, loc[0]:loc[2] + 1, loc[1]:loc[3] + 1]
        for (loc, re_feat) in zip(target_locations, re_features)
    ]
    samples = torch.cat(target_features, dim=0)

    labels = 1 - (ratios - 1)**2
    pair_labels = generate_pair_label(labels)
    return samples, pair_labels
Ejemplo n.º 2
0
            optim.zero_grad()  # 将梯度信息清零
            loss.backward(retain_graph=True)  # 执行反向传播
            optim.step()  # 更新参数信息


# 生成高斯标签
def generate_gauss_label(size, sigma, center=(0, 0), end_pad=(0, 0)):
    """
    function: generate gauss label for L2 loss
    """
    shift_x = torch.arange(-(size[1] - 1) / 2, (size[1] + 1) / 2 + end_pad[1])
    shift_y = torch.arange(-(size[0] - 1) / 2, (size[0] + 1) / 2 + end_pad[0])

    shift_y, shift_x = torch.meshgrid(shift_y, shift_x)

    alpha = 0.2
    gauss_label = torch.exp(-1 * alpha *
                            ((shift_y - center[0])**2 / (sigma[0]**2) +
                             (shift_x - center[1])**2 / (sigma[1]**2)))

    return gauss_label


if __name__ == '__main__':
    from feature_utils_v2 import resize_tensor

    input = torch.rand(1, 1, 3, 3)
    print(input)
    resized_input = resize_tensor(input, [7, 7])
    print(resized_input)
Ejemplo n.º 3
0
    def tracking(self, img_path, frame, visualize=False):
        #-------------read image and rescale the image-----------------------------
        img = default_image_loader(
            img_path)  #<class 'numpy.ndarray'>[height, width, channel]
        image = cv2.resize(img,
                           tuple((np.ceil(
                               np.array(img.shape[0:2][::-1]) *
                               self.rescale)).astype(int)),
                           interpolation=cv2.INTER_LINEAR)
        tic = cv2.getTickCount()
        #-------------get multi-scale feature--------------------------------------
        features = get_subwindow_feature(self.model,
                                         image,
                                         self.srch_window_location,
                                         self.input_size,
                                         visualize=visualize)
        feature_size = (torch.tensor(
            features[0].shape)).numpy().astype(int)[-2:]
        #selected_features = fuse_feature(features)
        selected_features = features_selection(features,
                                               self.feature_weights,
                                               self.balance_weights,
                                               mode='reduction')
        selected_features_1 = resize_tensor(
            selected_features, tuple(feature_size + self.feature_pad))
        selected_features_3 = resize_tensor(
            selected_features, tuple(feature_size - self.feature_pad))
        selected_features_1 = selected_features_1[:, :, self.b_feature_pad:
                                                  feature_size[0] +
                                                  self.b_feature_pad,
                                                  self.b_feature_pad:
                                                  feature_size[1] +
                                                  self.b_feature_pad]

        selected_features_3 = torch.nn.functional.pad(
            selected_features_3, (self.b_feature_pad, self.b_feature_pad,
                                  self.b_feature_pad, self.b_feature_pad))
        scaled_features = torch.cat(
            (selected_features_1, selected_features, selected_features_3),
            dim=0)

        #-------------get response map-----------------------------------------------
        response_map = self.siamese_model(scaled_features,
                                          self.exemplar_features).to('cpu')
        scaled_response_map = torch.squeeze(
            resize_tensor(response_map,
                          tuple(self.srch_window_location[-2:].astype(int)),
                          mode='bicubic',
                          align_corners=True))
        hann_window = generate_2d_window(
            'hann', tuple(self.srch_window_location[-2:].astype(int)),
            scaled_response_map.shape[0])
        scaled_response_maps = scaled_response_map + hann_window

        #-------------find max-response----------------------------------------------
        scale_ind = calculate_scale(scaled_response_maps,
                                    self.config.MODEL.SCALE_WEIGHTS)
        response_map = scaled_response_maps[scale_ind, :, :].numpy()
        max_h, max_w = np.where(response_map == np.max(response_map))
        if len(max_h) > 1:
            max_h = np.array([
                max_h[0],
            ])
        if len(max_w) > 1:
            max_w = np.array([
                max_w[0],
            ])

        #-------------update tracking state and save tracking result----------------------------------------
        target_loc_center = np.append(
            self.target_location[0:2] + (self.target_location[2:4]) / 2,
            self.target_location[2:4])
        target_loc_center[0:2] = target_loc_center[0:2] + (
            np.append(max_w, max_h) - (self.srch_window_location[2:4] / 2 - 1)
        ) * self.config.MODEL.SCALES[scale_ind]
        target_loc_center[
            2:4] = target_loc_center[2:4] * self.config.MODEL.SCALES[scale_ind]
        #print('target_loc_center in current frame:',target_loc_center)
        self.target_location = np.append(
            target_loc_center[0:2] - (target_loc_center[2:4]) / 2,
            target_loc_center[2:4])
        #print('target_location in current frame:', target_location)

        self.srch_window_location[2:4] = (round_python2(
            self.srch_window_location[2:4] *
            self.config.MODEL.SCALES[scale_ind]))
        self.srch_window_location[0:2] = target_loc_center[0:2] - (
            self.srch_window_location[2:4]) / 2

        tracking_bbox = (self.target_location +
                         np.array([1, 1, 0, 0])) / self.rescale - np.array(
                             [1, 1, 0, 0])  #tracking_bbox: 0-index
        self.results.append(tracking_bbox)
        self.toc += cv2.getTickCount() - tic
        if self.display:
            self.visualization(img, tracking_bbox.astype(int), frame)
Ejemplo n.º 4
0
    def tracking(self, img_path, frame, visualize=False):
        #-------------read image and rescale the image-----------------------------
        img = default_image_loader(
            img_path)  #<class 'numpy.ndarray'>[height, width, channel]
        image = cv2.resize(img,
                           tuple((np.ceil(
                               np.array(img.shape[0:2][::-1]) *
                               self.rescale)).astype(int)),
                           interpolation=cv2.INTER_LINEAR)
        tic = cv2.getTickCount()
        #-------------get multi-scale feature--------------------------------------
        features = get_subwindow_feature(self.model, image,
                                         self.srch_window_location,
                                         self.input_size)
        feature_size = (torch.tensor(
            features[0].shape)).numpy().astype(int)[-2:]

        #selected_features = fuse_feature(features)

        #-------------select the target-aware features new frame (not exemplar)---------------------------
        selected_features = features_selection(features,
                                               self.feature_weights,
                                               self.balance_weights,
                                               mode='reduction')
        selected_features_1 = resize_tensor(
            selected_features, tuple(feature_size + self.feature_pad))
        selected_features_3 = resize_tensor(
            selected_features, tuple(feature_size - self.feature_pad))
        selected_features_1 = selected_features_1[:, :, self.b_feature_pad:
                                                  feature_size[0] +
                                                  self.b_feature_pad,
                                                  self.b_feature_pad:
                                                  feature_size[1] +
                                                  self.b_feature_pad]

        selected_features_3 = torch.nn.functional.pad(
            selected_features_3, (self.b_feature_pad, self.b_feature_pad,
                                  self.b_feature_pad, self.b_feature_pad))
        scaled_features = torch.cat(
            (selected_features_1, selected_features, selected_features_3),
            dim=0)

        #-------------get response map (final target aware bluish feature map result of correlation)-------------------------
        response_map = self.siamese_model(scaled_features,
                                          self.exemplar_features).to('cpu')

        scaled_response_map = torch.squeeze(
            resize_tensor(response_map,
                          tuple(self.srch_window_location[-2:].astype(int)),
                          mode='bicubic',
                          align_corners=True))

        hann_window = generate_2d_window(
            'hann', tuple(self.srch_window_location[-2:].astype(int)),
            scaled_response_map.shape[0])
        scaled_response_maps = scaled_response_map + hann_window

        #-------------calculate ROI----------------------------------------------
        scale_ind = calculate_scale(scaled_response_maps,
                                    self.config.MODEL.SCALE_WEIGHTS)

        #response_map_reshaped = response_map[scale_ind,0,:,:].numpy()
        #center_h, center_w = np.where(response_map_reshaped == np.max(response_map_reshaped)) #find center ROI
        #center_h, center_w = center_h[0], center_w[0]

        #region_size = self.exemplar_features[0].shape[1:3]
        #width_size = int(region_size[0]/2)
        #width_remainder = region_size[0] % 2
        #height_size = int(region_size[1]/2)
        #height_remainder = region_size[1] % 2

        #plt.imshow(response_map_reshaped)
        #plt.plot(center_w, center_h, "xr", markersize=5)
        #plt.plot(center_w - width_size, center_h - height_size, "or", markersize=5)
        #plt.plot(center_w + width_size + width_remainder, center_h + height_size + height_remainder, "or", markersize=5)
        #plt.show()

        #roi_features = scaled_features[scale_ind, : , center_w - width_size : center_w + width_size + width_remainder, center_h - height_size : center_h + height_size + height_remainder]
        #roi_size = roi_features.shape[1:3]

        #-------------calculate Global Average Pooling current frame features--------------------
        #roi_features_gap = nn.AvgPool2d(roi_size)(roi_features)

        #-------------calculate Affinity Matrix--------------------
        #self.affinity_matrix = torch.sum(self.exemplar_features_gap * roi_features_gap) / len(roi_features_gap)

        #-------------find max-response----------------------------------------------
        response_map = scaled_response_maps[scale_ind, :, :].numpy()
        max_h, max_w = np.where(response_map == np.max(response_map))
        if len(max_h) > 1:
            max_h = np.array([
                max_h[0],
            ])
        if len(max_w) > 1:
            max_w = np.array([
                max_w[0],
            ])

        #-------------update tracking state and save tracking result----------------------------------------
        target_loc_center = np.append(
            self.target_location[0:2] + (self.target_location[2:4]) / 2,
            self.target_location[2:4])
        target_loc_center[0:2] = target_loc_center[0:2] + (
            np.append(max_w, max_h) - (self.srch_window_location[2:4] / 2 - 1)
        ) * self.config.MODEL.SCALES[scale_ind]
        target_loc_center[
            2:4] = target_loc_center[2:4] * self.config.MODEL.SCALES[scale_ind]
        #print('target_loc_center in current frame:',target_loc_center)
        self.target_location = np.append(
            target_loc_center[0:2] - (target_loc_center[2:4]) / 2,
            target_loc_center[2:4])
        #print('target_location in current frame:', self.target_location)

        self.srch_window_location[2:4] = (round_python2(
            self.srch_window_location[2:4] *
            self.config.MODEL.SCALES[scale_ind]))
        self.srch_window_location[0:2] = target_loc_center[0:2] - (
            self.srch_window_location[2:4]) / 2

        #print('srch_window_location: ', self.srch_window_location)

        tracking_bbox = (self.target_location +
                         np.array([1, 1, 0, 0])) / self.rescale - np.array(
                             [1, 1, 0, 0])  #tracking_bbox: 0-index
        self.results.append(tracking_bbox)

        #-------------calculate global average pooling of new frame features-------------------
        kernel_size = selected_features[0].shape[1:2]
        self.selected_features_gap = nn.AvgPool2d(kernel_size)(
            selected_features[0])

        self.toc += cv2.getTickCount() - tic
        if self.display:
            self.visualization(img, tracking_bbox.astype(int), frame)