예제 #1
0
    def forward(self, x, targets):
        """Applies network layers and ops on input image(s) x.

        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].

        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]

            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        loc = list()
        conf = list()
        has_lp = list()
        size_lp = list()
        offset = list()

        sources_2 = list()
        loc_2 = list()
        conf_2 = list()
        four_corners_2 = list()

        # apply vgg up to conv1_1 relu
        for k in range(2):
            x = self.vgg[k](x)
            if k == 1:
                # conv1_1 feature relu
                conv1_1_feat = x

        # apply vgg up to conv4_3 relu
        for k in range(2, 23):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        # apply vgg up to fc7
        for k in range(23, len(self.vgg)):
            x = self.vgg[k](x)

        sources.append(x)

        # apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # apply multibox head to source layers
        for (x, l, c, h, s, o) in zip(sources, self.loc, self.conf,
                                      self.has_lp, self.size_lp, self.offset):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())
            has_lp.append(h(x).permute(0, 2, 3, 1).contiguous())
            size_lp.append(s(x).permute(0, 2, 3, 1).contiguous())
            offset.append(o(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        has_lp = torch.cat([o.view(o.size(0), -1) for o in has_lp], 1)
        size_lp = torch.cat([o.view(o.size(0), -1) for o in size_lp], 1)
        offset = torch.cat([o.view(o.size(0), -1) for o in offset], 1)

        # [num, num_classes, top_k, 10]
        rpn_rois = self.detect(
            loc.view(loc.size(0), -1, 4),  # loc preds
            self.softmax(conf.view(conf.size(0), -1,
                                   self.num_classes)),  # conf preds
            self.priors.cuda(),  # default boxes
            self.sigmoid(has_lp.view(has_lp.size(0), -1, 1)),
            size_lp.view(size_lp.size(0), -1, 2),
            offset.view(offset.size(0), -1, 2))

        rpn_rois = rpn_rois.detach()

        # roi align or roi warping
        crop_height = self.size_2
        crop_width = self.size_2
        is_cuda = torch.cuda.is_available()

        if self.phase == 'test':
            has_lp_th = 0.5
            th = 0.6
            output = torch.zeros(1, 3, 200, 13)
            output[0, 1, :, :5] = rpn_rois[0, 1, :, :5]

            rois_idx = (rpn_rois[0, 1, :, 0] > th) & (rpn_rois[0, 1, :, 5] >
                                                      has_lp_th)
            matches = rpn_rois[0, 1, rois_idx, :]
            if matches.shape[0] == 0:
                return output

            car_center = (matches[:, [1, 2]] + matches[:, [3, 4]]) / 2
            lp_center = car_center + matches[:, [8, 9]]
            lp_bbox_top_left = lp_center - matches[:, [6, 7
                                                       ]] / 2 * self.expand_num
            lp_bbox_bottom_right = lp_center + matches[:, [
                6, 7
            ]] / 2 * self.expand_num
            lp_bbox = torch.cat((lp_bbox_top_left, lp_bbox_bottom_right), 1)
            lp_bbox = torch.max(lp_bbox, torch.zeros(lp_bbox.shape))
            lp_bbox = torch.min(lp_bbox, torch.ones(lp_bbox.shape))
            lp_bbox = torch.max(lp_bbox, matches[:, 1:3].repeat(1, 2))
            lp_bbox = torch.min(lp_bbox, matches[:, 3:5].repeat(1, 2))

            # [num_car, 4]
            rois_squeeze = lp_bbox

            # Define the boxes ( crops )
            # box = [y1/heigth , x1/width , y2/heigth , x2/width]
            boxes_data = torch.zeros(rois_squeeze.shape)
            boxes_data[:, 0] = rois_squeeze[:, 1]
            boxes_data[:, 1] = rois_squeeze[:, 0]
            boxes_data[:, 2] = rois_squeeze[:, 3]
            boxes_data[:, 3] = rois_squeeze[:, 2]

            # Create an index to indicate which box crops which image
            box_index_data = torch.IntTensor(range(boxes_data.shape[0]))

            image_data = conv1_1_feat.repeat(rois_squeeze.shape[0], 1, 1, 1)

            # Convert from numpy to Variables
            image_torch = to_varabile(image_data,
                                      is_cuda=is_cuda,
                                      requires_grad=False)
            boxes = to_varabile(boxes_data,
                                is_cuda=is_cuda,
                                requires_grad=False)
            box_index = to_varabile(box_index_data,
                                    is_cuda=is_cuda,
                                    requires_grad=False)

            # Crops and resize bbox1 from img1 and bbox2 from img2
            # n*64*crop_height*crop_width
            crops_torch = CropAndResizeFunction.apply(image_torch, boxes,
                                                      box_index, crop_height,
                                                      crop_width, 0)

            # second network
            x_2 = crops_torch

            for k in range(4):
                x_2 = self.vgg_2[k](x_2)
            sources_2.append(x_2)

            for k in range(4, 9):
                x_2 = self.vgg_2[k](x_2)
            sources_2.append(x_2)

            for k in range(9, 14):
                x_2 = self.vgg_2[k](x_2)
            sources_2.append(x_2)

            # apply multibox head to source layers
            for (x_2, l_2, c_2, f_2) in zip(sources_2, self.loc_2, self.conf_2,
                                            self.four_corners_2):
                loc_2.append(l_2(x_2).permute(0, 2, 3, 1).contiguous())
                conf_2.append(c_2(x_2).permute(0, 2, 3, 1).contiguous())
                four_corners_2.append(
                    f_2(x_2).permute(0, 2, 3, 1).contiguous())

            loc_2 = torch.cat([o.view(o.size(0), -1) for o in loc_2], 1)
            conf_2 = torch.cat([o.view(o.size(0), -1) for o in conf_2], 1)
            four_corners_2 = torch.cat(
                [o.view(o.size(0), -1) for o in four_corners_2], 1)

            output_2 = self.detect_2(
                loc_2.view(loc_2.size(0), -1, 4),
                self.softmax_2(
                    conf_2.view(conf_2.size(0), -1, self.num_classes)),
                self.priors_2.cuda(),
                four_corners_2.view(four_corners_2.size(0), -1, 8))
            output_2_pos = output_2[:, 1, 0, :]
            rois_size = rois_squeeze[:, 2:4] - rois_squeeze[:, :2]
            rois_top_left = rois_squeeze[:, :2]
            rois_size_expand = rois_size.repeat(1, 6)
            rois_top_left_expand = rois_top_left.repeat(1, 6)
            output_2_pos[:,
                         1:] = output_2_pos[:,
                                            1:] * rois_size_expand + rois_top_left_expand
            num_car = output_2_pos.shape[0]
            output[0, 2, :num_car, :] = output_2_pos
            output[0, 1, :num_car, 5:9] = lp_bbox
            output[0, 1, :num_car, 9] = 1

            return output
        else:
            print("ERROR: Phase: " + self.phase + " not recognized")
            return

        return output
예제 #2
0
def compare_with_tf(crop_height, crop_width, is_cuda=True):
    # generate data
    image_data, boxes_data, box_index_data = generate_data(
        batch_size=2,
        depth=128,
        im_height=200,
        im_width=200,
        n_boxes=10,
        xyxy=False, box_normalize=True)
    # boxes_tf_data = np.stack((boxes_data[:, 1], boxes_data[:, 0], boxes_data[:, 3], boxes_data[:, 2]), axis=1)
    # boxes_tf_data[:, 0::2] /= (image_data.shape[2] - 1.)
    # boxes_tf_data[:, 1::2] /= (image_data.shape[3] - 1.)

    # rand conv layer
    conv_torch = nn.Conv2d(image_data.shape[1], 64, 3, padding=1, bias=False)
    if is_cuda:
        conv_torch = conv_torch.cuda()

    # pytorch forward
    image_torch = to_varabile(image_data, requires_grad=True, is_cuda=is_cuda)
    boxes = to_varabile(boxes_data, requires_grad=False, is_cuda=is_cuda)
    box_index = to_varabile(box_index_data, requires_grad=False, is_cuda=is_cuda)

    print('pytorch forward and backward start')
    crops_torch = CropAndResizeFunction(crop_height, crop_width, 0)(image_torch, boxes, box_index)
    crops_torch = conv_torch(crops_torch)
    crops_torch_data = crops_torch.data.cpu().numpy()

    # pytorch backward
    loss_torch = crops_torch.sum()
    loss_torch.backward()
    grad_torch_data = image_torch.grad.data.cpu().numpy()

    print('pytorch forward and backward end')

    # tf forward & backward
    image_tf = tf.placeholder(tf.float32, (None, None, None, None), name='image')
    boxes = tf.placeholder(tf.float32, (None, 4), name='boxes')
    box_index = tf.placeholder(tf.int32, (None,), name='box_index')

    image_t = tf.transpose(image_tf, (0, 2, 3, 1))
    crops_tf = tf.image.crop_and_resize(image_t, boxes, box_index, (crop_height, crop_width))
    conv_tf = tf.nn.conv2d(crops_tf, np.transpose(conv_torch.weight.data.cpu().numpy(), (2, 3, 1, 0)),
                           [1, 1, 1, 1], padding='SAME')

    trans_tf = tf.transpose(conv_tf, (0, 3, 1, 2))
    loss_tf = tf.reduce_sum(trans_tf)
    grad_tf = tf.gradients(loss_tf, image_tf)[0]

    with tf.Session() as sess:
        crops_tf_data, grad_tf_data = sess.run(
            (trans_tf, grad_tf), feed_dict={image_tf: image_data, boxes: boxes_data, box_index: box_index_data}
        )

    crops_diff = np.abs(crops_tf_data - crops_torch_data)
    print('forward (maxval, min_err, max_err, mean_err):',
          crops_tf_data.max(), crops_diff.min(), crops_diff.max(), crops_diff.mean())

    grad_diff = np.abs(grad_tf_data - grad_torch_data)
    print('backward (maxval, min_err, max_err, mean_err):',
          grad_tf_data.max(), grad_diff.min(), grad_diff.max(), grad_diff.mean())
# box = [y1/heigth , x1/width , y2/heigth , x2/width]
boxes_data = torch.FloatTensor([[0, 0, 1, 1], [0, 0, 0.5, 0.5]])

# Create an index to say which box crops which image
box_index_data = torch.IntTensor([0, 1])

# Import the images from file
image_data1 = transforms.ToTensor()(Image.open(img_path1)).unsqueeze(0)
image_data2 = transforms.ToTensor()(Image.open(img_path2)).unsqueeze(0)

# Create a batch of 2 images
image_data = torch.cat((image_data1, image_data2), 0)

# Convert from numpy to Variables
image_torch = to_varabile(image_data, is_cuda=is_cuda)
boxes = to_varabile(boxes_data, is_cuda=is_cuda)
box_index = to_varabile(box_index_data, is_cuda=is_cuda)

# Crops and resize bbox1 from img1 and bbox2 from img2
crops_torch = CropAndResizeFunction(crop_height, crop_width, 0)(image_torch, boxes, box_index)

# Visualize the crops
print(crops_torch.data.size())
crops_torch_data = crops_torch.data.cpu().numpy().transpose(0, 2, 3, 1)
fig = plt.figure()
plt.subplot(121)
plt.imshow(crops_torch_data[0])
plt.subplot(122)
plt.imshow(crops_torch_data[1])
plt.show()
예제 #4
0
def train(**kwargs):
    # Retrieve training configuration
    data_loader = kwargs['data_loader']
    net = kwargs['net']
    loss = kwargs['loss']
    optimizer = kwargs['optimizer']
    feature_center = kwargs['feature_center']
    epoch = kwargs['epoch']
    save_freq = kwargs['save_freq']
    save_dir = kwargs['save_dir']
    verbose = kwargs['verbose']
    writer = kwargs['writer']
    # Attention Regularization: LA Loss
    l2_loss = nn.MSELoss()

    # Default Parameters
    beta = 0.05
    theta_c = 0.5
    theta_d = 0.5
    crop_size = (448, 448)  # size of cropped images for 'See Better'

    # metrics initialization
    batches = 0
    epoch_loss = np.array([0, 0, 0],
                          dtype='float')  # Loss on Raw/Crop/Drop Images
    epoch_acc = np.array(
        [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
        dtype='float')  # Top-1/3/5 Accuracy for Raw/Crop/Drop Images

    # begin training
    start_time = time.time()
    logging.info('Epoch %03d, Learning Rate %g' %
                 (epoch + 1, optimizer.param_groups[0]['lr']))
    net.train()
    for i, (X, y) in enumerate(data_loader):
        batch_start = time.time()

        # obtain data for training
        X = X.to(torch.device("cuda"))
        y = y.to(torch.device("cuda"))

        ##################################
        # Raw Image
        ##################################
        y_pred, feature_matrix, attention_maps = net(X)
        # loss
        batch_loss_1 = loss(y_pred, y)
        epoch_loss[0] += batch_loss_1.item()
        # metrics: top-1, top-3, top-5 error
        with torch.no_grad():
            epoch_acc[0] += accuracy(y_pred, y, topk=(1, 3, 5))

        ######################################
        # Reshape center and bap
        ####################################
        feature_center = feature_center.reshape((feature_center.shape[0], -1))
        feature_matrix = feature_matrix.reshape((feature_matrix.shape[0], -1))
        #get this batch's batch_center
        batch_center = feature_center[y]
        #Normalize centermatrix batch_center
        batch_center = nn.functional.normalize(batch_center, 2, -1)
        # Update Feature Center
        feature_center[y] += beta * (feature_matrix.detach() - batch_center)
        # loss_center = l2_loss(feature_matrix, batch_center)
        distance = torch.pow(feature_matrix - batch_center, 2)
        distance = torch.sum(distance, -1)
        loss_center = torch.mean(distance)

        ##################################
        # Attention Cropping
        ##################################
        with torch.no_grad():
            crop_masks = F.upsample_bilinear(attention_maps,
                                             size=(X.size(2), X.size(3)))
            bboxes = attention_crop(crop_masks.cpu().detach().numpy())
            bboxes = torch.from_numpy(bboxes).cuda()
            box_index = torch.IntTensor(range(crop_masks.size(0))).cuda()
            crop_images = CropAndResizeFunction(crop_size[0], crop_size[1],
                                                0)(to_varabile(X),
                                                   to_varabile(bboxes),
                                                   to_varabile(box_index))
        #loss
        y_pred, _, _ = net(crop_images)
        batch_loss_2 = loss(y_pred, y)
        epoch_loss[1] += batch_loss_2.item()
        with torch.no_grad():
            epoch_acc[1] += accuracy(y_pred, y, topk=(1, 3, 5))

        ##################################
        # Attention Dropping
        ##################################
        with torch.no_grad():
            crop_masks = F.upsample_bilinear(attention_maps,
                                             size=(X.size(2), X.size(3)))
            mask = attention_drop(crop_masks.cpu().detach().numpy())
            mask = torch.from_numpy(mask).cuda()
            drop_images = X * mask
        # loss
        y_pred, _, _ = net(drop_images)
        batch_loss_3 = loss(y_pred, y)
        epoch_loss[2] += batch_loss_3.item()
        with torch.no_grad():
            epoch_acc[2] += accuracy(y_pred, y, topk=(1, 3, 5))

        totol_loss = 1 / 3.0 * batch_loss_1 + 1 / 3.0 * batch_loss_2 + 1 / 3.0 * batch_loss_3 + loss_center
        # totol_loss = 1 / 2.0 * batch_loss_1 + 1 / 2.0 * batch_loss_2 + loss_center
        optimizer.zero_grad()
        totol_loss.backward()
        optimizer.step()

        # end of this batch
        batches += 1
        batch_end = time.time()
        if (i + 1) % verbose == 0:
            logging.info(
                '\tBatch %d: (Raw) Loss %.4f, Accuracy: (%.2f, %.2f, %.2f), (Crop) Loss %.4f, Accuracy: (%.2f, %.2f, %.2f), (Drop) Loss %.4f, Accuracy: (%.2f, %.2f, %.2f), Time %3.2f'
                % (i + 1, epoch_loss[0] / batches, epoch_acc[0, 0] / batches,
                   epoch_acc[0, 1] / batches, epoch_acc[0, 2] / batches,
                   epoch_loss[1] / batches, epoch_acc[1, 0] / batches,
                   epoch_acc[1, 1] / batches, epoch_acc[1, 2] / batches,
                   epoch_loss[2] / batches, epoch_acc[2, 0] / batches,
                   epoch_acc[2, 1] / batches, epoch_acc[2, 2] / batches,
                   batch_end - batch_start))
            writer.add_image('raw_img', X[0],
                             (epoch + 1) * 100 + (i + 1) / verbose)
            # writer.add_image('crop_mask', crop_mask[0], (epoch+1) * 100+(i + 1) / verbose)
            # writer.add_image('crop_img', crop_images[0], (epoch+1) * 100+(i + 1) / verbose)
            # writer.add_image('drop_mask', drop_mask[0], (epoch+1) * 100+(i + 1) / verbose)
            # writer.add_image('drop_img', drop_images[0], (epoch+1) * 100+(i + 1) / verbose)
            # crop_mask = F.upsample_bilinear(attention_maps, size=(X.size(2), X.size(3))) > theta_c
            # writer.add_image('attention_img', (X * crop_masks.float())[0], (epoch+1) * 100+(i + 1) / verbose)
            # print(type(attention_map[0]))
            # writer.add_image('attention_img',generate_attention_image(X[0],attention_map[0].cpu().numpy()) , (epoch + 1) * 100 + (i + 1) / verbose)

    # save checkpoint model
    if epoch % save_freq == 0:
        state_dict = net.module.state_dict()
        for key in state_dict.keys():
            state_dict[key] = state_dict[key].cpu()

        torch.save(
            {
                'epoch': epoch,
                'save_dir': save_dir,
                'state_dict': state_dict,
                'feature_center': feature_center.cpu()
            }, os.path.join(save_dir, '%03d.ckpt' % (epoch + 1)))

    # end of this epoch
    end_time = time.time()

    # metrics for average
    epoch_loss /= batches
    epoch_acc /= batches

    # show information for this epoch
    logging.info(
        'Train: (Raw) Loss %.4f, Accuracy: (%.2f, %.2f, %.2f), (Crop) Loss %.4f, Accuracy: (%.2f, %.2f, %.2f), (Drop) Loss %.4f, Accuracy: (%.2f, %.2f, %.2f), Time %3.2f'
        % (epoch_loss[0], epoch_acc[0, 0], epoch_acc[0, 1], epoch_acc[0, 2],
           epoch_loss[1], epoch_acc[1, 0], epoch_acc[1, 1], epoch_acc[1, 2],
           epoch_loss[2], epoch_acc[2, 0], epoch_acc[2, 1], epoch_acc[2, 2],
           end_time - start_time))
    writer.add_scalars(
        'scalar/train', {
            'acc_raw': epoch_acc[0, 0],
            'acc_crop': epoch_acc[1, 0],
            'acc_drop': epoch_acc[2, 0]
        }, epoch)
    writer.add_scalars(
        'scalar/train', {
            'loss_raw': epoch_loss[0],
            'loss_crop': epoch_loss[1],
            'loss_drop': epoch_loss[2]
        }, epoch)
예제 #5
0
def validate(**kwargs):
    # Retrieve training configuration
    data_loader = kwargs['data_loader']
    net = kwargs['net']
    loss = kwargs['loss']
    verbose = kwargs['verbose']

    # metrics initialization
    batches = 0
    epoch_loss = 0
    epoch_acc = np.array([0, 0, 0], dtype='float')  # top - 1, 3, 5

    # begin validation
    start_time = time.time()
    net.eval()
    with torch.no_grad():
        for i, (X, y) in enumerate(data_loader):
            batch_start = time.time()

            # obtain data
            X = X.to(torch.device("cuda"))
            y = y.to(torch.device("cuda"))

            ##################################
            # Raw Image
            ##################################
            y_pred_raw, feature_matrix, attention_maps = net(X)

            ##################################
            # Object Localization and Refinement
            ##################################
            attention_maps = torch.mean(attention_maps, dim=1, keepdim=True)
            attention_maps = F.upsample_bilinear(attention_maps,
                                                 size=(X.size(2), X.size(3)))
            bboxes = mask2bbox(attention_maps.cpu().detach().numpy())
            bboxes = torch.from_numpy(bboxes).cuda()
            box_index = torch.IntTensor(range(attention_maps.size(0))).cuda()
            crop_images = CropAndResizeFunction(X.size(2), X.size(3),
                                                0)(to_varabile(X),
                                                   to_varabile(bboxes),
                                                   to_varabile(box_index))
            y_pred_crop, _, _ = net(crop_images)

            # crop_mask = F.upsample_bilinear(attention_map, size=(X.size(2), X.size(3))) > theta_c
            # crop_images = []
            # for batch_index in range(crop_mask.size(0)):
            #     nonzero_indices = torch.nonzero(crop_mask[batch_index, 0, ...])
            #     height_min = nonzero_indices[:, 0].min()
            #     height_max = nonzero_indices[:, 0].max()
            #     width_min = nonzero_indices[:, 1].min()
            #     width_max = nonzero_indices[:, 1].max()
            #     crop_images.append(F.upsample_bilinear(X[batch_index:batch_index + 1, :, height_min:height_max, width_min:width_max], size=crop_size))
            # crop_images = torch.cat(crop_images, dim=0)
            #
            # y_pred_crop, _, _ = net(crop_images)

            # final prediction
            # y_pred = (y_pred_raw + y_pred_crop) / 2.0
            y_pred = torch.log(
                F.softmax(y_pred_raw) * 0.5 + F.softmax(y_pred_crop) * 0.5)
            # y_pred = y_pred_raw
            # loss
            batch_loss = loss(y_pred, y)
            epoch_loss += batch_loss.item()

            # metrics: top-1, top-3, top-5 error
            epoch_acc += accuracy(y_pred, y, topk=(1, 3, 5))

            # end of this batch
            batches += 1
            batch_end = time.time()
            if (i + 1) % verbose == 0:
                logging.info(
                    '\tBatch %d: Loss %.5f, Accuracy: Top-1 %.2f, Top-3 %.2f, Top-5 %.2f, Time %3.2f'
                    % (i + 1, epoch_loss / batches, epoch_acc[0] / batches,
                       epoch_acc[1] / batches, epoch_acc[2] / batches,
                       batch_end - batch_start))

    # end of validation
    end_time = time.time()

    # metrics for average
    epoch_loss /= batches
    epoch_acc /= batches

    # show information for this epoch
    logging.info(
        'Valid: Loss %.5f,  Accuracy: Top-1 %.2f, Top-3 %.2f, Top-5 %.2f, Time %3.2f'
        % (epoch_loss, epoch_acc[0], epoch_acc[1], epoch_acc[2],
           end_time - start_time))
    logging.info('')

    return epoch_loss
예제 #6
0
    def forward(self, x, mask=None):
        assert (len(x) == 2)
        total_bins = 1
        # x[0] is image with shape (rows, cols, channels)
        img = x[0]

        # x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
        rois = x[1]

        # because crop_size of tf.crop_and_resize requires 1-D tensor, we use uniform length
        bin_crop_size = []
        for num_bins, crop_dim in zip((7, 7), (14, 14)):
            assert num_bins >= 1
            assert crop_dim % num_bins == 0
            total_bins *= num_bins
            bin_crop_size.append(crop_dim // num_bins)

        xmin, ymin, xmax, ymax = torch.unbind(rois[0], dim=1) # torch.tensor([[1, 2, 3],[4, 5, 6],[7, 8, 9]]))
                                                              # ->(tensor([1, 2, 3]), tensor([4, 5, 6]), tensor([7, 8, 9]))
        spatial_bins_y =  spatial_bins_x = 7
        step_y = (ymax - ymin) / spatial_bins_y
        step_x = (xmax - xmin) / spatial_bins_x

        # gen bins
        position_sensitive_boxes = []
        for bin_x in range(self.pool_size): 
            for bin_y in range(self.pool_size):
                box_coordinates = [
                    ymin + bin_y * step_y,
                    xmin + bin_x * step_x,
                    ymin + (bin_y + 1) * step_y,
                    xmin + (bin_x + 1) * step_x 
                ]
                position_sensitive_boxes.append(torch.stack(box_coordinates, dim=1))
        
        img_splits = torch.split(img, total_bins, dim=3)
        box_image_indices = np.zeros(self.num_rois)

        feature_crops = []
        for split, box in zip(img_splits, position_sensitive_boxes):
            #assert box.shape[0] == box_image_indices.shape[0], "Psroi box number doesn't match roi box indices!"
            #crop = tf.image.crop_and_resize(
            #    split, box, box_image_indices,
            #    bin_crop_size, method='bilinear'
            #)
            crop = CropAndResizeFunction.apply(split, box, box_image_indices, bin_crop_size[0], bin_crop_size[1], 0)
            # shape [num_boxes, crop_height/spatial_bins_y, crop_width/spatial_bins_x, depth/total_bins]

            # do max pooling over spatial positions within the bin
            crop_1 = torch.max(crop, dim=1, keepdim=False, out=None) # tf.reduce_max(crop, axis=[1, 2])
            crop_2 = torch.max(crop, dim=2, keepdim=False, out=None) # tf.reduce_max(crop, axis=[1, 2])
            crop = torch.stack(crop_1, crop_2)
            crop = crop.unsqueeze(1) #tf.expand_dims(crop, 1)
            # shape [num_boxes, 1, depth/total_bins]

            feature_crops.append(crop)

        final_output = torch.cat(feature_crops, dim=1)

        # Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
        # Might be (1, 4, 7, 7, 5)
        final_output = final_output.reshape(1, self.num_rois, self.pool_size, self.pool_size, self.alpha_channels)

        # permute_dimensions is similar to transpose
        final_output = final_output.permute(0, 1, 2, 3, 4)

        return final_output
예제 #7
0
    def forward(self, X):
        N = X.size()[0]
        crop_height = 7
        crop_width = 7
        boxes_data = torch.FloatTensor([[0, 0, 1, 1]])
        box_index_data = torch.IntTensor([0])

        boxes = Variable(boxes_data, requires_grad=False)
        box_index = Variable(box_index_data, requires_grad=False)

        assert X.size() == (N, 3, 448, 448)

        bird = Image.open('cropped_bird.jpg')
        Image2PIL = transforms.ToPILImage()

        X_conv4_3 = self.features_conv4_3(X)

        X_conv4_3_down = self.resize_halve(X_conv4_3)

        X_conv5_1 = self.features_conv5_1(X)
        X_conv5_2 = self.features_conv5_2(X_conv5_1)
        X_conv5_3 = self.features_conv5_3(X_conv5_2)

        X_conv4_add_5_1 = X_conv5_1.add(X_conv4_3_down)
        X_conv4_add_5_2 = X_conv5_2.add(X_conv4_3_down)
        X_conv4_add_5_3 = X_conv5_3.add(X_conv4_3_down)

        X_conv451_torch = Variable(X_conv4_add_5_1, requires_grad=False)

        X_conv451_crop = CropAndResizeFunction(crop_height, crop_width,
                                               0)(X_conv451_torch, boxes,
                                                  box_index)

        X_conv452_torch = Variable(X_conv4_add_5_2, requires_grad=False)

        X_conv452_crop = CropAndResizeFunction(crop_height, crop_width,
                                               0)(X_conv452_torch, boxes,
                                                  box_index)

        X_conv453_torch = Variable(X_conv4_add_5_3, requires_grad=False)

        X_conv453_crop = CropAndResizeFunction(crop_height, crop_width,
                                               0)(X_conv4_add_5_3, boxes,
                                                  box_index)

        X_branch_1 = self.hbp(X_conv451_crop, X_conv452_crop)
        X_branch_2 = self.hbp(X_conv452_crop, X_conv453_crop)
        X_branch_3 = self.hbp(X_conv451_crop, X_conv453_crop)

        # X_branch_1 = self.hbp(X_conv4_add_5_1,X_conv4_add_5_2)
        # X_branch_2 = self.hbp(X_conv4_add_5_2,X_conv4_add_5_3)
        # X_branch_3 = self.hbp(X_conv4_add_5_1,X_conv4_add_5_3)

        X_branch = torch.cat([X_branch_1, X_branch_2, X_branch_3], dim=1)

        # print("X_branch_1.size()")
        # print(X_branch_1.size())

        # print("X_branch.size()")
        # print(X_branch.size())
        assert X_branch.size() == (N, 1024 * 3)

        # crop_height = 7
        # crop_width = 7
        # boxes_data = torch.FloatTensor([[0, 0, 1, 1]])
        # box_index_data = torch.IntTensor([0])

        #X_branch_torch = Variable(X_branch, requires_grad=False)
        # boxes = Variable(boxes_data, requires_grad=False)
        # box_index = Variable(box_index_data, requires_grad=False)

        #crops_torch = CropAndResizeFunction(crop_height, crop_width, 0)(X_branch_torch, boxes, box_index)
        #roi_align = RoIAlign(7, 7)
        #crops = roi_align(X_branch, boxes, box_index)

        print(X_conv453_crop.size())

        print(X_branch_1.size())

        X_branch_1

        X = self.fc(X_branch)
        assert X.size() == (N, 200)

        return X
예제 #8
0
def pyramid_roi_align(inputs, pool_size=[14, 14], image_shape=[416, 416, 3]):
    """Implements ROI Pooling on multiple levels of the feature pyramid.

    Params:
    - pool_size: [height, width] of the output pooled regions. Usually [7, 7]
    - image_shape: [height, width, channels]. Shape of input image in pixels

    Inputs:
    - boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized
             coordinates.
    - Feature maps: List of feature maps from different levels of the pyramid.
                    Each is [batch, channels, height, width]

    Output:
    Pooled regions in the shape: [num_boxes, channels, height, width].
    The width and height are those specific in the pool_shape in the layer
    constructor.
    """

    # Currently only supports batchsize 1
    for i in range(len(inputs)):
        inputs[i] = inputs[i].squeeze(0)

    # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords
    boxes = inputs[0]

    # Feature Maps. List of feature maps from different level of the
    # feature pyramid. Each is [batch, height, width, channels]
    feature_maps = inputs[1:]

    # Assign each ROI to a level in the pyramid based on the ROI area.
    y1, x1, y2, x2 = boxes.chunk(4, dim=1)
    h = y2 - y1
    w = x2 - x1

    # Equation 1 in the Feature Pyramid Networks paper. Account for
    # the fact that our coordinates are normalized here.
    # e.g. a 224x224 ROI (in pixels) maps to P4

    image_area = torch.FloatTensor([float(image_shape[0] * image_shape[1])],
                                   requires_grad=False)
    # image_area = torch.Tensor([float(image_shape[0] * image_shape[1])], requires_grad = False)

    if boxes.is_cuda:
        image_area = image_area.cuda()
    roi_level = 3 + torch.log2(
        torch.sqrt(h * w) / (224.0 / torch.sqrt(image_area)))
    roi_level = roi_level.round().int()
    roi_level = roi_level.clamp(1, 3)

    # Loop through levels and apply ROI pooling to each. P1 to P3.
    pooled = []
    box_to_level = []
    for i, level in enumerate(range(1, 4)):
        ix = roi_level == level
        if not ix.any():
            continue
        ix = torch.nonzero(ix)[:, 0]
        level_boxes = boxes[ix.data, :]

        # Keep track of which box is mapped to which level
        box_to_level.append(ix.data)

        # Stop gradient propogation to ROI proposals
        level_boxes = level_boxes.detach()

        # Crop and Resize
        # From Mask R-CNN paper: "We sample four regular locations, so
        # that we can evaluate either max or average pooling. In fact,
        # interpolating only a single value at each bin center (without
        # pooling) is nearly as effective."
        #
        # Here we use the simplified approach of a single value per bin,
        # which is how it's done in tf.crop_and_resize()
        # Result: [batch * num_boxes, pool_height, pool_width, channels]
        ind = torch.zeros(level_boxes.size()[0], requires_grad=False).int()
        # ind = torch.zeros(level_boxes.size()[0], requires_grad = False).int()
        if level_boxes.is_cuda:
            ind = ind.cuda()
        feature_maps[i] = feature_maps[i].unsqueeze(
            0)  # CropAndResizeFunction needs batch dimension
        pooled_features = CropAndResizeFunction(pool_size, pool_size,
                                                0)(feature_maps[i],
                                                   level_boxes, ind)
        pooled.append(pooled_features)

    # Pack pooled features into one tensor
    pooled = torch.cat(pooled, dim=0)

    # Pack box_to_level mapping into one array and add another
    # column representing the order of pooled boxes
    box_to_level = torch.cat(box_to_level, dim=0)

    # Rearrange pooled features to match the order of the original boxes
    _, box_to_level = torch.sort(box_to_level)
    pooled = pooled[box_to_level, :, :]

    return pooled
    def forward(self, x, targets):
        """Applies network layers and ops on input image(s) x.

        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].

        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]

            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        loc = list()
        conf = list()
        has_lp = list()
        size_lp = list()
        offset = list()

        sources_2 = list()
        loc_2 = list()
        conf_2 = list()
        four_corners_2 = list()

        # apply vgg up to conv1_1 relu
        # TODO: may be conv1_1 features
        for k in range(2):
            x = self.vgg[k](x)
            if k == 1:
                # conv1_1 feature relu
                conv1_1_feat = x

        # apply vgg up to conv4_3 relu
        for k in range(2, 23):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        # apply vgg up to fc7
        for k in range(23, len(self.vgg)):
            x = self.vgg[k](x)

        sources.append(x)

        # apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # apply multibox head to source layers
        for (x, l, c, h, s, o) in zip(sources, self.loc, self.conf, self.has_lp, self.size_lp, self.offset):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())
            has_lp.append(h(x).permute(0, 2, 3, 1).contiguous())
            size_lp.append(s(x).permute(0, 2, 3, 1).contiguous())
            offset.append(o(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        has_lp = torch.cat([o.view(o.size(0), -1) for o in has_lp], 1)
        size_lp = torch.cat([o.view(o.size(0), -1) for o in size_lp], 1)
        offset = torch.cat([o.view(o.size(0), -1) for o in offset], 1)

        # [num, num_classes, top_k, 10]
        rpn_rois = self.detect(
            loc.view(loc.size(0), -1, 4),  # loc preds
            self.softmax(conf.view(conf.size(0), -1,
                                   self.num_classes)),  # conf preds
            self.priors.cuda(),  # default boxes 这个地方按照之前会有重大bug,参数分布在不同GPU上
            self.sigmoid(has_lp.view(has_lp.size(0), -1, 1)),
            size_lp.view(size_lp.size(0), -1, 2),
            offset.view(offset.size(0), -1, 2)
        )

        # 解除这部分的可导
        rpn_rois = rpn_rois.detach()

        # roi align or roi warping
        crop_height = self.size_2
        crop_width = self.size_2
        is_cuda = torch.cuda.is_available()

        if self.phase == 'train':
            # rpn_rois: [num, num_classes, top_k, 10]
            # rois: [num, num_gt, 6], 6: IOU with GT, bbox(4), max iou with GT or not
            # target: [num, num_gt, 22], 10: bbox(4), has_lp, size(2), offset(2),
            # lp_bbox(4), lp_four_points(8), label

            # rois和target最外层是list, 里面是tensor,这样可以确保里面的tensor维度不同
            proposal_target_offset = ProposalTargetLayer_offset()
            rois = proposal_target_offset(rpn_rois, targets, self.expand_num)

            gt_new = torch.empty(0)
            boxes_data_list = []
            box_index_data_list = []
            for idx in range(len(rois)):
                num_gt = targets[idx].shape[0]

                # 获取所有GT车牌的位置
                targets_tensor = targets[idx]
                # car_center_x = (targets_tensor[:, 0].unsqueeze(1) + targets_tensor[:, 2].unsqueeze(1)) / 2.0
                # car_center_y = (targets_tensor[:, 1].unsqueeze(1) + targets_tensor[:, 3].unsqueeze(1)) / 2.0
                # car_center = torch.cat((car_center_x, car_center_y), 1)
                # lp_center = car_center + targets_tensor[:, 7:9]
                # lp_bbox = torch.cat((lp_center - targets_tensor[:, 5:7]/2, lp_center + targets_tensor[:, 5:7]/2), 1)
                lp_bbox = targets_tensor[:, 9:13]

                # 获取车牌的四点坐标
                lp_four_points = targets_tensor[:, 13:21]

                # 获取在rois中的车牌GT,并且根据rois的左上角调整成新的车牌GT
                rois_squeeze = rois[idx][:num_gt, 1:-1]

                a_include_b_list = []
                for i in range(num_gt):
                    a_include_b_list.append(a_include_b(rois_squeeze[i, :], lp_bbox[i, :]))

                has_lp_list = []
                for i in range(num_gt):
                    has_lp_list.append(targets_tensor[i, 4].cpu().numpy() > 0)

                gt_in_rois_list = np.array(a_include_b_list) + 0 & np.array(has_lp_list) + 0
                gt_in_rois_tensor = torch.tensor(gt_in_rois_list).type(torch.uint8).bool()
                rois_squeeze = rois_squeeze[gt_in_rois_tensor, :]
                lp_bbox = lp_bbox[gt_in_rois_tensor, :]
                lp_four_points = lp_four_points[gt_in_rois_tensor, :]

                if rois_squeeze.shape[0] > 0:
                    # 调整车牌GT bbox
                    rois_top_left = rois_squeeze[:, :2].repeat(1, 2)
                    rois_width = rois_squeeze[:, 2] - rois_squeeze[:, 0]
                    rois_height = rois_squeeze[:, 3] - rois_squeeze[:, 1]
                    rois_size = torch.cat((rois_width.unsqueeze(1), rois_height.unsqueeze(1)), 1).repeat(1, 2)
                    gt_bbox = (lp_bbox - rois_top_left) / rois_size

                    # 新的车牌四点
                    rois_top_left_2 = rois_squeeze[:, :2].repeat(1, 4)
                    rois_size_2 = torch.cat((rois_width.unsqueeze(1), rois_height.unsqueeze(1)), 1).repeat(1, 4)
                    gt_four_points = (lp_four_points - rois_top_left_2) / rois_size_2

                    # GT label
                    gt_label = torch.zeros((gt_bbox.shape[0], 1))

                    # is valid,说明这个gt是有效的,因为后面为了迎合多GPU合并必须有输出的情况,后面会伪造一些is not valid的数据
                    # TODO: 这是不太友好的做法
                    gt_valid = torch.ones((gt_bbox.shape[0], 1))

                    # concat
                    gt_cur = torch.cat((gt_bbox, gt_four_points, gt_label, gt_valid), 1)
                    gt_new = torch.cat((gt_new, gt_cur), 0)

                    # 按照损失创造第二个网络的GT,其中gt_2的list要跟后面的crops_torch的n一致,所以用for循环
                    for gt_idx in range(gt_cur.shape[0]):
                        box_index_data_list.append(idx)  # 当前图片的idx

                        boxes_data = torch.zeros(rois_squeeze.shape)
                        boxes_data[:, 0] = rois_squeeze[:, 1]
                        boxes_data[:, 1] = rois_squeeze[:, 0]
                        boxes_data[:, 2] = rois_squeeze[:, 3]
                        boxes_data[:, 3] = rois_squeeze[:, 2]
                        boxes_data_list.append(boxes_data[gt_idx, :].cpu().numpy())  # 当前的区域

            if gt_new.shape[0] > 0:
                # 这是将车作为roi的做法
                # Define the boxes ( crops )
                # box = [y1/heigth , x1/width , y2/heigth , x2/width]
                boxes_data = torch.FloatTensor(boxes_data_list)

                # Create an index to say which box crops which image
                box_index_data = torch.IntTensor(box_index_data_list)

                # Create batch of images
                image_data = conv1_1_feat

                # Convert from numpy to Variables
                # image feature这部分还是需要可导的,参见ROIAlign源程序,训练时需要可导,测试时不需要可导
                image_torch = to_varabile(image_data, is_cuda=is_cuda, requires_grad=True)
                boxes = to_varabile(boxes_data, is_cuda=is_cuda, requires_grad=False)
                box_index = to_varabile(box_index_data, is_cuda=is_cuda, requires_grad=False)

                # Crops and resize bbox1 from img1 and bbox2 from img2
                # n*64*crop_height*crop_width
                crops_torch = CropAndResizeFunction.apply(image_torch, boxes, box_index, crop_height, crop_width, 0)

                # 第二个网络!!!!!!!!!!!!!!!!!!!!!!!!!!
                x_2 = crops_torch

                for k in range(4):
                    x_2 = self.vgg_2[k](x_2)
                sources_2.append(x_2)

                for k in range(4, 9):
                    x_2 = self.vgg_2[k](x_2)
                sources_2.append(x_2)

                for k in range(9, 14):
                    x_2 = self.vgg_2[k](x_2)
                sources_2.append(x_2)

                # apply multibox head to source layers
                for (x_2, l_2, c_2, f_2) in zip(sources_2, self.loc_2, self.conf_2, self.four_corners_2):
                    loc_2.append(l_2(x_2).permute(0, 2, 3, 1).contiguous())
                    conf_2.append(c_2(x_2).permute(0, 2, 3, 1).contiguous())
                    four_corners_2.append(f_2(x_2).permute(0, 2, 3, 1).contiguous())

                loc_2 = torch.cat([o.view(o.size(0), -1) for o in loc_2], 1)
                conf_2 = torch.cat([o.view(o.size(0), -1) for o in conf_2], 1)
                four_corners_2 = torch.cat([o.view(o.size(0), -1) for o in four_corners_2], 1)

            # 如果loc_2还是list,说明gt_new是没有的,第二个网络的预测和GT都为空
            if isinstance(loc_2, list):
                output = (
                    loc.view(loc.size(0), -1, 4),
                    conf.view(conf.size(0), -1, self.num_classes),
                    self.priors,
                    has_lp.view(has_lp.size(0), -1, 1),
                    size_lp.view(size_lp.size(0), -1, 2),
                    offset.view(offset.size(0), -1, 2),
                    # 第二个网络 TODO: 这是非常不友好的做法
                    torch.zeros(1, self.priors_2.shape[0], 4),
                    torch.zeros(1, self.priors_2.shape[0], 2),
                    self.priors_2,
                    torch.zeros(1, self.priors_2.shape[0], 8),
                    torch.zeros(1, 14)  # 最后一位为0表示这个GT not valid
                )
            else:
                output = (
                    loc.view(loc.size(0), -1, 4),
                    conf.view(conf.size(0), -1, self.num_classes),
                    self.priors,
                    has_lp.view(has_lp.size(0), -1, 1),
                    size_lp.view(size_lp.size(0), -1, 2),
                    offset.view(offset.size(0), -1, 2),
                    # 第二个网络
                    loc_2.view(loc_2.size(0), -1, 4),
                    conf_2.view(conf_2.size(0), -1, self.num_classes),
                    self.priors_2,
                    four_corners_2.view(four_corners_2.size(0), -1, 8),
                    gt_new
                )

        elif self.phase == 'test':
            has_lp_th = 0.5
            th = 0.6
            # 包括车和车牌的检测结果
            output = torch.zeros(1, 3, 200, 13)
            # 存储车的检测结果
            output[0, 1, :, :5] = rpn_rois[0, 1, :, :5]

            # 这里把是否有车牌也考虑进来,有车并且有车牌的才去检测车牌
            rois_idx = (rpn_rois[0, 1, :, 0] > th) & (rpn_rois[0, 1, :, 5] > has_lp_th)
            matches = rpn_rois[0, 1, rois_idx, :]
            if matches.shape[0] == 0:
                return output

            # 针对matches中offset,size以及扩大倍数在车内扩大
            car_center = (matches[:, [1, 2]] + matches[:, [3, 4]]) / 2
            lp_center = car_center + matches[:, [8, 9]]
            lp_bbox_top_left = lp_center - matches[:, [6, 7]] / 2 * self.expand_num
            lp_bbox_bottom_right = lp_center + matches[:, [6, 7]] / 2 * self.expand_num
            lp_bbox = torch.cat((lp_bbox_top_left, lp_bbox_bottom_right), 1)
            # 将扩大后的车牌区域限制在图片内
            lp_bbox = torch.max(lp_bbox, torch.zeros(lp_bbox.shape))
            lp_bbox = torch.min(lp_bbox, torch.ones(lp_bbox.shape))
            # 将扩大后的车牌区域限制在检测到的车内
            lp_bbox = torch.max(lp_bbox, matches[:, 1:3].repeat(1, 2))
            lp_bbox = torch.min(lp_bbox, matches[:, 3:5].repeat(1, 2))

            # [num_car, 4]
            rois_squeeze = lp_bbox

            # 这是将车作为roi的做法
            # Define the boxes ( crops )
            # box = [y1/heigth , x1/width , y2/heigth , x2/width]
            boxes_data = torch.zeros(rois_squeeze.shape)
            boxes_data[:, 0] = rois_squeeze[:, 1]
            boxes_data[:, 1] = rois_squeeze[:, 0]
            boxes_data[:, 2] = rois_squeeze[:, 3]
            boxes_data[:, 3] = rois_squeeze[:, 2]

            # Create an index to indicate which box crops which image
            box_index_data = torch.IntTensor(range(boxes_data.shape[0]))

            # Create a batch of 2 images
            # 这个地方非常关键,需要repeat,不然后面的feature全是0 !!!!!!!!!!!!!!!
            image_data = conv1_1_feat.repeat(rois_squeeze.shape[0], 1, 1, 1)

            # Convert from numpy to Variables
            # image feature这部分还是需要可导的
            image_torch = to_varabile(image_data, is_cuda=is_cuda, requires_grad=False)
            boxes = to_varabile(boxes_data, is_cuda=is_cuda, requires_grad=False)
            box_index = to_varabile(box_index_data, is_cuda=is_cuda, requires_grad=False)

            # Crops and resize bbox1 from img1 and bbox2 from img2
            # n*64*crop_height*crop_width
            crops_torch = CropAndResizeFunction.apply(image_torch, boxes, box_index, crop_height, crop_width, 0)

            # Visualize the crops
            # print(crops_torch.data.size())
            # crops_torch_data = crops_torch.data.cpu().numpy().transpose(0, 2, 3, 1)
            # import matplotlib.pyplot as plt
            # for m in range(rois_squeeze.shape[0]):
            #     fig = plt.figure()
            #     currentAxis = plt.gca()
            #     # pt = gt_2[m][0, :4].cpu().numpy() * self.size_2
            #     # coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1
            #     # currentAxis.add_patch(plt.Rectangle(*coords, fill=False))
            #     plt.imshow(crops_torch_data[m, :, :, 33])
            #     plt.show()

            # 第二个网络!!!!!!!!!!!!!!!!!!!!!!!!!!
            x_2 = crops_torch

            for k in range(4):
                x_2 = self.vgg_2[k](x_2)
            sources_2.append(x_2)

            for k in range(4, 9):
                x_2 = self.vgg_2[k](x_2)
            sources_2.append(x_2)

            for k in range(9, 14):
                x_2 = self.vgg_2[k](x_2)
            sources_2.append(x_2)

            # apply multibox head to source layers
            for (x_2, l_2, c_2, f_2) in zip(sources_2, self.loc_2, self.conf_2, self.four_corners_2):
                loc_2.append(l_2(x_2).permute(0, 2, 3, 1).contiguous())
                conf_2.append(c_2(x_2).permute(0, 2, 3, 1).contiguous())
                four_corners_2.append(f_2(x_2).permute(0, 2, 3, 1).contiguous())

            loc_2 = torch.cat([o.view(o.size(0), -1) for o in loc_2], 1)
            conf_2 = torch.cat([o.view(o.size(0), -1) for o in conf_2], 1)
            four_corners_2 = torch.cat([o.view(o.size(0), -1) for o in four_corners_2], 1)

            output_2 = self.detect_2(
                loc_2.view(loc_2.size(0), -1, 4),
                self.softmax_2(conf_2.view(conf_2.size(0), -1,
                                            self.num_classes)),
                self.priors_2.cuda(),
                four_corners_2.view(four_corners_2.size(0), -1, 8)
            )
            
            # 这种方法是综合所有车里面的车牌检测结果,然后只选取所有结果的前200个
            # (num_car, 200, 13)
            # output_2_pos = output_2[:, 1, :, :]
            # # (num_car, 2)
            # rois_size = rois_squeeze[:, 2:4] - rois_squeeze[:, :2]
            # rois_top_left = rois_squeeze[:, :2]
            # # (num_car, 200, 12)
            # rois_size_expand = rois_size.repeat(1, 6).unsqueeze(1).repeat(1, 200, 1)
            # # (num_car, 200, 12)
            # rois_top_left_expand = rois_top_left.repeat(1, 6).unsqueeze(1).repeat(1, 200, 1)
            # # (num_car, 200, 12)
            # output_2_pos[:, :, 1:] = output_2_pos[:, :, 1:] * rois_size_expand + rois_top_left_expand
            # # (num_car*200, 13)
            # output_2_pos_squeeze = output_2_pos.reshape(-1, output_2_pos.shape[2])
            # _, indices = output_2_pos_squeeze[:, 0].sort(descending=True)
            # output_2_pos_squeeze_sorted = output_2_pos_squeeze[indices, :]
            # # (1, 2, 200, 13)
            # results_2 = output_2_pos_squeeze_sorted[:200, :].unsqueeze(0).unsqueeze(1).repeat(1, 2, 1, 1)

            # 这种方法是每辆车里面只选conf最大的车牌
            # (num_car, 13)
            output_2_pos = output_2[:, 1, 0, :]
            # (num_car, 2)
            rois_size = rois_squeeze[:, 2:4] - rois_squeeze[:, :2]
            rois_top_left = rois_squeeze[:, :2]
            # (num_car, 12)
            rois_size_expand = rois_size.repeat(1, 6)
            # (num_car, 12)
            rois_top_left_expand = rois_top_left.repeat(1, 6)
            # (num_car, 12)
            output_2_pos[:, 1:] = output_2_pos[:, 1:] * rois_size_expand + rois_top_left_expand

            # 存储车牌的检测结果
            num_car = output_2_pos.shape[0]
            output[0, 2, :num_car, :] = output_2_pos

            # 存储expand区域的结果,放在车后面,并设置flag
            output[0, 1, :num_car, 5:9] = lp_bbox
            output[0, 1, :num_car, 9] = 1

            return output
        else:
            print("ERROR: Phase: " + self.phase + " not recognized")
            return

        return output
예제 #10
0
 def _crop_rois(self, bottom, rois):
     pre_pool_size = 7
     crops = CropAndResizeFunction(pre_pool_size, pre_pool_size)(
         bottom, Variable(rois),
         Variable(torch.zeros(rois.size(0), 1).cuda().int()))
     return crops