예제 #1
0
def init_actor(actor, image, gt):
    np.random.seed(123)
    torch.manual_seed(456)
    torch.cuda.manual_seed(789)

    batch_num = 64
    maxiter = 80
    actor = actor.cuda()
    actor.train()
    init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001)
    loss_func = torch.nn.MSELoss()
    actor_samples = np.round(
        gen_samples(SampleGenerator('uniform', image.size, 0.3, 1.5, None), gt,
                    1500, [0.6, 1], [0.9, 1.1]))
    idx = np.random.permutation(actor_samples.shape[0])
    batch_img = getbatch_actor(np.array(image), actor_samples)
    batch_distance = cal_distance(actor_samples,
                                  np.tile(gt, [actor_samples.shape[0], 1]))
    batch_distance = np.array(batch_distance).astype(np.float32)
    while (len(idx) < batch_num * maxiter):
        idx = np.concatenate(
            [idx, np.random.permutation(actor_samples.shape[0])])

    pointer = 0
    torch_image = loader(image.resize((225, 225),
                                      Image.ANTIALIAS)).unsqueeze(0).cuda()
    for iter in range(maxiter):
        next = pointer + batch_num
        cur_idx = idx[pointer:next]
        pointer = next
        feat = actor(batch_img[cur_idx],
                     torch_image.repeat(batch_num, 1, 1, 1))
        loss = loss_func(
            feat,
            Variable(torch.FloatTensor(batch_distance[cur_idx])).cuda())

        actor.zero_grad()
        loss.backward()
        init_optimizer.step()
        if opts['show_train']:
            print("Iter %d, Loss %.10f" % (iter, loss.item()))
        if loss.item() < 0.0001:
            deta_flag = 0
            return deta_flag
        deta_flag = 1
        return deta_flag
예제 #2
0
    def initialize(self, image_file, box):
        self.frame_idx = 0

        # Load first image
        cur_image = Image.open(image_file).convert("RGB")
        cur_image = np.asarray(cur_image)

        self.target_bbox = np.array(box)

        # Draw pos/neg samples
        ishape = cur_image.shape
        pos_examples = gen_samples(
            SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2),
            self.target_bbox,
            opts["n_pos_init"],
            opts["overlap_pos_init"],
        )
        neg_examples = gen_samples(
            SampleGenerator("uniform", (ishape[1], ishape[0]), 1, 2, 1.1),
            self.target_bbox,
            opts["n_neg_init"],
            opts["overlap_neg_init"],
        )
        neg_examples = np.random.permutation(neg_examples)

        cur_bbreg_examples = gen_samples(
            SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5, 1.1),
            self.target_bbox,
            opts["n_bbreg"],
            opts["overlap_bbreg"],
            opts["scale_bbreg"],
        )

        # compute padded sample
        padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_scene_box = np.reshape(
            np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                        padded_y2 - padded_y1)),
            (1, 4),
        )

        scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
        if opts["jitter"]:
            # horizontal shift
            jittered_scene_box_horizon = np.copy(padded_scene_box)
            jittered_scene_box_horizon[0, 0] -= 4.0
            jitter_scale_horizon = 1.0

            # vertical shift
            jittered_scene_box_vertical = np.copy(padded_scene_box)
            jittered_scene_box_vertical[0, 1] -= 4.0
            jitter_scale_vertical = 1.0

            jittered_scene_box_reduce1 = np.copy(padded_scene_box)
            jitter_scale_reduce1 = 1.1**(-1)

            # vertical shift
            jittered_scene_box_enlarge1 = np.copy(padded_scene_box)
            jitter_scale_enlarge1 = 1.1**(1)

            # scale reduction
            jittered_scene_box_reduce2 = np.copy(padded_scene_box)
            jitter_scale_reduce2 = 1.1**(-2)
            # scale enlarge
            jittered_scene_box_enlarge2 = np.copy(padded_scene_box)
            jitter_scale_enlarge2 = 1.1**(2)

            scene_boxes = np.concatenate(
                [
                    scene_boxes,
                    jittered_scene_box_horizon,
                    jittered_scene_box_vertical,
                    jittered_scene_box_reduce1,
                    jittered_scene_box_enlarge1,
                    jittered_scene_box_reduce2,
                    jittered_scene_box_enlarge2,
                ],
                axis=0,
            )
            jitter_scale = [
                1.0,
                jitter_scale_horizon,
                jitter_scale_vertical,
                jitter_scale_reduce1,
                jitter_scale_enlarge1,
                jitter_scale_reduce2,
                jitter_scale_enlarge2,
            ]
        else:
            jitter_scale = [1.0]

        self.model.eval()
        for bidx in range(0, scene_boxes.shape[0]):
            crop_img_size = (scene_boxes[bidx, 2:4] * (
                (opts["img_size"], opts["img_size"]) / self.target_bbox[2:4])
                             ).astype("int64") * jitter_scale[bidx]
            cropped_image, cur_image_var = self.img_crop_model.crop_image(
                cur_image, np.reshape(scene_boxes[bidx], (1, 4)),
                crop_img_size)
            cropped_image = cropped_image - 128.0

            feat_map = self.model(cropped_image, out_layer="conv3")

            rel_target_bbox = np.copy(self.target_bbox)
            rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

            batch_num = np.zeros((pos_examples.shape[0], 1))
            cur_pos_rois = np.copy(pos_examples)
            cur_pos_rois[:, 0:2] -= np.repeat(
                np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                cur_pos_rois.shape[0],
                axis=0,
            )
            scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx]
            cur_pos_rois = samples2maskroi(
                cur_pos_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                self.target_bbox[2:4],
                opts["padding"],
            )
            cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
            cur_pos_rois = Variable(
                torch.from_numpy(cur_pos_rois.astype("float32"))).cuda()
            cur_pos_feats = self.model.roi_align_model(feat_map, cur_pos_rois)
            cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                               -1).data.clone()

            batch_num = np.zeros((neg_examples.shape[0], 1))
            cur_neg_rois = np.copy(neg_examples)
            cur_neg_rois[:, 0:2] -= np.repeat(
                np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                cur_neg_rois.shape[0],
                axis=0,
            )
            cur_neg_rois = samples2maskroi(
                cur_neg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                self.target_bbox[2:4],
                opts["padding"],
            )
            cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
            cur_neg_rois = Variable(
                torch.from_numpy(cur_neg_rois.astype("float32"))).cuda()
            cur_neg_feats = self.model.roi_align_model(feat_map, cur_neg_rois)
            cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                               -1).data.clone()

            # bbreg rois
            batch_num = np.zeros((cur_bbreg_examples.shape[0], 1))
            cur_bbreg_rois = np.copy(cur_bbreg_examples)
            cur_bbreg_rois[:, 0:2] -= np.repeat(
                np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                cur_bbreg_rois.shape[0],
                axis=0,
            )
            scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx]
            cur_bbreg_rois = samples2maskroi(
                cur_bbreg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                self.target_bbox[2:4],
                opts["padding"],
            )
            cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois),
                                            axis=1)
            cur_bbreg_rois = Variable(
                torch.from_numpy(cur_bbreg_rois.astype("float32"))).cuda()
            cur_bbreg_feats = self.model.roi_align_model(
                feat_map, cur_bbreg_rois)
            cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0),
                                                   -1).data.clone()

            self.feat_dim = cur_pos_feats.size(-1)

            if bidx == 0:
                pos_feats = cur_pos_feats
                neg_feats = cur_neg_feats
                # bbreg feature
                bbreg_feats = cur_bbreg_feats
                bbreg_examples = cur_bbreg_examples
            else:
                pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
                neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
                # bbreg feature
                bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0)
                bbreg_examples = np.concatenate(
                    (bbreg_examples, cur_bbreg_examples), axis=0)

        if pos_feats.size(0) > opts["n_pos_init"]:
            pos_idx = np.asarray(range(pos_feats.size(0)))
            np.random.shuffle(pos_idx)
            pos_feats = pos_feats[pos_idx[0:opts["n_pos_init"]], :]
        if neg_feats.size(0) > opts["n_neg_init"]:
            neg_idx = np.asarray(range(neg_feats.size(0)))
            np.random.shuffle(neg_idx)
            neg_feats = neg_feats[neg_idx[0:opts["n_neg_init"]], :]

        # bbreg
        if bbreg_feats.size(0) > opts["n_bbreg"]:
            bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
            np.random.shuffle(bbreg_idx)
            bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :]
            bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :]
            # print bbreg_examples.shape

        # open images and crop patch from obj
        extra_obj_size = np.array((opts["img_size"], opts["img_size"]))
        extra_crop_img_size = extra_obj_size * (opts["padding"] + 0.6)
        replicateNum = 100
        for iidx in range(replicateNum):
            extra_target_bbox = np.copy(self.target_bbox)

            extra_scene_box = np.copy(extra_target_bbox)
            extra_scene_box_center = extra_scene_box[
                0:2] + extra_scene_box[2:4] / 2.0
            extra_scene_box_size = extra_scene_box[2:4] * (opts["padding"] +
                                                           0.6)
            extra_scene_box[
                0:2] = extra_scene_box_center - extra_scene_box_size / 2.0
            extra_scene_box[2:4] = extra_scene_box_size

            extra_shift_offset = np.clip(2.0 * np.random.randn(2), -4, 4)
            cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2)

            extra_scene_box[0] += extra_shift_offset[0]
            extra_scene_box[1] += extra_shift_offset[1]
            extra_scene_box[2:4] *= cur_extra_scale[0]

            scaled_obj_size = float(opts["img_size"]) / cur_extra_scale[0]

            cur_extra_cropped_image, _ = self.img_crop_model.crop_image(
                cur_image, np.reshape(extra_scene_box, (1, 4)),
                extra_crop_img_size)
            cur_extra_cropped_image = cur_extra_cropped_image.detach()

            cur_extra_pos_examples = gen_samples(
                SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2),
                extra_target_bbox,
                opts["n_pos_init"] / replicateNum,
                opts["overlap_pos_init"],
            )
            cur_extra_neg_examples = gen_samples(
                SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 2,
                                1.1),
                extra_target_bbox,
                opts["n_neg_init"] / replicateNum / 4,
                opts["overlap_neg_init"],
            )

            # bbreg sample
            cur_extra_bbreg_examples = gen_samples(
                SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5,
                                1.1),
                extra_target_bbox,
                opts["n_bbreg"] / replicateNum / 4,
                opts["overlap_bbreg"],
                opts["scale_bbreg"],
            )

            batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1))
            cur_extra_pos_rois = np.copy(cur_extra_pos_examples)
            cur_extra_pos_rois[:, 0:2] -= np.repeat(
                np.reshape(extra_scene_box[0:2], (1, 2)),
                cur_extra_pos_rois.shape[0],
                axis=0,
            )
            cur_extra_pos_rois = samples2maskroi(
                cur_extra_pos_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                extra_target_bbox[2:4],
                opts["padding"],
            )
            cur_extra_pos_rois = np.concatenate(
                (batch_num, cur_extra_pos_rois), axis=1)

            batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1))
            cur_extra_neg_rois = np.copy(cur_extra_neg_examples)
            cur_extra_neg_rois[:, 0:2] -= np.repeat(
                np.reshape(extra_scene_box[0:2], (1, 2)),
                cur_extra_neg_rois.shape[0],
                axis=0,
            )
            cur_extra_neg_rois = samples2maskroi(
                cur_extra_neg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                extra_target_bbox[2:4],
                opts["padding"],
            )
            cur_extra_neg_rois = np.concatenate(
                (batch_num, cur_extra_neg_rois), axis=1)

            # bbreg rois
            batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1))
            cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples)
            cur_extra_bbreg_rois[:, 0:2] -= np.repeat(
                np.reshape(extra_scene_box[0:2], (1, 2)),
                cur_extra_bbreg_rois.shape[0],
                axis=0,
            )
            cur_extra_bbreg_rois = samples2maskroi(
                cur_extra_bbreg_rois,
                self.model.receptive_field,
                (scaled_obj_size, scaled_obj_size),
                extra_target_bbox[2:4],
                opts["padding"],
            )
            cur_extra_bbreg_rois = np.concatenate(
                (batch_num, cur_extra_bbreg_rois), axis=1)

            if iidx == 0:
                extra_cropped_image = cur_extra_cropped_image

                extra_pos_rois = np.copy(cur_extra_pos_rois)
                extra_neg_rois = np.copy(cur_extra_neg_rois)
                # bbreg rois
                extra_bbreg_rois = np.copy(cur_extra_bbreg_rois)
                extra_bbreg_examples = np.copy(cur_extra_bbreg_examples)
            else:
                extra_cropped_image = torch.cat(
                    (extra_cropped_image, cur_extra_cropped_image), dim=0)

                extra_pos_rois = np.concatenate(
                    (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0)
                extra_neg_rois = np.concatenate(
                    (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0)
                # bbreg rois
                extra_bbreg_rois = np.concatenate(
                    (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0)
                extra_bbreg_examples = np.concatenate(
                    (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)),
                    axis=0)

        extra_pos_rois = Variable(
            torch.from_numpy(extra_pos_rois.astype("float32"))).cuda()
        extra_neg_rois = Variable(
            torch.from_numpy(extra_neg_rois.astype("float32"))).cuda()
        # bbreg rois
        extra_bbreg_rois = Variable(
            torch.from_numpy(extra_bbreg_rois.astype("float32"))).cuda()

        extra_cropped_image -= 128.0

        extra_feat_maps = self.model(extra_cropped_image, out_layer="conv3")
        # Draw pos/neg samples
        ishape = cur_image.shape

        extra_pos_feats = self.model.roi_align_model(extra_feat_maps,
                                                     extra_pos_rois)
        extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0),
                                               -1).data.clone()

        extra_neg_feats = self.model.roi_align_model(extra_feat_maps,
                                                     extra_neg_rois)
        extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0),
                                               -1).data.clone()
        # bbreg feat
        extra_bbreg_feats = self.model.roi_align_model(extra_feat_maps,
                                                       extra_bbreg_rois)
        extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0),
                                                   -1).data.clone()

        # concatenate extra features to original_features
        pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0)
        neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0)
        # concatenate extra bbreg feats to original_bbreg_feats
        bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0)
        bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples),
                                        axis=0)

        torch.cuda.empty_cache()
        self.model.zero_grad()

        self.P4 = torch.autograd.Variable(torch.eye(512 * 3 * 3 + 1).type(
            self.dtype),
                                          volatile=True)
        self.P5 = (torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype),
                                           volatile=True) * 10)
        self.P6 = torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype),
                                          volatile=True)

        self.W4 = torch.autograd.Variable(torch.zeros(512 * 3 * 3 + 1,
                                                      512).type(self.dtype),
                                          volatile=True)
        self.W5 = torch.autograd.Variable(torch.zeros(512 + 1,
                                                      512).type(self.dtype),
                                          volatile=True)
        self.W6 = torch.autograd.Variable(torch.zeros(512 + 1,
                                                      2).type(self.dtype),
                                          volatile=True)

        self.flag_old = 0

        # Initial training
        self.flag_old = train_owm(
            self.model,
            self.criterion,
            self.init_optimizer,
            pos_feats,
            neg_feats,
            opts["maxiter_init"],
            self.P4,
            self.P5,
            self.P6,
            self.W4,
            self.W5,
            self.W6,
            self.flag_old,
        )

        # bbreg train
        if bbreg_feats.size(0) > opts["n_bbreg"]:
            bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
            np.random.shuffle(bbreg_idx)
            bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :]
            bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :]
        self.bbreg = BBRegressor((ishape[1], ishape[0]))
        self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox)

        if pos_feats.size(0) > opts["n_pos_update"]:
            pos_idx = np.asarray(range(pos_feats.size(0)))
            np.random.shuffle(pos_idx)
            self.pos_feats_all = [
                pos_feats.index_select(
                    0,
                    torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda())
            ]
        if neg_feats.size(0) > opts["n_neg_update"]:
            neg_idx = np.asarray(range(neg_feats.size(0)))
            np.random.shuffle(neg_idx)
            self.neg_feats_all = [
                neg_feats.index_select(
                    0,
                    torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda())
            ]

        self.trans_f = opts["trans_f"]
예제 #3
0
    def track(self, image_file):
        self.frame_idx += 1

        # Load image
        cur_image = Image.open(image_file).convert("RGB")
        cur_image = np.asarray(cur_image)

        # Estimate target bbox
        ishape = cur_image.shape
        samples = gen_samples(
            SampleGenerator(
                "gaussian",
                (ishape[1], ishape[0]),
                self.trans_f,
                opts["scale_f"],
                valid=True,
            ),
            self.target_bbox,
            opts["n_samples"],
        )

        padded_x1 = (samples[:, 0] - samples[:, 2] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_y1 = (samples[:, 1] - samples[:, 3] *
                     (opts["padding"] - 1.0) / 2.0).min()
        padded_x2 = (samples[:, 0] + samples[:, 2] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_y2 = (samples[:, 1] + samples[:, 3] *
                     (opts["padding"] + 1.0) / 2.0).max()
        padded_scene_box = np.asarray(
            (padded_x1, padded_y1, padded_x2 - padded_x1,
             padded_y2 - padded_y1))

        if padded_scene_box[0] > cur_image.shape[1]:
            padded_scene_box[0] = cur_image.shape[1] - 1
        if padded_scene_box[1] > cur_image.shape[0]:
            padded_scene_box[1] = cur_image.shape[0] - 1
        if padded_scene_box[0] + padded_scene_box[2] < 0:
            padded_scene_box[2] = -padded_scene_box[0] + 1
        if padded_scene_box[1] + padded_scene_box[3] < 0:
            padded_scene_box[3] = -padded_scene_box[1] + 1

        crop_img_size = (padded_scene_box[2:4] *
                         ((opts["img_size"], opts["img_size"]) /
                          self.target_bbox[2:4])).astype("int64")
        cropped_image, cur_image_var = self.img_crop_model.crop_image(
            cur_image, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
        cropped_image = cropped_image - 128.0

        self.model.eval()
        feat_map = self.model(cropped_image, out_layer="conv3")

        # relative target bbox with padded_scene_box
        rel_target_bbox = np.copy(self.target_bbox)
        rel_target_bbox[0:2] -= padded_scene_box[0:2]

        # Extract sample features and get target location
        batch_num = np.zeros((samples.shape[0], 1))
        sample_rois = np.copy(samples)
        sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2],
                                                    (1, 2)),
                                         sample_rois.shape[0],
                                         axis=0)
        sample_rois = samples2maskroi(
            sample_rois,
            self.model.receptive_field,
            (opts["img_size"], opts["img_size"]),
            self.target_bbox[2:4],
            opts["padding"],
        )
        sample_rois = np.concatenate((batch_num, sample_rois), axis=1)
        sample_rois = Variable(torch.from_numpy(
            sample_rois.astype("float32"))).cuda()
        sample_feats = self.model.roi_align_model(feat_map, sample_rois)
        sample_feats = sample_feats.view(sample_feats.size(0), -1).clone()
        sample_scores = self.model(sample_feats, in_layer="fc4")
        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.data.cpu().numpy()
        target_score = top_scores.data.mean()
        self.target_bbox = samples[top_idx].mean(axis=0)

        success = target_score > opts["success_thr"]

        # # Expand search area at failure
        if success:
            self.trans_f = opts["trans_f"]
        else:
            self.trans_f = opts["trans_f_expand"]

        # Bbox regression
        if success:
            bbreg_feats = sample_feats[top_idx, :]
            bbreg_samples = samples[top_idx]
            bbreg_samples = self.bbreg.predict(bbreg_feats.data, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = self.target_bbox

        # Data collect
        if success:

            # Draw pos/neg samples
            pos_examples = gen_samples(
                SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2),
                self.target_bbox,
                opts["n_pos_update"],
                opts["overlap_pos_update"],
            )
            neg_examples = gen_samples(
                SampleGenerator("uniform", (ishape[1], ishape[0]), 1.5, 1.2),
                self.target_bbox,
                opts["n_neg_update"],
                opts["overlap_neg_update"],
            )

            padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                         (opts["padding"] - 1.0) / 2.0).min()
            padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                         (opts["padding"] - 1.0) / 2.0).min()
            padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                         (opts["padding"] + 1.0) / 2.0).max()
            padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                         (opts["padding"] + 1.0) / 2.0).max()
            padded_scene_box = np.reshape(
                np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                            padded_y2 - padded_y1)),
                (1, 4),
            )

            scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
            jitter_scale = [1.0]

            for bidx in range(0, scene_boxes.shape[0]):
                crop_img_size = (scene_boxes[bidx, 2:4] *
                                 ((opts["img_size"], opts["img_size"]) /
                                  self.target_bbox[2:4])
                                 ).astype("int64") * jitter_scale[bidx]
                cropped_image, cur_image_var = self.img_crop_model.crop_image(
                    cur_image, np.reshape(scene_boxes[bidx], (1, 4)),
                    crop_img_size)
                cropped_image = cropped_image - 128.0

                feat_map = self.model(cropped_image, out_layer="conv3")

                rel_target_bbox = np.copy(self.target_bbox)
                rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

                batch_num = np.zeros((pos_examples.shape[0], 1))
                cur_pos_rois = np.copy(pos_examples)
                cur_pos_rois[:, 0:2] -= np.repeat(
                    np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                    cur_pos_rois.shape[0],
                    axis=0,
                )
                scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx]
                cur_pos_rois = samples2maskroi(
                    cur_pos_rois,
                    self.model.receptive_field,
                    (scaled_obj_size, scaled_obj_size),
                    self.target_bbox[2:4],
                    opts["padding"],
                )
                cur_pos_rois = np.concatenate((batch_num, cur_pos_rois),
                                              axis=1)
                cur_pos_rois = Variable(
                    torch.from_numpy(cur_pos_rois.astype("float32"))).cuda()
                cur_pos_feats = self.model.roi_align_model(
                    feat_map, cur_pos_rois)
                cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                                   -1).data.clone()

                batch_num = np.zeros((neg_examples.shape[0], 1))
                cur_neg_rois = np.copy(neg_examples)
                cur_neg_rois[:, 0:2] -= np.repeat(
                    np.reshape(scene_boxes[bidx, 0:2], (1, 2)),
                    cur_neg_rois.shape[0],
                    axis=0,
                )
                cur_neg_rois = samples2maskroi(
                    cur_neg_rois,
                    self.model.receptive_field,
                    (scaled_obj_size, scaled_obj_size),
                    self.target_bbox[2:4],
                    opts["padding"],
                )
                cur_neg_rois = np.concatenate((batch_num, cur_neg_rois),
                                              axis=1)
                cur_neg_rois = Variable(
                    torch.from_numpy(cur_neg_rois.astype("float32"))).cuda()
                cur_neg_feats = self.model.roi_align_model(
                    feat_map, cur_neg_rois)
                cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                                   -1).data.clone()

                self.feat_dim = cur_pos_feats.size(-1)

                if bidx == 0:
                    pos_feats = cur_pos_feats  # index select
                    neg_feats = cur_neg_feats
                else:
                    pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
                    neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)

            if pos_feats.size(0) > opts["n_pos_update"]:
                pos_idx = np.asarray(range(pos_feats.size(0)))
                np.random.shuffle(pos_idx)
                pos_feats = pos_feats.index_select(
                    0,
                    torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda())
            if neg_feats.size(0) > opts["n_neg_update"]:
                neg_idx = np.asarray(range(neg_feats.size(0)))
                np.random.shuffle(neg_idx)
                neg_feats = neg_feats.index_select(
                    0,
                    torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda())

            self.pos_feats_all.append(pos_feats)
            self.neg_feats_all.append(neg_feats)

            if len(self.pos_feats_all) > opts["n_frames_long"]:
                del self.pos_feats_all[0]
            if len(self.neg_feats_all) > opts["n_frames_short"]:
                del self.neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts["n_frames_short"], len(self.pos_feats_all))
            pos_data = torch.stack(self.pos_feats_all[-nframes:],
                                   0).view(-1, self.feat_dim)
            neg_data = torch.stack(self.neg_feats_all,
                                   0).view(-1, self.feat_dim)
            self.flag_old = train(
                self.model,
                self.criterion,
                self.update_optimizer,
                pos_data,
                neg_data,
                opts["maxiter_update"],
                self.W4,
                self.W5,
                self.W6,
                self.flag_old,
            )

        # Long term update
        elif self.frame_idx % opts["long_interval"] == 0:
            nframes = min(opts["n_frames_short"], len(self.pos_feats_all))
            pos_data = torch.stack(self.pos_feats_all[-nframes:],
                                   0).view(-1, self.feat_dim)
            neg_data = torch.stack(self.neg_feats_all,
                                   0).view(-1, self.feat_dim)
            self.flag_old = train_owm(
                self.model,
                self.criterion,
                self.update_optimizer_owm,
                pos_data,
                neg_data,
                opts["maxiter_update"],
                self.P4,
                self.P5,
                self.P6,
                self.W4,
                self.W5,
                self.W6,
                self.flag_old,
            )

        return bbreg_bbox
예제 #4
0
    def update(self, image):
        # image = loader(image.resize((225,225),Image.ANTIALIAS)).unsqueeze(0).cuda()
        self.frame += 1
        update_lenth = 10
        np_image = np.array(image)
        if self.imageVar_first > 200:
            imageVar = cv2.Laplacian(
                crop_image_blur(np_image, self.target_bbox), cv2.CV_64F).var()
        else:
            imageVar = 200
        img_l = getbatch_actor(np_image, self.target_bbox.reshape([1, 4]))
        torch_image = loader(image.resize(
            (225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda()
        deta_pos = self.actor(img_l, torch_image)
        deta_pos = deta_pos.data.clone().cpu().numpy()
        if self.deta_flag:
            deta_pos[:, 2] = 0
        if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05:
            deta_pos[:, 2] = 0
        if len(self.pf_frame) and self.frame == (self.pf_frame[-1] + 1):
            deta_pos[:, 2] = 0
        pos_ = np.round(
            move_crop(self.target_bbox, deta_pos,
                      (image.size[1], image.size[0]), self.rate))
        r = forward_samples(self.model,
                            image,
                            np.array(pos_).reshape([1, 4]),
                            out_layer='fc6')
        r = r.cpu().numpy()
        if r[0][1] > 0 and imageVar > 100:
            self.target_bbox = pos_
            target_score = r[0][1]
            bbreg_bbox = pos_
            success = 1
            if True:
                fin_score = r[0][1]
                self.img_learn.append(image)
                self.pos_learn.append(self.target_bbox)
                self.score_pos.append(fin_score)
                self.frame_learn.append(self.frame)
                while len(self.img_learn) > update_lenth * 2:
                    del self.img_learn[0]
                    del self.pos_learn[0]
                    del self.score_pos[0]
                    del self.frame_learn[0]
            self.result[self.frame] = self.target_bbox
            self.result_bb[self.frame] = bbreg_bbox
        else:
            self.detetion += 1
            if len(self.pf_frame) == 0:
                self.pf_frame = [self.frame]
            else:
                self.pf_frame.append(self.frame)

        if (len(self.frame_learn) == update_lenth * 2 and self.data_frame[-1]
                not in self.frame_learn) or self.data_frame[-1] == 0:
            for num in range(max(0,
                                 self.img_learn.__len__() - update_lenth),
                             self.img_learn.__len__()):
                if self.frame_learn[num] not in self.data_frame:
                    gt_ = self.pos_learn[num]
                    image_ = self.img_learn[num]
                    pos_examples = np.round(
                        gen_samples(self.pos_generator, gt_,
                                    opts['n_pos_update'],
                                    opts['overlap_pos_update']))
                    neg_examples = np.round(
                        gen_samples(self.neg_generator, gt_,
                                    opts['n_neg_update'],
                                    opts['overlap_neg_update']))
                    pos_feats_ = forward_samples(self.model, image_,
                                                 pos_examples)
                    neg_feats_ = forward_samples(self.model, image_,
                                                 neg_examples)

                    self.pos_feats_all.append(pos_feats_)
                    self.neg_feats_all.append(neg_feats_)
                    self.data_frame.append(self.frame_learn[num])
                    if len(self.pos_feats_all) > 10:
                        del self.pos_feats_all[0]
                        del self.neg_feats_all[0]
                        del self.data_frame[0]
                else:
                    pos_feats_ = self.pos_feats_all[self.data_frame.index(
                        self.frame_learn[num])]
                    neg_feats_ = self.neg_feats_all[self.data_frame.index(
                        self.frame_learn[num])]

                if num == max(0, self.img_learn.__len__() - update_lenth):
                    pos_feats = pos_feats_
                    neg_feats = neg_feats_

                else:
                    pos_feats = torch.cat([pos_feats, pos_feats_], 0)
                    neg_feats = torch.cat([neg_feats, neg_feats_], 0)
            train(self.model, self.criterion, self.update_optimizer, pos_feats,
                  neg_feats, opts['maxiter_update'])

            if success:
                self.sample_generator.set_trans_f(opts['trans_f'])
            else:
                self.sample_generator.set_trans_f(opts['trans_f_expand'])
            if imageVar < 100:
                samples = gen_samples(self.init_generator, self.target_bbox,
                                      opts['n_samples'])
            else:
                samples = gen_samples(self.sample_generator, self.target_bbox,
                                      opts['n_samples'])

                if i < 20 or ((self.init_bbox[2] * self.init_bbox[3]) > 1000
                              and
                              (self.target_bbox[2] * self.target_bbox[3] /
                               (self.init_bbox[2] * self.init_bbox[3]) > 2.5
                               or self.target_bbox[2] * self.target_bbox[3] /
                               (self.init_bbox[2] * self.init_bbox[3]) < 0.4)):
                    self.sample_generator.set_trans_f(opts['trans_f_expand'])
                    samples_ = np.round(
                        gen_samples(
                            self.sample_generator,
                            np.hstack([
                                self.target_bbox[0:2] +
                                self.target_bbox[2:4] / 2 -
                                self.init_bbox[2:4] / 2, self.init_bbox[2:4]
                            ]), opts['n_samples']))
                    samples = np.vstack([samples, samples_])

                sample_scores = forward_samples(self.model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)
                top_idx = top_idx.cpu().numpy()
                target_score = top_scores.mean()
                self.target_bbox = samples[top_idx].mean(axis=0)
                success = target_score > opts['success_thr']

                # Bbox regression
                if success:
                    bbreg_samples = samples[top_idx]
                    bbreg_feats = forward_samples(self.model, image,
                                                  bbreg_samples)
                    bbreg_samples = self.bbreg.predict(bbreg_feats,
                                                       bbreg_samples)
                    bbreg_bbox = bbreg_samples.mean(axis=0)

                    self.img_learn.append(image)
                    self.pos_learn.append(self.target_bbox)
                    self.score_pos.append(self.target_score)
                    self.frame_learn.append(i)
                    while len(self.img_learn) > 2 * update_lenth:
                        del self.img_learn[0]
                        del self.pos_learn[0]
                        del self.score_pos[0]
                        del self.frame_learn[0]

                else:
                    bbreg_bbox = self.target_bbox

                # Copy previous result at failure
                if not success:
                    target_bbox = self.result[self.frame - 1]
                    bbreg_bbox = self.result_bb[self.frame - 1]

                # Save result
                self.result[self.frame] = target_bbox
                self.result_bb[self.frame] = bbreg_bbox

        return self.target_bbox
예제 #5
0
    def init(self, image, init_bbox):
        self.rate = init_bbox[2] / init_bbox[3]
        self.target_bbox = np.array(init_bbox)
        self.init_bbox = np.array(init_bbox)

        self.result.append(self.target_bbox)
        self.result_bb.append(self.target_bbox)
        image = np.asarray(image)

        # Init model
        bbreg_examples = gen_samples(
            SampleGenerator('uniform', image.shape, 0.3, 1.5,
                            1.1), self.target_bbox, opts['n_bbreg'],
            opts['overlap_bbreg'], opts['scale_bbreg'])
        bbreg_feats = forward_samples(self.model, image, bbreg_examples)
        self.bbreg = BBRegressor(image.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox)

        pos_examples = gen_samples(
            SampleGenerator('gaussian', image.shape, 0.1, 1.2),
            self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
        neg_examples = np.concatenate([
            gen_samples(SampleGenerator('uniform', image.shape, 1, 2,
                                        1.1), self.target_bbox,
                        opts['n_neg_init'] // 2, opts['overlap_neg_init']),
            gen_samples(SampleGenerator('whole', image.shape, 0, 1.2,
                                        1.1), self.target_bbox,
                        opts['n_neg_init'] // 2, opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        pos_feats = forward_samples(self.model, image, pos_examples)
        neg_feats = forward_samples(self.model, image, neg_examples)
        train(self.model, self.criterion, self.init_optimizer, pos_feats,
              neg_feats, opts['maxiter_init'])
        self.deta_flag = init_actor(self.actor, image, self.target_bbox)
        self.init_generator = SampleGenerator('gaussian',
                                              image.shape,
                                              opts['trans_f'],
                                              1,
                                              valid=False)
        self.sample_generator = SampleGenerator('gaussian',
                                                image.shape,
                                                opts['trans_f'],
                                                opts['scale_f'],
                                                valid=False)
        self.pos_generator = SampleGenerator('gaussian', image.shape, 0.1, 1.2)
        self.neg_generator = SampleGenerator('uniform', image.shape, 1.5, 1.2)
        self.pos_feats_all = [pos_feats[:opts['n_pos_update']]]
        self.neg_feats_all = [neg_feats[:opts['n_neg_update']]]
        pos_score = forward_samples(self.model,
                                    image,
                                    np.array(init_bbox).reshape([1, 4]),
                                    out_layer='fc6')
        self.img_learn = [image]
        self.pos_learn = [init_bbox]
        self.score_pos = [pos_score.cpu().numpy()[0][1]]
        self.frame_learn = [0]
        self.pf_frame = []
        self.imageVar_first = cv2.Laplacian(
            crop_image_blur(np.array(image), self.target_bbox),
            cv2.CV_64F).var()