def init_actor(actor, image, gt): np.random.seed(123) torch.manual_seed(456) torch.cuda.manual_seed(789) batch_num = 64 maxiter = 80 actor = actor.cuda() actor.train() init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001) loss_func = torch.nn.MSELoss() actor_samples = np.round( gen_samples(SampleGenerator('uniform', image.size, 0.3, 1.5, None), gt, 1500, [0.6, 1], [0.9, 1.1])) idx = np.random.permutation(actor_samples.shape[0]) batch_img = getbatch_actor(np.array(image), actor_samples) batch_distance = cal_distance(actor_samples, np.tile(gt, [actor_samples.shape[0], 1])) batch_distance = np.array(batch_distance).astype(np.float32) while (len(idx) < batch_num * maxiter): idx = np.concatenate( [idx, np.random.permutation(actor_samples.shape[0])]) pointer = 0 torch_image = loader(image.resize((225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda() for iter in range(maxiter): next = pointer + batch_num cur_idx = idx[pointer:next] pointer = next feat = actor(batch_img[cur_idx], torch_image.repeat(batch_num, 1, 1, 1)) loss = loss_func( feat, Variable(torch.FloatTensor(batch_distance[cur_idx])).cuda()) actor.zero_grad() loss.backward() init_optimizer.step() if opts['show_train']: print("Iter %d, Loss %.10f" % (iter, loss.item())) if loss.item() < 0.0001: deta_flag = 0 return deta_flag deta_flag = 1 return deta_flag
def initialize(self, image_file, box): self.frame_idx = 0 # Load first image cur_image = Image.open(image_file).convert("RGB") cur_image = np.asarray(cur_image) self.target_bbox = np.array(box) # Draw pos/neg samples ishape = cur_image.shape pos_examples = gen_samples( SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2), self.target_bbox, opts["n_pos_init"], opts["overlap_pos_init"], ) neg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 1, 2, 1.1), self.target_bbox, opts["n_neg_init"], opts["overlap_neg_init"], ) neg_examples = np.random.permutation(neg_examples) cur_bbreg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5, 1.1), self.target_bbox, opts["n_bbreg"], opts["overlap_bbreg"], opts["scale_bbreg"], ) # compute padded sample padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts["padding"] - 1.0) / 2.0).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts["padding"] - 1.0) / 2.0).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts["padding"] + 1.0) / 2.0).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts["padding"] + 1.0) / 2.0).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4), ) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) if opts["jitter"]: # horizontal shift jittered_scene_box_horizon = np.copy(padded_scene_box) jittered_scene_box_horizon[0, 0] -= 4.0 jitter_scale_horizon = 1.0 # vertical shift jittered_scene_box_vertical = np.copy(padded_scene_box) jittered_scene_box_vertical[0, 1] -= 4.0 jitter_scale_vertical = 1.0 jittered_scene_box_reduce1 = np.copy(padded_scene_box) jitter_scale_reduce1 = 1.1**(-1) # vertical shift jittered_scene_box_enlarge1 = np.copy(padded_scene_box) jitter_scale_enlarge1 = 1.1**(1) # scale reduction jittered_scene_box_reduce2 = np.copy(padded_scene_box) jitter_scale_reduce2 = 1.1**(-2) # scale enlarge jittered_scene_box_enlarge2 = np.copy(padded_scene_box) jitter_scale_enlarge2 = 1.1**(2) scene_boxes = np.concatenate( [ scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical, jittered_scene_box_reduce1, jittered_scene_box_enlarge1, jittered_scene_box_reduce2, jittered_scene_box_enlarge2, ], axis=0, ) jitter_scale = [ 1.0, jitter_scale_horizon, jitter_scale_vertical, jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2, jitter_scale_enlarge2, ] else: jitter_scale = [1.0] self.model.eval() for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts["img_size"], opts["img_size"]) / self.target_bbox[2:4]) ).astype("int64") * jitter_scale[bidx] cropped_image, cur_image_var = self.img_crop_model.crop_image( cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128.0 feat_map = self.model(cropped_image, out_layer="conv3") rel_target_bbox = np.copy(self.target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0, ) scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx] cur_pos_rois = samples2maskroi( cur_pos_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype("float32"))).cuda() cur_pos_feats = self.model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0, ) cur_neg_rois = samples2maskroi( cur_neg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype("float32"))).cuda() cur_neg_feats = self.model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() # bbreg rois batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) cur_bbreg_rois = np.copy(cur_bbreg_examples) cur_bbreg_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_bbreg_rois.shape[0], axis=0, ) scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx] cur_bbreg_rois = samples2maskroi( cur_bbreg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) cur_bbreg_rois = Variable( torch.from_numpy(cur_bbreg_rois.astype("float32"))).cuda() cur_bbreg_feats = self.model.roi_align_model( feat_map, cur_bbreg_rois) cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() self.feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats neg_feats = cur_neg_feats # bbreg feature bbreg_feats = cur_bbreg_feats bbreg_examples = cur_bbreg_examples else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) # bbreg feature bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) bbreg_examples = np.concatenate( (bbreg_examples, cur_bbreg_examples), axis=0) if pos_feats.size(0) > opts["n_pos_init"]: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats[pos_idx[0:opts["n_pos_init"]], :] if neg_feats.size(0) > opts["n_neg_init"]: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats[neg_idx[0:opts["n_neg_init"]], :] # bbreg if bbreg_feats.size(0) > opts["n_bbreg"]: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :] # print bbreg_examples.shape # open images and crop patch from obj extra_obj_size = np.array((opts["img_size"], opts["img_size"])) extra_crop_img_size = extra_obj_size * (opts["padding"] + 0.6) replicateNum = 100 for iidx in range(replicateNum): extra_target_bbox = np.copy(self.target_bbox) extra_scene_box = np.copy(extra_target_bbox) extra_scene_box_center = extra_scene_box[ 0:2] + extra_scene_box[2:4] / 2.0 extra_scene_box_size = extra_scene_box[2:4] * (opts["padding"] + 0.6) extra_scene_box[ 0:2] = extra_scene_box_center - extra_scene_box_size / 2.0 extra_scene_box[2:4] = extra_scene_box_size extra_shift_offset = np.clip(2.0 * np.random.randn(2), -4, 4) cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2) extra_scene_box[0] += extra_shift_offset[0] extra_scene_box[1] += extra_shift_offset[1] extra_scene_box[2:4] *= cur_extra_scale[0] scaled_obj_size = float(opts["img_size"]) / cur_extra_scale[0] cur_extra_cropped_image, _ = self.img_crop_model.crop_image( cur_image, np.reshape(extra_scene_box, (1, 4)), extra_crop_img_size) cur_extra_cropped_image = cur_extra_cropped_image.detach() cur_extra_pos_examples = gen_samples( SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2), extra_target_bbox, opts["n_pos_init"] / replicateNum, opts["overlap_pos_init"], ) cur_extra_neg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 2, 1.1), extra_target_bbox, opts["n_neg_init"] / replicateNum / 4, opts["overlap_neg_init"], ) # bbreg sample cur_extra_bbreg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5, 1.1), extra_target_bbox, opts["n_bbreg"] / replicateNum / 4, opts["overlap_bbreg"], opts["scale_bbreg"], ) batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1)) cur_extra_pos_rois = np.copy(cur_extra_pos_examples) cur_extra_pos_rois[:, 0:2] -= np.repeat( np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_pos_rois.shape[0], axis=0, ) cur_extra_pos_rois = samples2maskroi( cur_extra_pos_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts["padding"], ) cur_extra_pos_rois = np.concatenate( (batch_num, cur_extra_pos_rois), axis=1) batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) cur_extra_neg_rois = np.copy(cur_extra_neg_examples) cur_extra_neg_rois[:, 0:2] -= np.repeat( np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_neg_rois.shape[0], axis=0, ) cur_extra_neg_rois = samples2maskroi( cur_extra_neg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts["padding"], ) cur_extra_neg_rois = np.concatenate( (batch_num, cur_extra_neg_rois), axis=1) # bbreg rois batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) cur_extra_bbreg_rois[:, 0:2] -= np.repeat( np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_bbreg_rois.shape[0], axis=0, ) cur_extra_bbreg_rois = samples2maskroi( cur_extra_bbreg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts["padding"], ) cur_extra_bbreg_rois = np.concatenate( (batch_num, cur_extra_bbreg_rois), axis=1) if iidx == 0: extra_cropped_image = cur_extra_cropped_image extra_pos_rois = np.copy(cur_extra_pos_rois) extra_neg_rois = np.copy(cur_extra_neg_rois) # bbreg rois extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) else: extra_cropped_image = torch.cat( (extra_cropped_image, cur_extra_cropped_image), dim=0) extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) # bbreg rois extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0) extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0) extra_pos_rois = Variable( torch.from_numpy(extra_pos_rois.astype("float32"))).cuda() extra_neg_rois = Variable( torch.from_numpy(extra_neg_rois.astype("float32"))).cuda() # bbreg rois extra_bbreg_rois = Variable( torch.from_numpy(extra_bbreg_rois.astype("float32"))).cuda() extra_cropped_image -= 128.0 extra_feat_maps = self.model(extra_cropped_image, out_layer="conv3") # Draw pos/neg samples ishape = cur_image.shape extra_pos_feats = self.model.roi_align_model(extra_feat_maps, extra_pos_rois) extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() extra_neg_feats = self.model.roi_align_model(extra_feat_maps, extra_neg_rois) extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() # bbreg feat extra_bbreg_feats = self.model.roi_align_model(extra_feat_maps, extra_bbreg_rois) extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() # concatenate extra features to original_features pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) # concatenate extra bbreg feats to original_bbreg_feats bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) torch.cuda.empty_cache() self.model.zero_grad() self.P4 = torch.autograd.Variable(torch.eye(512 * 3 * 3 + 1).type( self.dtype), volatile=True) self.P5 = (torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype), volatile=True) * 10) self.P6 = torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype), volatile=True) self.W4 = torch.autograd.Variable(torch.zeros(512 * 3 * 3 + 1, 512).type(self.dtype), volatile=True) self.W5 = torch.autograd.Variable(torch.zeros(512 + 1, 512).type(self.dtype), volatile=True) self.W6 = torch.autograd.Variable(torch.zeros(512 + 1, 2).type(self.dtype), volatile=True) self.flag_old = 0 # Initial training self.flag_old = train_owm( self.model, self.criterion, self.init_optimizer, pos_feats, neg_feats, opts["maxiter_init"], self.P4, self.P5, self.P6, self.W4, self.W5, self.W6, self.flag_old, ) # bbreg train if bbreg_feats.size(0) > opts["n_bbreg"]: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :] self.bbreg = BBRegressor((ishape[1], ishape[0])) self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox) if pos_feats.size(0) > opts["n_pos_update"]: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) self.pos_feats_all = [ pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda()) ] if neg_feats.size(0) > opts["n_neg_update"]: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) self.neg_feats_all = [ neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda()) ] self.trans_f = opts["trans_f"]
def track(self, image_file): self.frame_idx += 1 # Load image cur_image = Image.open(image_file).convert("RGB") cur_image = np.asarray(cur_image) # Estimate target bbox ishape = cur_image.shape samples = gen_samples( SampleGenerator( "gaussian", (ishape[1], ishape[0]), self.trans_f, opts["scale_f"], valid=True, ), self.target_bbox, opts["n_samples"], ) padded_x1 = (samples[:, 0] - samples[:, 2] * (opts["padding"] - 1.0) / 2.0).min() padded_y1 = (samples[:, 1] - samples[:, 3] * (opts["padding"] - 1.0) / 2.0).min() padded_x2 = (samples[:, 0] + samples[:, 2] * (opts["padding"] + 1.0) / 2.0).max() padded_y2 = (samples[:, 1] + samples[:, 3] * (opts["padding"] + 1.0) / 2.0).max() padded_scene_box = np.asarray( (padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) if padded_scene_box[0] > cur_image.shape[1]: padded_scene_box[0] = cur_image.shape[1] - 1 if padded_scene_box[1] > cur_image.shape[0]: padded_scene_box[1] = cur_image.shape[0] - 1 if padded_scene_box[0] + padded_scene_box[2] < 0: padded_scene_box[2] = -padded_scene_box[0] + 1 if padded_scene_box[1] + padded_scene_box[3] < 0: padded_scene_box[3] = -padded_scene_box[1] + 1 crop_img_size = (padded_scene_box[2:4] * ((opts["img_size"], opts["img_size"]) / self.target_bbox[2:4])).astype("int64") cropped_image, cur_image_var = self.img_crop_model.crop_image( cur_image, np.reshape(padded_scene_box, (1, 4)), crop_img_size) cropped_image = cropped_image - 128.0 self.model.eval() feat_map = self.model(cropped_image, out_layer="conv3") # relative target bbox with padded_scene_box rel_target_bbox = np.copy(self.target_bbox) rel_target_bbox[0:2] -= padded_scene_box[0:2] # Extract sample features and get target location batch_num = np.zeros((samples.shape[0], 1)) sample_rois = np.copy(samples) sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0) sample_rois = samples2maskroi( sample_rois, self.model.receptive_field, (opts["img_size"], opts["img_size"]), self.target_bbox[2:4], opts["padding"], ) sample_rois = np.concatenate((batch_num, sample_rois), axis=1) sample_rois = Variable(torch.from_numpy( sample_rois.astype("float32"))).cuda() sample_feats = self.model.roi_align_model(feat_map, sample_rois) sample_feats = sample_feats.view(sample_feats.size(0), -1).clone() sample_scores = self.model(sample_feats, in_layer="fc4") top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.data.cpu().numpy() target_score = top_scores.data.mean() self.target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts["success_thr"] # # Expand search area at failure if success: self.trans_f = opts["trans_f"] else: self.trans_f = opts["trans_f_expand"] # Bbox regression if success: bbreg_feats = sample_feats[top_idx, :] bbreg_samples = samples[top_idx] bbreg_samples = self.bbreg.predict(bbreg_feats.data, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = self.target_bbox # Data collect if success: # Draw pos/neg samples pos_examples = gen_samples( SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2), self.target_bbox, opts["n_pos_update"], opts["overlap_pos_update"], ) neg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 1.5, 1.2), self.target_bbox, opts["n_neg_update"], opts["overlap_neg_update"], ) padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts["padding"] - 1.0) / 2.0).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts["padding"] - 1.0) / 2.0).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts["padding"] + 1.0) / 2.0).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts["padding"] + 1.0) / 2.0).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4), ) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) jitter_scale = [1.0] for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ((opts["img_size"], opts["img_size"]) / self.target_bbox[2:4]) ).astype("int64") * jitter_scale[bidx] cropped_image, cur_image_var = self.img_crop_model.crop_image( cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128.0 feat_map = self.model(cropped_image, out_layer="conv3") rel_target_bbox = np.copy(self.target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0, ) scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx] cur_pos_rois = samples2maskroi( cur_pos_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype("float32"))).cuda() cur_pos_feats = self.model.roi_align_model( feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0, ) cur_neg_rois = samples2maskroi( cur_neg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype("float32"))).cuda() cur_neg_feats = self.model.roi_align_model( feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() self.feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats # index select neg_feats = cur_neg_feats else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) if pos_feats.size(0) > opts["n_pos_update"]: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda()) if neg_feats.size(0) > opts["n_neg_update"]: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda()) self.pos_feats_all.append(pos_feats) self.neg_feats_all.append(neg_feats) if len(self.pos_feats_all) > opts["n_frames_long"]: del self.pos_feats_all[0] if len(self.neg_feats_all) > opts["n_frames_short"]: del self.neg_feats_all[0] # Short term update if not success: nframes = min(opts["n_frames_short"], len(self.pos_feats_all)) pos_data = torch.stack(self.pos_feats_all[-nframes:], 0).view(-1, self.feat_dim) neg_data = torch.stack(self.neg_feats_all, 0).view(-1, self.feat_dim) self.flag_old = train( self.model, self.criterion, self.update_optimizer, pos_data, neg_data, opts["maxiter_update"], self.W4, self.W5, self.W6, self.flag_old, ) # Long term update elif self.frame_idx % opts["long_interval"] == 0: nframes = min(opts["n_frames_short"], len(self.pos_feats_all)) pos_data = torch.stack(self.pos_feats_all[-nframes:], 0).view(-1, self.feat_dim) neg_data = torch.stack(self.neg_feats_all, 0).view(-1, self.feat_dim) self.flag_old = train_owm( self.model, self.criterion, self.update_optimizer_owm, pos_data, neg_data, opts["maxiter_update"], self.P4, self.P5, self.P6, self.W4, self.W5, self.W6, self.flag_old, ) return bbreg_bbox
def update(self, image): # image = loader(image.resize((225,225),Image.ANTIALIAS)).unsqueeze(0).cuda() self.frame += 1 update_lenth = 10 np_image = np.array(image) if self.imageVar_first > 200: imageVar = cv2.Laplacian( crop_image_blur(np_image, self.target_bbox), cv2.CV_64F).var() else: imageVar = 200 img_l = getbatch_actor(np_image, self.target_bbox.reshape([1, 4])) torch_image = loader(image.resize( (225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda() deta_pos = self.actor(img_l, torch_image) deta_pos = deta_pos.data.clone().cpu().numpy() if self.deta_flag: deta_pos[:, 2] = 0 if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05: deta_pos[:, 2] = 0 if len(self.pf_frame) and self.frame == (self.pf_frame[-1] + 1): deta_pos[:, 2] = 0 pos_ = np.round( move_crop(self.target_bbox, deta_pos, (image.size[1], image.size[0]), self.rate)) r = forward_samples(self.model, image, np.array(pos_).reshape([1, 4]), out_layer='fc6') r = r.cpu().numpy() if r[0][1] > 0 and imageVar > 100: self.target_bbox = pos_ target_score = r[0][1] bbreg_bbox = pos_ success = 1 if True: fin_score = r[0][1] self.img_learn.append(image) self.pos_learn.append(self.target_bbox) self.score_pos.append(fin_score) self.frame_learn.append(self.frame) while len(self.img_learn) > update_lenth * 2: del self.img_learn[0] del self.pos_learn[0] del self.score_pos[0] del self.frame_learn[0] self.result[self.frame] = self.target_bbox self.result_bb[self.frame] = bbreg_bbox else: self.detetion += 1 if len(self.pf_frame) == 0: self.pf_frame = [self.frame] else: self.pf_frame.append(self.frame) if (len(self.frame_learn) == update_lenth * 2 and self.data_frame[-1] not in self.frame_learn) or self.data_frame[-1] == 0: for num in range(max(0, self.img_learn.__len__() - update_lenth), self.img_learn.__len__()): if self.frame_learn[num] not in self.data_frame: gt_ = self.pos_learn[num] image_ = self.img_learn[num] pos_examples = np.round( gen_samples(self.pos_generator, gt_, opts['n_pos_update'], opts['overlap_pos_update'])) neg_examples = np.round( gen_samples(self.neg_generator, gt_, opts['n_neg_update'], opts['overlap_neg_update'])) pos_feats_ = forward_samples(self.model, image_, pos_examples) neg_feats_ = forward_samples(self.model, image_, neg_examples) self.pos_feats_all.append(pos_feats_) self.neg_feats_all.append(neg_feats_) self.data_frame.append(self.frame_learn[num]) if len(self.pos_feats_all) > 10: del self.pos_feats_all[0] del self.neg_feats_all[0] del self.data_frame[0] else: pos_feats_ = self.pos_feats_all[self.data_frame.index( self.frame_learn[num])] neg_feats_ = self.neg_feats_all[self.data_frame.index( self.frame_learn[num])] if num == max(0, self.img_learn.__len__() - update_lenth): pos_feats = pos_feats_ neg_feats = neg_feats_ else: pos_feats = torch.cat([pos_feats, pos_feats_], 0) neg_feats = torch.cat([neg_feats, neg_feats_], 0) train(self.model, self.criterion, self.update_optimizer, pos_feats, neg_feats, opts['maxiter_update']) if success: self.sample_generator.set_trans_f(opts['trans_f']) else: self.sample_generator.set_trans_f(opts['trans_f_expand']) if imageVar < 100: samples = gen_samples(self.init_generator, self.target_bbox, opts['n_samples']) else: samples = gen_samples(self.sample_generator, self.target_bbox, opts['n_samples']) if i < 20 or ((self.init_bbox[2] * self.init_bbox[3]) > 1000 and (self.target_bbox[2] * self.target_bbox[3] / (self.init_bbox[2] * self.init_bbox[3]) > 2.5 or self.target_bbox[2] * self.target_bbox[3] / (self.init_bbox[2] * self.init_bbox[3]) < 0.4)): self.sample_generator.set_trans_f(opts['trans_f_expand']) samples_ = np.round( gen_samples( self.sample_generator, np.hstack([ self.target_bbox[0:2] + self.target_bbox[2:4] / 2 - self.init_bbox[2:4] / 2, self.init_bbox[2:4] ]), opts['n_samples'])) samples = np.vstack([samples, samples_]) sample_scores = forward_samples(self.model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu().numpy() target_score = top_scores.mean() self.target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # Bbox regression if success: bbreg_samples = samples[top_idx] bbreg_feats = forward_samples(self.model, image, bbreg_samples) bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) self.img_learn.append(image) self.pos_learn.append(self.target_bbox) self.score_pos.append(self.target_score) self.frame_learn.append(i) while len(self.img_learn) > 2 * update_lenth: del self.img_learn[0] del self.pos_learn[0] del self.score_pos[0] del self.frame_learn[0] else: bbreg_bbox = self.target_bbox # Copy previous result at failure if not success: target_bbox = self.result[self.frame - 1] bbreg_bbox = self.result_bb[self.frame - 1] # Save result self.result[self.frame] = target_bbox self.result_bb[self.frame] = bbreg_bbox return self.target_bbox
def init(self, image, init_bbox): self.rate = init_bbox[2] / init_bbox[3] self.target_bbox = np.array(init_bbox) self.init_bbox = np.array(init_bbox) self.result.append(self.target_bbox) self.result_bb.append(self.target_bbox) image = np.asarray(image) # Init model bbreg_examples = gen_samples( SampleGenerator('uniform', image.shape, 0.3, 1.5, 1.1), self.target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) bbreg_feats = forward_samples(self.model, image, bbreg_examples) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox) pos_examples = gen_samples( SampleGenerator('gaussian', image.shape, 0.1, 1.2), self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ gen_samples(SampleGenerator('uniform', image.shape, 1, 2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']), gen_samples(SampleGenerator('whole', image.shape, 0, 1.2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) pos_feats = forward_samples(self.model, image, pos_examples) neg_feats = forward_samples(self.model, image, neg_examples) train(self.model, self.criterion, self.init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) self.deta_flag = init_actor(self.actor, image, self.target_bbox) self.init_generator = SampleGenerator('gaussian', image.shape, opts['trans_f'], 1, valid=False) self.sample_generator = SampleGenerator('gaussian', image.shape, opts['trans_f'], opts['scale_f'], valid=False) self.pos_generator = SampleGenerator('gaussian', image.shape, 0.1, 1.2) self.neg_generator = SampleGenerator('uniform', image.shape, 1.5, 1.2) self.pos_feats_all = [pos_feats[:opts['n_pos_update']]] self.neg_feats_all = [neg_feats[:opts['n_neg_update']]] pos_score = forward_samples(self.model, image, np.array(init_bbox).reshape([1, 4]), out_layer='fc6') self.img_learn = [image] self.pos_learn = [init_bbox] self.score_pos = [pos_score.cpu().numpy()[0][1]] self.frame_learn = [0] self.pf_frame = [] self.imageVar_first = cv2.Laplacian( crop_image_blur(np.array(image), self.target_bbox), cv2.CV_64F).var()