def forward_samples(model, image, samples, out_layer="conv3"): """Forward samples through the network.""" model.eval() extractor = tracking.data_prov.RegionExtractor(image, samples, opts) for i, regions in enumerate(extractor): if opts["use_gpu"]: regions = regions.cuda() with torch.no_grad(): feat = model(regions, out_layer=out_layer) if i == 0: feats = feat.detach().clone() else: feats = torch.cat((feats, feat.detach().clone()), 0) return feats
def G_pretrain_v3(model, model_G, g_criterion, g_optimizer, maxiter, g_lr, in_layer='fc4'): ''' This function is used to pretrain model_G, using the feature extracted from the RoIAlign module instead of that from the original MDNet. ''' # Judge if pretrained model exists. If it does, then load the model already saved, otherwise pretrain model_G model_path = './models/g_model' + str(g_lr) + '.pth' if os.path.exists(model_path): model_G.load_state_dict(torch.load(model_path)) print('Loading model: {}'.format(model_path)) return batch_pos = opts['g_pretrain_pos'] # Init image crop model img_crop_model = imgCropper(1.) if opts['use_gpu']: img_crop_model.gpuEnable() if os.path.exists('./models/feats_before.npy'): feats_before = np.load('./models/feats_before.npy') feats_after = np.load('./models/feats_after.npy') else: path = "./models/G_sample_list_2.mat" res = loadmat(path) res = res['G_sample_list'] res = res[0, 0] G_samplelist = {} G_samplegt = {} attr_name = [ 'illum', 'illum_after', 'motion', 'motion_after', 'size', 'size_after', 'occlusion', 'visible', 'illum_gt', 'illum_after_gt', 'motion_gt', 'motion_after_gt', 'size_gt', 'size_after_gt', 'occlusion_gt', 'visible_gt' ] for i in range(res.__len__()): if i < 8: temp = res[i] G_samplelist[attr_name[i]] = [ item[0][0].encode('unicode-escape').decode('string_escape') for item in temp ] else: G_samplegt[attr_name[i]] = res[i] feats = {} for (key, val) in G_samplelist.items(): feats[key] = [] for val_item in val: cur_img = Image.open(val_item).convert('RGB') cur_img = np.asarray(cur_img) cur_initbbox = G_samplegt[key + '_gt'][i] cur_bbox = np.asarray(cur_initbbox).copy().reshape(1, 4) target_bbox = np.array(cur_initbbox) # compute padded sample padded_x1 = (cur_bbox[:, 0] - cur_bbox[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (cur_bbox[:, 1] - cur_bbox[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (cur_bbox[:, 0] + cur_bbox[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (cur_bbox[:, 1] + cur_bbox[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) jitter_scale = [1.] model.eval() crop_img_size = (scene_boxes[0, 2:4] * ( (opts['img_size'], opts['img_size']) / target_bbox[2:4]) ).astype('int64') * jitter_scale[0] cropped_image, cur_image_var = img_crop_model.crop_image( cur_img, np.reshape(scene_boxes[0], (1, 4)), crop_img_size) cropped_image = cropped_image - 128. feat_map = model(cropped_image, out_layer='conv3') rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= scene_boxes[0, 0:2] batch_num = np.zeros((cur_bbox.shape[0], 1)) cur_rois = np.copy(cur_bbox) cur_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[0, 0:2], (1, 2)), cur_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[0] cur_rois = samples2maskroi(cur_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_rois = np.concatenate((batch_num, cur_rois), axis=1) cur_rois = Variable( torch.from_numpy(cur_rois.astype('float32'))).cuda() cur_feats = model.roi_align_model(feat_map, cur_rois) cur_feats = cur_feats.reshape(cur_feats.shape[0], -1).data.clone() cur_feats = cur_feats.cpu().numpy() feats[key].append(np.squeeze(cur_feats, axis=0)) feats_before = np.concatenate( (feats['illum'], feats['size'], feats['motion'], feats['visible']), axis=0) feats_after = np.concatenate( (feats['illum_after'], feats['size_after'], feats['motion_after'], feats['occlusion']), axis=0) np.save('./models/feats_before.npy', feats_before) np.save('./models/feats_after.npy', feats_after) if opts['use_gpu']: feats_before = Variable( torch.from_numpy(feats_before.astype('float32'))).cuda() feats_after = Variable(torch.from_numpy( feats_after.astype('float32'))).cuda() idx_before = np.where(feats_before == 0) idx_after = np.where(feats_after == 0) mask = torch.ones_like(feats_after) mask[idx_after] = 0 mask[idx_before] = 1 # pretrain G pos_idx = np.random.permutation(feats_before.size(0)) while (len(pos_idx) < batch_pos * maxiter): pos_idx = np.concatenate( [pos_idx, np.random.permutation(feats_before.size(0))]) pos_pointer = 0 for iter in range(maxiter): # select pos idx pos_next = pos_pointer + batch_pos pos_cur_idx = pos_idx[pos_pointer:pos_next] pos_cur_idx = feats_before.new(pos_cur_idx).long() pos_pointer = pos_next # create batch batch_data_feats = Variable(feats_before.index_select(0, pos_cur_idx)) labels = Variable(mask.index_select(0, pos_cur_idx)) if opts['use_gpu']: labels = labels.cuda() model_G.train() ## model transfer into train mode # forward score = model_G(batch_data_feats) # optimize loss = g_criterion(score, labels) model_G.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model_G.parameters(), opts['grad_clip']) g_optimizer.step() if opts['visual_log']: print("Iter %d, G_Loss %.4f" % (iter, loss.data[0])) torch.save(model_G.state_dict(), model_path)
def run_mdnet(img_list, init_bbox, num2drop, g_lr, g_lr_update, gt=None, seq='seq_name ex)Basketball', savefig_dir='', display=False): ############################################ ############################################ ############################################ # Init bbox target_bbox = np.array(init_bbox) secPerFrame = np.zeros((len(img_list), 1)) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = np.copy(target_bbox) result_bb[0] = np.copy(target_bbox) iou_result = np.zeros((len(img_list), 1)) # execution time array exec_time_result = np.zeros((len(img_list), 1)) # Init model #model_G = net_G() model_G = net_G_v2() model = MDNet(opts['model_path']) if opts['adaptive_align']: align_h = model.roi_align_model.aligned_height align_w = model.roi_align_model.aligned_width spatial_s = model.roi_align_model.spatial_scale model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) if opts['use_gpu']: model = model.cuda() model_G = model_G.cuda() model.set_learnable_params(opts['ft_layers']) model_G.set_learnable_params() # Init image crop model img_crop_model = imgCropper(1.) if opts['use_gpu']: img_crop_model.gpuEnable() # Init criterion and optimizer #criterion = FocalBinaryLoss() criterion = BinaryLoss() init_optimizer = set_optimizer(model, opts['lr_init']) update_optimizer = set_optimizer(model, opts['lr_update']) g_criterion = nn.MSELoss() g_optimizer = optim.SGD(model_G.parameters(), lr=g_lr, weight_decay=opts['w_decay'], momentum=opts['momentum']) g_update_optimizer = optim.SGD(model_G.parameters(), lr=g_lr_update, weight_decay=opts['w_decay'], momentum=opts['momentum']) tic = time.time() # Load first image cur_image = Image.open(img_list[0]).convert('RGB') cur_image = np.asarray(cur_image) # Draw pos/neg samples ishape = cur_image.shape pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 1, 2, 1.1), target_bbox, opts['n_neg_init'], opts['overlap_neg_init']) neg_examples = np.random.permutation(neg_examples) cur_bbreg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) # compute padded sample padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) if opts['jitter']: ## horizontal shift jittered_scene_box_horizon = np.copy(padded_scene_box) jittered_scene_box_horizon[0, 0] -= 4. jitter_scale_horizon = 1. ## vertical shift jittered_scene_box_vertical = np.copy(padded_scene_box) jittered_scene_box_vertical[0, 1] -= 4. jitter_scale_vertical = 1. jittered_scene_box_reduce1 = np.copy(padded_scene_box) jitter_scale_reduce1 = 1.1**(-1) ## vertical shift jittered_scene_box_enlarge1 = np.copy(padded_scene_box) jitter_scale_enlarge1 = 1.1**(1) ## scale reduction jittered_scene_box_reduce2 = np.copy(padded_scene_box) jitter_scale_reduce2 = 1.1**(-2) ## scale enlarge jittered_scene_box_enlarge2 = np.copy(padded_scene_box) jitter_scale_enlarge2 = 1.1**(2) scene_boxes = np.concatenate([ scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical, jittered_scene_box_reduce1, jittered_scene_box_enlarge1, jittered_scene_box_reduce2, jittered_scene_box_enlarge2 ], axis=0) jitter_scale = [ 1., jitter_scale_horizon, jitter_scale_vertical, jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2, jitter_scale_enlarge2 ] else: jitter_scale = [1.] model.eval() for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts['img_size'], opts['img_size']) / target_bbox[2:4]) ).astype('int64') * jitter_scale[bidx] cropped_image, cur_image_var = img_crop_model.crop_image( cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128. feat_map = model(cropped_image, out_layer='conv3') rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() ## bbreg rois batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) cur_bbreg_rois = np.copy(cur_bbreg_examples) cur_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_bbreg_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) cur_bbreg_rois = Variable( torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda() cur_bbreg_feats = model.roi_align_model(feat_map, cur_bbreg_rois) cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats neg_feats = cur_neg_feats ##bbreg feature bbreg_feats = cur_bbreg_feats bbreg_examples = cur_bbreg_examples else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) ##bbreg feature bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) bbreg_examples = np.concatenate( (bbreg_examples, cur_bbreg_examples), axis=0) if pos_feats.size(0) > opts['n_pos_init']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']], :] if neg_feats.size(0) > opts['n_neg_init']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :] ##bbreg if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] ## open images and crop patch from obj extra_obj_size = np.array((opts['img_size'], opts['img_size'])) extra_crop_img_size = extra_obj_size * (opts['padding'] + 0.6) replicateNum = 100 for iidx in range(replicateNum): extra_target_bbox = np.copy(target_bbox) extra_scene_box = np.copy(extra_target_bbox) extra_scene_box_center = extra_scene_box[ 0:2] + extra_scene_box[2:4] / 2. extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6) extra_scene_box[ 0:2] = extra_scene_box_center - extra_scene_box_size / 2. extra_scene_box[2:4] = extra_scene_box_size extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4) cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2) extra_scene_box[0] += extra_shift_offset[0] extra_scene_box[1] += extra_shift_offset[1] extra_scene_box[2:4] *= cur_extra_scale[0] scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0] cur_extra_cropped_image, _ = img_crop_model.crop_image( cur_image, np.reshape(extra_scene_box, (1, 4)), extra_crop_img_size) cur_extra_cropped_image = cur_extra_cropped_image.detach() cur_extra_pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), extra_target_bbox, opts['n_pos_init'] / replicateNum, opts['overlap_pos_init']) cur_extra_neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2, 1.1), extra_target_bbox, opts['n_neg_init'] / replicateNum / 4, opts['overlap_neg_init']) ##bbreg sample cur_extra_bbreg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1), extra_target_bbox, opts['n_bbreg'] / replicateNum / 4, opts['overlap_bbreg'], opts['scale_bbreg']) batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1)) cur_extra_pos_rois = np.copy(cur_extra_pos_examples) cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_pos_rois.shape[0], axis=0) cur_extra_pos_rois = samples2maskroi( cur_extra_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1) batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) cur_extra_neg_rois = np.copy(cur_extra_neg_examples) cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_neg_rois.shape[0], axis=0) cur_extra_neg_rois = samples2maskroi( cur_extra_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1) ## bbreg rois batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) cur_extra_bbreg_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_bbreg_rois.shape[0], axis=0) cur_extra_bbreg_rois = samples2maskroi( cur_extra_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_bbreg_rois = np.concatenate( (batch_num, cur_extra_bbreg_rois), axis=1) if iidx == 0: extra_cropped_image = cur_extra_cropped_image extra_pos_rois = np.copy(cur_extra_pos_rois) extra_neg_rois = np.copy(cur_extra_neg_rois) ##bbreg rois extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) else: extra_cropped_image = torch.cat( (extra_cropped_image, cur_extra_cropped_image), dim=0) extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) ##bbreg rois extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0) extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0) extra_pos_rois = Variable( torch.from_numpy(extra_pos_rois.astype('float32'))).cuda() extra_neg_rois = Variable( torch.from_numpy(extra_neg_rois.astype('float32'))).cuda() ##bbreg rois extra_bbreg_rois = Variable( torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda() extra_cropped_image -= 128. extra_feat_maps = model(extra_cropped_image, out_layer='conv3') # Draw pos/neg samples ishape = cur_image.shape extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois) extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois) extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() ##bbreg feat extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois) extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() ## concatenate extra features to original_features pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) ## concatenate extra bbreg feats to original_bbreg_feats bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) torch.cuda.empty_cache() model.zero_grad() model_G.zero_grad() # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) G_pretrain_v3(model, model_G, g_criterion, g_optimizer, opts['maxiter_g_pretrain'], g_lr) ##bbreg train if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] bbreg = BBRegressor((ishape[1], ishape[0])) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) if pos_feats.size(0) > opts['n_pos_update']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats_all = [ pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) ] if neg_feats.size(0) > opts['n_neg_update']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats_all = [ neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) ] spf_total = time.time() - tic #spf_total = 0. # no first frame # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (cur_image.shape[1] / dpi, cur_image.shape[0] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(cur_image, aspect='normal') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop trans_f = opts['trans_f'] for i in range(1, len(img_list)): tic = time.time() # Load image cur_image = Image.open(img_list[i]).convert('RGB') cur_image = np.asarray(cur_image) # Estimate target bbox ishape = cur_image.shape samples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), target_bbox, opts['n_samples']) padded_x1 = (samples[:, 0] - samples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (samples[:, 1] - samples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (samples[:, 0] + samples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (samples[:, 1] + samples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.asarray( (padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) if padded_scene_box[0] > cur_image.shape[1]: padded_scene_box[0] = cur_image.shape[1] - 1 if padded_scene_box[1] > cur_image.shape[0]: padded_scene_box[1] = cur_image.shape[0] - 1 if padded_scene_box[0] + padded_scene_box[2] < 0: padded_scene_box[2] = -padded_scene_box[0] + 1 if padded_scene_box[1] + padded_scene_box[3] < 0: padded_scene_box[3] = -padded_scene_box[1] + 1 crop_img_size = (padded_scene_box[2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') cropped_image, cur_image_var = img_crop_model.crop_image( cur_image, np.reshape(padded_scene_box, (1, 4)), crop_img_size) cropped_image = cropped_image - 128. model.eval() feat_map = model(cropped_image, out_layer='conv3') # relative target bbox with padded_scene_box rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= padded_scene_box[0:2] # Extract sample features and get target location batch_num = np.zeros((samples.shape[0], 1)) sample_rois = np.copy(samples) sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0) sample_rois = samples2maskroi(sample_rois, model.receptive_field, (opts['img_size'], opts['img_size']), target_bbox[2:4], opts['padding']) sample_rois = np.concatenate((batch_num, sample_rois), axis=1) sample_rois = Variable(torch.from_numpy( sample_rois.astype('float32'))).cuda() sample_feats = model.roi_align_model(feat_map, sample_rois) sample_feats = sample_feats.view(sample_feats.size(0), -1).clone() sample_scores = model(sample_feats, in_layer='fc4') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.data.cpu().numpy() target_score = top_scores.data.mean() target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # # Expand search area at failure if success: trans_f = opts['trans_f'] else: trans_f = opts['trans_f_expand'] ## Bbox regression if success: bbreg_feats = sample_feats[top_idx, :] bbreg_samples = samples[top_idx] bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox iou_result[i] = 1. # Data collect if success: # Draw pos/neg samples pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2), target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) jitter_scale = [1.] for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts['img_size'], opts['img_size']) / target_bbox[2:4]) ).astype('int64') * jitter_scale[bidx] cropped_image, cur_image_var = img_crop_model.crop_image( cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128. feat_map = model(cropped_image, out_layer='conv3') rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat(np.reshape( scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_pos_rois = samples2maskroi( cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat(np.reshape( scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) cur_neg_rois = samples2maskroi( cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats ##index select neg_feats = cur_neg_feats else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) if pos_feats.size(0) > opts['n_pos_update']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) if neg_feats.size(0) > opts['n_neg_update']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) pos_feats_all.append(pos_feats) neg_feats_all.append(neg_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.stack(pos_feats_all[-nframes:], 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.stack(pos_feats_all, 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) # train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) update_vital_v2_new(model, model_G, criterion, update_optimizer, g_update_optimizer, g_criterion, True, num2drop, pos_data, neg_data, opts['maxiter_update']) spf = time.time() - tic spf_total += spf secPerFrame[i] = spf # Display if display or savefig: im.set_data(cur_image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)), dpi=dpi) if opts['visual_log']: if gt is None: print "Frame %d/%d, Score %.3f, Time %.3f" % \ (i, len(img_list), target_score, spf) else: print "Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf) # iou_result[i]= overlap_ratio(gt[i],result_bb[i])[0] fps = len(img_list) / spf_total #fps = (len(img_list)-1) / spf_total #no first frame # return iou_result, result_bb, fps, result, secPerFrame return result_bb, fps, result, secPerFrame
def update_vital_v2_new(model, model_G, criterion, optimizer, g_optimizer, g_criterion, adversarial, num2drop, pos_feats, neg_feats, maxiter, in_layer='fc4'): model.train() batch_pos = opts['batch_pos'] batch_neg = opts['batch_neg'] batch_test = opts['batch_test'] batch_neg_cand = max(opts['batch_neg_cand'], batch_neg) pos_idx = np.random.permutation(pos_feats.size(0)) neg_idx = np.random.permutation(neg_feats.size(0)) while (len(pos_idx) < batch_pos * maxiter): pos_idx = np.concatenate( [pos_idx, np.random.permutation(pos_feats.size(0))]) while (len(neg_idx) < batch_neg_cand * maxiter): neg_idx = np.concatenate( [neg_idx, np.random.permutation(neg_feats.size(0))]) pos_pointer = 0 neg_pointer = 0 for iter in range(maxiter): # select pos idx pos_next = pos_pointer + batch_pos pos_cur_idx = pos_idx[pos_pointer:pos_next] pos_cur_idx = pos_feats.new(pos_cur_idx).long() pos_pointer = pos_next # select neg idx neg_next = neg_pointer + batch_neg_cand neg_cur_idx = neg_idx[neg_pointer:neg_next] neg_cur_idx = neg_feats.new(neg_cur_idx).long() neg_pointer = neg_next # create batch batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx)) batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx)) # hard negative mining # tic=time.time() if batch_neg_cand > batch_neg: model.eval() ## model transfer into evaluation mode for start in range(0, batch_neg_cand, batch_test): end = min(start + batch_test, batch_neg_cand) score = model(batch_neg_feats[start:end], in_layer=in_layer) if start == 0: neg_cand_score = score.data[:, 1].clone() else: neg_cand_score = torch.cat( (neg_cand_score, score.data[:, 1].clone()), 0) _, top_idx = neg_cand_score.topk(batch_neg) batch_neg_feats = batch_neg_feats.index_select( 0, Variable(top_idx)) # model.train() ## model transfer into train mode # t1=time.time()-tic ####################################################################################### ###################### ADDED: generate mask #################### ####################################################################################### #batch_pos_data = batch_pos_feats.clone().reshape(batch_pos_feats.size(0),-1,3,3) ###################### Update Net D first #################### model_G.eval() res = model_G(batch_pos_feats) model_G.train() mask = torch.ones_like(batch_pos_feats) mask[np.where(res == 0)] = 0 batch_pos_feats_mask = batch_pos_feats * mask # forward model.train() pos_score = model(batch_pos_feats_mask, in_layer=in_layer) neg_score = model(batch_neg_feats, in_layer=in_layer) # optimize loss = criterion(pos_score, neg_score) model.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip']) optimizer.step() model.eval() ###################### Update Net G now #################### if adversarial == True: # num2drop = 512 idxlist = np.zeros([9, num2drop]) prob_k = np.zeros(9) for i in range(9): idxlist[i, :] = np.random.permutation(4608)[0:num2drop] cur_idx = pos_feats.new(idxlist[i]).long() cur_batch = Variable(pos_feats.index_select(0, pos_cur_idx)) new_mask = mask.clone() new_mask[:, cur_idx] = 0 new_cur_batch = cur_batch * new_mask res_ = model(new_cur_batch, in_layer=in_layer) X = res_ X_maxchannel = torch.max(X, 1) X_maxchannel = X_maxchannel[0] X_maxchannel = X_maxchannel.view(X.size(0), 1) E = torch.exp(X - X_maxchannel) L = torch.sum(E, 1) Y = E / (L.view(X.size(0), 1)) Y = Y.cpu().detach().numpy() prob_k[i] = np.sum(Y[:, 0]) idx = np.where(prob_k == np.min(prob_k)) batch_pos_data = Variable(pos_feats.index_select(0, pos_cur_idx)) labels = mask.clone() labels[:, idx[0]] = 0 if opts['use_gpu']: labels = labels.cuda() # t5=time.time()-tic model_G.train() ## model transfer into train mode # forward score = model_G(batch_pos_data) # optimize loss_g = g_criterion(score, labels) model_G.zero_grad() loss_g.backward() torch.nn.utils.clip_grad_norm(model_G.parameters(), opts['grad_clip']) g_optimizer.step() if opts['visual_log']: print "Iter %d, Loss %.4f, G_Loss %.4f" % (iter, loss.data[0], loss_g.data[0])
def train(model, criterion, optimizer, pos_feats, neg_feats, maxiter, in_layer='fc4'): model.train() batch_pos = opts['batch_pos'] batch_neg = opts['batch_neg'] batch_test = opts['batch_test'] batch_neg_cand = max(opts['batch_neg_cand'], batch_neg) pos_idx = np.random.permutation(pos_feats.size(0)) neg_idx = np.random.permutation(neg_feats.size(0)) while (len(pos_idx) < batch_pos * maxiter): pos_idx = np.concatenate( [pos_idx, np.random.permutation(pos_feats.size(0))]) while (len(neg_idx) < batch_neg_cand * maxiter): neg_idx = np.concatenate( [neg_idx, np.random.permutation(neg_feats.size(0))]) pos_pointer = 0 neg_pointer = 0 for iter in range(maxiter): # select pos idx pos_next = pos_pointer + batch_pos pos_cur_idx = pos_idx[pos_pointer:pos_next] pos_cur_idx = pos_feats.new(pos_cur_idx).long() pos_pointer = pos_next # select neg idx neg_next = neg_pointer + batch_neg_cand neg_cur_idx = neg_idx[neg_pointer:neg_next] neg_cur_idx = neg_feats.new(neg_cur_idx).long() neg_pointer = neg_next # create batch batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx)) batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx)) # hard negative mining if batch_neg_cand > batch_neg: model.eval() ## model transfer into evaluation mode for start in range(0, batch_neg_cand, batch_test): end = min(start + batch_test, batch_neg_cand) score = model(batch_neg_feats[start:end], in_layer=in_layer) if start == 0: neg_cand_score = score.data[:, 1].clone() else: neg_cand_score = torch.cat( (neg_cand_score, score.data[:, 1].clone()), 0) _, top_idx = neg_cand_score.topk(batch_neg) batch_neg_feats = batch_neg_feats.index_select( 0, Variable(top_idx)) model.train() ## model transfer into train mode # forward pos_score = model(batch_pos_feats, in_layer=in_layer) neg_score = model(batch_neg_feats, in_layer=in_layer) # optimize loss = criterion(pos_score, neg_score) model.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip']) optimizer.step() if opts['visual_log']: print "Iter %d, Loss %.4f" % (iter, loss.data[0])
def train(model, model_g, criterion, optimizer, pos_feats, neg_feats, maxiter, in_layer="fc4"): """Train the models.""" model.train() batch_pos = opts["batch_pos"] batch_neg = opts["batch_neg"] batch_test = opts["batch_test"] batch_neg_cand = max(opts["batch_neg_cand"], batch_neg) pos_idx = numpy.random.permutation(pos_feats.size(0)) neg_idx = numpy.random.permutation(neg_feats.size(0)) while len(pos_idx) < batch_pos * maxiter: pos_idx = numpy.concatenate( [pos_idx, numpy.random.permutation(pos_feats.size(0))]) while len(neg_idx) < batch_neg_cand * maxiter: neg_idx = numpy.concatenate( [neg_idx, numpy.random.permutation(neg_feats.size(0))]) pos_pointer = 0 neg_pointer = 0 for _ in range(maxiter): # select pos idx pos_next = pos_pointer + batch_pos pos_cur_idx = pos_idx[pos_pointer:pos_next] pos_cur_idx = pos_feats.new(pos_cur_idx).long() pos_pointer = pos_next # select neg idx neg_next = neg_pointer + batch_neg_cand neg_cur_idx = neg_idx[neg_pointer:neg_next] neg_cur_idx = neg_feats.new(neg_cur_idx).long() neg_pointer = neg_next # create batch batch_pos_feats = pos_feats[pos_cur_idx] if model_g is not None: batch_asdn_feats = pos_feats.index_select(0, pos_cur_idx) batch_neg_feats = neg_feats[neg_cur_idx] # hard negative mining if batch_neg_cand > batch_neg: model.eval() for start in range(0, batch_neg_cand, batch_test): end = min(start + batch_test, batch_neg_cand) with torch.no_grad(): score = model(batch_neg_feats[start:end], in_layer=in_layer) if start == 0: neg_cand_score = score.detach()[:, 1].clone() else: neg_cand_score = torch.cat( (neg_cand_score, score.detach()[:, 1].clone()), 0) _, top_idx = neg_cand_score.topk(batch_neg) batch_neg_feats = batch_neg_feats[top_idx] model.train() if model_g is not None: model_g.eval() res_asdn = model_g(batch_asdn_feats) model_g.train() num = res_asdn.size(0) mask_asdn = torch.ones(num, 512, 3, 3) res_asdn = res_asdn.view(num, 3, 3) for j in range(num): feat_ = res_asdn[j, :, :] featlist = feat_.view(1, 9).squeeze() feat_list = featlist.detach().cpu().numpy() idlist = feat_list.argsort() idxlist = idlist[:3] for k, idx in enumerate(idxlist): row = idx // 3 col = idx % 3 mask_asdn[:, :, col, row] = 0 mask_asdn = mask_asdn.view(mask_asdn.size(0), -1) if opts["use_gpu"]: batch_asdn_feats = batch_asdn_feats.cuda() mask_asdn = mask_asdn.cuda() batch_asdn_feats = batch_asdn_feats * mask_asdn # forward if model_g is None: pos_score = model(batch_pos_feats, in_layer=in_layer) else: pos_score = model(batch_asdn_feats, in_layer=in_layer) neg_score = model(batch_neg_feats, in_layer=in_layer) # optimize loss = criterion(pos_score, neg_score) model.zero_grad() loss.backward() if "grad_clip" in opts: torch.nn.utils.clip_grad_norm_(model.parameters(), opts["grad_clip"]) optimizer.step() if model_g is not None: start = time.time() prob_k = torch.zeros(9) for k in range(9): row = k // 3 col = k % 3 model.eval() batch = batch_pos_feats.view(batch_pos, 512, 3, 3) batch[:, :, col, row] = 0 batch = batch.view(batch.size(0), -1) if opts["use_gpu"]: batch = batch.cuda() prob = model(batch, in_layer="fc4", out_layer="fc6_softmax")[:, 1] model.train() prob_k[k] = prob.sum() _, idx = torch.min(prob_k, 0) idx = idx.item() row = idx // 3 col = idx % 3 optimizer_g = gnet.g_init.set_optimizer_g(model_g) labels = torch.ones(batch_pos, 1, 3, 3) labels[:, :, col, row] = 0 batch_pos_feats = batch_pos_feats.view(batch_pos_feats.size(0), -1) res = model_g(batch_pos_feats) labels = labels.view(batch_pos, -1) criterion_g = torch.nn.MSELoss(reduction="mean") loss_g_2 = criterion_g(res.float(), labels.cuda().float()) model_g.zero_grad() loss_g_2.backward() optimizer_g.step() end = time.time()
"recall75", "precision", "conf_obj", "conf_noobj", ] for epoch in range(opt.epochs): model.train() start_time = time.time() for batch_i, (_, imgs, targets) in enumerate(dataloader): batches_done = len(dataloader) * epoch + batch_i imgs = Variable(imgs.to(device)) targets = Variable(targets.to(device), requires_grad=False) loss, outputs = model(imgs, targets) loss.backward() if batches_done % opt.gradient_accumulations: # Accumulates gradient before each step optimizer.step() optimizer.zero_grad() # ---------------- # Log progress # ---------------- log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, opt.epochs, batch_i, len(dataloader)) metric_table = [[
def main(args): if args.seed: torch.manual_seed(args.seed) np.random.seed(args.seed) # Q, X = load_data(args.dataset, args.data_path) ppi_embs = np.loadtxt('STRING_PPI_struc2vec_embs.txt', skiprows=1) ppi_id = ppi_embs[:, 0].astype(int) ppi_embs = ppi_embs[:, 1:] X = ppi_embs.T # (d, num_nodes) Q = X[:, 0:3] if args.graph_mode == 'ransac': q_RANSAC_graph, x_RANSAC_graph = load_ransac_graph( args.dataset, args.data_path) elif args.graph_mode == 'approx_ransac': _, x_RANSAC_graph = load_ransac_graph(args.dataset, args.data_path) q_RANSAC_graph = None else: q_RANSAC_graph, x_RANSAC_graph = None, None q_adj, q_features, x_adj, x_features = gen_graph( Q, X, args.kq, args.k, q_RANSAC_graph, x_RANSAC_graph) all_features = np.concatenate([q_features, x_features]) all_adj = combine_graph(q_adj, x_adj) all_adj_normed = preprocess_graph(all_adj) x_adj_normed = preprocess_graph(x_adj) x_adj_normed_sparse_tensor = convert_sparse_matrix_to_sparse_tensor( x_adj_normed) all_adj_normed_sparse_tensor = convert_sparse_matrix_to_sparse_tensor( all_adj_normed) # features_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, args.hidden_units]) # adj_placeholder = tf.sparse_placeholder(dtype=tf.float32, shape=[None, None]) # FIXME: regularizer = tf.contrib.layers.l2_regularizer(scale=args.regularizer_scale) model = ResidualGraphConvolutionalNetwork(train_batch_size=args.batch_size if args.batch_size > 0 else x_adj_normed.shape[0], val_batch_size=all_adj_normed.shape[0], num_layers=args.num_layers, hidden_units=args.hidden_units, init_weights=args.init_weights, layer_decay=args.layer_decay) if args.gpu_id is not None: model.cuda() x_adj_normed_sparse_tensor = x_adj_normed_sparse_tensor.cuda() all_adj_normed_sparse_tensor = all_adj_normed_sparse_tensor.cuda() print('using gpu') # FIXME: add flexible iterator training_dataset = DiffusionDataSet(features=x_features, adj=x_adj_normed_sparse_tensor) training_loader = DiffusionDataLoader(training_dataset, batch_size=args.batch_size if args.batch_size > 0 else len( training_dataset), num_workers=6, shuffle=True) validation_dataset = DiffusionDataSet(features=all_features, adj=all_adj_normed_sparse_tensor) validation_loader = DiffusionDataLoader(validation_dataset, batch_size=len(validation_dataset), num_workers=6, shuffle=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=0) # RMSprop reaches 76.48 with batch size 2048, although it's a bit better than Adam # and will not drop the map in later training. # optimizer = torch.optim.RMSprop(model.parameters(), # lr=args.lr, # centered=True) # DenseSparseAdam reaches 76.53 with batch size 2048, although it's a bit better than Adam # from allennlp.training.optimizers import DenseSparseAdam # optimizer = DenseSparseAdam(model.parameters(), # lr=args.lr) best_map = 0.0 itr = 0 model.train() if args.loss == 'gss': loss_fcn = GSS_loss(args.alpha).gss_loss elif args.loss == 'triplet': loss_fcn = GSS_loss(args.alpha).tri_loss while itr < args.epochs: # training step start_time = time.time() # forward for batch_id, batch_data in enumerate(training_loader): if args.gpu_id is not None: batch_data = batch_data.cuda() logits, hidden_emb = model( x=training_dataset.features.cuda( ) if args.gpu_id is not None else validation_dataset.features, adj=training_dataset.adj) if itr == 0: hidden_emb = hidden_emb.cpu().data if args.beta_percentile is not None: beta_score = np.percentile( np.dot(hidden_emb, hidden_emb.T).flatten(), args.beta_percentile) elif args.beta is not None: beta_score = args.beta else: raise Exception( 'At least one of beta and beta_percentile should be set!') # !# need to change the loss here to support tri_loss and load and use the graph computed loss = loss_fcn(logits=logits, beta=beta_score, index=batch_data.detach()) # if args.regularizer_scale: # l2 = 0 # for p in model.parameters(): # l2 += (p**2).sum() # loss += args.regularizer_scale * l2 optimizer.zero_grad() loss.backward() optimizer.step() train_time = time.time() - start_time itr += 1 # ============ # eval step # ============ print(f"iter {itr}") # print(hidden_emb[0, :10]) np.savetxt('graph_embs.txt', hidden_emb.cpu().data)
def train(args): # random seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # gpu if not args.no_cuda: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu print('使用%s号GPU' % args.gpu) # word vector corpus = Corpus() vocab, embed = corpus.load_embed(args.fn_embed) print("finish loading external word embeding, the shape is:") print(embed.shape) # model model_dict = {'lstm_comparing':BiLSTM_Encoding_Comparing, 'char_lstm_comparing':Char_BiLSTM_Encoding_Comparing} print("current model is", args.model) model_name = model_dict[args.model] if not args.no_cuda: embed = embed.cuda() model = model_name(args, embed) if not args.no_cuda: model.cuda() print(model) train_questions_raw, train_golds_raw, train_negs_raw = corpus.load_data(args.fn_train, 'train') valid_questions_raw, valid_golds_raw, valid_negs_raw = corpus.load_data(args.fn_valid, 'valid') train_questions = corpus.numericalize(train_questions_raw, args.input_mode) train_golds = corpus.numericalize(train_golds_raw, args.input_mode) train_negs = [] for line in train_negs_raw: train_negs.append(corpus.numericalize(line, args.input_mode)) # from pdb import set_trace # set_trace() if isinstance(train_questions, tuple): print("train data loaded!%d questions totally"%len(train_questions[0])) else: print("train data loaded!%d questions totally"%len(train_questions)) valid_questions = corpus.numericalize(valid_questions_raw, args.input_mode) valid_golds = corpus.numericalize(valid_golds_raw, args.input_mode) valid_negs = [] for index, line in enumerate(valid_negs_raw): valid_negs.append(corpus.numericalize(line, args.input_mode)) if isinstance(valid_questions, tuple): print("valid data loaded!%d questions totally"%len(valid_questions[0])) else: print("valid data loaded!%d questions totally"%len(valid_questions)) valid_dataset = (valid_questions, valid_golds, valid_negs) print("字符字典长度", corpus.len_char_dict()) # dump vocab corpus.dump_vocab(args.vocab_word, mode='word') corpus.dump_vocab(args.vocab_char, mode='char') # training settings optimizer_dict = {"adam":Adam} optimizer_name = optimizer_dict[args.optimizer] print("choose optimizer:%s"%args.optimizer) optimizer = optimizer_name(model.parameters(), lr = args.learning_rate) criterion = MarginRankingLoss(margin=args.margin) patience = args.patience num_train_epochs = args.num_train_epochs iters_left = patience best_precision = 0 num_not_improved = 0 global_step = 0 logger.info('\nstart training:%s'%datetime.now().strftime('%Y-%m-%d %H:%M:%S')) print("start training!") for epoch in range(args.num_train_epochs): # batchlize sample_train_negs = train_neg_sample(train_negs, args.neg_size, mode=args.input_mode) sample_train = (train_questions, train_golds, sample_train_negs) train_batches = train_batchlize(sample_train, args.batch_size, mode=args.input_mode) print("train data batchlized............") # train_right = 0 train_total = 0 # 打印 print('start time') start_time = datetime.now() logger.info('\nstart training:%s'%datetime.now().strftime('%Y-%m-%d %H:%M:%S')) print(start_time) model.train() optimizer.zero_grad() loss_epoch = 0 # 单次迭代的总loss for step, batch in enumerate(train_batches): # if not args.no_cuda: # batch = (t.cuda() for t in batch) question_batch, gold_batch, negs_batch = batch pos_score, neg_scores = model(question_batch, gold_batch, negs_batch) pos_score = pos_score.expand_as(neg_scores).reshape(-1) neg_scores = neg_scores.reshape(-1) assert pos_score.shape == neg_scores.shape ones = torch.ones(pos_score.shape) if not args.no_cuda: ones = ones.cuda() loss = criterion(pos_score, neg_scores, ones) # evaluate train result = (torch.sum(pos_score.view(-1, args.neg_size) > neg_scores.view(-1, args.neg_size),-1) == args.neg_size) train_right += torch.sum(result).item() train_total += len(result) optimizer.zero_grad() loss.backward() optimizer.step() loss_epoch += loss # 打印 end_time = datetime.now() logger.info('\ntrain epoch %d time span:%s'%(epoch, end_time-start_time)) print('train loss', loss_epoch.item()) logger.info('train loss:%f'%loss_epoch.item()) print('train result', train_right, train_total, 1.0*train_right/train_total) logger.info(('train result', train_right, train_total, 1.0*train_right/train_total)) # eval right, total, precision = evaluate_char(args, model, valid_dataset) # print print('valid result', right, total, precision) print('epoch time') print(datetime.now()) print('*'*20) logger.info("epoch:%d\t"%epoch+"dev_Accuracy-----------------------%d/%d=%f\n"%(right, total, precision)) end_time = datetime.now() logger.info('dev epoch %d time span:%s'%(epoch,end_time-start_time)) if precision > best_precision: best_precision = precision iters_left = patience print("epoch %d saved\n"%epoch) logger.info("epoch %d saved\n"%epoch) # Save a trained model model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self output_model_file = os.path.join(args.output_dir, "best_model.bin") torch.save(model_to_save.state_dict(), output_model_file) else: iters_left -= 1 if iters_left == 0: break logger.info('finish training!') print('finish training!')