def main(args): # Read configs with open(args.cfg_path, "r") as fp: configs = json.load(fp) # Update the configs based on command line args arg_dict = vars(args) for key in arg_dict: if key in configs: if arg_dict[key] is not None: configs[key] = arg_dict[key] configs = utils.ConfigMapper(configs) configs.attack_eps = float(configs.attack_eps) / 255 configs.attack_lr = float(configs.attack_lr) / 255 configs.save_path = os.path.join(configs.save_path, configs.mode, configs.alg) pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True) if configs.mode == 'train': trainer = Trainer(configs) trainer.train() elif configs.mode == 'eval': evaluator = Evaluator(configs) evaluator.eval() elif configs.mode == 'vis': visualizer = Visualizer(configs) visualizer.visualize() else: raise ValueError('mode should be train, eval or vis')
def test(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu cfg = load_configs(args.config) dataset = get_vehicle_dataloader(cfg, quick_check=args.check) model = globals()[cfg['MODEL']]() model.load_param(args.ckpt) model = nn.DataParallel(model).cuda() evaluator = Evaluator(cfg, model, dataset) mAP, cmc = evaluator.evaluate() cmc1, cmc5, cmc10 = cmc[0], cmc[4], cmc[9] print("(mAP: {:.5f} cmc-1: {:.5f} cmc-5: {:.5f} cmc-10: {:.5f})".format( mAP, cmc1, cmc5, cmc10))
def __init__(self, cfg, model, dataset): super(BaseTrainer, self).__init__() self.cfg = cfg self.logger = Logger(cfg) #self.debugger = SummaryWriter(os.path.join('debug', cfg['NAME'], 'loss' )) #self.mAP_marker = SummaryWriter(os.path.join('debug', cfg['NAME'], 'mAP')) self.source_loader, self.target_loader, self.test_loader, self.query, self.gallery, self.train_transformer,self.source_train, self.target_train, self.target_cluster_loader = dataset self.best_mAP = 0 self.num_gpus = torch.cuda.device_count() if os.path.exists(self.cfg['PRETRAIN']): model.load_param(self.cfg['PRETRAIN']) print("load checkpoint from {}".format(self.cfg['PRETRAIN'])) self.model = nn.DataParallel(model).cuda() self.evaluator = Evaluator(self.cfg, self.model, dataset) self.num_gpus = torch.cuda.device_count() self.optimizer = make_optimizer(self.cfg, self.model, num_gpus=self.num_gpus) self.scheduler = WarmupMultiStepLR(self.optimizer, self.cfg['MILESTONES'], self.cfg['GAMMA'], self.cfg['WARMUP_FACTOR']) #self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.cfg['LR'], momentum=0.9, weight_decay=0, nesterov=True) #self.scheduler = MultiStepLR(self.optimizer, milestones=self.cfg['MILESTONES']) self.num_gpus = torch.cuda.device_count() self.logger.write('num gpus:{} \n'.format(self.num_gpus))
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) # cudnn.benchmark = True # Redirect print to both console and log file if not args.evaluate: sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt')) # Create data loaders if args.height is None or args.width is None: args.height, args.width = (144, 56) if args.arch == 'inception' else \ (240, 240) dataset, num_classes, train_loader, val_loader, test_loader = \ get_data(args.dataset, args.split, args.data_dir, args.height, args.width, args.batch_size, args.workers, args.combine_trainval) # Create model img_branch = models.create(args.arch, cut_layer=args.cut_layer, num_classes=num_classes, num_features=args.features) diff_branch = models.create(args.arch, cut_layer=args.cut_layer, num_classes=num_classes, num_features=args.features) # Load from checkpoint start_epoch = best_top1 = 0 if args.resume: checkpoint = load_checkpoint(args.resume) img_branch.load_state_dict(checkpoint['state_dict_img']) diff_branch.load_state_dict(checkpoint['state_dict_diff']) start_epoch = checkpoint['epoch'] best_top1 = checkpoint['best_top1'] print("=> Start epoch {} best top1 {:.1%}".format( start_epoch, best_top1)) img_branch = nn.DataParallel(img_branch).cuda() diff_branch = nn.DataParallel(diff_branch).cuda() # img_branch = nn.DataParallel(img_branch) # diff_branch = nn.DataParallel(diff_branch) # Criterion criterion = nn.CrossEntropyLoss().cuda() # criterion = nn.CrossEntropyLoss() # Evaluator evaluator = Evaluator(img_branch, diff_branch, criterion) if args.evaluate: # print("Validation:") # top1, _ = evaluator.evaluate(val_loader) # print("Validation acc: {:.1%}".format(top1)) print("Test:") top1, (gt, pred) = evaluator.evaluate(test_loader) print("Test acc: {:.1%}".format(top1)) from confusion_matrix import plot_confusion_matrix plot_confusion_matrix(gt, pred, dataset.classes, args.logs_dir) return img_param_groups = [ { 'params': img_branch.module.low_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': img_branch.module.high_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': img_branch.module.classifier.parameters(), 'lr_mult': 1 }, ] diff_param_groups = [ { 'params': diff_branch.module.low_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': diff_branch.module.high_level_modules.parameters(), 'lr_mult': 0.1 }, { 'params': diff_branch.module.classifier.parameters(), 'lr_mult': 1 }, ] img_optimizer = torch.optim.SGD(img_param_groups, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) diff_optimizer = torch.optim.SGD(diff_param_groups, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) # Trainer trainer = Trainer(img_branch, diff_branch, criterion) # Schedule learning rate def adjust_lr(epoch): step_size = args.step_size lr = args.lr * (0.1**(epoch // step_size)) for g in img_optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) for g in diff_optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) # Start training for epoch in range(start_epoch, args.epochs): adjust_lr(epoch) trainer.train(epoch, train_loader, img_optimizer, diff_optimizer) if epoch < args.start_save: continue top1, _ = evaluator.evaluate(val_loader) is_best = top1 > best_top1 best_top1 = max(top1, best_top1) save_checkpoint( { 'state_dict_img': img_branch.module.state_dict(), 'state_dict_diff': diff_branch.module.state_dict(), 'epoch': epoch + 1, 'best_top1': best_top1, }, is_best, fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar')) print('\n * Finished epoch {:3d} top1: {:5.1%} best: {:5.1%}{}\n'. format(epoch, top1, best_top1, ' *' if is_best else '')) # Final test print('Test with best model:') checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar')) img_branch.module.load_state_dict(checkpoint['state_dict_img']) diff_branch.module.load_state_dict(checkpoint['state_dict_diff']) top1, (gt, pred) = evaluator.evaluate(test_loader) from confusion_matrix import plot_confusion_matrix plot_confusion_matrix(gt, pred, dataset.classes, args.logs_dir) print('\n * Test Accuarcy: {:5.1%}\n'.format(top1))
train_dataset = TextDataset(train_data, dictionary, args.sort_dataset, args.min_length, args.max_length) train_dataloader = TextDataLoader(dataset=train_dataset, dictionary=dictionary, batch_size=args.batch_size) val_dataset = TextDataset(val_data, dictionary, args.sort_dataset, args.min_length, args.max_length) val_dataloader = TextDataLoader(dataset=val_dataset, dictionary=dictionary, batch_size=64) test_dataset = TextDataset(test_data, dictionary, args.sort_dataset, args.min_length, args.max_length) test_dataloader = TextDataLoader(dataset=test_dataset, dictionary=dictionary, batch_size=64) logger.info("Constructing model...") model = args.model(n_classes=preprocessor.n_classes, dictionary=dictionary, args=args) if args.use_gpu: model = model.cuda() logger.info("Training...") trainable_params = [p for p in model.parameters() if p.requires_grad] if args.optimizer == 'Adam': optimizer = Adam(params=trainable_params, lr=args.initial_lr) if args.optimizer == 'Adadelta': optimizer = Adadelta(params=trainable_params, lr=args.initial_lr, weight_decay=0.95) lr_plateau = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.7, patience=5, min_lr=0.0001) criterion = nn.CrossEntropyLoss trainer = Trainer(model, train_dataloader, val_dataloader, criterion=criterion, optimizer=optimizer, lr_schedule=args.lr_schedule, lr_scheduler=lr_plateau, use_gpu=args.use_gpu, logger=logger) trainer.run(epochs=args.epochs) logger.info("Evaluating...") logger.info('Best Model: {}'.format(trainer.best_checkpoint_filepath)) model.load_state_dict(torch.load(trainer.best_checkpoint_filepath)) # load best model evaluator = Evaluator(model, test_dataloader, use_gpu=args.use_gpu, logger=logger) evaluator.evaluate()
model_name_list = [] if isdir(checkpoint_filepath): model_name_list = [ join(checkpoint_filepath, f) for f in os.listdir(checkpoint_filepath) if f.endswith(".ckpt") ] else: model_name_list = [checkpoint_filepath] logger.info("Making dataset & dataloader...") test_dataset = TextDataset(test_data, dictionary, args.get('sort_dataset'), args.get('min_length'), args.get('max_length')) test_dataloader = TextDataLoader(dataset=test_dataset, dictionary=dictionary, batch_size=args.get('batch_size'), shuffle=not args.get('sort_dataset'), num_workers=args.get('num_workers')) for checkpoint in model_name_list: logger.info(checkpoint) model.load_state_dict(torch.load(checkpoint)) if args.get('use_gpu'): model = model.cuda() criterion = nn.CrossEntropyLoss evaluator = Evaluator(model, test_dataloader, criterion=criterion, use_gpu=args.get('use_gpu'), logger=logger) evaluator.evaluate()
def inference( images_path: str, texts_path: str, test_imgs_file_path, batch_size: int, prefetch_size: int, checkpoint_path: str, joint_space: int, num_layers: int, ) -> None: """Performs inference on the Flickr8k/30k test set. Args: images_path: A path where all the images are located. texts_path: Path where the text doc with the descriptions is. test_imgs_file_path: Path to a file with the test image names. batch_size: The batch size to be used. prefetch_size: How many batches to prefetch. checkpoint_path: Path to a valid model checkpoint. joint_space: The size of the joint latent space. num_layers: The number of rnn layers. Returns: None """ dataset = FlickrDataset(images_path, texts_path) # Getting the vocabulary size of the train dataset test_image_paths, test_captions = dataset.get_data(test_imgs_file_path) logger.info("Test dataset created...") evaluator_test = Evaluator(len(test_image_paths), joint_space) logger.info("Test evaluator created...") # Resetting the default graph and setting the random seed tf.reset_default_graph() loader = InferenceLoader(test_image_paths, test_captions, batch_size, prefetch_size) images, captions, captions_lengths = loader.get_next() logger.info("Loader created...") model = VsePpModel(images, captions, captions_lengths, joint_space, num_layers) logger.info("Model created...") logger.info("Inference is starting...") with tf.Session() as sess: # Initializers model.init(sess, checkpoint_path) try: with tqdm(total=len(test_image_paths)) as pbar: while True: loss, lengths, embedded_images, embedded_captions = sess.run( [ model.loss, model.captions_len, model.image_encoded, model.text_encoded, ]) evaluator_test.update_metrics(loss) evaluator_test.update_embeddings(embedded_images, embedded_captions) pbar.update(len(lengths)) except tf.errors.OutOfRangeError: pass logger.info(f"The image2text recall at (1, 5, 10) is: " f"{evaluator_test.image2text_recall_at_k()}") logger.info(f"The text2image recall at (1, 5, 10) is: " f"{evaluator_test.text2image_recall_at_k()}")
def train( images_path: str, texts_path: str, train_imgs_file_path: str, val_imgs_file_path: str, joint_space: int, num_layers: int, learning_rate: float, margin: float, clip_val: float, decay_rate: int, weight_decay: float, batch_size: int, prefetch_size: int, epochs: int, save_model_path: str, ) -> None: """Starts a training session with the Flickr8k dataset. Args: images_path: A path where all the images are located. texts_path: Path where the text doc with the descriptions is. train_imgs_file_path: Path to a file with the train image names. val_imgs_file_path: Path to a file with the val image names. joint_space: The space where the encoded images and text will be projected. num_layers: Number of layers of the rnn. epochs: The number of epochs to train the model. batch_size: The batch size to be used. prefetch_size: How many batches to keep on GPU ready for processing. save_model_path: Where to save the model. learning_rate: The learning rate. weight_decay: The L2 loss constant. margin: The contrastive margin. clip_val: The max grad norm. decay_rate: When to decay the learning rate. Returns: None """ dataset = FlickrDataset(images_path, texts_path) train_image_paths, train_captions = dataset.get_data(train_imgs_file_path) val_image_paths, val_captions = dataset.get_data(val_imgs_file_path) logger.info("Dataset created...") evaluator_val = Evaluator(len(val_image_paths), joint_space) logger.info("Evaluators created...") # Resetting the default graph tf.reset_default_graph() loader = TrainValLoader( train_image_paths, train_captions, val_image_paths, val_captions, batch_size, prefetch_size, ) images, captions, captions_lengths = loader.get_next() logger.info("Loader created...") decay_steps = decay_rate * len(train_image_paths) / batch_size model = VsePpModel(images, captions, captions_lengths, joint_space, num_layers) logger.info("Model created...") logger.info("Training is starting...") with tf.Session() as sess: # Initializers model.init(sess) for e in range(epochs): # Reset evaluators evaluator_val.reset_all_vars() # Initialize iterator with train data sess.run(loader.train_init) try: with tqdm(total=len(train_image_paths)) as pbar: while True: _, loss, lengths = sess.run( [model.optimize, model.loss, model.captions], feed_dict={ model.weight_decay: weight_decay, model.learning_rate: learning_rate, model.margin: margin, model.decay_steps: decay_steps, model.clip_value: clip_val, }, ) pbar.update(len(lengths)) pbar.set_postfix({"Batch loss": loss}) except tf.errors.OutOfRangeError: pass # Initialize iterator with validation data sess.run(loader.val_init) try: with tqdm(total=len(val_image_paths)) as pbar: while True: loss, lengths, embedded_images, embedded_captions = sess.run( [ model.loss, model.captions, model.image_encoded, model.text_encoded, ] ) evaluator_val.update_metrics(loss) evaluator_val.update_embeddings( embedded_images, embedded_captions ) pbar.update(len(lengths)) except tf.errors.OutOfRangeError: pass if evaluator_val.is_best_recall_at_k(): evaluator_val.update_best_recall_at_k() logger.info("=============================") logger.info( f"Found new best on epoch {e+1}!! Saving model!\n" f"Current image-text recall at 1, 5, 10: " f"{evaluator_val.best_image2text_recall_at_k} \n" f"Current text-image recall at 1, 5, 10: " f"{evaluator_val.best_text2image_recall_at_k}" ) logger.info("=============================") model.save_model(sess, save_model_path)
def plot_seg_results(images, y_true, y_pred=None, num_classes=None, num_rows=3, num_cols=3, colors=None, save_dir=None, start_idx=0): if y_pred is None: y_pred = y_true if y_true.shape[-1] == 1: y_t = y_true[..., 0].astype(int) valid = np.greater_equal(y_t, 0) num_classes = np.amax(y_t) + 1 if num_classes is None else num_classes else: y_t = y_true.argmax(axis=-1) valid = np.isclose(y_true.sum(axis=-1), 1) num_classes = y_true.shape[-1] if num_classes is None else num_classes if y_pred.shape[-1] == 1: y_p = y_pred[..., 0].astype(int) else: y_p = y_pred.argmax(axis=-1) num_images = images.shape[0] images = (images * 255).astype(np.int) if colors is None: mask_true = (seg_labels_to_images(y_t, num_classes, valid=valid) * 255).astype(np.int) else: y = (y_t + 1) * valid mask_true = np.array(colors)[y] fig, axes = plt.subplots(num_rows, num_cols, figsize=(9, 9)) fig.subplots_adjust(left=0.075, right=0.95, bottom=0.05, top=0.9, wspace=0.3, hspace=0.3) fig.suptitle('Ground Truths') i = 0 for row in range(num_cols): for col in range(num_rows): if i >= images.shape[0]: break axes[row, col].imshow(np.clip(images[i] + mask_true[i], 0, 255)) axes[row, col].tick_params(axis='both', labelsize=6) i += 1 evaluator = Evaluator() name = evaluator.name scores = [] for t, p in zip(y_true, y_pred): t = np.expand_dims(t, axis=0) p = np.expand_dims(p, axis=0) scores.append(evaluator.score(t, p)) if colors is None: mask_pred = (seg_labels_to_images(y_p, num_classes, valid=valid) * 255).astype(np.int) else: y = (y_p + 1) * valid mask_pred = np.array(colors)[y] fig, axes = plt.subplots(num_rows, num_cols, figsize=(9, 9)) fig.subplots_adjust(left=0.075, right=0.95, bottom=0.05, top=0.9, wspace=0.3, hspace=0.3) fig.suptitle('Predictions') i = 0 for row in range(num_cols): for col in range(num_rows): if i >= images.shape[0]: break axes[row, col].imshow(np.clip(images[i] + mask_pred[i], 0, 255)) axes[row, col].tick_params(axis='both', labelsize=6) axes[row, col].set_title('{}:\n{:.4}'.format(name, scores[i])) axes[row, col].tick_params(axis='both', labelsize=6) i += 1 if save_dir is not None: if not os.path.exists(save_dir): os.makedirs(save_dir) print('') for i in range(num_images): fig_name = os.path.join(save_dir, '{:5d}.jpg'.format(i + start_idx)) if not os.path.isfile( os.path.join(save_dir, '{:5d}.jpg'.format(i + start_idx))): fig = plt.figure() plt.imshow(np.clip(images[i] + mask_pred[i], 0, 255)) fig.suptitle('{:6d}. {}:\n{:.4}'.format( i + start_idx, name, scores[i])) fig.savefig(fig_name) plt.close(fig) if i % 200 == 0: print('Saving result images... {:5}/{}'.format( i, num_images)) print('Done.')
class BaseTrainer(object): def __init__(self, cfg, model, dataset): super(BaseTrainer, self).__init__() self.cfg = cfg self.logger = Logger(cfg) #self.debugger = SummaryWriter(os.path.join('debug', cfg['NAME'], 'loss' )) #self.mAP_marker = SummaryWriter(os.path.join('debug', cfg['NAME'], 'mAP')) self.source_loader, self.target_loader, self.test_loader, self.query, self.gallery, self.train_transformer,self.source_train, self.target_train, self.target_cluster_loader = dataset self.best_mAP = 0 self.num_gpus = torch.cuda.device_count() if os.path.exists(self.cfg['PRETRAIN']): model.load_param(self.cfg['PRETRAIN']) print("load checkpoint from {}".format(self.cfg['PRETRAIN'])) self.model = nn.DataParallel(model).cuda() self.evaluator = Evaluator(self.cfg, self.model, dataset) self.num_gpus = torch.cuda.device_count() self.optimizer = make_optimizer(self.cfg, self.model, num_gpus=self.num_gpus) self.scheduler = WarmupMultiStepLR(self.optimizer, self.cfg['MILESTONES'], self.cfg['GAMMA'], self.cfg['WARMUP_FACTOR']) #self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.cfg['LR'], momentum=0.9, weight_decay=0, nesterov=True) #self.scheduler = MultiStepLR(self.optimizer, milestones=self.cfg['MILESTONES']) self.num_gpus = torch.cuda.device_count() self.logger.write('num gpus:{} \n'.format(self.num_gpus)) def train(self): self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.cfg['LR'], momentum=0.9, weight_decay=0, nesterov=True) self.scheduler = MultiStepLR(self.optimizer, milestones=self.cfg['MILESTONES']) CE = nn.CrossEntropyLoss().cuda() for epoch in range(self.cfg['EPOCHS']): self.model.train() stats = ('ce_loss', 'total_loss') meters_trn = {stat: AverageMeter() for stat in stats} for i,inputs in enumerate(self.source_loader): imgs = Variable(inputs[0]) labels = Variable(inputs[1]).cuda() scores = self.model(imgs, state='web stream') ce_loss = CE(scores, labels) total_loss = ce_loss self.optimizer.zero_grad() total_loss.backward() self.optimizer.step() for k in stats: v = locals()[k] meters_trn[k].update(v.item(), self.cfg['BATCHSIZE']) self.logger.write("epoch: %d | lr: %.5f | loss: %.5f | \n"%( epoch+1, self.scheduler.get_lr()[0], meters_trn['ce_loss'].avg, )) self.scheduler.step() self.evaluate(epoch, stats) def evaluate(self, epoch, stats=None): if self.cfg['TARGET'] == 'VehicleID': mAP, cmc1, cmc5, cmc10 = self.evaluator.evaluate_VeID() else: mAP, cmc = self.evaluator.evaluate(eval_cls=True) cmc1, cmc5, cmc10 = cmc[0], cmc[4], cmc[9] ''' if stats is not None: for stat in stats: self.mAP_marker.add_scalar(stat, mAP, epoch+1) ''' is_best = mAP > self.best_mAP self.best_mAP = max(mAP, self.best_mAP) self.logger.write("mAP: {:.1f}% | cmc-1: {:.1f}% | cmc-5: {:.1f}% | cmc-10: {:.1f}% | Best mAP: {:.1f}% |\n".format(mAP * 100, cmc1 * 100, cmc5 * 100, cmc10 * 100, self.best_mAP * 100)) self.logger.write("==========================================\n") save_checkpoint({ 'state_dict':self.model.module.state_dict(), 'epoch':epoch+1, 'best_mAP': self.best_mAP, }, is_best=is_best, fpath=os.path.join("ckpt", self.cfg['NAME'], 'checkpoint.pth')) def cls_visualization(self): for i,inputs in enumerate(self.target_loader): imgs, _, fnames = inputs[0], inputs[1], inputs[-1] self.model.eval() cls_score, _ = self.model(imgs, 'auxiliary') predict = torch.max(cls_score, dim=1)[1].data.squeeze() for p, fname in zip(predict, fnames): dir_ = os.path.join('vis', self.cfg['CLS_PATH']) mkdir_if_missing(os.path.join(dir_, '%d'%(p.item()))) dst = os.path.join(dir_, '%d'%(p.item()), fname+'.jpg') src = os.path.join('/home/share/zhihui/VeRi/image_train/', fname+'.jpg') shutil.copyfile(src, dst) def tSNE(self, img_path='tSNE.jpg'): source_feats, aux_feats = [], [] source_labels, aux_labels = [], [] for i,inputs in enumerate(self.source_loader): imgs, vids = Variable(inputs[0]).cuda(), inputs[1] outputs = self.model(imgs) for output, vid in zip(outputs, vids): source_feats.append(output.data.cpu().numpy().tolist()) source_labels.append('VehicleID') ''' source_feats = np.array(source_feats) source_labels = np.array(source_labels) ''' for i,inputs in enumerate(self.auxiliary_loader): imgs, tids = Variable(inputs[0]).cuda(), inputs[1] outputs = self.model(imgs) for output, tid in zip(outputs, tids): aux_feats.append(output.data.cpu().numpy().tolist()) aux_labels.append('CompCars') tsne = TSNE(n_components=2, init='pca', random_state=501) source_feats = np.array(source_feats[:1000]) aux_feats = np.array(aux_feats[:1000]) feats = np.concatenate((source_feats,aux_feats), axis=0) labels = source_labels[:1000] + aux_labels[:1000] pickle.dump(feats, open('feat.pkl', 'wb')) pickle.dump(labels, open('labels.pkl', 'wb')) '''