def main(args): token_to_index, index_to_token = Vocabulary.load(args.vocab_file) root, _ = os.path.splitext(args.vocab_file) basepath, basename = os.path.split(root) embed_path = f'{basepath}/embedding_{basename}.npy' embeddings = np.load(embed_path) if os.path.exists(embed_path) else None model = FastQA(len(token_to_index), args.embed, args.hidden, question_limit=args.q_len, context_limit=args.c_len, dropout=args.dropout, pretrained_embeddings=embeddings, with_feature=not args.without_feature).build() opt = Adam() model.compile(optimizer=opt, loss_weights=[1, 1, 0, 0], loss=['sparse_categorical_crossentropy', 'sparse_categorical_crossentropy', None, None]) train_dataset = SquadReader(args.train_path) dev_dataset = SquadReader(args.dev_path) tokenizer = get_tokenizer(lower=args.lower, as_str=False) converter = SquadConverter(token_to_index, PAD_TOKEN, UNK_TOKEN, tokenizer, question_max_len=args.q_len, context_max_len=args.c_len) eval_converter = SquadEvalConverter( token_to_index, PAD_TOKEN, UNK_TOKEN, tokenizer, question_max_len=args.q_len, context_max_len=args.c_len) train_generator = Iterator(train_dataset, args.batch, converter) dev_generator_loss = Iterator(dev_dataset, args.batch, converter, shuffle=False) dev_generator_f1 = Iterator(dev_dataset, args.batch, eval_converter, repeat=False, shuffle=False) trainer = SquadTrainer(model, train_generator, args.epoch, dev_generator_loss, './models/fastqa.{epoch:02d}-{val_loss:.2f}.h5') trainer.add_callback(FastQALRScheduler( dev_generator_f1, val_answer_file=args.answer_path, steps=args.steps)) trainer.add_callback(FastQACheckpoint('./models/fastqa.{steps:06d}.h5', steps=args.steps)) if args.use_tensorboard: trainer.add_callback(TensorBoard(log_dir='./graph', batch_size=args.batch)) history = trainer.run() dump_graph(history, 'loss_graph.png')
def main(args): token_to_index, _ = Vocabulary.load(args.vocab_file) model = FastQA(len(token_to_index), args.embed, args.hidden, question_limit=args.q_len, context_limit=args.c_len, with_feature=not args.without_feature).build() model.load_weights(args.model_path) test_dataset = SquadReader(args.test_path) tokenizer = get_tokenizer(lower=args.lower, as_str=False) converter = SquadEvalConverter(token_to_index, PAD_TOKEN, UNK_TOKEN, tokenizer, question_max_len=args.q_len, context_max_len=args.c_len) test_generator = Iterator(test_dataset, args.batch, converter, False, False) predictions = {} for inputs, (contexts, ids) in test_generator: _, _, start_indices, end_indices = model.predict_on_batch(inputs) for i, (start, end) in enumerate(zip(start_indices, end_indices)): prediction = ' '.join(contexts[i][j] for j in range(start, end + 1)) predictions[ids[i]] = prediction basename = osp.splitext(osp.basename(args.model_path))[0] save_path = osp.join(args.save_dir, f'predictions_{basename}.json') with open(save_path, 'w') as f: json.dump(predictions, f, indent=2)
def __init__(self): """Initializes a Inference object.""" # self.model = get_pretrained_model() self.tokenizer = get_tokenizer() self.model = transformers.Trainer(model=get_pretrained_model()) self.summarizer = pipeline( "summarization") # ~1.2 GB download the first time this is run.
def main(args): tokenizer = get_tokenizer(lower=args.lower, as_str=True) if args.only_question: indices = [1] desc = 'question' elif args.only_context: indices = [0] desc = 'context' else: indices = [0, 1] desc = 'question_context' basename, ext = os.path.splitext(args.vocab_path) min_freq = args.min_freq if args.min_freq else '' max_size = args.max_size if args.max_size else '' filename = f'{basename}_{desc}_min-freq{min_freq}_max_size{max_size}{ext}' squad_tokens = load_squad_tokens(args.train_path, tokenizer, indices=indices) Vocabulary.build(squad_tokens, args.min_freq, args.max_size, (PAD_TOKEN, UNK_TOKEN), filename)
def main(): # Hyper Parameters parser = argparse.ArgumentParser() parser.add_argument('--data_path', default='/A/VSE/data/', help='path to datasets') parser.add_argument( '--data_name', default='resnet152_precomp', help='{coco,f8k,f30k,10crop,irv2,resnet152}_precomp|coco|f8k|f30k') parser.add_argument('--vocab_path', default='./vocab/', help='Path to saved vocabulary pickle files.') parser.add_argument('--margin', default=0.05, type=float, help='Rank loss margin.') parser.add_argument('--num_epochs', default=30, type=int, help='Number of training epochs.') parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.') parser.add_argument('--word_dim', default=300, type=int, help='Dimensionality of the word embedding.') parser.add_argument( '--embed_size', default=1024, type=int, help= 'Dimensionality of the joint embedding. [NOTE: this is used only if <embed_size> differs from <gru_units>]' ) parser.add_argument('--gru_units', default=1024, type=int, help='Number of GRU neurons.') parser.add_argument('--grad_clip', default=1., type=float, help='Gradient clipping threshold.') parser.add_argument('--crop_size', default=224, type=int, help='Size of an image crop as the CNN input.') parser.add_argument('--num_layers', default=1, type=int, help='Number of GRU layers.') parser.add_argument('--learning_rate', default=.001, type=float, help='Initial learning rate.') parser.add_argument('--lr_update', default=15, type=int, help='Number of epochs to update the learning rate.') parser.add_argument('--workers', default=10, type=int, help='Number of data loader workers.') parser.add_argument('--log_step', default=10, type=int, help='Number of steps to print and record the log.') parser.add_argument('--val_step', default=500, type=int, help='Number of steps to run validation.') parser.add_argument('--logger_name', default='runs/runX', help='Path to save the model and Tensorboard log.') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--max_violation', action='store_true', help='Use max instead of sum in the rank loss.') parser.add_argument('--img_dim', default=2048, type=int, help='Dimensionality of the image embedding.') parser.add_argument('--finetune', action='store_true', help='Fine-tune the image encoder.') parser.add_argument('--cnn_type', default='vgg19', help="""The CNN used for image encoder (e.g. vgg19, resnet152)""") parser.add_argument('--use_restval', action='store_true', help='Use the restval data for training on MSCOCO.') parser.add_argument('--measure', default='cosine', help='Similarity measure used (cosine|order)') parser.add_argument( '--test_measure', default=None, help= 'Similarity used for retrieval (None<same used for training>|cosine|order)' ) parser.add_argument('--use_abs', action='store_true', help='Take the absolute value of embedding vectors.') parser.add_argument('--no_imgnorm', action='store_true', help='Do not normalize the image embeddings.') parser.add_argument('--text_encoder', default='seam-e', choices=text_encoders.text_encoders_alias.keys()) parser.add_argument( '--att_units', default=300, type=int, help= 'Number of tanh neurons. When using --att_dim=None we apply a tanh directly to the att input. ' ) parser.add_argument('--att_hops', default=30, type=int, help='Number of attention hops (viewpoints).') parser.add_argument( '--att_coef', default=0., type=float, help='Influence of Frobenius divergence in the loss function.') opt = parser.parse_args() if opt.test_measure is None: opt.test_measure = opt.measure print(opt) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) tb_logger.configure(opt.logger_name, flush_secs=5) tokenizer, vocab_size = data.get_tokenizer(opt.vocab_path, opt.data_name) opt.vocab_size = vocab_size collate_fn = 'collate_fn' # Load data loaders train_loader, val_loader = data.get_loaders(opt.data_name, tokenizer, opt.crop_size, opt.batch_size, opt.workers, opt, collate_fn) # Construct the model model = VSE(opt) print(model.txt_enc) # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) start_epoch = checkpoint['epoch'] best_rsum = checkpoint['best_rsum'] model.load_state_dict(checkpoint['model']) # Eiters is used to show logs as the continuation of another # training model.Eiters = checkpoint['Eiters'] print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format( opt.resume, start_epoch, best_rsum)) validate(opt, val_loader, model) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # Train the Model best_rsum = 0 for epoch in range(opt.num_epochs): adjust_learning_rate(opt, model.optimizer, epoch) # train for one epoch train(opt, train_loader, model, epoch, val_loader) # evaluate on validation set rsum = validate(opt, val_loader, model) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, prefix=opt.logger_name + '/')
def evalrank(model_path, data_path=None, split='dev', fold5=False, test_measure=None, log_step=10000): """ Evaluate a trained model on either dev or test. If `fold5=True`, 5 fold cross-validation is done (only for MSCOCO). Otherwise, the full data is used for evaluation. """ # load model and options checkpoint = torch.load(model_path) opt = checkpoint['opt'] if data_path is not None: opt.data_path = data_path tokenizer, vocab_size = get_tokenizer(opt.vocab_path, opt.data_name) opt.vocab_size = vocab_size # construct model model = VSE(opt) if test_measure is None: test_measure = opt.test_measure # load model state model.load_state_dict(checkpoint['model']) print('Loading dataset') data_loader = get_test_loader(split, opt.data_name, tokenizer, opt.crop_size, opt.batch_size, opt.workers, opt) print('Computing results...') img_embs, cap_embs = encode_data(model, data_loader, log_step=log_step) print('Images: %d, Captions: %d' % (img_embs.shape[0] / 5, cap_embs.shape[0])) if not fold5: # no cross-validation, full evaluation r, rt = i2t(img_embs, cap_embs, measure=test_measure, return_ranks=True) ri, rti = t2i(img_embs, cap_embs, measure=test_measure, return_ranks=True) ar = (r[0] + r[1] + r[2]) / 3 ari = (ri[0] + ri[1] + ri[2]) / 3 rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2] print("rsum: %.1f" % rsum) print("Average i2t Recall: %.1f" % ar) print("Image to text: %.1f %.1f %.1f %.1f %.1f" % r) print("Average t2i Recall: %.1f" % ari) print("Text to image: %.1f %.1f %.1f %.1f %.1f" % ri) else: # 5fold cross-validation, only for MSCOCO results = [] for i in range(5): r, rt0 = i2t(img_embs[i * 5000:(i + 1) * 5000], cap_embs[i * 5000:(i + 1) * 5000], measure=test_measure, return_ranks=True) print("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % r) ri, rti0 = t2i(img_embs[i * 5000:(i + 1) * 5000], cap_embs[i * 5000:(i + 1) * 5000], measure=test_measure, return_ranks=True) if i == 0: rt, rti = rt0, rti0 print("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ri) ar = (r[0] + r[1] + r[2]) / 3 ari = (ri[0] + ri[1] + ri[2]) / 3 rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2] print("rsum: %.1f ar: %.1f ari: %.1f" % (rsum, ar, ari)) results += [list(r) + list(ri) + [ar, ari, rsum]] print("-----------------------------------") print("Mean metrics: ") mean_metrics = tuple(np.array(results).mean(axis=0).flatten()) print("rsum: %.1f" % (mean_metrics[10] * 6)) print("Average i2t Recall: %.1f" % mean_metrics[11]) print("Image to text: %.1f %.1f %.1f %.1f %.1f" % mean_metrics[:5]) print("Average t2i Recall: %.1f" % mean_metrics[12]) print("Text to image: %.1f %.1f %.1f %.1f %.1f" % mean_metrics[5:10]) torch.save({'rt': rt, 'rti': rti}, 'ranks.pth.tar') return results, mean_metrics
def main(): # Hyper Parameters parser = argparse.ArgumentParser() parser.add_argument('--data_path', default='/A/VSE/data/', help='path to datasets') parser.add_argument('--data_name', default='10resnet152_precomp', help='data name for the training set') parser.add_argument('--adapt_data', default='10resnet152_precomp', help='data name for loading the adapt set ') parser.add_argument('--adapt_split', default='train', help='split for performing domain adapt') parser.add_argument('--adapt_batch_size', default=128, type=int, help='Adapt set mini-batch size.') parser.add_argument('--val_data', default='10resnet152_precomp', help='data name for loading the val set') parser.add_argument('--val_split', default='val', help='data name for loading the val set') parser.add_argument('--val_batch_size', default=128, type=int, help='Validation mini-batch size.') parser.add_argument( '--vocab_path', default='char', help= 'Path to saved vocabulary pickle files. Use char for character-based models.' ) parser.add_argument('--margin', default=0.2, type=float, help='Rank loss margin.') parser.add_argument('--num_epochs', default=30, type=int, help='Number of training epochs.') parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.') parser.add_argument('--word_dim', default=300, type=int, help='Dimensionality of the word embedding.') parser.add_argument( '--embed_size', default=1024, type=int, help= 'Dimensionality of the joint embedding. [NOTE: this is used only if <embed_size> differs from <gru_units>]' ) parser.add_argument('--gru_units', default=1024, type=int, help='Number of GRU neurons.') parser.add_argument('--grad_clip', default=1., type=float, help='Gradient clipping threshold.') parser.add_argument('--crop_size', default=224, type=int, help='Size of an image crop as the CNN input.') parser.add_argument('--num_layers', default=1, type=int, help='Number of GRU layers.') parser.add_argument('--learning_rate', default=2e-4, type=float, help='Initial learning rate.') parser.add_argument('--lr_update', default=15, type=int, help='Number of epochs to update the learning rate.') parser.add_argument( '--lr_decay', default=0.1, type=float, help='Learnin rate dacay ratio (0.1 reduces lr in 10x).') parser.add_argument('--workers', default=8, type=int, help='Number of data loader workers.') parser.add_argument('--log_step', default=10, type=int, help='Number of steps to print and record the log.') parser.add_argument('--val_step', default=500, type=int, help='Number of steps to run validation.') parser.add_argument('--logger_name', default='', help='Path to save the model and Tensorboard log.') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--max_violation', action='store_true', help='Use max instead of sum in the rank loss.') parser.add_argument( '--hard_gamma', type=float, default=0.25, help= 'Importance of hard-contrastive across training. [NOTE: this is ignored when using --max_violation]' ) parser.add_argument('--img_dim', default=2048, type=int, help='Dimensionality of the image embedding.') parser.add_argument('--finetune', action='store_true', help='Fine-tune the image encoder.') parser.add_argument('--cnn_type', default='vgg19', help="""The CNN used for image encoder (e.g. vgg19, resnet152)""") parser.add_argument('--use_restval', action='store_false', help='Use the restval data for training on MSCOCO.') parser.add_argument('--measure', default='cosine', help='Similarity measure used (cosine|order)') parser.add_argument( '--test_measure', default=None, help= 'Similarity used for retrieval (None<same used for training>|cosine|order)' ) parser.add_argument('--use_abs', action='store_true', help='Take the absolute value of embedding vectors.') parser.add_argument('--no_imgnorm', action='store_true', help='Do not normalize the image embeddings.') parser.add_argument('--text_encoder', default='GRU', help='[GRU|Conv].') parser.add_argument('--add_data', action='store_true', help='Wheter to use additional unlabeled data.') parser.add_argument('--log_images', action='store_true', help='Wheter to use log images in tensorboard.') parser.add_argument('--noise', type=float, default=0., help='Ammont of noise for augmenting image features.') parser.add_argument('--dropout_noise', type=float, default=0., help='Ammont of noise for augmenting word embeddings.') parser.add_argument('--pool', default='max', help='Type of pooling used for conv models.') parser.add_argument( '--kwargs', type=str, nargs='+', default=None, help='Additional args for the model. Usage: argument:type:value ') ### Mean-teacher hyperparameters ### parser.add_argument( '--ramp_lr', action='store_true', help='Use the learning rate schedule from mean-teacher') parser.add_argument('--initial_lr', type=float, default=0.0006, help='Initial learning_rate for rampup') parser.add_argument('--initial_lr_rampup', type=int, default=50, help='Epoch for lr rampup') parser.add_argument('--consistency_weight', type=float, default=20., help='consistency weight (default: 20.).') parser.add_argument('--consistency_alpha', type=float, default=0.99, help='Consistency alpha before ema_late_epoch') parser.add_argument('--consistency_alpha_late', type=float, default=0.999, help='Consistency alpha after ema_late_epoch') parser.add_argument('--consistency_rampup', type=int, default=15, help='Consistency rampup epoch') parser.add_argument( '--ema_late_epoch', type=int, default=15, help='When to change alpha variable for consistency weight') parser.add_argument('--adapt_loss', type=str, default='mse', help='Loss used to perform domain adapt.') opt = parser.parse_args() if opt.test_measure is None: opt.test_measure = opt.measure print('\n\n') print(opt) if opt.logger_name == '': writer = SummaryWriter() logpath = writer.file_writer.get_logdir() opt.logger_name = logpath else: writer = SummaryWriter(opt.logger_name) print('') print('') print('Outpath: ', opt.logger_name) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.ERROR) # tb_logger.configure(opt.logger_name, flush_secs=5) tokenizer, vocab_size = data.get_tokenizer(opt.vocab_path, opt.data_name) opt.vocab_size = vocab_size train_loader = data.get_loader( split='train', data_name=opt.data_name, batch_size=opt.batch_size, tokenizer=tokenizer, crop_size=opt.crop_size, workers=opt.workers, opt=opt, adapt_set=False, ) val_loader = data.get_loader( data_name=opt.val_data, split=opt.val_split, batch_size=opt.val_batch_size, tokenizer=tokenizer, crop_size=opt.crop_size, workers=opt.workers, opt=opt, adapt_set=False, ) if opt.add_data: adapt_loader = data.get_loader( split=opt.adapt_split, data_name=opt.adapt_data, batch_size=opt.adapt_batch_size, tokenizer=tokenizer, crop_size=opt.crop_size, workers=opt.workers, opt=opt, adapt_set=True, ) print('Train loader/dataset') print(train_loader.dataset.data_path, train_loader.dataset.split) print('Valid loader/dataset') print(val_loader.dataset.data_path, val_loader.dataset.split) print('Adapt loader/dataset') print(adapt_loader.dataset.data_path, adapt_loader.dataset.split) # adapt_loader, val_adapt_loader = data.get_loaders( # opt.data_name, tokenizer, opt.crop_size, opt.batch_size, opt.workers, opt, collate_fn) # TODO set correct dataset print('[OK] Loaders.') # Construct the model model = create_model(opt) model_ema = create_model(opt, ema=True) print('[OK] model') print(model.txt_enc) # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) start_epoch = checkpoint['epoch'] best_rsum = checkpoint['best_rsum'] model.load_state_dict(checkpoint['model']) # Eiters is used to show logs as the continuation of another # training model.Eiters = checkpoint['Eiters'] print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format( opt.resume, start_epoch, best_rsum)) validate(opt, val_loader, model, writer) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # Train the Model best_rsum = 0 for epoch in range(opt.num_epochs): # train for one epoch train(opt, train_loader, adapt_loader, model, model_ema, epoch, val_loader, tb_writer=writer) # evaluate on validation set # print('Valdiate Normal') print('Valdiate EMA') rsum = validate(opt, val_loader, model_ema, writer) # rsum = validate(opt, val_loader, model, writer) # rsum_adapt = validate(opt, val_adapt_loader, model_ema) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'model_ema': model_ema.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, prefix=opt.logger_name + '/')