def main(): foldername = '/cs/labs/dshahaf/omribloch/data/text_lord/restorant/train/note_tiny_no_noise_dim_32_ntokens_5_nconv_10_nsamples_102400_content_noise_0.001/' # foldername = '/cs/labs/dshahaf/omribloch/data/text_lord/restorant/train/note_EM_no_noise_dim_32_ntokens_10_nconv_4_nsamples_1024_content_noise_0.0/' vocab_path = os.path.join(foldername, 'vocab.pickle') model_ckpt_path = os.path.join(foldername, 'last_checkpoint.ckpt') with open(vocab_path, 'rb') as file: vocab = pickle.load(file) print('vocab was loaded') decoder_dictionary = vocab_to_dictionary(vocab) device = 'cpu' nsamples = 102400 ntokens = 5 dim = 32 content_noise = 0.001 dropout = 0 nconv = 10 model = load_checkpoint(model_ckpt_path, 'cpu', device, nsamples, decoder_dictionary.pad(), ntokens, dim, content_noise, dropout, decoder_dictionary, 50, nconv) print('model loaded') model.eval() dataset, vocab = get_dataset( 10000, '/cs/labs/dshahaf/omribloch/data/text_lord/restorant/', vocab) for i in range(10): sid = dataset[i].id stars = dataset[i].stars # stars = 1 review_sentence = ' '.join(dataset[i].review) print(review_sentence) decoded_sentence = gready_decode_single(model, vocab, stars, sid) print(decoded_sentence) decoded_sentence = gready_decode_single(model, vocab, 1 - stars, sid) print(decoded_sentence) print('-------------') decoded_sentence = beam_decode_single(model, vocab, sid, stars, topk=10, beam_width=4) for d in decoded_sentence: print(d) print('==============================')
def main(): # parse command line arguments parser = argparse.ArgumentParser(description='Train lord-seq2seq-convnet.') # session parser.add_argument('--note', type=str, help='a comment', required=True) parser.add_argument('--device', type=str, default='cpu', help='cuda device: cuda:0 / cuda:1') parser.add_argument('--overwrite', action='store_true', help='delete old ckpt with this configuration') parser.add_argument( '--resume', action='store_true', help='resume training from old ckpt with this configuration') parser.add_argument('--shuffle', action='store_true', help='shuffle input') parser.add_argument('--ckpt_every', type=int, default=25, help='how many epochs between checkpoints') parser.add_argument( '--dir', type=str, default='/cs/labs/dshahaf/omribloch/data/text_lord/restorant/train', help='here the script will create a directory named by the parameters') parser.add_argument( '--data_dir', type=str, default='/cs/labs/dshahaf/omribloch/data/text_lord/restorant/') parser.add_argument('-f', action='store_true') # training parser.add_argument('--batch_size', type=int, help='batch size', required=True) parser.add_argument('--epochs', type=int, help='number pf epochs to train', required=True) parser.add_argument('--it', type=int, help='number pf train-eval iterations', required=True) parser.add_argument('--content_wdecay', type=float, help='weight decay for the content embedding', required=True) parser.add_argument('--drop_connect', type=float, help='drop connect rate', default=0) # model parser.add_argument('--dim', type=int, help='model dimension', required=True) parser.add_argument( '--content_noise', type=float, help='standard deviation for the content embedding noise', required=True) parser.add_argument('--dropout', type=float, default=0.1, help='embedding dropout') parser.add_argument('--lr', type=float, default=0.01, help='learning rate') parser.add_argument('--nsamples', type=int, default=300000, help='number of examples to use') parser.add_argument('--ntokens', type=int, default=5, help='number of latent input vectors') parser.add_argument( '--nconv', type=int, default=20, help='number of conv layers, i think :) default as in the original.') args = parser.parse_args() device = torch.device(args.device) if 'cuda' in args.device: torch.cuda.set_device(device) # create directory for checkpoints and logs foldername = os.path.join(args.dir, args_to_comment(args)) print(foldername) folder_setup(args.overwrite, args.resume, foldername, force=args.f) # configure logger logger = configure_logger(os.path.join(foldername, 'trainer.log')) vocab_path = os.path.join(foldername, 'vocab.pickle') vocab = None if args.resume: with open(vocab_path, 'rb') as file: vocab = pickle.load(file) print('vocab was loaded') # create dataset dataset, vocab = get_dataset(args.nsamples, args.data_dir, vocab) logger.info(f'dataset loaded, vocab size is {len(vocab)}') # serialize the vocab object if not args.resume: with open(vocab_path, "wb") as file: pickle.dump(vocab, file) logger.info(f'vocab was pickled into {vocab_path}') # the dictionary is used for decoder construction but will never be in use after that. decoder_dictionary = vocab_to_dictionary(vocab) # build model if not args.resume: model = create_model(device, args.nsamples, decoder_dictionary.pad(), args.ntokens, args.dim, args.content_noise, args.dropout, decoder_dictionary, 50, args.nconv) else: model = load_checkpoint( os.path.join(foldername, 'last_checkpoint.ckpt'), device, device, args.nsamples, decoder_dictionary.pad(), args.ntokens, args.dim, args.content_noise, 0.1, decoder_dictionary, 50, args.nconv) writer = SummaryWriter(log_dir=foldername, comment=args_to_comment(args)) global_step = 0 global_epoch = 0 for it in range(args.it): logger.info('-- iteration {} --'.format(it)) global_step, global_epoch = train(model, dataset, device, args.epochs, args.batch_size, decoder_dictionary.pad(), logger, args.content_wdecay, writer, foldername, global_step=global_step, global_epoch=global_epoch, shuffle=args.shuffle) evaluate(model, vocab, dataset, 10, it, logger, writer, device=device, gready=False) print('finished')
def main(): # parse command line arguments parser = argparse.ArgumentParser(description='Train lord-seq2seq-convnet.') # session parser.add_argument('--note', type=str, help='a comment', required=True) parser.add_argument('--device', type=str, default='cpu', help='cuda device: cuda:0 / cuda:1') parser.add_argument('--overwrite', action='store_true', help='delete old ckpt with this configuration') parser.add_argument( '--resume', action='store_true', help='resume training from old ckpt with this configuration') parser.add_argument('--ckpt_every', type=int, default=25, help='how many epochs between checkpoints') parser.add_argument( '--dir', type=str, default= '/cs/labs/dshahaf/omribloch/data/text_lord/restorant/train/lstm', help='here the script will create a directory named by the parameters') parser.add_argument( '--data_dir', type=str, default='/cs/labs/dshahaf/omribloch/data/text_lord/restorant/') # training parser.add_argument('--batch_size', type=int, help='batch size', required=True) parser.add_argument('--epochs', type=int, help='number pf epochs to train', required=True) parser.add_argument('--content_wdecay', type=float, help='weight decay for the content embedding', required=True) # model parser.add_argument('--dim', type=int, help='model dimension', required=True) parser.add_argument( '--content_noise', type=float, help='standard deviation for the content embedding noise', required=True) parser.add_argument('--nsamples', type=int, default=300000, help='number of examples to use') parser.add_argument('--nlayers', type=int, default=2, help='number of lstm layers') args = parser.parse_args() if args.overwrite and args.resume: raise Exception("can't use overwrite and resume together!!!") device = torch.device(args.device) if 'cuda' in args.device: torch.cuda.set_device(device) # create directory for checkpoints and logs foldername = os.path.join(args.dir, args_to_comment(args)) print(foldername) vocab = None model = None if os.path.exists(foldername): if args.overwrite: if ask_user_confirmation('overwriting'): shutil.rmtree(foldername) else: print('okey, exiting. not removing anything.') exit(0) elif args.resume: if ask_user_confirmation('resuming'): print("resuming!") else: print('okey, exiting. not resuming.') exit(0) else: raise Exception( 'you had already tried this configuration! aborting. try --overwrite or --resume.' ) if not os.path.exists(foldername): os.makedirs(foldername) # configure logger logger = configure_logger(os.path.join(foldername, 'trainer.log')) vocab_path = os.path.join(foldername, 'vocab.pickle') if args.resume: with open(vocab_path, 'rb') as file: vocab = pickle.load(file) print('vocab was loaded') # create dataset dataset, vocab = get_dataset(args.nsamples, args.data_dir, vocab) logger.info(f'dataset loaded, vocab size is {len(vocab)}') # serialize the vocab object if not args.resume: with open(vocab_path, "wb") as file: pickle.dump(vocab, file) logger.info(f'vocab was pickled into {vocab_path}') # build model if not args.resume: model = LSTM_LORD(args.dim, args.nlayers, len(vocab), args.nsamples, args.content_noise) else: model = load_checkpoint( os.path.join(foldername, 'last_checkpoint.ckpt'), device, args.dim, args.nlayers, len(vocab), args.nsample) # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs # model = nn.DataParallel(model) model.to(device) model.train() writer = SummaryWriter(log_dir=foldername, comment=args_to_comment(args)) logger.info('before entering the training loop, ' 'we are going to have {} iterations per epoch'.format( args.nsamples // args.batch_size)) acc_writer = AccuracyTensorboradWriter(writer, logger) global_step = 0 for epoch in range(args.epochs): # optimizers are created each epoch because from time to time there lr is reduced. model_parameters = [p for p in model.lstm.parameters()] + \ [p for p in model.stars_embedding.parameters()] + \ [p for p in model.fc.parameters()] content_parameters = [p for p in model.sample_embedding.parameters()] # optimizer = optim.Adam(model_parameters, lr=0.001) # content_optimizer = optim.Adam(content_parameters, lr=0.1, weight_decay=args.content_wdecay) optimizer = optim.Adagrad(model.parameters()) losses = [] train_iter = data.BucketIterator(dataset=dataset, batch_size=args.batch_size, sort_key=lambda x: len(x.review), sort=False, sort_within_batch=True, repeat=False, device=device) for batch in tqdm(train_iter): # create input reviews = batch.review.transpose(1, 0) state = model.create_initial_hiddens(batch.stars, batch.id) # run! model.zero_grad() logits, state = model(reviews, state) logits_flat = logits.view(-1, len(vocab)) targets_flat = shift_left(reviews, 1, device).reshape(-1) loss = F.cross_entropy(logits_flat, targets_flat, ignore_index=1) loss.backward() optimizer.step() # content_optimizer.step() # finished training step, now logging losses.append(loss.item()) writer.add_scalar('Loss/per-step', loss.item(), global_step) # acc if global_step % 1 == 0: acc_writer.write_step(logits_flat, targets_flat, global_step, ignore_index=1) global_step += 1 logger.info('epoch {} loss {}'.format(epoch, np.average(losses))) writer.add_scalar('Loss/per-epoch', np.average(losses), epoch) acc_writer.write_epoch(epoch) checkpoint(model, os.path.join(foldername, 'last_checkpoint.ckpt')) if epoch % 100 == 0: checkpoint( model, os.path.join(foldername, f'epoch{epoch}_checkpoint.ckpt'))
def main(): # parse command line arguments parser = argparse.ArgumentParser( description='evaluate lord-seq2seq-convnet.') # session parser.add_argument('--foldername', type=str) parser.add_argument( '--data_dir', type=str, default='/cs/labs/dshahaf/omribloch/data/text_lord/restorant/') parser.add_argument('--ckpt_name', type=str, default='last_checkpoint.ckpt') parser.add_argument('--device', type=str, default='cpu') parser.add_argument('--nsamples', type=int) parser.add_argument('--ntokens', type=int) parser.add_argument('--dim', type=int) parser.add_argument('--content_noise', type=float) parser.add_argument('--nconv', type=int) parser.add_argument('--samples_to_eval', type=int) parser.add_argument('--gready', action='store_true') parser.add_argument('--partitioned', action='store_true') args = parser.parse_args() vocab_path = os.path.join(args.foldername, 'vocab.pickle') model_ckpt_path = os.path.join(args.foldername, 'last_checkpoint.ckpt') with open(vocab_path, 'rb') as file: vocab = pickle.load(file) print('vocab was loaded') decoder_dictionary = vocab_to_dictionary(vocab) dropout = 0 if not args.partitioned: model = load_checkpoint(model_ckpt_path, args.device, args.device, args.nsamples, decoder_dictionary.pad(), args.ntokens, args.dim, args.content_noise, dropout, decoder_dictionary, 50, args.nconv) else: model = load_checkpoint_partitioned(model_ckpt_path, args.device, args.device, args.nsamples, decoder_dictionary.pad(), args.ntokens, args.dim, args.content_noise, dropout, decoder_dictionary, 50, args.nconv) print('model loaded') model.eval() dataset, vocab = get_dataset(args.nsamples, args.data_dir, vocab) evaluator = Evaluator() fasttext_classfier = fasttext.FastText.load_model( '/cs/labs/dshahaf/omribloch/data/text_lord/restorant/fasttext_model.bin' ) dataset_ppl = [] orig_ppl = [] orig_bleu = [] new_ppl = [] new_bleu = [] orig_wbleu = [] new_wbleu = [] correct_counter = 0 counter = 0 with open('/tmp/results_final.txt', 'w') as file: for i in tqdm(range(args.samples_to_eval), disable=False): sid = dataset[i].id stars = dataset[i].stars # stars = 1 review_sentence = ' '.join(dataset[i].review) ppl, bleu, classified, soriginal, sgenerated, original_ppl, bleu_weighted = evaluator.eval( model, vocab, review_sentence, stars, sid, gready=args.gready) orig_ppl.append(ppl) orig_bleu.append(bleu) orig_wbleu.append(bleu_weighted) dataset_ppl.append(original_ppl) ppl, bleu, classified, soriginal, sgenerated_new, original_ppl, bleu_weighted_new = evaluator.eval( model, vocab, review_sentence, 1 - stars, sid, gready=args.gready) new_ppl.append(ppl) new_bleu.append(bleu) new_wbleu.append(bleu_weighted_new) predicted_label = fasttext_classfier.predict(sgenerated_new)[0][0] if labels_dictionary[predicted_label] == 1 - stars: correct_counter += 1 counter += 1 file.write('\n\n===========================') file.write('orig - {}\n'.format(soriginal)) file.write('reco - {}\n'.format(sgenerated)) file.write('opos - {}\n'.format(sgenerated_new)) print('dataset ppl {}'.format(np.average(dataset_ppl))) print(f'orig ppl: {np.average(orig_ppl)}') print(f'new ppl: {np.average(new_ppl)}') print(f'orig bleu: {np.average(orig_bleu)}') print(f'new bleu: {np.average(new_bleu)}') print(f'orig wbleu: {np.average(orig_wbleu)}') print(f'new wbleu: {np.average(new_wbleu)}') print(f'classifier accuracy: {correct_counter / counter}')