def main(): parser = utils.opt_parser.get_trainer_opt_parser() parser.add_argument('models', nargs='*', help='pretrained models for the same setting') parser.add_argument('--test', action="store_true", help='use testing mode') parser.add_argument('--num-layer', type=int, help="stacked layer of transformer model") args = parser.parse_args() reader = data_adapter.GeoQueryDatasetReader() training_set = reader.read(config.DATASETS[args.dataset].train_path) try: validation_set = reader.read(config.DATASETS[args.dataset].dev_path) except: validation_set = None vocab = allennlp.data.Vocabulary.from_instances(training_set) st_ds_conf = config.TRANSFORMER_CONF[args.dataset] if args.num_layer: st_ds_conf['num_layers'] = args.num_layer encoder = TransformerEncoder( input_dim=st_ds_conf['emb_sz'], num_layers=st_ds_conf['num_layers'], num_heads=st_ds_conf['num_heads'], feedforward_hidden_dim=st_ds_conf['emb_sz'], ) decoder = TransformerDecoder( input_dim=st_ds_conf['emb_sz'], num_layers=st_ds_conf['num_layers'], num_heads=st_ds_conf['num_heads'], feedforward_hidden_dim=st_ds_conf['emb_sz'], feedforward_dropout=0.1, ) source_embedding = allennlp.modules.Embedding( num_embeddings=vocab.get_vocab_size('nltokens'), embedding_dim=st_ds_conf['emb_sz']) target_embedding = allennlp.modules.Embedding( num_embeddings=vocab.get_vocab_size('lftokens'), embedding_dim=st_ds_conf['emb_sz']) model = ParallelSeq2Seq( vocab=vocab, encoder=encoder, decoder=decoder, source_embedding=source_embedding, target_embedding=target_embedding, target_namespace='lftokens', start_symbol=START_SYMBOL, eos_symbol=END_SYMBOL, max_decoding_step=st_ds_conf['max_decoding_len'], ) if args.models: model.load_state_dict(torch.load(args.models[0])) if not args.test or not args.models: iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens") ], batch_size=st_ds_conf['batch_sz']) iterator.index_with(vocab) optim = torch.optim.Adam(model.parameters()) savepath = os.path.join( config.SNAPSHOT_PATH, args.dataset, 'transformer', datetime.datetime.now().strftime('%Y%m%d%H%M%S')) if not os.path.exists(savepath): os.makedirs(savepath, mode=0o755) trainer = allennlp.training.Trainer( model=model, optimizer=optim, iterator=iterator, train_dataset=training_set, validation_dataset=validation_set, serialization_dir=savepath, cuda_device=args.device, num_epochs=config.TRAINING_LIMIT, ) trainer.train() else: testing_set = reader.read(config.DATASETS[args.dataset].test_path) model.eval() predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader) for instance in testing_set: print('SRC: ', instance.fields['source_tokens'].tokens) print( 'GOLD:', ' '.join( str(x) for x in instance.fields['target_tokens'].tokens[1:-1])) del instance.fields['target_tokens'] output = predictor.predict_instance(instance) print('PRED:', ' '.join(output['predicted_tokens']))
def run_model(args): st_ds_conf = get_updated_settings(args) reader = data_adapter.GeoQueryDatasetReader() training_set = reader.read(config.DATASETS[args.dataset].train_path) try: validation_set = reader.read(config.DATASETS[args.dataset].dev_path) except: validation_set = None vocab = allennlp.data.Vocabulary.from_instances(training_set) model = get_model(vocab, st_ds_conf) device_tag = "cpu" if config.DEVICE < 0 else f"cuda:{config.DEVICE}" if args.models: model.load_state_dict( torch.load(args.models[0], map_location=device_tag)) if not args.test or not args.models: iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens") ], batch_size=st_ds_conf['batch_sz']) iterator.index_with(vocab) optim = torch.optim.Adam(model.parameters(), lr=config.ADAM_LR, betas=config.ADAM_BETAS, eps=config.ADAM_EPS) if args.fine_tune: optim = torch.optim.SGD(model.parameters(), lr=config.SGD_LR) savepath = os.path.join( config.SNAPSHOT_PATH, args.dataset, 'unc_s2s', datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" + args.memo) if not os.path.exists(savepath): os.makedirs(savepath, mode=0o755) trainer = allennlp.training.Trainer( model=model, optimizer=optim, iterator=iterator, train_dataset=training_set, validation_dataset=validation_set, serialization_dir=savepath, cuda_device=config.DEVICE, num_epochs=config.TRAINING_LIMIT, grad_clipping=config.GRAD_CLIPPING, num_serialized_models_to_keep=-1, ) trainer.train() else: if args.test_on_val: testing_set = reader.read(config.DATASETS[args.dataset].dev_path) else: testing_set = reader.read(config.DATASETS[args.dataset].test_path) model.eval() model.skip_loss = True # skip loss computation on testing set for faster evaluation if config.DEVICE > -1: model = model.cuda(config.DEVICE) # batch testing iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens") ], batch_size=st_ds_conf['batch_sz']) iterator.index_with(vocab) eval_generator = iterator(testing_set, num_epochs=1, shuffle=False) for batch in tqdm.tqdm(eval_generator, total=iterator.get_num_batches(testing_set)): batch = move_to_device(batch, config.DEVICE) output = model(**batch) metrics = model.get_metrics() print(metrics) if args.dump_test: predictor = allennlp.predictors.SimpleSeq2SeqPredictor( model, reader) for instance in tqdm.tqdm(testing_set, total=len(testing_set)): print('SRC: ', instance.fields['source_tokens'].tokens) print( 'GOLD:', ' '.join( str(x) for x in instance.fields['target_tokens'].tokens[1:-1])) del instance.fields['target_tokens'] output = predictor.predict_instance(instance) print('PRED:', ' '.join(output['predicted_tokens']))
def main(): parser = utils.opt_parser.get_trainer_opt_parser() parser.add_argument('models', nargs='*', help='pretrained models for the same setting') parser.add_argument('--test', action="store_true", help='use testing mode') parser.add_argument('--num-layer', type=int, help='maximum number of stacked layers') parser.add_argument( '--use-ut', action="store_true", help='Use universal transformer instead of transformer') args = parser.parse_args() reader = data_adapter.GeoQueryDatasetReader() training_set = reader.read(config.DATASETS[args.dataset].train_path) try: validation_set = reader.read(config.DATASETS[args.dataset].dev_path) except: validation_set = None vocab = allennlp.data.Vocabulary.from_instances(training_set) st_ds_conf = config.TRANS2SEQ_CONF[args.dataset] if args.num_layer: st_ds_conf['max_num_layers'] = args.num_layer if args.epoch: config.TRAINING_LIMIT = args.epoch if args.batch: st_ds_conf['batch_sz'] = args.batch bsz = st_ds_conf['batch_sz'] emb_sz = st_ds_conf['emb_sz'] src_embedder = BasicTextFieldEmbedder( token_embedders={ "tokens": Embedding(vocab.get_vocab_size('nltokens'), emb_sz) }) if args.use_ut: transformer_encoder = UTEncoder( input_dim=emb_sz, max_num_layers=st_ds_conf['max_num_layers'], num_heads=st_ds_conf['num_heads'], feedforward_hidden_dim=emb_sz, feedforward_dropout=st_ds_conf['feedforward_dropout'], attention_dropout=st_ds_conf['attention_dropout'], residual_dropout=st_ds_conf['residual_dropout'], use_act=st_ds_conf['act'], use_vanilla_wiring=st_ds_conf['vanilla_wiring']) else: transformer_encoder = TransformerEncoder( input_dim=emb_sz, num_layers=st_ds_conf['max_num_layers'], num_heads=st_ds_conf['num_heads'], feedforward_hidden_dim=emb_sz, feedforward_dropout=st_ds_conf['feedforward_dropout'], attention_dropout=st_ds_conf['attention_dropout'], residual_dropout=st_ds_conf['residual_dropout'], ) model = allennlp.models.SimpleSeq2Seq( vocab, source_embedder=src_embedder, encoder=transformer_encoder, max_decoding_steps=50, attention=allennlp.modules.attention.DotProductAttention(), beam_size=6, target_namespace="lftokens", use_bleu=True) if args.models: model.load_state_dict(torch.load(args.models[0])) if not args.test or not args.models: iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens") ], batch_size=bsz) iterator.index_with(vocab) optim = torch.optim.Adam(model.parameters()) savepath = os.path.join( config.SNAPSHOT_PATH, args.dataset, 'transformer2seq', datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" + args.memo) if not os.path.exists(savepath): os.makedirs(savepath, mode=0o755) trainer = allennlp.training.Trainer( model=model, optimizer=optim, iterator=iterator, train_dataset=training_set, validation_dataset=validation_set, serialization_dir=savepath, cuda_device=args.device, num_epochs=config.TRAINING_LIMIT, ) trainer.train() else: testing_set = reader.read(config.DATASETS[args.dataset].test_path) model.eval() predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader) for instance in tqdm.tqdm(testing_set, total=len(testing_set)): print('SRC: ', instance.fields['source_tokens'].tokens) print( 'GOLD:', ' '.join( str(x) for x in instance.fields['target_tokens'].tokens[1:-1])) del instance.fields['target_tokens'] output = predictor.predict_instance(instance) print('PRED:', ' '.join(output['predicted_tokens']))
def main(): parser = utils.opt_parser.get_trainer_opt_parser() parser.add_argument('models', nargs='*', help='pretrained models for the same setting') parser.add_argument('--test', action="store_true", help='use testing mode') parser.add_argument('--no-act', action="store_true", help='Do not use ACT for layer computation') parser.add_argument('--num-layer', type=int, help='maximum number of stacked layers') parser.add_argument('--warm-up', type=int, default=10, help='number of warmup-steps for Noam Scheduler') args = parser.parse_args() reader = data_adapter.GeoQueryDatasetReader() training_set = reader.read(config.DATASETS[args.dataset].train_path) try: validation_set = reader.read(config.DATASETS[args.dataset].dev_path) except: validation_set = None vocab = allennlp.data.Vocabulary.from_instances(training_set) st_ds_conf = config.UTRANSFORMER_CONF[args.dataset] if args.no_act: st_ds_conf['act'] = False if args.num_layer: st_ds_conf['max_num_layers'] = args.num_layer if args.epoch: config.TRAINING_LIMIT = args.epoch if args.batch: st_ds_conf['batch_sz'] = args.batch encoder = UTEncoder( input_dim=st_ds_conf['emb_sz'], max_num_layers=st_ds_conf['max_num_layers'], num_heads=st_ds_conf['num_heads'], feedforward_hidden_dim=st_ds_conf['emb_sz'], use_act=st_ds_conf['act'], attention_dropout=st_ds_conf['attention_dropout'], residual_dropout=st_ds_conf['residual_dropout'], feedforward_dropout=st_ds_conf['feedforward_dropout'], use_vanilla_wiring=st_ds_conf['vanilla_wiring'], ) decoder = UTDecoder( input_dim=st_ds_conf['emb_sz'], max_num_layers=st_ds_conf['max_num_layers'], num_heads=st_ds_conf['num_heads'], feedforward_hidden_dim=st_ds_conf['emb_sz'], use_act=st_ds_conf['act'], attention_dropout=st_ds_conf['attention_dropout'], residual_dropout=st_ds_conf['residual_dropout'], feedforward_dropout=st_ds_conf['feedforward_dropout'], use_vanilla_wiring=st_ds_conf['vanilla_wiring'], ) source_embedding = allennlp.modules.Embedding( num_embeddings=vocab.get_vocab_size('nltokens'), embedding_dim=st_ds_conf['emb_sz']) target_embedding = allennlp.modules.Embedding( num_embeddings=vocab.get_vocab_size('lftokens'), embedding_dim=st_ds_conf['emb_sz']) model = ParallelSeq2Seq( vocab=vocab, encoder=encoder, decoder=decoder, source_embedding=source_embedding, target_embedding=target_embedding, target_namespace='lftokens', start_symbol=START_SYMBOL, eos_symbol=END_SYMBOL, max_decoding_step=st_ds_conf['max_decoding_len'], ) if args.models: logging.getLogger().setLevel(logging.INFO) logging.info(f"loads pretrained model from {args.models[0]}") model.load_state_dict(torch.load(args.models[0])) if not args.test or not args.models: iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens") ], batch_size=st_ds_conf['batch_sz']) iterator.index_with(vocab) optim = torch.optim.Adam(model.parameters(), betas=(.9, .98), eps=1.e-9) savepath = os.path.join( config.SNAPSHOT_PATH, args.dataset, 'universal_transformer', datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" + args.memo) if not os.path.exists(savepath): os.makedirs(savepath, mode=0o755) trainer = allennlp.training.Trainer( model=model, optimizer=optim, iterator=iterator, train_dataset=training_set, validation_dataset=validation_set, serialization_dir=savepath, cuda_device=args.device, num_epochs=config.TRAINING_LIMIT, ) trainer.train() else: testing_set = reader.read(config.DATASETS[args.dataset].test_path) model.eval() predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader) for instance in tqdm.tqdm(testing_set, total=len(testing_set)): print('SRC: ', instance.fields['source_tokens'].tokens) print( 'GOLD:', ' '.join( str(x) for x in instance.fields['target_tokens'].tokens[1:-1])) del instance.fields['target_tokens'] output = predictor.predict_instance(instance) print('PRED:', ' '.join(output['predicted_tokens']))
def main(): parser = utils.opt_parser.get_trainer_opt_parser() parser.add_argument('models', nargs='*', help='pretrained models for the same setting') parser.add_argument('--enc-layers', type=int, default=1, help="encoder layer number defaulted to 1") parser.add_argument('--test', action="store_true", help='use testing mode') parser.add_argument('--use-dev', action="store_true") args = parser.parse_args() reader = data_adapter.GeoQueryDatasetReader() training_set = reader.read(config.DATASETS[args.dataset].train_path) if args.use_dev: validation_set = reader.read(config.DATASETS[args.dataset].dev_path) vocab = allennlp.data.Vocabulary.from_instances(training_set) st_ds_conf = config.SEQ2SEQ_CONF[args.dataset] if args.epoch: config.TRAINING_LIMIT = args.epoch bsz = st_ds_conf['batch_sz'] emb_sz = st_ds_conf['emb_sz'] src_embedder = BasicTextFieldEmbedder( token_embedders={ "tokens": Embedding(vocab.get_vocab_size('nltokens'), emb_sz) }) encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(emb_sz, emb_sz, num_layers=args.enc_layers, batch_first=True)) model = allennlp.models.SimpleSeq2Seq( vocab, source_embedder=src_embedder, encoder=encoder, max_decoding_steps=st_ds_conf['max_decoding_len'], attention=allennlp.modules.attention.DotProductAttention(), beam_size=8, target_namespace="lftokens", use_bleu=True) if args.models: model.load_state_dict(torch.load(args.models[0])) if not args.test or not args.models: iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens") ], batch_size=bsz) iterator.index_with(vocab) optim = torch.optim.Adam(model.parameters()) savepath = os.path.join( config.SNAPSHOT_PATH, args.dataset, 'seq2seq', datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" + args.memo) if not os.path.exists(savepath): os.makedirs(savepath, mode=0o755) trainer = allennlp.training.Trainer( model=model, optimizer=optim, iterator=iterator, train_dataset=training_set, validation_dataset=validation_set if args.use_dev else None, serialization_dir=savepath, cuda_device=args.device, num_epochs=config.TRAINING_LIMIT, ) trainer.train() else: testing_set = reader.read(config.DATASETS[args.dataset].test_path) model.eval() predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader) for instance in testing_set: print('SRC: ', instance.fields['source_tokens'].tokens) print( 'GOLD:', ' '.join( str(x) for x in instance.fields['target_tokens'].tokens[1:-1])) print( 'PRED:', ' '.join( predictor.predict_instance(instance)['predicted_tokens']))
def main(): parser = utils.opt_parser.get_trainer_opt_parser() parser.add_argument('models', nargs='*', help='pretrained models for the same setting') parser.add_argument('--test', action="store_true", help='use testing mode') parser.add_argument('--emb-dim', type=int, help='basic embedding dimension') parser.add_argument('--act-max-layer', type=int, help='maximum number of stacked layers') parser.add_argument('--use-act', action="store_true", help='Use adaptive computation time for decoder') parser.add_argument('--act-loss-weight', type=float, help="the loss of the act weights") parser.add_argument('--enc-layers', type=int, help="layers in encoder") parser.add_argument('--act-mode', choices=['basic', 'random', 'mean_field']) parser.add_argument('--encoder', choices=['transformer', 'lstm', 'bilstm']) parser.add_argument( '--decoder', choices=['lstm', 'rnn', 'gru', 'ind_rnn', 'n_lstm', 'n_gru'], ) parser.add_argument('--dec-cell-height', type=int, help="the height for n_layer lstm/gru") args = parser.parse_args() reader = data_adapter.GeoQueryDatasetReader() training_set = reader.read(config.DATASETS[args.dataset].train_path) try: validation_set = reader.read(config.DATASETS[args.dataset].dev_path) except: validation_set = None vocab = allennlp.data.Vocabulary.from_instances(training_set) if args.epoch: config.TRAINING_LIMIT = args.epoch if args.device: config.DEVICE = args.device st_ds_conf = get_updated_settings(args) model = get_model(vocab, st_ds_conf) if args.models: model.load_state_dict(torch.load(args.models[0])) if not args.test or not args.models: iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens") ], batch_size=st_ds_conf['batch_sz']) iterator.index_with(vocab) optim = torch.optim.Adam(model.parameters(), lr=config.ADAM_LR, betas=config.ADAM_BETAS, eps=config.ADAM_EPS) savepath = os.path.join( config.SNAPSHOT_PATH, args.dataset, 'ada_trans2seq', datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" + args.memo) if not os.path.exists(savepath): os.makedirs(savepath, mode=0o755) trainer = allennlp.training.Trainer( model=model, optimizer=optim, iterator=iterator, train_dataset=training_set, validation_dataset=validation_set, serialization_dir=savepath, cuda_device=config.DEVICE, num_epochs=config.TRAINING_LIMIT, grad_clipping=config.GRAD_CLIPPING, ) trainer.train() else: testing_set = reader.read(config.DATASETS[args.dataset].test_path) model.eval() if config.DEVICE > -1: model = model.cuda(config.DEVICE) predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader) for instance in tqdm.tqdm(testing_set, total=len(testing_set)): print('SRC: ', instance.fields['source_tokens'].tokens) print( 'GOLD:', ' '.join( str(x) for x in instance.fields['target_tokens'].tokens[1:-1])) del instance.fields['target_tokens'] output = predictor.predict_instance(instance) print('PRED:', ' '.join(output['predicted_tokens']))