Ejemplo n.º 1
0
def main():
    parser = utils.opt_parser.get_trainer_opt_parser()
    parser.add_argument('models',
                        nargs='*',
                        help='pretrained models for the same setting')
    parser.add_argument('--test', action="store_true", help='use testing mode')
    parser.add_argument('--num-layer',
                        type=int,
                        help="stacked layer of transformer model")

    args = parser.parse_args()

    reader = data_adapter.GeoQueryDatasetReader()
    training_set = reader.read(config.DATASETS[args.dataset].train_path)
    try:
        validation_set = reader.read(config.DATASETS[args.dataset].dev_path)
    except:
        validation_set = None

    vocab = allennlp.data.Vocabulary.from_instances(training_set)
    st_ds_conf = config.TRANSFORMER_CONF[args.dataset]
    if args.num_layer:
        st_ds_conf['num_layers'] = args.num_layer

    encoder = TransformerEncoder(
        input_dim=st_ds_conf['emb_sz'],
        num_layers=st_ds_conf['num_layers'],
        num_heads=st_ds_conf['num_heads'],
        feedforward_hidden_dim=st_ds_conf['emb_sz'],
    )
    decoder = TransformerDecoder(
        input_dim=st_ds_conf['emb_sz'],
        num_layers=st_ds_conf['num_layers'],
        num_heads=st_ds_conf['num_heads'],
        feedforward_hidden_dim=st_ds_conf['emb_sz'],
        feedforward_dropout=0.1,
    )
    source_embedding = allennlp.modules.Embedding(
        num_embeddings=vocab.get_vocab_size('nltokens'),
        embedding_dim=st_ds_conf['emb_sz'])
    target_embedding = allennlp.modules.Embedding(
        num_embeddings=vocab.get_vocab_size('lftokens'),
        embedding_dim=st_ds_conf['emb_sz'])
    model = ParallelSeq2Seq(
        vocab=vocab,
        encoder=encoder,
        decoder=decoder,
        source_embedding=source_embedding,
        target_embedding=target_embedding,
        target_namespace='lftokens',
        start_symbol=START_SYMBOL,
        eos_symbol=END_SYMBOL,
        max_decoding_step=st_ds_conf['max_decoding_len'],
    )

    if args.models:
        model.load_state_dict(torch.load(args.models[0]))

    if not args.test or not args.models:
        iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")
                                                ],
                                  batch_size=st_ds_conf['batch_sz'])
        iterator.index_with(vocab)

        optim = torch.optim.Adam(model.parameters())

        savepath = os.path.join(
            config.SNAPSHOT_PATH, args.dataset, 'transformer',
            datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
        if not os.path.exists(savepath):
            os.makedirs(savepath, mode=0o755)

        trainer = allennlp.training.Trainer(
            model=model,
            optimizer=optim,
            iterator=iterator,
            train_dataset=training_set,
            validation_dataset=validation_set,
            serialization_dir=savepath,
            cuda_device=args.device,
            num_epochs=config.TRAINING_LIMIT,
        )

        trainer.train()

    else:
        testing_set = reader.read(config.DATASETS[args.dataset].test_path)
        model.eval()

        predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader)

        for instance in testing_set:
            print('SRC: ', instance.fields['source_tokens'].tokens)
            print(
                'GOLD:', ' '.join(
                    str(x)
                    for x in instance.fields['target_tokens'].tokens[1:-1]))
            del instance.fields['target_tokens']
            output = predictor.predict_instance(instance)
            print('PRED:', ' '.join(output['predicted_tokens']))
Ejemplo n.º 2
0
def run_model(args):
    st_ds_conf = get_updated_settings(args)
    reader = data_adapter.GeoQueryDatasetReader()
    training_set = reader.read(config.DATASETS[args.dataset].train_path)
    try:
        validation_set = reader.read(config.DATASETS[args.dataset].dev_path)
    except:
        validation_set = None

    vocab = allennlp.data.Vocabulary.from_instances(training_set)
    model = get_model(vocab, st_ds_conf)
    device_tag = "cpu" if config.DEVICE < 0 else f"cuda:{config.DEVICE}"
    if args.models:
        model.load_state_dict(
            torch.load(args.models[0], map_location=device_tag))

    if not args.test or not args.models:
        iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")
                                                ],
                                  batch_size=st_ds_conf['batch_sz'])
        iterator.index_with(vocab)

        optim = torch.optim.Adam(model.parameters(),
                                 lr=config.ADAM_LR,
                                 betas=config.ADAM_BETAS,
                                 eps=config.ADAM_EPS)
        if args.fine_tune:
            optim = torch.optim.SGD(model.parameters(), lr=config.SGD_LR)

        savepath = os.path.join(
            config.SNAPSHOT_PATH, args.dataset, 'unc_s2s',
            datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" +
            args.memo)
        if not os.path.exists(savepath):
            os.makedirs(savepath, mode=0o755)

        trainer = allennlp.training.Trainer(
            model=model,
            optimizer=optim,
            iterator=iterator,
            train_dataset=training_set,
            validation_dataset=validation_set,
            serialization_dir=savepath,
            cuda_device=config.DEVICE,
            num_epochs=config.TRAINING_LIMIT,
            grad_clipping=config.GRAD_CLIPPING,
            num_serialized_models_to_keep=-1,
        )

        trainer.train()

    else:
        if args.test_on_val:
            testing_set = reader.read(config.DATASETS[args.dataset].dev_path)
        else:
            testing_set = reader.read(config.DATASETS[args.dataset].test_path)

        model.eval()
        model.skip_loss = True  # skip loss computation on testing set for faster evaluation

        if config.DEVICE > -1:
            model = model.cuda(config.DEVICE)

        # batch testing
        iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")
                                                ],
                                  batch_size=st_ds_conf['batch_sz'])
        iterator.index_with(vocab)
        eval_generator = iterator(testing_set, num_epochs=1, shuffle=False)
        for batch in tqdm.tqdm(eval_generator,
                               total=iterator.get_num_batches(testing_set)):
            batch = move_to_device(batch, config.DEVICE)
            output = model(**batch)
        metrics = model.get_metrics()
        print(metrics)

        if args.dump_test:

            predictor = allennlp.predictors.SimpleSeq2SeqPredictor(
                model, reader)

            for instance in tqdm.tqdm(testing_set, total=len(testing_set)):
                print('SRC: ', instance.fields['source_tokens'].tokens)
                print(
                    'GOLD:', ' '.join(
                        str(x) for x in
                        instance.fields['target_tokens'].tokens[1:-1]))
                del instance.fields['target_tokens']
                output = predictor.predict_instance(instance)
                print('PRED:', ' '.join(output['predicted_tokens']))
Ejemplo n.º 3
0
def main():
    parser = utils.opt_parser.get_trainer_opt_parser()
    parser.add_argument('models',
                        nargs='*',
                        help='pretrained models for the same setting')
    parser.add_argument('--test', action="store_true", help='use testing mode')
    parser.add_argument('--num-layer',
                        type=int,
                        help='maximum number of stacked layers')
    parser.add_argument(
        '--use-ut',
        action="store_true",
        help='Use universal transformer instead of transformer')

    args = parser.parse_args()

    reader = data_adapter.GeoQueryDatasetReader()
    training_set = reader.read(config.DATASETS[args.dataset].train_path)
    try:
        validation_set = reader.read(config.DATASETS[args.dataset].dev_path)
    except:
        validation_set = None

    vocab = allennlp.data.Vocabulary.from_instances(training_set)
    st_ds_conf = config.TRANS2SEQ_CONF[args.dataset]
    if args.num_layer:
        st_ds_conf['max_num_layers'] = args.num_layer
    if args.epoch:
        config.TRAINING_LIMIT = args.epoch
    if args.batch:
        st_ds_conf['batch_sz'] = args.batch
    bsz = st_ds_conf['batch_sz']
    emb_sz = st_ds_conf['emb_sz']

    src_embedder = BasicTextFieldEmbedder(
        token_embedders={
            "tokens": Embedding(vocab.get_vocab_size('nltokens'), emb_sz)
        })

    if args.use_ut:
        transformer_encoder = UTEncoder(
            input_dim=emb_sz,
            max_num_layers=st_ds_conf['max_num_layers'],
            num_heads=st_ds_conf['num_heads'],
            feedforward_hidden_dim=emb_sz,
            feedforward_dropout=st_ds_conf['feedforward_dropout'],
            attention_dropout=st_ds_conf['attention_dropout'],
            residual_dropout=st_ds_conf['residual_dropout'],
            use_act=st_ds_conf['act'],
            use_vanilla_wiring=st_ds_conf['vanilla_wiring'])
    else:
        transformer_encoder = TransformerEncoder(
            input_dim=emb_sz,
            num_layers=st_ds_conf['max_num_layers'],
            num_heads=st_ds_conf['num_heads'],
            feedforward_hidden_dim=emb_sz,
            feedforward_dropout=st_ds_conf['feedforward_dropout'],
            attention_dropout=st_ds_conf['attention_dropout'],
            residual_dropout=st_ds_conf['residual_dropout'],
        )

    model = allennlp.models.SimpleSeq2Seq(
        vocab,
        source_embedder=src_embedder,
        encoder=transformer_encoder,
        max_decoding_steps=50,
        attention=allennlp.modules.attention.DotProductAttention(),
        beam_size=6,
        target_namespace="lftokens",
        use_bleu=True)

    if args.models:
        model.load_state_dict(torch.load(args.models[0]))

    if not args.test or not args.models:
        iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")
                                                ],
                                  batch_size=bsz)
        iterator.index_with(vocab)

        optim = torch.optim.Adam(model.parameters())

        savepath = os.path.join(
            config.SNAPSHOT_PATH, args.dataset, 'transformer2seq',
            datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" +
            args.memo)
        if not os.path.exists(savepath):
            os.makedirs(savepath, mode=0o755)

        trainer = allennlp.training.Trainer(
            model=model,
            optimizer=optim,
            iterator=iterator,
            train_dataset=training_set,
            validation_dataset=validation_set,
            serialization_dir=savepath,
            cuda_device=args.device,
            num_epochs=config.TRAINING_LIMIT,
        )

        trainer.train()

    else:
        testing_set = reader.read(config.DATASETS[args.dataset].test_path)
        model.eval()

        predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader)

        for instance in tqdm.tqdm(testing_set, total=len(testing_set)):
            print('SRC: ', instance.fields['source_tokens'].tokens)
            print(
                'GOLD:', ' '.join(
                    str(x)
                    for x in instance.fields['target_tokens'].tokens[1:-1]))
            del instance.fields['target_tokens']
            output = predictor.predict_instance(instance)
            print('PRED:', ' '.join(output['predicted_tokens']))
Ejemplo n.º 4
0
def main():
    parser = utils.opt_parser.get_trainer_opt_parser()
    parser.add_argument('models',
                        nargs='*',
                        help='pretrained models for the same setting')
    parser.add_argument('--test', action="store_true", help='use testing mode')
    parser.add_argument('--no-act',
                        action="store_true",
                        help='Do not use ACT for layer computation')
    parser.add_argument('--num-layer',
                        type=int,
                        help='maximum number of stacked layers')
    parser.add_argument('--warm-up',
                        type=int,
                        default=10,
                        help='number of warmup-steps for Noam Scheduler')

    args = parser.parse_args()

    reader = data_adapter.GeoQueryDatasetReader()
    training_set = reader.read(config.DATASETS[args.dataset].train_path)
    try:
        validation_set = reader.read(config.DATASETS[args.dataset].dev_path)
    except:
        validation_set = None

    vocab = allennlp.data.Vocabulary.from_instances(training_set)
    st_ds_conf = config.UTRANSFORMER_CONF[args.dataset]
    if args.no_act:
        st_ds_conf['act'] = False
    if args.num_layer:
        st_ds_conf['max_num_layers'] = args.num_layer
    if args.epoch:
        config.TRAINING_LIMIT = args.epoch
    if args.batch:
        st_ds_conf['batch_sz'] = args.batch

    encoder = UTEncoder(
        input_dim=st_ds_conf['emb_sz'],
        max_num_layers=st_ds_conf['max_num_layers'],
        num_heads=st_ds_conf['num_heads'],
        feedforward_hidden_dim=st_ds_conf['emb_sz'],
        use_act=st_ds_conf['act'],
        attention_dropout=st_ds_conf['attention_dropout'],
        residual_dropout=st_ds_conf['residual_dropout'],
        feedforward_dropout=st_ds_conf['feedforward_dropout'],
        use_vanilla_wiring=st_ds_conf['vanilla_wiring'],
    )
    decoder = UTDecoder(
        input_dim=st_ds_conf['emb_sz'],
        max_num_layers=st_ds_conf['max_num_layers'],
        num_heads=st_ds_conf['num_heads'],
        feedforward_hidden_dim=st_ds_conf['emb_sz'],
        use_act=st_ds_conf['act'],
        attention_dropout=st_ds_conf['attention_dropout'],
        residual_dropout=st_ds_conf['residual_dropout'],
        feedforward_dropout=st_ds_conf['feedforward_dropout'],
        use_vanilla_wiring=st_ds_conf['vanilla_wiring'],
    )
    source_embedding = allennlp.modules.Embedding(
        num_embeddings=vocab.get_vocab_size('nltokens'),
        embedding_dim=st_ds_conf['emb_sz'])
    target_embedding = allennlp.modules.Embedding(
        num_embeddings=vocab.get_vocab_size('lftokens'),
        embedding_dim=st_ds_conf['emb_sz'])
    model = ParallelSeq2Seq(
        vocab=vocab,
        encoder=encoder,
        decoder=decoder,
        source_embedding=source_embedding,
        target_embedding=target_embedding,
        target_namespace='lftokens',
        start_symbol=START_SYMBOL,
        eos_symbol=END_SYMBOL,
        max_decoding_step=st_ds_conf['max_decoding_len'],
    )

    if args.models:
        logging.getLogger().setLevel(logging.INFO)
        logging.info(f"loads pretrained model from {args.models[0]}")
        model.load_state_dict(torch.load(args.models[0]))

    if not args.test or not args.models:
        iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")
                                                ],
                                  batch_size=st_ds_conf['batch_sz'])
        iterator.index_with(vocab)

        optim = torch.optim.Adam(model.parameters(),
                                 betas=(.9, .98),
                                 eps=1.e-9)

        savepath = os.path.join(
            config.SNAPSHOT_PATH, args.dataset, 'universal_transformer',
            datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" +
            args.memo)
        if not os.path.exists(savepath):
            os.makedirs(savepath, mode=0o755)

        trainer = allennlp.training.Trainer(
            model=model,
            optimizer=optim,
            iterator=iterator,
            train_dataset=training_set,
            validation_dataset=validation_set,
            serialization_dir=savepath,
            cuda_device=args.device,
            num_epochs=config.TRAINING_LIMIT,
        )

        trainer.train()

    else:
        testing_set = reader.read(config.DATASETS[args.dataset].test_path)
        model.eval()

        predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader)

        for instance in tqdm.tqdm(testing_set, total=len(testing_set)):
            print('SRC: ', instance.fields['source_tokens'].tokens)
            print(
                'GOLD:', ' '.join(
                    str(x)
                    for x in instance.fields['target_tokens'].tokens[1:-1]))
            del instance.fields['target_tokens']
            output = predictor.predict_instance(instance)
            print('PRED:', ' '.join(output['predicted_tokens']))
Ejemplo n.º 5
0
def main():
    parser = utils.opt_parser.get_trainer_opt_parser()
    parser.add_argument('models',
                        nargs='*',
                        help='pretrained models for the same setting')
    parser.add_argument('--enc-layers',
                        type=int,
                        default=1,
                        help="encoder layer number defaulted to 1")
    parser.add_argument('--test', action="store_true", help='use testing mode')
    parser.add_argument('--use-dev', action="store_true")

    args = parser.parse_args()

    reader = data_adapter.GeoQueryDatasetReader()
    training_set = reader.read(config.DATASETS[args.dataset].train_path)
    if args.use_dev:
        validation_set = reader.read(config.DATASETS[args.dataset].dev_path)

    vocab = allennlp.data.Vocabulary.from_instances(training_set)
    st_ds_conf = config.SEQ2SEQ_CONF[args.dataset]
    if args.epoch:
        config.TRAINING_LIMIT = args.epoch
    bsz = st_ds_conf['batch_sz']
    emb_sz = st_ds_conf['emb_sz']

    src_embedder = BasicTextFieldEmbedder(
        token_embedders={
            "tokens": Embedding(vocab.get_vocab_size('nltokens'), emb_sz)
        })

    encoder = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(emb_sz,
                      emb_sz,
                      num_layers=args.enc_layers,
                      batch_first=True))

    model = allennlp.models.SimpleSeq2Seq(
        vocab,
        source_embedder=src_embedder,
        encoder=encoder,
        max_decoding_steps=st_ds_conf['max_decoding_len'],
        attention=allennlp.modules.attention.DotProductAttention(),
        beam_size=8,
        target_namespace="lftokens",
        use_bleu=True)

    if args.models:
        model.load_state_dict(torch.load(args.models[0]))

    if not args.test or not args.models:
        iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")
                                                ],
                                  batch_size=bsz)
        iterator.index_with(vocab)

        optim = torch.optim.Adam(model.parameters())

        savepath = os.path.join(
            config.SNAPSHOT_PATH, args.dataset, 'seq2seq',
            datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" +
            args.memo)
        if not os.path.exists(savepath):
            os.makedirs(savepath, mode=0o755)

        trainer = allennlp.training.Trainer(
            model=model,
            optimizer=optim,
            iterator=iterator,
            train_dataset=training_set,
            validation_dataset=validation_set if args.use_dev else None,
            serialization_dir=savepath,
            cuda_device=args.device,
            num_epochs=config.TRAINING_LIMIT,
        )

        trainer.train()

    else:
        testing_set = reader.read(config.DATASETS[args.dataset].test_path)
        model.eval()

        predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader)

        for instance in testing_set:
            print('SRC: ', instance.fields['source_tokens'].tokens)
            print(
                'GOLD:', ' '.join(
                    str(x)
                    for x in instance.fields['target_tokens'].tokens[1:-1]))
            print(
                'PRED:', ' '.join(
                    predictor.predict_instance(instance)['predicted_tokens']))
Ejemplo n.º 6
0
def main():
    parser = utils.opt_parser.get_trainer_opt_parser()
    parser.add_argument('models',
                        nargs='*',
                        help='pretrained models for the same setting')
    parser.add_argument('--test', action="store_true", help='use testing mode')
    parser.add_argument('--emb-dim',
                        type=int,
                        help='basic embedding dimension')
    parser.add_argument('--act-max-layer',
                        type=int,
                        help='maximum number of stacked layers')
    parser.add_argument('--use-act',
                        action="store_true",
                        help='Use adaptive computation time for decoder')
    parser.add_argument('--act-loss-weight',
                        type=float,
                        help="the loss of the act weights")

    parser.add_argument('--enc-layers', type=int, help="layers in encoder")
    parser.add_argument('--act-mode',
                        choices=['basic', 'random', 'mean_field'])
    parser.add_argument('--encoder', choices=['transformer', 'lstm', 'bilstm'])
    parser.add_argument(
        '--decoder',
        choices=['lstm', 'rnn', 'gru', 'ind_rnn', 'n_lstm', 'n_gru'],
    )
    parser.add_argument('--dec-cell-height',
                        type=int,
                        help="the height for n_layer lstm/gru")

    args = parser.parse_args()

    reader = data_adapter.GeoQueryDatasetReader()
    training_set = reader.read(config.DATASETS[args.dataset].train_path)
    try:
        validation_set = reader.read(config.DATASETS[args.dataset].dev_path)
    except:
        validation_set = None

    vocab = allennlp.data.Vocabulary.from_instances(training_set)
    if args.epoch:
        config.TRAINING_LIMIT = args.epoch
    if args.device:
        config.DEVICE = args.device
    st_ds_conf = get_updated_settings(args)

    model = get_model(vocab, st_ds_conf)

    if args.models:
        model.load_state_dict(torch.load(args.models[0]))

    if not args.test or not args.models:
        iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")
                                                ],
                                  batch_size=st_ds_conf['batch_sz'])
        iterator.index_with(vocab)

        optim = torch.optim.Adam(model.parameters(),
                                 lr=config.ADAM_LR,
                                 betas=config.ADAM_BETAS,
                                 eps=config.ADAM_EPS)

        savepath = os.path.join(
            config.SNAPSHOT_PATH, args.dataset, 'ada_trans2seq',
            datetime.datetime.now().strftime('%Y%m%d-%H%M%S') + "--" +
            args.memo)
        if not os.path.exists(savepath):
            os.makedirs(savepath, mode=0o755)

        trainer = allennlp.training.Trainer(
            model=model,
            optimizer=optim,
            iterator=iterator,
            train_dataset=training_set,
            validation_dataset=validation_set,
            serialization_dir=savepath,
            cuda_device=config.DEVICE,
            num_epochs=config.TRAINING_LIMIT,
            grad_clipping=config.GRAD_CLIPPING,
        )

        trainer.train()

    else:
        testing_set = reader.read(config.DATASETS[args.dataset].test_path)
        model.eval()

        if config.DEVICE > -1:
            model = model.cuda(config.DEVICE)

        predictor = allennlp.predictors.SimpleSeq2SeqPredictor(model, reader)

        for instance in tqdm.tqdm(testing_set, total=len(testing_set)):
            print('SRC: ', instance.fields['source_tokens'].tokens)
            print(
                'GOLD:', ' '.join(
                    str(x)
                    for x in instance.fields['target_tokens'].tokens[1:-1]))
            del instance.fields['target_tokens']
            output = predictor.predict_instance(instance)
            print('PRED:', ' '.join(output['predicted_tokens']))