Example #1
0
        combine_strategy=args.combine_strategy,
        rnn_bias=True,
        device=torch.device(args.device),
        num_layers=args.num_layers,
        add_projection_layer=False,
    )
    model = RnnSeqCrfTagger(
        rnn2seqencoder=lstm2seqencoder,
        encoding_dim=2 * args.hidden_dim if args.bidirectional
        and args.combine_strategy == "concat" else args.hidden_dim,
        device=torch.device(args.device),
        tagging_type="BIOUL",
        datasets_manager=data_manager,
    )

    optimizer = optim.Adam(params=model.parameters(),
                           lr=args.lr,
                           weight_decay=args.reg)

    train_metric = TokenClassificationAccuracy(datasets_manager=data_manager)
    dev_metric = TokenClassificationAccuracy(datasets_manager=data_manager)
    test_metric = TokenClassificationAccuracy(datasets_manager=data_manager)

    engine = Engine(
        model=model,
        datasets_manager=data_manager,
        optimizer=optimizer,
        batch_size=args.bs,
        save_dir=args.model_save_dir,
        num_epochs=args.epochs,
        save_every=args.save_every,
Example #2
0
def setup_parscit_inference(seq_dataset_manager, tmpdir_factory):
    HIDDEN_DIM = 100
    BIDIRECTIONAL = True
    COMBINE_STRATEGY = "concat"
    dataset_manager = seq_dataset_manager

    embedder = WordEmbedder(embedding_type="glove_6B_50")

    char_embedder = CharEmbedder(
        char_embedding_dimension=10,
        hidden_dimension=20,
        datasets_manager=dataset_manager,
    )
    embedder = ConcatEmbedders([embedder, char_embedder])

    encoder = Lstm2SeqEncoder(
        embedder=embedder,
        dropout_value=0.0,
        hidden_dim=HIDDEN_DIM,
        bidirectional=BIDIRECTIONAL,
        combine_strategy=COMBINE_STRATEGY,
        rnn_bias=False,
        add_projection_layer=False,
    )

    tagger = RnnSeqCrfTagger(
        rnn2seqencoder=encoder,
        encoding_dim=2 * HIDDEN_DIM
        if BIDIRECTIONAL and COMBINE_STRATEGY == "concat"
        else HIDDEN_DIM,
        datasets_manager=dataset_manager,
    )

    train_metric = TokenClassificationAccuracy(datasets_manager=dataset_manager)
    dev_metric = TokenClassificationAccuracy(datasets_manager=dataset_manager)
    test_metric = TokenClassificationAccuracy(datasets_manager=dataset_manager)

    optimizer = torch.optim.Adam(params=tagger.parameters())
    batch_size = 1
    save_dir = tmpdir_factory.mktemp("experiment_1")
    num_epochs = 1
    save_every = 1
    log_train_metrics_every = 10

    engine = Engine(
        model=tagger,
        datasets_manager=dataset_manager,
        optimizer=optimizer,
        batch_size=batch_size,
        save_dir=save_dir,
        num_epochs=num_epochs,
        save_every=save_every,
        log_train_metrics_every=log_train_metrics_every,
        train_metric=train_metric,
        validation_metric=dev_metric,
        test_metric=test_metric,
        track_for_best="macro_fscore",
    )

    engine.run()
    model_filepath = pathlib.Path(save_dir).joinpath("best_model.pt")

    inference_client = SequenceLabellingInference(
        model=tagger, model_filepath=model_filepath, datasets_manager=dataset_manager
    )

    return inference_client
Example #3
0
        combine_strategy=args.combine_strategy,
        rnn_bias=True,
        device=args.device,
        num_layers=args.num_layers,
        add_projection_layer=args.add_projection_layer,
    )
    model = RnnSeqCrfTagger(
        rnn2seqencoder=lstm2seqencoder,
        encoding_dim=2 * args.hidden_dim,
        device=args.device,
        tagging_type="BIOUL",
        datasets_manager=data_manager,
        include_start_end_trainsitions=False,
    )

    optimizer = optim.Adam(params=model.parameters(), lr=args.lr)

    train_metric = ConLL2003Metrics(datasets_manager=data_manager)
    dev_metric = ConLL2003Metrics(datasets_manager=data_manager)
    test_metric = ConLL2003Metrics(datasets_manager=data_manager)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer,
        factor=0.1,
        mode="max",
        patience=25,
        verbose=True,
        threshold=1e-3,
    )

    engine = Engine(
Example #4
0
        combine_strategy=args.combine_strategy,
        rnn_bias=True,
        device=torch.device(args.device),
        num_layers=args.num_layers,
    )
    model = RnnSeqCrfTagger(
        rnn2seqencoder=lstm2seqencoder,
        encoding_dim=2 * args.hidden_dim
        if args.bidirectional and args.combine_strategy == "concat"
        else args.hidden_dim,
        device=torch.device(args.device),
        tagging_type="BIOUL",
        datasets_manager=data_manager,
    )

    optimizer = optim.Adam(params=model.parameters(), lr=args.lr, weight_decay=args.reg)

    train_metric = TokenClassificationAccuracy(datasets_manager=data_manager)
    dev_metric = TokenClassificationAccuracy(datasets_manager=data_manager)
    test_metric = TokenClassificationAccuracy(datasets_manager=data_manager)

    engine = Engine(
        model=model,
        datasets_manager=data_manager,
        optimizer=optimizer,
        batch_size=args.bs,
        save_dir=args.model_save_dir,
        num_epochs=args.epochs,
        save_every=args.save_every,
        log_train_metrics_every=args.log_train_metrics_every,
        track_for_best="macro_fscore",