Beispiel #1
0
 def _get_data(self):
     data_manager = SeqLabellingDatasetManager(
         train_filename=self.data_dir.joinpath("parscit.train"),
         dev_filename=self.data_dir.joinpath("parscit.dev"),
         test_filename=self.data_dir.joinpath("parscit.test"),
     )
     return data_manager
Beispiel #2
0
 def build_dataset(self):
     data_dir = pathlib.Path(DATA_DIR)
     train_filename = data_dir.joinpath("parscit.train")
     dev_filename = data_dir.joinpath("parscit.dev")
     test_filename = data_dir.joinpath("parscit.test")
     data_manager = SeqLabellingDatasetManager(
         train_filename=train_filename,
         dev_filename=dev_filename,
         test_filename=test_filename,
     )
     return data_manager
def setup_parscit_dataset_manager():
    data_dir = pathlib.Path(DATA_DIR)
    parscit_train_file = data_dir.joinpath("parscit.train")
    parscit_dev_file = data_dir.joinpath("parscit.dev")
    parscit_test_file = data_dir.joinpath("parscit.test")

    dataset_manager = SeqLabellingDatasetManager(
        train_filename=str(parscit_train_file),
        dev_filename=str(parscit_dev_file),
        test_filename=str(parscit_test_file),
    )
    return dataset_manager
 def _get_data(self):
     data_manager = SeqLabellingDatasetManager(
         train_filename=cached_path(
             path=self.data_dir.joinpath("parscit.train"),
             url=self.train_data_file_url,
             unzip=False,
         ),
         dev_filename=cached_path(
             path=self.data_dir.joinpath("parscit.dev"),
             url=self.dev_data_file_url,
             unzip=False,
         ),
         test_filename=cached_path(
             path=self.data_dir.joinpath("parscit.test"),
             url=self.test_data_file_url,
             unzip=False,
         ),
     )
     return data_manager
Beispiel #5
0
def seq_dataset_manager(tmpdir_factory):
    train_file = tmpdir_factory.mktemp("train_data").join("train.txt")
    train_file.write(
        "word11_train###label1 word21_train###label2\nword12_train###label1 word22_train###label2 word32_train###label3"
    )

    dev_file = tmpdir_factory.mktemp("dev_data").join("dev.txt")
    dev_file.write(
        "word11_dev###label1 word21_dev###label2\nword12_dev###label1 word22_dev###label2 word32_dev###label3"
    )

    test_file = tmpdir_factory.mktemp("test_data").join("test.txt")
    test_file.write(
        "word11_test###label1 word21_test###label2\nword12_test###label1 word22_test###label2 word32_test###label3"
    )

    data_manager = SeqLabellingDatasetManager(
        train_filename=str(train_file),
        dev_filename=str(dev_file),
        test_filename=str(test_file),
    )

    return data_manager
Beispiel #6
0
    parser.add_argument(
        "--sample_proportion",
        help="Sampling proportion of dataset for debugging",
        type=float,
    )

    args = parser.parse_args()
    msg_printer = wasabi.Printer()

    data_dir = pathlib.Path(DATA_DIR)
    train_filename = data_dir.joinpath("parscit.train")
    dev_filename = data_dir.joinpath("parscit.dev")
    test_filename = data_dir.joinpath("parscit.test")
    data_manager = SeqLabellingDatasetManager(
        train_filename=train_filename,
        dev_filename=dev_filename,
        test_filename=test_filename,
    )
    word_embedder = TrainableWordEmbedder(
        embedding_type=args.emb_type, device=args.device, datasets_manager=data_manager
    )

    char_embedder = CharEmbedder(
        char_embedding_dimension=args.char_emb_dim,
        hidden_dimension=args.char_encoder_hidden_dim,
        datasets_manager=data_manager,
        device=args.device,
    )

    elmo_embedder = BowElmoEmbedder(
        datasets_manager=data_manager, layer_aggregation="sum", device=args.device