Esempio n. 1
0
    def setUp(self):
        self.json_data_source = SquadDataSource.from_config(
            SquadDataSource.Config(
                train_filename=tests_module.test_file("squad_tiny.json"),
                eval_filename=None,
                test_filename=None,
            ))
        self.tsv_data_source = SquadDataSource.from_config(
            SquadDataSource.Config(
                train_filename=tests_module.test_file("squad_tiny.tsv"),
                eval_filename=None,
                test_filename=None,
            ))

        self.tensorizer_with_wordpiece = SquadTensorizer.from_config(
            SquadTensorizer.Config(
                tokenizer=WordPieceTokenizer.Config(
                    wordpiece_vocab_path=
                    "pytext/data/test/data/wordpiece_1k.txt"),
                max_seq_len=250,
            ))
        self.tensorizer_with_alphanumeric = SquadTensorizer.from_config(
            SquadTensorizer.Config(
                tokenizer=Tokenizer.Config(split_regex=r"\W+"),
                max_seq_len=250))
Esempio n. 2
0
 class ModelInput(BaseModel.Config.ModelInput):
     squad_input: SquadTensorizer.Config = SquadTensorizer.Config()
     has_answer: LabelTensorizer.Config = LabelTensorizer.Config(
         column="has_answer")