def setUp(self): self.json_data_source = SquadDataSource.from_config( SquadDataSource.Config( train_filename=tests_module.test_file("squad_tiny.json"), eval_filename=None, test_filename=None, ) ) self.tsv_data_source = SquadDataSource.from_config( SquadDataSource.Config( train_filename=tests_module.test_file("squad_tiny.tsv"), eval_filename=None, test_filename=None, ) ) self.tensorizer_with_wordpiece = SquadTensorizer.from_config( SquadTensorizer.Config( tokenizer=WordPieceTokenizer.Config( wordpiece_vocab_path="pytext/data/test/data/wordpiece_1k.txt" ), max_seq_len=250, ) ) self.tensorizer_with_alphanumeric = SquadTensorizer.from_config( SquadTensorizer.Config( tokenizer=Tokenizer.Config(split_regex=r"\W+"), max_seq_len=250 ) )
class ModelInput(BaseModel.Config.ModelInput): squad_input: SquadTensorizer.Config = SquadTensorizer.Config() has_answer: LabelTensorizer.Config = LabelTensorizer.Config( column="has_answer")