def tokenization(self, tokenizer, max_len, batch_size, num_workers): dataset_plus = DatasetPlus(self.dev, tokenizer, max_len, batch_size, num_workers, column_sequence1="sentence1", column_sequence2="sentence2", column_target="label") self.dev_tokenized = dataset_plus.viewData() dataset_plus = DatasetPlus(self.train, tokenizer, max_len, batch_size, num_workers, column_sequence1="sentence1", column_sequence2="sentence2", column_target="label") self.train_tokenized = dataset_plus.viewData() dataset_plus = DatasetPlus(self.test, tokenizer, max_len, batch_size, num_workers, column_sequence1="sentence1", column_sequence2="sentence2") self.test_tokenized = dataset_plus.viewData()
def tokenization(self, tokenizer, max_len, batch_size, num_workers): super().tokenization(tokenizer, max_len, batch_size, num_workers) dataset_plus = DatasetPlus(self.msr_paraphrase_test, tokenizer, max_len, batch_size, num_workers, column_sequence1="sentence1", column_sequence2="sentence2", column_target="label") self.train_tokenized = dataset_plus.viewData()