コード例 #1
0
ファイル: glue.py プロジェクト: matteoghera/Progetto_ML
    def tokenization(self, tokenizer, max_len, batch_size, num_workers):
        dataset_plus = DatasetPlus(self.dev,
                                   tokenizer,
                                   max_len,
                                   batch_size,
                                   num_workers,
                                   column_sequence1="sentence1",
                                   column_sequence2="sentence2",
                                   column_target="label")
        self.dev_tokenized = dataset_plus.viewData()

        dataset_plus = DatasetPlus(self.train,
                                   tokenizer,
                                   max_len,
                                   batch_size,
                                   num_workers,
                                   column_sequence1="sentence1",
                                   column_sequence2="sentence2",
                                   column_target="label")
        self.train_tokenized = dataset_plus.viewData()

        dataset_plus = DatasetPlus(self.test,
                                   tokenizer,
                                   max_len,
                                   batch_size,
                                   num_workers,
                                   column_sequence1="sentence1",
                                   column_sequence2="sentence2")
        self.test_tokenized = dataset_plus.viewData()
コード例 #2
0
ファイル: glue.py プロジェクト: matteoghera/Progetto_ML
 def tokenization(self, tokenizer, max_len, batch_size, num_workers):
     super().tokenization(tokenizer, max_len, batch_size, num_workers)
     dataset_plus = DatasetPlus(self.msr_paraphrase_test,
                                tokenizer,
                                max_len,
                                batch_size,
                                num_workers,
                                column_sequence1="sentence1",
                                column_sequence2="sentence2",
                                column_target="label")
     self.train_tokenized = dataset_plus.viewData()