Beispiel #1
0
    def __init__(self, taskpath, max_seq_len, load_data, seed=1111):
        logging.debug('***** Transfer task : ANLI Entailment*****\n\n')
        self.seed = seed
        targ_map = {'neutral': 0, 'entailment': 1, 'contradiction': 2}
        train = sort_split(self.loadFile(os.path.join(taskpath, 'multinli_1.0_train.txt'),
                           max_seq_len, targ_map, load_data))
        valid = sort_split(self.loadFile(os.path.join(taskpath, 'multinli_1.0_dev_matched.txt'),
                           max_seq_len, targ_map, load_data))
        test = sort_split(self.loadAux(os.path.join(taskpath, 'adversarial_nli.tsv'),
                          max_seq_len, targ_map, load_data))

        self.samples = train[0] + train[1] + valid[0] + valid[1] + test[0] + test[1]
        self.data = {'train': train, 'valid': valid, 'test': test}
Beispiel #2
0
    def __init__(self, taskpath, max_seq_len, load_data, seed=1111):
        logging.debug('***** Transfer task : CoLA *****\n\n')
        self.seed = seed
        train = sort_split(self.loadFile(os.path.join(taskpath, 'train.tsv'),
                                         max_seq_len, load_data),
                           pair_input=0)
        valid = sort_split(self.loadFile(os.path.join(taskpath, 'dev.tsv'),
                                         max_seq_len, load_data),
                           pair_input=0)
        test = sort_split(self.loadTest(os.path.join(taskpath, 'test.tsv'),
                                        max_seq_len, load_data),
                          pair_input=0)

        self.samples = train[0] + valid[0] + test[0]
        self.data = {'train': train, 'valid': valid, 'test': test}
Beispiel #3
0
    def __init__(self, taskpath, max_seq_len, load_data, seed=1111):
        logging.debug(
            '***** Transfer task : Recognizing Textual Entailment*****\n\n')
        self.seed = seed
        train = sort_split(
            self.loadFile(os.path.join(taskpath, 'train.tsv'), max_seq_len,
                          load_data))
        valid = sort_split(
            self.loadFile(os.path.join(taskpath, 'dev.tsv'), max_seq_len,
                          load_data))
        test = sort_split(
            self.loadTest(os.path.join(taskpath, 'test.tsv'), max_seq_len,
                          load_data))

        self.samples = train[0] + train[1] + valid[0] + valid[1] + test[
            0] + test[1]
        self.data = {'train': train, 'valid': valid, 'test': test}
Beispiel #4
0
    def __init__(self, taskpath, max_seq_len, load_data, seed=1111):
        logging.debug('***** Transfer task : QNLI Classification *****\n\n')
        self.seed = seed

        train = sort_split(
            self.loadFile(os.path.join(taskpath, "train.tsv"), max_seq_len,
                          load_data))
        valid = sort_split(
            self.loadFile(os.path.join(taskpath, "dev.tsv"), max_seq_len,
                          load_data))
        test = sort_split(
            self.loadTest(os.path.join(taskpath, "test.tsv"), max_seq_len,
                          load_data))

        # sort data (by s2 first) to reduce padding
        self.samples = train[0] + train[1] + valid[0] + valid[1] + test[
            0] + test[1]
        self.data = {'train': train, 'valid': valid, 'test': test}
Beispiel #5
0
    def __init__(self, taskpath, max_seq_len, load_data, seed=1111):
        ''' Class for QQP task '''
        logging.debug(
            '***** Transfer task : Quora Question Similarity*****\n\n')
        self.seed = seed
        train = sort_split(
            self.loadFile(os.path.join(taskpath, 'train.tsv'), max_seq_len,
                          load_data))
        valid = sort_split(
            self.loadFile(os.path.join(taskpath, 'dev.tsv'), max_seq_len,
                          load_data))
        test = sort_split(
            self.loadTest(os.path.join(taskpath, 'test.tsv'), max_seq_len,
                          load_data))

        self.samples = train[0] + train[1] + valid[0] + valid[1] + test[
            0] + test[1]
        self.data = {'train': train, 'valid': valid, 'test': test}
Beispiel #6
0
 def __init__(self, taskpath, max_seq_len=50, load_data=1, seed=1111):
     logging.debug('***** Transfer task : MNLI Entailment*****\n\n')
     self.seed = seed
     targ_map = {'neutral': 0, 'entailment': 1, 'contradiction': 2}
     train = sort_split(self.loadFile(os.path.join(taskpath, 'multinli_1.0_train.txt'),
                                      max_seq_len, targ_map, load_data))
     valid = sort_split(self.loadFile(os.path.join(taskpath, 'multinli_1.0_dev_both.txt'),
                                      max_seq_len, targ_map, load_data))
     test_m = self.loadTest(os.path.join(taskpath, 'mnli_matched_test_ans.tsv'),
                                       max_seq_len, targ_map, load_data)
     test_m = sort_split(test_m)
     test_mm = sort_split(self.loadTest(os.path.join(taskpath, 'mnli_mismatched_test_ans.tsv'),
                                     max_seq_len, targ_map, load_data))
     test_d = sort_split(self.loadTest(os.path.join(taskpath, 'diagnostic_test_ans.tsv'),
                         max_seq_len, targ_map, load_data))
     self.samples = train[0] + train[1] + valid[0] + valid[1] + test_m[0] + test_m[1] + \
                     test_mm[0] + test_mm[1] + test_d[0] + test_d[1]
     self.data = {'train': train, 'valid': valid, 'test': test_m,
                  'test_mismatched': test_mm, 'diagnostic': test_d}