Beispiel #1
0
 def __init__(self, paths: dict=None):
     paths = paths if paths is not None else {
         'train': 'train.tsv',
         'dev': 'dev.tsv',
         'test': 'test.tsv',
     }
     MatchingLoader.__init__(self, paths=paths)
     CSVLoader.__init__(self, sep='\t', headers=(Const.TARGET, Const.INPUTS(0), Const.INPUTS(1), 'pairID'))
Beispiel #2
0
 def __init__(self, paths: dict = None):
     paths = paths if paths is not None else {
         'train': 'train.tsv',
         'dev': 'dev.tsv',
         'test': 'test.tsv'  # test set has not label
     }
     MatchingLoader.__init__(self, paths=paths)
     self.fields = {
         'question': Const.INPUTS(0),
         'sentence': Const.INPUTS(1),
         'label': Const.TARGET,
     }
     CSVLoader.__init__(self, sep='\t')
Beispiel #3
0
def load_data(path, is_train=0):
    loader = CSVLoader(sep='\t')
    dataset = loader.load(path)
    dataset.delete_field('SentenceId')
    dataset.delete_field('PhraseId')

    dataset.apply(lambda x: get_word(x['Phrase']),
                  new_field_name='words',
                  is_input=True)
    dataset.apply(lambda x: len(x['words']),
                  new_field_name="length",
                  is_input=True)
    dataset.delete_field('Phrase')
    if (is_train):
        dataset.apply(lambda x: int(x['Sentiment']),
                      new_field_name="Sentiment")
        dataset.set_target('Sentiment')
    return dataset
Beispiel #4
0
    def __init__(self, paths: dict = None):
        paths = paths if paths is not None else {
            'train': 'train.tsv',
            'dev_matched': 'dev_matched.tsv',
            'dev_mismatched': 'dev_mismatched.tsv',
            'test_matched': 'test_matched.tsv',
            'test_mismatched': 'test_mismatched.tsv',
            # 'test_0.9_matched': 'multinli_0.9_test_matched_unlabeled.txt',
            # 'test_0.9_mismatched': 'multinli_0.9_test_mismatched_unlabeled.txt',

            # test_0.9_mathed与mismatched是MNLI0.9版本的(数据来源:kaggle)
        }
        MatchingLoader.__init__(self, paths=paths)
        CSVLoader.__init__(self, sep='\t')
        self.fields = {
            'sentence1_binary_parse': Const.INPUTS(0),
            'sentence2_binary_parse': Const.INPUTS(1),
            'gold_label': Const.TARGET,
        }
Beispiel #5
0
    def _load(self, path):
        ds = CSVLoader._load(self, path)

        for k, v in self.fields.items():
            if v in ds.get_field_names():
                ds.rename_field(k, v)
        for fields in ds.get_all_fields():
            if Const.INPUT in fields:
                ds.apply(lambda x: x[fields].strip().split(), new_field_name=fields)

        return ds
Beispiel #6
0
    def _load(self, path):
        ds = CSVLoader._load(self, path)

        for k, v in self.fields.items():
            if k in ds.get_field_names():
                ds.rename_field(k, v)

        if Const.TARGET in ds.get_field_names():
            if ds[0][Const.TARGET] == 'hidden':
                ds.delete_field(Const.TARGET)

        parentheses_table = str.maketrans({'(': None, ')': None})

        ds.apply(lambda ins: ins[Const.INPUTS(0)].translate(parentheses_table).strip().split(),
                 new_field_name=Const.INPUTS(0))
        ds.apply(lambda ins: ins[Const.INPUTS(1)].translate(parentheses_table).strip().split(),
                 new_field_name=Const.INPUTS(1))
        if Const.TARGET in ds.get_field_names():
            ds.drop(lambda x: x[Const.TARGET] == '-')
        return ds
Beispiel #7
0
 def _load(self, path):
     ds = CSVLoader._load(self, path)
     return ds