Beispiel #1
0
def make(dataset):
    claim_map = dev_claim_map if dataset == 'dev' else test_claim_map
    df = data.load(dataset)
    file_name = '%s-adv-full.txt' % dataset
    new_claims = [claim_map[c] for c in list(df.claim)]
    new_labels = [not l for l in df.correctLabelW0orW1]
    adv = df.copy()
    adv.claim = new_claims
    adv.correctLabelW0orW1 = new_labels
    adv.to_csv('data/arct/%s' % file_name, sep='\t', index=False)
Beispiel #2
0
 def test(self, args):
     data_points = data.load('test-adv')
     return self.get_data_loader(data_points, args)
Beispiel #3
0
 def dev(self, args):
     data_points = data.load('dev-adv')
     return self.get_data_loader(data_points, args)
Beispiel #4
0
 def train(self, args):
     data_points = data.load('train-adv')
     self.n_training_points = len(data_points)
     return self.get_data_loader(data_points, args)
Beispiel #5
0
 def test(self, args):
     return self.get_data_loader(data.load('test'), args)
Beispiel #6
0
 def dev(self, args):
     return self.get_data_loader(data.load('dev'), args)
Beispiel #7
0
from util import text


def flatten(list_of_lists):
    return [x for sublist in list_of_lists for x in sublist]


if __name__ == '__main__':
    print('Building ARCT vocab...')

    # grab all sents from all data subsets
    datasets = ['train', 'dev', 'test']
    sent_cols = ['claim', 'reason', 'warrant0', 'warrant1']
    sents = []
    for dataset in datasets:
        df = data.load(dataset)
        for _, row in df.iterrows():
            for col in sent_cols:
                sents.append(row[col])

    # tokenize
    tokens = set(flatten([text.tokenize(s) for s in sents]))

    # build the vocab dictionary
    vocab = dict(zip(tokens, range(len(tokens))))
    rev_vocab = {v: k for k, v in vocab.items()}

    # save the vocab dictionary
    vocab_path = os.path.join(glovar.ARCT_DIR, 'vocab.json')
    rev_vocab_path = os.path.join(glovar.ARCT_DIR, 'rev_vocab.json')
    with open(vocab_path, 'w') as f:
Beispiel #8
0
def merge(dataset):
    dfo = data.load(dataset)
    dfa = data.load('%s-adv' % dataset)
    dfm = pd.concat([dfo, dfa])
    dfm.to_csv('data/arct/%s.csv' % dataset, sep='\t', index=False)
Beispiel #9
0
 def test(self, args):
     df = data.load('test-original')
     self.n_train_examples = len(df)
     examples = self.create_examples(df)
     return self.get_data_loader(examples, args)
Beispiel #10
0
 def train(self, args):
     df = data.load('train-original')
     self.n_training_points = len(df)
     examples = self.create_examples(df)
     return self.get_data_loader(examples, args)
Beispiel #11
0
 def test(self, args):
     df = data.load('test-adv-negated')
     self.n_training_points = len(df)
     examples = self.create_examples(df)
     return self.get_data_loader(examples, args)
Beispiel #12
0
 def dev(self, args):
     df = data.load('dev-adv-swapped')
     self.n_training_points = len(df)
     examples = self.create_examples(df)
     return self.get_data_loader(examples, args)
Beispiel #13
0
 def dev(self, args):
     df = data.load('dev')
     self.n_train_examples = len(df)
     examples = self.create_examples(df)
     return self.get_data_loader(examples, args)
Beispiel #14
0
 def test_adv(self, args):
     return self.get_data_loader(data.load('test-adv'), args)