Example #1
0
 def loadFile(self, fpath, max_seq_len, load_data):
     '''Process the dataset located at path.'''
     if os.path.exists(fpath + '.pkl') and load_data:
         data = pkl.load(open(fpath + '.pkl', 'rb'))
         logging.info("Loaded data from %s", fpath + '.pkl')
     else:
         data = load_tsv(fpath, max_seq_len, s1_idx=3, s2_idx=4, targ_idx=0)
         pkl.dump(data, open(fpath + '.pkl', 'wb'))
         logging.info("Saved data to %s", fpath + '.pkl')
     return data
Example #2
0
 def loadAux(self, fpath, max_seq_len, targ_map, load_data):
     '''Process the dataset located at path.'''
     if os.path.exists(fpath + '.pkl') and load_data:
         data = pkl.load(open(fpath + '.pkl', 'rb'))
         logging.info("Loaded data from %s", fpath + '.pkl')
     else:
         data = load_tsv(fpath, max_seq_len, s1_idx=6, s2_idx=7, targ_idx=8,
                         targ_map=targ_map, skip_rows=1)
         pkl.dump(data, open(fpath + '.pkl', 'wb'))
         logging.info("Saved data to %s", fpath + '.pkl')
     return data
Example #3
0
 def loadFile(self, fpath, max_seq_len, load_data):
     '''
     Read in and process data directly from JSON
     Returns dictionary with format
         question_ID: (question, tokenized contexts, 0/1 labels, gold_idx)
     '''
     if os.path.exists(fpath + '.pkl') and load_data:
         data = pkl.load(open(fpath + '.pkl', 'rb'))
         logging.info("Loaded data from %s", fpath + '.pkl')
     else:
         data = load_tsv(fpath, max_seq_len, s1_idx=3, s2_idx=4, targ_idx=5)
         pkl.dump(data, open(fpath + '.pkl', 'wb'))
         logging.info("Saved data to %s", fpath + '.pkl')
     return data
Example #4
0
 def loadFile(self, fpath, max_seq_len, load_data):
     '''Load a single split'''
     if os.path.exists(fpath + '.pkl') and load_data:
         data = pkl.load(open(fpath + '.pkl', 'rb'))
         logging.info("Loaded data from %s", fpath + '.pkl')
     else:
         data = load_tsv(fpath,
                         max_seq_len,
                         s1_idx=1,
                         s2_idx=2,
                         targ_idx=3,
                         targ_map={
                             'entailment': 1,
                             'not_entailment': 0
                         })
         pkl.dump(data, open(fpath + '.pkl', 'wb'))
         logging.info("Saved data to %s", fpath + '.pkl')
     return data