def loadFile(self, fpath, max_seq_len, load_data): '''Process the dataset located at path.''' if os.path.exists(fpath + '.pkl') and load_data: data = pkl.load(open(fpath + '.pkl', 'rb')) logging.info("Loaded data from %s", fpath + '.pkl') else: data = load_tsv(fpath, max_seq_len, s1_idx=3, s2_idx=4, targ_idx=0) pkl.dump(data, open(fpath + '.pkl', 'wb')) logging.info("Saved data to %s", fpath + '.pkl') return data
def loadAux(self, fpath, max_seq_len, targ_map, load_data): '''Process the dataset located at path.''' if os.path.exists(fpath + '.pkl') and load_data: data = pkl.load(open(fpath + '.pkl', 'rb')) logging.info("Loaded data from %s", fpath + '.pkl') else: data = load_tsv(fpath, max_seq_len, s1_idx=6, s2_idx=7, targ_idx=8, targ_map=targ_map, skip_rows=1) pkl.dump(data, open(fpath + '.pkl', 'wb')) logging.info("Saved data to %s", fpath + '.pkl') return data
def loadFile(self, fpath, max_seq_len, load_data): ''' Read in and process data directly from JSON Returns dictionary with format question_ID: (question, tokenized contexts, 0/1 labels, gold_idx) ''' if os.path.exists(fpath + '.pkl') and load_data: data = pkl.load(open(fpath + '.pkl', 'rb')) logging.info("Loaded data from %s", fpath + '.pkl') else: data = load_tsv(fpath, max_seq_len, s1_idx=3, s2_idx=4, targ_idx=5) pkl.dump(data, open(fpath + '.pkl', 'wb')) logging.info("Saved data to %s", fpath + '.pkl') return data
def loadFile(self, fpath, max_seq_len, load_data): '''Load a single split''' if os.path.exists(fpath + '.pkl') and load_data: data = pkl.load(open(fpath + '.pkl', 'rb')) logging.info("Loaded data from %s", fpath + '.pkl') else: data = load_tsv(fpath, max_seq_len, s1_idx=1, s2_idx=2, targ_idx=3, targ_map={ 'entailment': 1, 'not_entailment': 0 }) pkl.dump(data, open(fpath + '.pkl', 'wb')) logging.info("Saved data to %s", fpath + '.pkl') return data