Exemple #1
0
 def read_from_csv(self):
     utils.assert_in_type(self.opt.QUESTION_TYPE)
     train_path = os.path.join(self.opt.TEXT_DIR,
                               ('Train' + CSV_TYPE[self.opt.QUESTION_TYPE]))
     test_path = os.path.join(self.opt.TEXT_DIR,
                              ('Test' + CSV_TYPE[self.opt.QUESTION_TYPE]))
     utils.assert_exits(train_path)
     utils.assert_exits(test_path)
     text_train = pd.read_csv(train_path, sep='\t')
     text_test = pd.read_csv(test_path, sep='\t')
     text_train = text_train.set_index('vid_id')
     text_test = text_test.set_index('vid_id')
     total_path = os.path.join(self.opt.TEXT_DIR,
                               ('Total' + CSV_TYPE[self.opt.QUESTION_TYPE]))
     total_set = pd.read_csv(total_path, sep='\t')
     return text_train, text_test, total_set
Exemple #2
0
 def __init__(self, question_type, dictionary, mode):
     super(FeatureDataset, self).__init__()
     self.opt = config.parse_opt()
     utils.assert_in_type(question_type)
     if question_type == 'FrameQA':
         self.ans2label = pkl.load(open('./data/ans2label.pkl', 'rb'))
         self.label2ans = pkl.load(open('./data/label2ans.pkl', 'rb'))
         self.num_ans = len(self.ans2label)
     self.dictionary = dictionary
     entry_path = './data/entries_' + str(mode) + '.pkl'
     print('Load Dataset')
     self.entries = load_dataset(mode)
     print('Dataset\'s length is %d' % (len(self.entries)))
     self.tokenize()
     self.read_from_h5py()
     self.tensorize()
     '''