def run_task(data_dir, task_id): """ Train and test for each task """ print("Train and test for task %d ..." % task_id) # Parse data train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id)) test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id)) dictionary = {"nil": 0} train_story, train_questions, train_qstory = parse_babi_task( train_files, dictionary, False) test_story, test_questions, test_qstory = parse_babi_task( test_files, dictionary, False) general_config = BabiConfig(train_story, train_questions, dictionary) memory, model, loss = build_model(general_config) if general_config.linear_start: train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config) else: train(train_story, train_questions, train_qstory, memory, model, loss, general_config) test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
def run_task(data_dir, task_id): """ Train and test for each task """ print("Train and test for task %d ..." % task_id) print("We are going to use this") # Parse data train_files = glob.glob('%s/qa3_*_train.txt' % (data_dir, task_id)) test_files = glob.glob('%s/qa3_*_test.txt' % (data_dir, task_id)) dictionary = {"nil": 0} train_story, train_questions, train_qstory = parse_babi_task( train_files, dictionary, False) test_story, test_questions, test_qstory = parse_babi_task( test_files, dictionary, False) general_config = BabiConfig(train_story, train_questions, dictionary) # #### R: this line build a empty model to train # memory, model, loss = build_model(general_config) # if general_config.linear_start: # train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config) # else: # train(train_story, train_questions, train_qstory, memory, model, loss, general_config) # memory, model, loss = build_model(general_config) # this line test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
def train(self): """ Train MemN2N model using training data for tasks. """ np.random.seed(42) # for reproducing assert self.data_dir is not None, "data_dir is not specified." print("Reading data from %s ..." % self.data_dir) # Parse training data train_data_path = glob.glob('%s/qa8_*_train.txt' % self.data_dir) dictionary = {"nil": 0} train_story, train_questions, train_qstory = parse_babi_task( train_data_path, dictionary, False) # Parse test data just to expand the dictionary so that it covers all words in the test data too test_data_path = glob.glob('%s/qa8_*_test.txt' % self.data_dir) parse_babi_task(test_data_path, dictionary, False) # Get reversed dictionary mapping index to word self.reversed_dict = dict((ix, w) for w, ix in dictionary.items()) # Construct model self.general_config = BabiConfig(train_story, train_questions, dictionary) self.memory, self.model, self.loss = build_model(self.general_config) # Train model if self.general_config.linear_start: train_linear_start(train_story, train_questions, train_qstory, self.memory, self.model, self.loss, self.general_config) else: train(train_story, train_questions, train_qstory, self.memory, self.model, self.loss, self.general_config) # Save model self.save_model()
def run_tableQA(data_path, model_file): """ Train and test for table QA """ # Parse data train_files = glob.glob(data_path.format('train')) test_files = glob.glob(data_path.format('test')) # SV: init dict with pre-trained vectors, e.g. from fastText # dictionary = fasttext.load_model(EMBEDDINGS_MODEL_PATH) dictionary = {"nil": 0} train_story, train_questions, train_qstory = parse_babi_task( train_files, dictionary, False) test_story, test_questions, test_qstory = parse_babi_task( test_files, dictionary, False) # print test_questions print 'Dictionary:', len(dictionary) general_config = BabiConfig(train_story, train_questions, dictionary) memory, model, loss = build_model(general_config) if general_config.linear_start: train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config) else: train(train_story, train_questions, train_qstory, memory, model, loss, general_config) test(test_story, test_questions, test_qstory, memory, model, loss, general_config) # save_model with gzip.open(model_file, "wb") as f: print("Saving model to file %s ..." % model_file) reversed_dict = dict((ix, w) for w, ix in dictionary.items()) pickle.dump((reversed_dict, memory, model, loss, general_config), f)
def run_task(data_dir, task_id, model_file, log_path): """ Train and test for each task """ print("Train and test for task %d ..." % task_id) train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id)) test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id)) dictionary = {"nil": 0} train_story, train_questions, train_qstory = \ parse_babi_task(train_files, dictionary, False) test_story, test_questions, test_qstory = \ parse_babi_task(test_files, dictionary, False) # Get reversed dictionary mapping index to word # NOTE: this needed to real-time testing reversed_dict = dict((ix, w) for w, ix in dictionary.items()) general_config = BabiConfig(train_story, train_questions, dictionary) memory, model, loss_func = build_model(general_config) if general_config.linear_start: print('We will use LS training') best_model, best_memory = \ train_linear_start(train_story, train_questions, train_qstory, memory, model, loss_func, general_config, self.log_path) else: train_logger = open(os.path.join(self.log_path, 'train.log'), 'w') train_logger.write('epoch batch_iter lr loss err\n') train_logger.flush() val_logger = open(os.path.join(self.log_path, 'val.log'), 'w') val_logger.write('epoch batch_iter lr loss err\n') val_logger.flush() global_batch_iter = 0 train_logger, val_logger, _, _, _ = \ train(train_story, train_questions, train_qstory, memory, model, loss_func, general_config, train_logger, val_logger, global_batch_iter) train_logger.close() val_logger.close() model_file = os.path.join(log_path, model_file) with gzip.open(model_file, 'wb') as f: print('Saving model to file %s ...' % model_file) pickle.dump((reversed_dict, memory, model, loss_func, general_config), f) print('Start to testing') test(test_story, test_questions, test_qstory, memory, model, loss_func, general_config)
def run_task(data_dir, task_id): """ Train and test for each task """ print("Train and test for task %d ..." % task_id) # Parse data train_files = glob.glob('%s/qa%d_*_train.txt' % (data_dir, task_id)) test_files = glob.glob('%s/qa%d_*_test.txt' % (data_dir, task_id)) #train_files = glob.glob('%s/qa%d_*train.txt' % (data_dir, task_id)) #test_files = glob.glob('%s/qa%d_*test.txt' % (data_dir, task_id)) # #### empty dictionary dictionary = {"nil": 0} train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False) test_story, test_questions, test_qstory = parse_babi_task(test_files, dictionary, False) general_config = BabiConfig(train_story, train_questions, dictionary) memory, model, loss = build_model(general_config) if general_config.linear_start: train_linear_start(train_story, train_questions, train_qstory, memory, model, loss, general_config) else: train(train_story, train_questions, train_qstory, memory, model, loss, general_config) with open('R_trained.txt', 'a') as outfile: json.dump(general_config.dictionary, outfile, indent=2) print("######## trained dictionary") print(general_config.dictionary) ans_index = test(test_story, test_questions, test_qstory, memory, model, loss, general_config) ####R this line load model memn2n = MemN2N(args.data_dir, args.model_file) #Try to load model memn2n.load_model() dictionary2 = {"nil": 0} train_story2, train_questions2, train_qstory2 = parse_babi_task(train_files, memn2n.general_config.dictionary, False) test_story2, test_questions2, test_qstory2 = parse_babi_task(test_files, memn2n.general_config.dictionary, False) #print(len(test_questions2)) #general_config2 = BabiConfig(train_story2, train_questions2,memn2n.general_config.dictionary) with open('R_loaded.txt', 'a') as outfile2: json.dump(memn2n.general_config.dictionary, outfile2, indent=2) print("???????? loaded dictionary") print(memn2n.general_config.dictionary) ans_index = test(test_story2, test_questions2, test_qstory2, memn2n.memory, memn2n.model, memn2n.loss, memn2n.general_config)