def _test(config): test_data = read_data(config, 'test', True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared[ 'lower_word2vec'] if config.lower_word else test_data.shared[ 'word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = { idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items() } new_emb_mat = np.array( [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches e = None for multi_batch in tqdm(test_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): ei = evaluator.get_evaluation(sess, multi_batch) e = ei if e is None else e + ei if config.vis: eval_subdir = os.path.join( config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6))) if not os.path.exists(eval_subdir): os.mkdir(eval_subdir) path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) graph_handler.dump_eval(ei, path=path) print(e) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e)
def _forward(config): assert config.load test_data = read_data(config, config.forward_name, True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()} new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = ForwardEvaluator(config, model) graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_batches = math.ceil(test_data.num_examples / config.batch_size) if 0 < config.test_num_batches < num_batches: num_batches = config.test_num_batches e = evaluator.get_evaluation_from_batches(sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches)) print(e) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e, path=config.answer_path) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e, path=config.eval_path)
def data_ready(self, data=None, update=False): config = self.config config.batch_size = 1 test_data = read_data(self.config, 'demo', True, data=data, data_set=self.test_data) # test_data = read_data(self.config, 'demo', True) if update: update_config(self.config, [test_data]) if config.use_glove_for_unk: word2vec_dict = test_data.shared[ 'lower_word2vec'] if config.lower_word else test_data.shared[ 'word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = { idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items() } new_emb_mat = np.array( [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat self.config = config self.test_data = test_data
def _test(config): t1 = time.time() print("[{}] loading data..".format(t1)) test_data = read_data(config, config.testfile, "test") t2 = time.time() print("[{}] updating config..".format(t2)) update_config(config, [test_data]) _config_debug(config) models = get_multi_gpu_models(config) model = models[0] evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) t3 = time.time() print("[{}] creating session..".format(t3)) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) t4 = time.time() print("[{}] initializing session..".format(t4)) graph_handler.initialize(sess) num_steps = int(math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus))) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches e = None t5 = time.time() print("loading model takes {}s\n begin evaluating..".format(t5 - t3)) count = 0 total_time = 0 for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): t_start = time.time() evaluator.set_count(count) ei = evaluator.get_evaluation(sess, multi_batch) t_end = time.time() count += 1 single_time = t_end - t_start total_time += single_time answer_id = list(ei.id2answer_dict["scores"].keys())[0] answer = ei.id2answer_dict[answer_id] print("id: {}, answer: {}, correct: {}, time: {:6.4f}s" .format(answer_id, answer.encode('ascii', 'ignore').decode('ascii'), int(ei.acc) == 1, single_time)) sys.stdout.flush() e = ei if e is None else e + ei t6 = time.time() #print("[{}] finish evaluation".format(t6)) #print("total time:{} for {} evaluations, avg:{}".format(total_time, count, total_time * 1.0 / count)) print(e) print("dumping answer ...") graph_handler.dump_answer(e) """
def _test(config): test_data = read_data(config, 'dev', True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared[ 'lower_word2vec'] if config.lower_word else test_data.shared[ 'word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = { idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items() } new_emb_mat = np.array( [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat models = get_multi_gpu_models(config) model = models[0] evaluator = ScoreEvaluator( config, model, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) configgpu = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) configgpu.gpu_options.allow_growth = True sess = tf.Session(config=configgpu) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches e = None part = 0 eval_step = 0 for multi_batch in tqdm(test_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): eval_step += 1 ei = evaluator.get_evaluation_from_batches(sess, multi_batch) e = ei if e is None else e + ei if (eval_step % 5000 == 0): graph_handler.dump_score(e, part=part) e = None part += 1 graph_handler.dump_score(e, part=part)
def _test(config): test_data = read_data(config, 'test', True) update_config(config, [test_data]) _config_debug(config) word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec'] word2idx_dict = test_data.shared['word2idx'] new_word2idx_dict = test_data.shared['new_word2idx'] print('word2idx len : {}, new_word2idx len : {}'.format(len(word2idx_dict), len(new_word2idx_dict))) idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict} idx2word_dict = {idx: word for word, idx in word2idx_dict.items()} offset = len(idx2word_dict) idx2word_dict.update({offset+idx: word for word, idx in new_word2idx_dict.items()}) test_data.shared['idx2word'] = idx2word_dict emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size)]) config.emb_mat = emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches e = None for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): ei = evaluator.get_evaluation(sess, multi_batch) e = ei if e is None else e + ei if config.vis: eval_subdir = os.path.join(config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6))) if not os.path.exists(eval_subdir): os.mkdir(eval_subdir) path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) graph_handler.dump_eval(ei, path=path) print(e) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e)
def _train(config): data_filter = get_squad_data_filter(config) train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat print("finished data preprocessing") # construct model graph and variables (using default graph) print("Getting torch model") # model = get_torch_model() ## print("num params: {}".format(get_num_params())) # trainer = TorchTrainer(config, model) # evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None) # graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving # # # Variables # sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0
def _predict(config): predict_data = read_data(config, 'predict', True) update_config(config, [predict_data]) _config_debug(config) # if config.use_glove_for_unk: word2vec_dict = predict_data.shared['word2vec'] config.emb_mat = np.array(word2vec_dict, dtype=np.float32) #pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] predictor = myMultiGPUF1Predictor(config, models, tensor_dict=models[0].tensor_dict) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = math.ceil(predict_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches labelout = {} for multi_batch in tqdm(predict_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): labelout = predictor.get_labelout(sess, multi_batch, labelout) outstring = "" for key, value in labelout.items(): outstring += str(key) outstring += ',' outstring += str(value) outstring += '\n' with open('../data/predictor.txt', 'w') as f: f.write(outstring)
def _test(config): test_data = read_data(config, 'test', True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()} new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches e = None for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): ei = evaluator.get_evaluation(sess, multi_batch) e = ei if e is None else e + ei if config.vis: eval_subdir = os.path.join(config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6))) if not os.path.exists(eval_subdir): os.mkdir(eval_subdir) path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) graph_handler.dump_eval(ei, path=path) print(e) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e)
def _train(config): data_filter = get_squad_data_filter(config) train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, config.dev_name, True, data_filter=None) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] print("num params: {}".format(get_num_params())) trainer = MultiGPUTrainer(config, models) if config.model_name == 'basic': ThisEvaluator = MultiGPUF1Evaluator elif config.model_name in ['basic-class', 'basic-generate', 'baseline']: ThisEvaluator = MultiGPUClassificationAccuracyEvaluator elif config.model_name == 'span-gen': ThisEvaluator = UnionEvaluator evaluator = ThisEvaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: print("Saving variables on step ", global_step) graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches """ train_batches = tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps) e_train = evaluator.get_evaluation_from_batches( sess, train_batches ) graph_handler.add_summaries(e_train.summaries, global_step) """ e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) print("Evaluated on dev at step ", global_step, ": ", e_dev) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: print("Final save at step ", global_step) graph_handler.save(sess, global_step=global_step)
def _train(config): data_filter = get_squad_data_filter(config) train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict} emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size)]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int(math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run(model.global_step) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps) ) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _train(config): if config.dataset == 'qangaroo': data_filter = get_qangaroo_data_filter(config) else: raise NotImplementedError train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) update_config(config, [train_data, dev_data]) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) with sess.as_default(): models = get_multi_gpu_models(config, emb_mat) model = models[0] print("num params: {}".format(get_num_params())) trainer = MultiGPUTrainer(config, models) if config.reasoning_layer is not None and config.mac_prediction == 'candidates': evaluator = MultiGPUF1CandidateEvaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) else: evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving # Variables #gpu_options = tf.GPUOptions(allow_growth=True) #sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) #sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): INSUFFICIENT_DATA = False for batch in batches: _, ds = batch if len(ds.data['x']) < config.batch_size: INSUFFICIENT_DATA = True break if INSUFFICIENT_DATA: continue global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _train(config): # this get squad data filter return a function data_filter = get_squad_data_filter(config) # config.load, True, "load saved data? [True]" train_data = read_data(config, 'train', config.load, data_filter=data_filter) # DataSet dev_data = read_data(config, 'dev', config.load, data_filter=data_filter) # DataSet update_config(config, [train_data, dev_data]) # update config such as max sent size and so on. _config_debug(config) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.flag_values_dict(), indent=2) models = get_multi_gpu_models(config) model = models[0] trainer = MultiGPUTrainer(config, models) # evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches( batch_size=config.batch_size, num_batches_per_step=config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _train(config): # load_metadata(config, 'train') # this updates the config file according to metadata file k = config.k sup_unsup_ratio = config.sup_unsup_ratio save_dir = 'error_results_newsqa_k=%s' % k f1_thres = 0.1 data_filter = get_squad_data_filter(config) train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) # Baseline model config.data_dir = config.baseline_dir squad_train_data = read_data(config, 'train', config.load, data_filter=data_filter) # test_data = read_data(config, 'test', True, data_filter=data_filter) update_config(config, [squad_train_data, train_data, dev_data]) _config_draft(config) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } print("{}/{} unique words have corresponding glove vectors.".format( len(idx2vec_dict), len(word2idx_dict))) print(len(word2vec_dict)) emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) # model = Model(config) models = get_multi_gpu_models(config) model = models[0] trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config ) # controls all tensors and variables in the graph, including loading /saving # Variables config_proto = tf.ConfigProto(allow_soft_placement=True) config_proto.gpu_options.per_process_gpu_memory_fraction = 0.65 sess = tf.Session(config=config_proto) graph_handler.initialize(sess) batches_list = [] # begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 global_scores = [] # Combine batching together train_data_batcher = train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster) squad_data_batcher = squad_train_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster) idx = [-1] ratio = sup_unsup_ratio def combine_batchers(unsupervised_generator, supervised_generator, ratio): while True: idx[0] = idx[0] + 1 if idx[0] % ratio == 0: print("Yielding unsupervised") unsup_batch = next(unsupervised_generator) for _, data_set in unsup_batch: if config.use_special_token: data_set.data['dataset_type'] = ['NEWSQA'] y = data_set.data['y'] x = data_set.data['x'] q = data_set.data['q'] for xi, yi, qi in zip(x, y, q): start_id = yi[0][0][1] end_id = yi[0][1][1] ans = xi[0][start_id:end_id] yield unsup_batch else: print("Yielding squad") sup_batch = next(supervised_generator) y = data_set.data['y'] x = data_set.data['x'] for xi, yi in zip(x, y): start_id = yi[0][0][1] end_id = yi[0][1][1] ans = xi[0][start_id:end_id] yield sup_batch combined_batcher = combine_batchers(train_data_batcher, squad_data_batcher, ratio=ratio) for batches in tqdm(combined_batcher, total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 scores = trainer.get_scores(sess, batches, get_summary=get_summary, k=k) loss, summary, train_op = trainer.margin_step(sess, batches=batches, top_k_batches=scores, get_summary=get_summary) #loss, summary, train_op = trainer.step(sess, batches=batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.eval_num_batches < num_steps: num_steps = config.eval_num_batches e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev)
def _train(config): data_filter = get_squad_data_filter(config) #train_data = read_data(config, 'train', config.load, data_filter=data_filter) train_data = read_data(config, 'train', False, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] print("num params: {}".format(get_num_params())) trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving # Variables config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True config_proto.allow_soft_placement = True sess = tf.Session(config=config_proto) graph_handler.initialize(sess) # plot weights for train_var in tf.trainable_variables(): plot_tensor(train_var.eval(session=sess), train_var.op.name, plot_weights=config.plot_weights, hidden_size=config.hidden_size) # Begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _train(config): data_filter = get_squad_data_filter(config) #以下三行是读取数据的部分 train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) update_config(config, [train_data, dev_data]) _config_debug(config) #这里生成的emb——mat是个什么东西呀 word2vec_dict = train_data.shared['word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) #17.tf.train.Saver().save(sess, 'ckpts/')在ckpts/ 路径下主要保存四个文件checkpoint:文本文件 vim 可查看内容 记录保存了那些checkpoint # 以下三个文件组成一个checkpoint: # model.ckpt.data-00000-of-00001: 某个ckpt的数据文件 # model.ckpt.index :某个ckpt的index文件 二进制 或者其他格式 不可直接查看 # model.ckpt.meta:某个ckpt的meta数据 二进制 或者其他格式 不可直接查看 if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) print if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _train(config): data_filter = get_squad_data_filter(config) train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec'] word2idx_dict = train_data.shared['word2idx'] new_word2idx_dict = train_data.shared['new_word2idx'] print('word2idx len : {}, new_word2idx len : {}'.format(len(word2idx_dict), len(new_word2idx_dict))) idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict} idx2word_dict = {idx: word for word, idx in word2idx_dict.items()} offset = len(idx2word_dict) idx2word_dict.update({offset+idx: word for word, idx in new_word2idx_dict.items()}) train_data.shared['idx2word'] = idx2word_dict emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size)]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] print("num params: {}".format(get_num_params())) trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int(math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): # QA, QG shared encoder로 학습하는 부분 global_step = sess.run(model.global_step) + 1 #get_summary = global_step % config.log_period == 1 get_summary = True # 너무 답답해서 매 스텝마다 찍어야겠다... loss, seq2seq_loss, summary, train_op, gen_q_sample = trainer.step(sess, batches, get_summary=get_summary) config.is_gen = False print("global step : ", global_step) print("Loss : ", loss, "|", seq2seq_loss) print("Generated Question Sample : ", ' '.join([idx2word_dict[w] for w in gen_q_sample[0]])) """ config.is_gen = True for (_, batch) in batches: batch.data['q'] = ['']*len(gen_q_sample) batch.data['cq'] = ['']*len(gen_q_sample) for b_idx in range(len(gen_q_sample)): batch.data['q'][b_idx] = [idx2word_dict[w] if w in idx2word_dict else "-UNK-" for w in gen_q_sample[b_idx]] batch.data['cq'][b_idx] = [list(idx2word_dict[w] if w in idx2word_dict else "-UNK-") for w in gen_q_sample[b_idx]] qa_gen_loss, _, __, train_op, ___ = trainer.step(sess, batch, get_summary=get_summary, is_gen=config.is_gen) print("QA Gen Loss : ", qa_gen_loss) """ if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches """ e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps) ) graph_handler.add_summaries(e_train.summaries, global_step) """ e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) print(e_dev) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _train(config): data_filter = get_squad_data_filter(config) train_data = read_data(config, config.trainfile, "train", data_filter = data_filter) dev_data = read_data(config, config.validfile, "valid", data_filter = data_filter) update_config(config, [train_data, dev_data]) _config_debug(config) models = get_multi_gpu_models(config) model = models[0] print("num params: {}".format(get_num_params())) trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None) # controls all tensors and variables in the graph, including loading /saving graph_handler = GraphHandler(config, model) # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = min(config.num_steps,int(math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs) acc = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run(model.global_step) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # Occasional evaluation and saving if global_step % config.save_period == 0: num_steps = int(math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus))) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches e_train = evaluator.get_evaluation_from_batches(sess, tqdm(train_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if e_dev.acc > acc: acc = e_dev.acc print("begin saving model...") print(e_dev) graph_handler.save(sess) print("end saving model, dumping eval and answer...") if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) print("end dumping") print("begin freezing model...") config.clear_device = False config.input_path = graph_handler.save_path config.output_path = "model" config.input_names = None config.output_names = None freeze_graph(config) print("model frozen at {}".format(config.output_path))
def _train(config): data_filter = get_squad_data_filter(config) train_data = read_data(config, 'train', config.load) dev_data = read_data(config, 'dev', True) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared['word2vec'] # word2idx_dict = train_data.shared['word2idx'] # idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict} # emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict # else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) # for idx in range(config.word_vocab_size)]) config.emb_mat = np.array(word2vec_dict, dtype=np.float32) print("embmat", config.emb_mat) print('begin construct') # construct model graph and variables (using default graph) #pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] trainer = MultiGPUTrainer(config, models) evaluator = myMultiGPUF1Evaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving print('construct graph ready') # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) print('initialize session ready') # Begin training print("begin train") num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _train(config): # load_metadata(config, 'train') # this updates the config file according to metadata file data_filter = get_squad_data_filter(config) train_data = read_data(config, 'train', config.load, data_filter=data_filter) dev_data = read_data(config, 'dev', True, data_filter=data_filter) # test_data = read_data(config, 'test', True, data_filter=data_filter) update_config(config, [train_data, dev_data]) _config_draft(config) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } print("{}/{} unique words have corresponding glove vectors.".format( len(idx2vec_dict), len(word2idx_dict))) emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) # model = Model(config) models = get_multi_gpu_models(config) model = models[0] trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # begin training print(train_data.num_examples) num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.eval_num_batches < num_steps: num_steps = config.eval_num_batches e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def _test(config): test_data = read_data(config, 'test', True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared[ 'lower_word2vec'] if config.lower_word else test_data.shared[ 'word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = { idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items() } new_emb_mat = np.array( [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True config_proto.allow_soft_placement = True sess = tf.Session(config=config_proto) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches # plot weights for train_var in tf.trainable_variables(): plot_tensor(train_var.eval(session=sess), train_var.op.name, plot_weights=config.plot_weights, hidden_size=config.hidden_size) plt.show() if config.group_config: get_structure_sparsity(sess, config.group_config) print('Structure coordinating...') sess.run([model.get_var_structure_coordinate_op()]) get_structure_sparsity(sess, config.group_config) e = None for multi_batch in tqdm(test_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): ei = evaluator.get_evaluation(sess, multi_batch) e = ei if e is None else e + ei if config.vis: eval_subdir = os.path.join( config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6))) if not os.path.exists(eval_subdir): os.mkdir(eval_subdir) path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) graph_handler.dump_eval(ei, path=path) print(e) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e)
def _test(config): assert config.load test_data = read_data(config, 'test', True) #read_data(config, 'test', True) update_config(config, [test_data]) _config_draft(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared[ 'lower_word2vec'] if config.lower_word else test_data.shared[ 'word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = { idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items() } # print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict))) new_emb_mat = np.array( [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) evaluator = MultiGPUF1Evaluator( config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler( config ) # controls all tensors and variables in the graph, including loading /saving config_proto = tf.ConfigProto(allow_soft_placement=True) config_proto.gpu_options.per_process_gpu_memory_fraction = 0.7 sess = tf.Session(config=config_proto) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) #if 0 < config.eval_num_batches < num_steps: # num_steps = config.eval_num_batches #print(config.eval_num_batches) #assert(False) e = None for multi_batch in tqdm(test_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): idx, batch = multi_batch[0] if config.use_special_token: batch.data['dataset_type'] = ['NEWSQA'] ei = evaluator.get_evaluation(sess, multi_batch) e = ei if e is None else e + ei if config.vis: eval_subdir = os.path.join( config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6))) if not os.path.exists(eval_subdir): os.mkdir(eval_subdir) path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) graph_handler.dump_eval(ei, path=path) print(e) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e)