def main(): qanet = QANet(50) init1 = filter(lambda p: p.requires_grad and p.dim() >= 2, qanet.parameters()) init2 = filter(lambda p: p.requires_grad and p.dim() <= 2, qanet.parameters()) # Parameter initialization for param in init1: nn.init.xavier_uniform_(param) for param in init2: nn.init.normal_(param) train = SQuAD(TRAIN_JSON) val = SQuAD(DEV_JSON) # trainSet = DataLoader(dataset=train, batch_size=4, shuffle=True, collate_fn=collate) valSet = DataLoader(dataset=val, batch_size=4, shuffle=True, collate_fn=collate) trainSet = DataLoader(dataset=train, batch_size=4, shuffle=True, collate_fn=collate) print('length of dataloader', len(trainSet)) optimizer = torch.optim.Adam(qanet.parameters(), lr=LEARNING_RATE) loss_list = [] for epoch in range(10): print('epoch ', epoch) for i, (c, q, a) in enumerate(trainSet): y_pred = qanet(c, q) loss = utils.loss(y_pred, a) loss_list.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() if i % 200 == 0: print('loss ', loss.item()) with open('your_file.txt', 'w') as f: for item in loss_list: f.write("%s\n" % item) print('loss file written.') torch.save(qanet, 'qanet') print('model saved.')
def demo(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_meta, "r") as fh: meta = json.load(fh) model = QANet(config, None, word_mat, char_mat, trainable=False, demo=True) demo = Demo(model, config)
def main(): r""" Main function. """ model = QANet() # initial parameters logging.info('Initial paramters...') if opt.load_trained_model: model.load_parameters(opt.trained_model_name, ctx=CTX) else: logging.info('Initial model parameters...') initial_model_parameters(model) print(model) if opt.is_train: loss_function = MySoftmaxCrossEntropy() ema = ExponentialMovingAverage(decay=opt.ema_decay) # initial trainer trainer = gluon.Trainer( model.collect_params(), 'adam', { 'learning_rate': opt.init_learning_rate, 'beta1': opt.beta1, 'beta2': opt.beta2, 'epsilon': opt.epsilon }) if opt.load_trained_model: trainer.load_states(opt.trained_trainer_name) # initial dataloader train_data_loader = DataLoader(batch_size=opt.train_batch_size, dev_set=False) # train logging.info('Train') train(model, train_data_loader, trainer, loss_function, ema) else: logging.info('Evaluating dev set...') f1_score, em_score = evaluate(model, dataset_type='dev', ema=None) logging.debug('The dev dataset F1 is:%.5f, and EM is: %.5f', f1_score, em_score)
def test(config): os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] graph = tf.Graph() print("Loading model...") with graph.as_default() as g: test_batch = get_dataset(config.test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = QANet(config, test_batch, word_mat, char_mat, trainable=False, graph=g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) if config.decay < 1.0: sess.run(model.assign_vars) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format(metrics['exact_match'], metrics['f1']))
def main(): r""" Main function. """ model = QANet() # initial parameters print('Initial paramters...') if NEED_LOAD_TRAINED_MODEL: model.load_parameters(TARGET_MODEL_FILE_NAME, ctx=CTX) else: print('Initial model parameters...') initial_model_parameters(model) print(model) if TRAIN_FLAG is True: loss_function = MySoftmaxCrossEntropy() ema = ExponentialMovingAverage(decay=EXPONENTIAL_MOVING_AVERAGE_DECAY) # initial trainer trainer = gluon.Trainer( model.collect_params(), 'adam', { 'learning_rate': INIT_LEARNING_RATE, 'beta1': BETA1, 'beta2': BETA2, 'epsilon': EPSILON }) if NEED_LOAD_TRAINED_MODEL: trainer.load_states(TARGET_TRAINER_FILE_NAME) # initial dataloader train_data_loader = DataLoader(batch_size=TRAIN_BATCH_SIZE, dev_set=False) # train print('Train...') train(model, train_data_loader, trainer, loss_function, ema) else: print('Evaluating dev set...') f1_score, em_score = evaluate(model, dataset_type='dev', ema=None) print('The dev dataset F1 is:%s, and EM is: %s' % (f1_score, em_score))
def test(cfg): logging.info('Model is loading...') with open(cfg['dev_eval_file'], "r") as fh: dev_eval_file = json.load(fh) dev_dataset = SQuADDataset(cfg['dev_record_file'], -1, cfg['batch_size'], cfg['word2ind_file']) model_args = pickle.load(open(cfg['args_filename'], 'rb')) model = QANet(**model_args) model.load_state_dict(torch.load(cfg['dump_filename'])) model.to(device) metrics, answer_dict = evaluation(model, dev_dataset, dev_eval_file, len(dev_dataset)) with open('logs/answers.json', 'w') as f: json.dump(answer_dict, f) logging.info("TEST loss %f F1 %f EM %f\n", metrics["loss"], metrics["f1"], metrics["exact_match"])
def train(model_params, launch_params): with open(launch_params['word_emb_file'], "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(launch_params['char_emb_file'], "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(launch_params['train_eval_file'], "r") as fh: train_eval_file = json.load(fh) with open(launch_params['dev_eval_file'], "r") as fh: dev_eval_file = json.load(fh) writer = SummaryWriter(os.path.join(launch_params['log'], launch_params['prefix'])) lr = launch_params['learning_rate'] base_lr = 1.0 warm_up = launch_params['lr_warm_up_num'] model_params['word_mat'] = word_mat model_params['char_mat'] = char_mat logging.info('Load dataset and create model.') dev_dataset = SQuADDataset(launch_params['dev_record_file'], launch_params['test_num_batches'], launch_params['batch_size'], launch_params['word2ind_file']) if launch_params['fine_tuning']: train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['fine_tuning_steps'], launch_params['batch_size'], launch_params['word2ind_file']) model_args = pickle.load(open(launch_params['args_filename'], 'rb')) model = QANet(**model_args) model.load_state_dict(torch.load(launch_params['dump_filename'])) model.to(device) else: train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['num_steps'], launch_params['batch_size'], launch_params['word2ind_file']) model = QANet(**model_params).to(device) launch_params['fine_tuning_steps'] = 0 params = filter(lambda param: param.requires_grad, model.parameters()) optimizer = optim.Adam(params, lr=base_lr, betas=(launch_params['beta1'], launch_params['beta2']), eps=1e-7, weight_decay=3e-7) cr = lr / log2(warm_up) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ee: cr * log2(ee + 1) if ee < warm_up else lr) qt = False logging.info('Start training.') for iter in range(launch_params['num_steps']): try: passage_w, passage_c, question_w, question_c, y1, y2, ids = train_dataset[iter] passage_w, passage_c = passage_w.to(device), passage_c.to(device) question_w, question_c = question_w.to(device), question_c.to(device) y1, y2 = y1.to(device), y2.to(device) loss, p1, p2 = model.train_step([passage_w, passage_c, question_w, question_c], y1, y2, optimizer, scheduler) if iter % launch_params['train_interval'] == 0: logging.info('Iteration %d; Loss: %f', iter+launch_params['fine_tuning_steps'], loss) writer.add_scalar('Loss', loss, iter+launch_params['fine_tuning_steps']) if iter % launch_params['train_sample_interval'] == 0: start = torch.argmax(p1[0, :]).item() end = torch.argmax(p2[0, start:]).item()+start passage = train_dataset.decode(passage_w) question = train_dataset.decode(question_w) generated_answer = train_dataset.decode(passage_w[:, start:end+1]) real_answer = train_dataset.decode(passage_w[:, y1[0]:y2[0]+1]) logging.info('Train Sample:\n Passage: %s\nQuestion: %s\nOriginal answer: %s\nGenerated answer: %s', passage, question, real_answer, generated_answer) if iter % launch_params['test_interval'] == 0: metrics, _ = evaluation(model, train_dataset, train_eval_file, launch_params['val_num_batches']) logging.info("VALID loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match']) writer.add_scalar('Valid_loss', metrics['loss'], iter) writer.add_scalar('Valid_f1', metrics['f1'], iter) writer.add_scalar('Valid_em', metrics['exact_match'], iter) if iter % launch_params['test_interval'] == 0: metrics, _ = evaluation(model, dev_dataset, dev_eval_file, launch_params['test_num_batches']) logging.info("TEST loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match']) writer.add_scalar('Test_loss', metrics['loss'], iter) writer.add_scalar('Test_f1', metrics['f1'], iter) writer.add_scalar('Test_em', metrics['exact_match'], iter) except RuntimeError as e: logging.error(str(e)) except KeyboardInterrupt: break torch.save(model.cpu().state_dict(), launch_params['dump_filename']) pickle.dump(model_params, open(launch_params['args_filename'], 'wb')) logging.info('Model has been saved.')
def train(config): print(dict(config.__dict__['__flags'])) print() print(sys.stdout.flush()) os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.train_eval_file, "r") as fh: train_eval_file = json.load(fh) with open(config.dev_eval_file, "r") as fh: dev_eval_file = json.load(fh) with open(config.dev_meta, "r") as fh: meta = json.load(fh) dev_total = meta["total"] print("Building model...") parser = get_record_parser(config) graph = tf.Graph() with graph.as_default() as g: train_dataset = get_batch_dataset(config.train_record_file, parser, config) dev_dataset = get_dataset(config.dev_record_file, parser, config) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) train_iterator = train_dataset.make_one_shot_iterator() dev_iterator = dev_dataset.make_one_shot_iterator() model = QANet(config, iterator, word_mat, char_mat, graph=g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction loss_save = 100.0 patience = 0 best_f1 = 0. best_em = 0. with tf.Session(config=sess_config) as sess: writer = tf.summary.FileWriter(config.log_dir) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() train_handle = sess.run(train_iterator.string_handle()) dev_handle = sess.run(dev_iterator.string_handle()) if os.path.exists(os.path.join(config.save_dir, "checkpoint")): saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) global_step = max(sess.run(model.global_step), 1) for _ in tqdm(range(global_step, config.num_steps + 1)): global_step = sess.run(model.global_step) + 1 loss, train_op = sess.run([model.loss, model.train_op], feed_dict={ handle: train_handle, model.dropout: config.dropout }) if global_step % config.period == 0: loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="model/loss", simple_value=loss), ]) writer.add_summary(loss_sum, global_step) if global_step % config.checkpoint == 0: _, summ = evaluate_batch(model, config.val_num_batches, train_eval_file, sess, "train", handle, train_handle) for s in summ: writer.add_summary(s, global_step) metrics, summ = evaluate_batch( model, dev_total // config.batch_size + 1, dev_eval_file, sess, "dev", handle, dev_handle) dev_f1 = metrics["f1"] dev_em = metrics["exact_match"] if dev_f1 < best_f1 and dev_em < best_em: patience += 1 if patience > config.early_stop: break else: patience = 0 best_em = max(best_em, dev_em) best_f1 = max(best_f1, dev_f1) for s in summ: writer.add_summary(s, global_step) writer.flush() filename = os.path.join( config.save_dir, "model_{}.ckpt".format(global_step)) saver.save(sess, filename)
eval_dataset = SQuADDataset('data/dev.npz') train_dataset = SQuADDataset('data/train.npz') print('Loading Embeddigs..') import numpy as np import json char_emb_matrix = np.array(json.load(open('data/char_emb.json')), dtype=np.float32) word_emb_matrix = np.array(json.load(open('data/word_emb.json')), dtype=np.float32) print('Create Model..') from model import QANet model = QANet(128, 400, 50, word_emb_matrix, char_emb_matrix, droprate=0.1).to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.8, 0.999), eps=1e-08, weight_decay=3e-07, amsgrad=False) del char_emb_matrix, word_emb_matrix import math warm_up = 1000 warm_up_f = lambda x: math.log(x + 1) / math.log(warm_up ) if x < warm_up else 1 scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[warm_up_f])
def train(num_units, batch_size, sentence_size, embedding_size, ctx): net = QANet(num_units, batch_size, sentence_size, embedding_size) net.collect_params().initialize(ctx) loss = LogLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
def get_model(opt, word_mat, char_mat): model = QANet(word_mat, char_mat, opt.dropout, opt.dropout_char, opt.max_passage_len, opt.encode_size)