Ejemplo n.º 1
0
def main():
    qanet = QANet(50)
    init1 = filter(lambda p: p.requires_grad and p.dim() >= 2,
                   qanet.parameters())
    init2 = filter(lambda p: p.requires_grad and p.dim() <= 2,
                   qanet.parameters())
    # Parameter initialization
    for param in init1:
        nn.init.xavier_uniform_(param)
    for param in init2:
        nn.init.normal_(param)

    train = SQuAD(TRAIN_JSON)
    val = SQuAD(DEV_JSON)

    # trainSet = DataLoader(dataset=train, batch_size=4, shuffle=True, collate_fn=collate)
    valSet = DataLoader(dataset=val,
                        batch_size=4,
                        shuffle=True,
                        collate_fn=collate)
    trainSet = DataLoader(dataset=train,
                          batch_size=4,
                          shuffle=True,
                          collate_fn=collate)

    print('length of dataloader', len(trainSet))

    optimizer = torch.optim.Adam(qanet.parameters(), lr=LEARNING_RATE)
    loss_list = []
    for epoch in range(10):
        print('epoch ', epoch)
        for i, (c, q, a) in enumerate(trainSet):
            y_pred = qanet(c, q)
            loss = utils.loss(y_pred, a)
            loss_list.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if i % 200 == 0:
                print('loss ', loss.item())
        with open('your_file.txt', 'w') as f:
            for item in loss_list:
                f.write("%s\n" % item)
            print('loss file written.')
        torch.save(qanet, 'qanet')
        print('model saved.')
Ejemplo n.º 2
0
def demo(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    model = QANet(config, None, word_mat, char_mat, trainable=False, demo=True)
    demo = Demo(model, config)
Ejemplo n.º 3
0
def main():
    r"""
    Main function.
    """
    model = QANet()
    # initial parameters
    logging.info('Initial paramters...')
    if opt.load_trained_model:
        model.load_parameters(opt.trained_model_name, ctx=CTX)
    else:
        logging.info('Initial model parameters...')
        initial_model_parameters(model)
    print(model)
    if opt.is_train:
        loss_function = MySoftmaxCrossEntropy()

        ema = ExponentialMovingAverage(decay=opt.ema_decay)

        # initial trainer
        trainer = gluon.Trainer(
            model.collect_params(), 'adam', {
                'learning_rate': opt.init_learning_rate,
                'beta1': opt.beta1,
                'beta2': opt.beta2,
                'epsilon': opt.epsilon
            })

        if opt.load_trained_model:
            trainer.load_states(opt.trained_trainer_name)

        # initial dataloader
        train_data_loader = DataLoader(batch_size=opt.train_batch_size,
                                       dev_set=False)

        # train
        logging.info('Train')
        train(model, train_data_loader, trainer, loss_function, ema)
    else:
        logging.info('Evaluating dev set...')
        f1_score, em_score = evaluate(model, dataset_type='dev', ema=None)
        logging.debug('The dev dataset F1 is:%.5f, and EM is: %.5f', f1_score,
                      em_score)
Ejemplo n.º 4
0
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file,
                                 get_record_parser(config, is_test=True),
                                 config).make_one_shot_iterator()

        model = QANet(config,
                      test_batch,
                      word_mat,
                      char_mat,
                      trainable=False,
                      graph=g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                                   metrics['f1']))
Ejemplo n.º 5
0
def main():
    r"""
    Main function.
    """
    model = QANet()
    # initial parameters
    print('Initial paramters...')
    if NEED_LOAD_TRAINED_MODEL:
        model.load_parameters(TARGET_MODEL_FILE_NAME, ctx=CTX)
    else:
        print('Initial model parameters...')
        initial_model_parameters(model)
    print(model)
    if TRAIN_FLAG is True:
        loss_function = MySoftmaxCrossEntropy()

        ema = ExponentialMovingAverage(decay=EXPONENTIAL_MOVING_AVERAGE_DECAY)

        # initial trainer
        trainer = gluon.Trainer(
            model.collect_params(), 'adam', {
                'learning_rate': INIT_LEARNING_RATE,
                'beta1': BETA1,
                'beta2': BETA2,
                'epsilon': EPSILON
            })

        if NEED_LOAD_TRAINED_MODEL:
            trainer.load_states(TARGET_TRAINER_FILE_NAME)

        # initial dataloader
        train_data_loader = DataLoader(batch_size=TRAIN_BATCH_SIZE,
                                       dev_set=False)

        # train
        print('Train...')
        train(model, train_data_loader, trainer, loss_function, ema)
    else:
        print('Evaluating dev set...')
        f1_score, em_score = evaluate(model, dataset_type='dev', ema=None)
        print('The dev dataset F1 is:%s, and EM is: %s' % (f1_score, em_score))
Ejemplo n.º 6
0
def test(cfg):
    logging.info('Model is loading...')
    with open(cfg['dev_eval_file'], "r") as fh:
        dev_eval_file = json.load(fh)
    dev_dataset = SQuADDataset(cfg['dev_record_file'], -1, cfg['batch_size'], cfg['word2ind_file'])
    model_args = pickle.load(open(cfg['args_filename'], 'rb'))
    model = QANet(**model_args)

    model.load_state_dict(torch.load(cfg['dump_filename']))
    model.to(device)
    
    metrics, answer_dict = evaluation(model, dev_dataset, dev_eval_file, len(dev_dataset))
    with open('logs/answers.json', 'w') as f:
        json.dump(answer_dict, f)
    logging.info("TEST loss %f F1 %f EM %f\n", metrics["loss"], metrics["f1"], metrics["exact_match"])
Ejemplo n.º 7
0
def train(model_params, launch_params):
    with open(launch_params['word_emb_file'], "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(launch_params['char_emb_file'], "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(launch_params['train_eval_file'], "r") as fh:
        train_eval_file = json.load(fh)
    with open(launch_params['dev_eval_file'], "r") as fh:
        dev_eval_file = json.load(fh)

    writer = SummaryWriter(os.path.join(launch_params['log'], launch_params['prefix']))
    
    lr = launch_params['learning_rate']
    base_lr = 1.0
    warm_up = launch_params['lr_warm_up_num']
    model_params['word_mat'] = word_mat
    model_params['char_mat'] = char_mat
    
    logging.info('Load dataset and create model.')
    dev_dataset = SQuADDataset(launch_params['dev_record_file'], launch_params['test_num_batches'], 
                               launch_params['batch_size'], launch_params['word2ind_file'])
    if launch_params['fine_tuning']:
        train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['fine_tuning_steps'], 
                                    launch_params['batch_size'], launch_params['word2ind_file'])
        model_args = pickle.load(open(launch_params['args_filename'], 'rb'))
        model = QANet(**model_args)
        model.load_state_dict(torch.load(launch_params['dump_filename']))
        model.to(device)
    else:
        train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['num_steps'], 
                                    launch_params['batch_size'], launch_params['word2ind_file'])
        model = QANet(**model_params).to(device)
        launch_params['fine_tuning_steps'] = 0
    
    params = filter(lambda param: param.requires_grad, model.parameters())
    optimizer = optim.Adam(params, lr=base_lr, betas=(launch_params['beta1'], launch_params['beta2']), eps=1e-7, weight_decay=3e-7)
    cr = lr / log2(warm_up)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ee: cr * log2(ee + 1) if ee < warm_up else lr)
    qt = False
    logging.info('Start training.')
    for iter in range(launch_params['num_steps']):
        try:
            passage_w, passage_c, question_w, question_c, y1, y2, ids = train_dataset[iter]
            passage_w, passage_c = passage_w.to(device), passage_c.to(device)
            question_w, question_c = question_w.to(device), question_c.to(device)
            y1, y2 = y1.to(device), y2.to(device)
            loss, p1, p2 = model.train_step([passage_w, passage_c, question_w, question_c], y1, y2, optimizer, scheduler)
            if iter % launch_params['train_interval'] == 0:
                logging.info('Iteration %d; Loss: %f', iter+launch_params['fine_tuning_steps'], loss)
                writer.add_scalar('Loss', loss, iter+launch_params['fine_tuning_steps'])
            if iter % launch_params['train_sample_interval'] == 0:
                start = torch.argmax(p1[0, :]).item()
                end = torch.argmax(p2[0, start:]).item()+start
                passage = train_dataset.decode(passage_w)
                question = train_dataset.decode(question_w)
                generated_answer = train_dataset.decode(passage_w[:, start:end+1])
                real_answer = train_dataset.decode(passage_w[:, y1[0]:y2[0]+1])
                logging.info('Train Sample:\n Passage: %s\nQuestion: %s\nOriginal answer: %s\nGenerated answer: %s',
                        passage, question, real_answer, generated_answer)
            if iter % launch_params['test_interval'] == 0:
                metrics, _ = evaluation(model, train_dataset, train_eval_file, launch_params['val_num_batches'])
                logging.info("VALID loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match'])
                writer.add_scalar('Valid_loss', metrics['loss'], iter)
                writer.add_scalar('Valid_f1', metrics['f1'], iter)
                writer.add_scalar('Valid_em', metrics['exact_match'], iter)
            if iter % launch_params['test_interval'] == 0:
                metrics, _ = evaluation(model, dev_dataset, dev_eval_file, launch_params['test_num_batches'])
                logging.info("TEST loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match'])
                writer.add_scalar('Test_loss', metrics['loss'], iter)
                writer.add_scalar('Test_f1', metrics['f1'], iter)
                writer.add_scalar('Test_em', metrics['exact_match'], iter)
        except RuntimeError as e:
            logging.error(str(e))
        except KeyboardInterrupt:
            break
    torch.save(model.cpu().state_dict(), launch_params['dump_filename'])
    pickle.dump(model_params, open(launch_params['args_filename'], 'wb'))
    logging.info('Model has been saved.')
Ejemplo n.º 8
0
def train(config):
    print(dict(config.__dict__['__flags']))
    print()
    print(sys.stdout.flush())
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.train_eval_file, "r") as fh:
        train_eval_file = json.load(fh)
    with open(config.dev_eval_file, "r") as fh:
        dev_eval_file = json.load(fh)
    with open(config.dev_meta, "r") as fh:
        meta = json.load(fh)

    dev_total = meta["total"]
    print("Building model...")
    parser = get_record_parser(config)
    graph = tf.Graph()
    with graph.as_default() as g:
        train_dataset = get_batch_dataset(config.train_record_file, parser,
                                          config)
        dev_dataset = get_dataset(config.dev_record_file, parser, config)
        handle = tf.placeholder(tf.string, shape=[])
        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        train_iterator = train_dataset.make_one_shot_iterator()
        dev_iterator = dev_dataset.make_one_shot_iterator()

        model = QANet(config, iterator, word_mat, char_mat, graph=g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction

        loss_save = 100.0
        patience = 0
        best_f1 = 0.
        best_em = 0.

        with tf.Session(config=sess_config) as sess:
            writer = tf.summary.FileWriter(config.log_dir)
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            train_handle = sess.run(train_iterator.string_handle())
            dev_handle = sess.run(dev_iterator.string_handle())
            if os.path.exists(os.path.join(config.save_dir, "checkpoint")):
                saver.restore(sess,
                              tf.train.latest_checkpoint(config.save_dir))
            global_step = max(sess.run(model.global_step), 1)

            for _ in tqdm(range(global_step, config.num_steps + 1)):
                global_step = sess.run(model.global_step) + 1
                loss, train_op = sess.run([model.loss, model.train_op],
                                          feed_dict={
                                              handle: train_handle,
                                              model.dropout: config.dropout
                                          })
                if global_step % config.period == 0:
                    loss_sum = tf.Summary(value=[
                        tf.Summary.Value(tag="model/loss", simple_value=loss),
                    ])
                    writer.add_summary(loss_sum, global_step)
                if global_step % config.checkpoint == 0:
                    _, summ = evaluate_batch(model, config.val_num_batches,
                                             train_eval_file, sess, "train",
                                             handle, train_handle)
                    for s in summ:
                        writer.add_summary(s, global_step)

                    metrics, summ = evaluate_batch(
                        model, dev_total // config.batch_size + 1,
                        dev_eval_file, sess, "dev", handle, dev_handle)

                    dev_f1 = metrics["f1"]
                    dev_em = metrics["exact_match"]
                    if dev_f1 < best_f1 and dev_em < best_em:
                        patience += 1
                        if patience > config.early_stop:
                            break
                    else:
                        patience = 0
                        best_em = max(best_em, dev_em)
                        best_f1 = max(best_f1, dev_f1)

                    for s in summ:
                        writer.add_summary(s, global_step)
                    writer.flush()
                    filename = os.path.join(
                        config.save_dir, "model_{}.ckpt".format(global_step))
                    saver.save(sess, filename)
Ejemplo n.º 9
0
        eval_dataset = SQuADDataset('data/dev.npz')
        train_dataset = SQuADDataset('data/train.npz')

    print('Loading Embeddigs..')
    import numpy as np
    import json

    char_emb_matrix = np.array(json.load(open('data/char_emb.json')),
                               dtype=np.float32)
    word_emb_matrix = np.array(json.load(open('data/word_emb.json')),
                               dtype=np.float32)

    print('Create Model..')
    from model import QANet

    model = QANet(128, 400, 50, word_emb_matrix, char_emb_matrix,
                  droprate=0.1).to(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=0.001,
                           betas=(0.8, 0.999),
                           eps=1e-08,
                           weight_decay=3e-07,
                           amsgrad=False)
    del char_emb_matrix, word_emb_matrix

    import math
    warm_up = 1000
    warm_up_f = lambda x: math.log(x + 1) / math.log(warm_up
                                                     ) if x < warm_up else 1

    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[warm_up_f])
Ejemplo n.º 10
0
def train(num_units, batch_size, sentence_size, embedding_size, ctx):
    net = QANet(num_units, batch_size, sentence_size, embedding_size)
    net.collect_params().initialize(ctx)
    loss = LogLoss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
Ejemplo n.º 11
0
def get_model(opt, word_mat, char_mat):
    model = QANet(word_mat, char_mat,
                  opt.dropout, opt.dropout_char, opt.max_passage_len,
                  opt.encode_size)