def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared[
            'lower_word2vec'] if config.lower_word else test_data.shared[
                'word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {
            idx: word2vec_dict[word]
            for word, idx in new_word2idx_dict.items()
        }
        new_emb_mat = np.array(
            [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))],
            dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = MultiGPUEvaluator(
        config,
        models,
        tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples /
                          (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches
    e = None
    for multi_batch in tqdm(test_data.get_multi_batches(
            config.batch_size,
            config.num_gpus,
            num_steps=num_steps,
            cluster=config.cluster),
                            total=num_steps):
        ei = evaluator.get_evaluation(sess, multi_batch)
        e = ei if e is None else e + ei
        if config.vis:
            eval_subdir = os.path.join(
                config.eval_dir, "{}-{}".format(ei.data_type,
                                                str(ei.global_step).zfill(6)))
            if not os.path.exists(eval_subdir):
                os.mkdir(eval_subdir)
            path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
            graph_handler.dump_eval(ei, path=path)

    print("test acc: %f, loss: %f" % (e.acc, e.loss))
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
Beispiel #2
0
def train(config):
    train_data = read_data('train')
    dev_data = read_data('dev')

    update_config(config, [train_data, dev_data])
    _config_debug(config)

    word2vec_dict = train_data.shared['lower_word2vec'] 
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
    emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                        else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                        for idx in range(config.word_vocab_size)])
    config.emb_mat = emb_mat
    bidaf_model = train_bidaf()
Beispiel #3
0
def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = AccuracyEvaluator(config.test_num_can, config, model,
                                  tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus))

    e = None
    tensor=[]
    for i, multi_batch in enumerate(tqdm(
            test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps,
                                        cluster=config.cluster), total=num_steps)):

        ei = evaluator.get_evaluation(sess, multi_batch)
        # outfinal=ei.tensor
        # tensor.extend(outfinal)

        e = ei if e is None else e + ei
        # if config.vis:
        #     eval_subdir = os.path.join(config.eval_dir,
        #                                "{}-{}".format(multi_batch[0][1].data_type, str(ei.global_step).zfill(6)))
        #     if not os.path.exists(eval_subdir):
        #         os.mkdir(eval_subdir)
        #     path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
        #     graph_handler.dump_eval(ei, path=path)

    print(e.acc)

    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
    if config.dump_answer:
        print("dumping answers ...")
        graph_handler.dump_answer(e)
Beispiel #4
0
def main():

    config = parse_args()
    data_filter = get_squad_data_filter(config)
    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)

    update_config(config, [train_data, dev_data])
    _config_debug(config)

    print("Total vocabulary for training is %s" % config.word_vocab_size)
    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }

    # if Glove use the vector, otherwise, assigns random value
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])

    config.emb_mat = emb_mat

    ## Initialize model
    model = BiDAF(config)

    if config.use_gpu:
        model.cuda()

    optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              lr=0.5)

    ## Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0

    print(num_steps)

    count = 1
    train_loss = []
    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):

        model.train()
        model.zero_grad()

        model(batches)
        loss = model.build_loss()

        loss.backward()
        optimizer.step()

        if count % 100 == 0:
            eval_loss = eval_model(model, dev_data, config)
            print("train loss is: %.3f" % loss.data.cpu().numpy()[0])
            print("eval loss is: %.3f \n" % eval_loss)
            model.train()

        count += 1
    return
def _train(config):
    np.set_printoptions(threshold=np.inf)
    train_data = read_data(config, 'train', config.load)
    dev_data = read_data(config, 'dev', True)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat

    def make_idx2word():
        """
        return index of the word from the preprocessed dictionary. 
        """
        idx2word = {}
        d = train_data.shared['word2idx']
        for word, idx in d.items():
            print(word)
            idx2word[idx] = word
        if config.use_glove_for_unk:
            d2 = train_data.shared['new_word2idx']
            for word, idx in d2.items():
                print(word)
                idx2word[idx + len(d)] = word
        return idx2word

    idx2word = make_idx2word()
    # Save total number of words used in this dictionary: words in GloVe + etc tokens(including UNK, POS, ... etc)
    print("size of config.id2word len:", len(idx2word))
    print("size of config.total_word_vocab_size:",
          config.total_word_vocab_size)

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUEvaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    min_val = {}
    min_val['loss'] = 100.0
    min_val['acc'] = 0
    min_val['step'] = 0
    min_val['patience'] = 0

    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))

            # num_steps: total steps to finish this training session.
            # val_num_batches: 100
            if 0 < config.val_num_batches < num_steps:
                # if config.val_num_batches is less the the actual steps required to run whole dev set. Run evaluation up to the step.
                num_steps = config.val_num_batches

            # This train loss is calulated from sampling the same number of data size of dev_data.

            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)

            # This e_dev may differ from the dev_set used in test time because some data is filtered out here.
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)
            print("%s e_train: loss=%.4f" % (header, e_train.loss))
            print("%s e_dev: loss=%.4f" % (header, e_dev.loss))
            print()
            if min_val['loss'] > e_dev.loss:
                min_val['loss'] = e_dev.loss
                min_val['step'] = global_step
                min_val['patience'] = 0
            else:
                min_val['patience'] = min_val['patience'] + 1
                if min_val['patience'] >= 1000:
                    slack.notify(
                        text="%s patience reached %d. early stopping." %
                        (header, min_val['patience']))
                    break

            slack.notify(text="%s e_dev: loss=%.4f" % (header, e_dev.loss))

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)

    slack.notify(
        text=
        "%s <@U024BE7LH|insikk> Train is finished. e_dev: loss=%.4f at step=%d\nPlease assign another task to get more research result"
        % (header, min_val['loss'], min_val['step']))

    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
Beispiel #6
0
def main():

    config = parse_args()
    data_filter = get_squad_data_filter(config)

    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)

    #print("Total vocabulary for training is %s" % config.word_vocab_size)
    #print(train_data.shared['x'][0][0])
    #print(train_data.shared['x'][0][1])

    #print(train_data.data['*x'][0])
    update_config(config, [train_data, dev_data])
    #_config_debug(config)

    print("Total vocabulary for training is %s" % config.word_vocab_size)

    # from all
    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    # from filter-out set
    word2idx_dict = train_data.shared['word2idx']

    # filter-out set idx-vector
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    print("{}/{} unique words have corresponding glove vectors.".format(
        len(idx2vec_dict), len(word2idx_dict)))

    # <null> and <unk> do not have corresponding vector so random.
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])

    config.emb_mat = emb_mat
    config.new_emb_mat = train_data.shared['new_emb_mat']

    print(emb_mat.shape, config.new_emb_mat.shape)

    ## Initialize model
    model = BiDAF(config)

    if config.use_gpu:
        model.cuda()

    #optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=config.init_lr)
    print("learning rate is: %.4f" % config.init_lr)
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=config.init_lr)

    ## Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0

    train_loss = []
    count = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):

        model.train()
        model.zero_grad()

        model(batches)
        model.loss = model.build_loss()

        model.loss.backward()
        optimizer.step()

        if config.test_run:
            eval_model(model, train_data, dev_data, config)
            break
        else:
            if count % 500 == 0:
                #print("train loss is: %.4f" % model.loss.data.cpu().numpy()[0])
                eval_model(model, train_data, dev_data, config)
                #print("eval loss is: %.4f \n" % eval_loss)
        count += 1
    return
Beispiel #7
0
def _train(config):
    train_data = read_data(config, 'val_train', config.load)
    dev_data = read_data(config, 'val_val', True)
    # test = read_data(config, 'test', True)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
    emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                        else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                        for idx in range(config.word_vocab_size)])
    config.emb_mat = emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    evaluator = AccuracyEvaluator(config.train_num_can, config, model,
                                  tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config,
                                 model)  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    best_dev=[0,0]

    for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus,
                                                     num_steps=num_steps, shuffle=False, cluster=config.cluster), total=num_steps):
        global_step = sess.run(model.global_step) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary)

        if get_summary:
            graph_handler.add_summary(summary, global_step)


        if not config.eval:
            continue

        if global_step % config.eval_period == 0:

            num_steps_dev = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus))
            num_steps_train = math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))


            e_train = evaluator.get_evaluation_from_batches(
                sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps_train),
                           total=num_steps_train)
            )
            # graph_handler.add_summaries(e_test.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps_dev),
                           total=num_steps_dev))
            # graph_handler.dump_eval(e)
            # graph_handler.add_summaries(e_dev.summaries, global_step)
            print('train step:{}  loss:{}  acc:{}'.format(global_step, e_train.loss, e_train.acc))
            print('val step:{}  loss:{}  acc:{}'.format(global_step, e_dev.loss, e_dev.acc))
            # print('w_s:{}'.format(w_s))
            if global_step > 700:
                 config.save_period = 50
                 config.eval_period = 50

            if best_dev[0] < e_dev.acc:
                best_dev=[e_dev.acc,global_step,e_train.acc]
                graph_handler.save(sess, global_step=global_step)



            # if config.dump_eval:
            #     graph_handler.dump_eval(e_dev)

    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
    print (best_dev)
    print ("you can test on test data set and set load setp is {}".format(best_dev[1]))
Beispiel #8
0
def main(NMT_config):

    ### Load RL (global) configurations ###
    config = parse_args()

    ### Load trained QA model ###
    QA_checkpoint = torch.load(config.data_dir + config.QA_best_model)
    QA_config = QA_checkpoint['config']

    QA_mod = BiDAF(QA_config)
    if QA_config.use_gpu:
        QA_mod.cuda()
    QA_mod.load_state_dict(QA_checkpoint['state_dict'])

    ### Load SQuAD dataset ###
    data_filter = get_squad_data_filter(QA_config)

    train_data = read_data(QA_config,
                           'train',
                           QA_config.load,
                           data_filter=data_filter)
    dev_data = read_data(QA_config, 'dev', True, data_filter=data_filter)

    update_config(QA_config, [train_data, dev_data])

    print("Total vocabulary for training is %s" % QA_config.word_vocab_size)

    # from all
    word2vec_dict = train_data.shared[
        'lower_word2vec'] if QA_config.lower_word else train_data.shared[
            'word2vec']
    # from filter-out set
    word2idx_dict = train_data.shared['word2idx']

    # filter-out set idx-vector
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    print("{}/{} unique words have corresponding glove vectors.".format(
        len(idx2vec_dict), len(word2idx_dict)))

    # <null> and <unk> do not have corresponding vector so random.
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(QA_config.word_emb_size), np.eye(QA_config.word_emb_size))
        for idx in range(QA_config.word_vocab_size)
    ])

    config.emb_mat = emb_mat
    config.new_emb_mat = train_data.shared['new_emb_mat']

    num_steps = int(
        math.ceil(train_data.num_examples /
                  (QA_config.batch_size *
                   QA_config.num_gpus))) * QA_config.num_epochs

    # offset for question mark
    NMT_config.max_length = QA_config.ques_size_th - 1
    NMT_config.batch_size = QA_config.batch_size

    ### Construct translator ###
    translator = make_translator(NMT_config, report_score=True)

    ### Construct optimizer ###
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 translator.model.parameters()),
                          lr=config.lr)

    ### Start RL training ###
    count = 0
    QA_mod.eval()
    F1_eval = F1Evaluator(QA_config, QA_mod)
    #eval_model(QA_mod, train_data, dev_data, QA_config, NMT_config, config, translator)

    for i in range(config.n_episodes):
        for batches in tqdm(train_data.get_multi_batches(
                QA_config.batch_size,
                QA_config.num_gpus,
                num_steps=num_steps,
                shuffle=True,
                cluster=QA_config.cluster),
                            total=num_steps):

            #for n, p in translator.model.named_parameters():
            #    print(n)
            #    print(p)
            #print(p.requires_grad)

            start = datetime.now()
            to_input(batches[0][1].data['q'], config.RL_path + config.RL_file)

            # obtain rewrite and log_prob
            q, scores, log_prob = translator.translate(NMT_config.src_dir,
                                                       NMT_config.src,
                                                       NMT_config.tgt,
                                                       NMT_config.batch_size,
                                                       NMT_config.attn_debug)

            q, cq = ref_query(q)
            batches[0][1].data['q'] = q
            batches[0][1].data['cq'] = cq

            log_prob = torch.stack(log_prob).squeeze(-1)
            #print(log_prob)

            translator.model.zero_grad()

            QA_mod(batches)

            e = F1_eval.get_evaluation(batches, False, NMT_config, config,
                                       translator)
            reward = Variable(torch.FloatTensor(e.f1s), requires_grad=False)
            #print(reward)

            ## Initial loss
            loss = create_loss(log_prob, reward)

            loss.backward()
            optimizer.step()