Esempio n. 1
0
def accuracies_on_ds(data_file, inputs, model, n_ans):
    train, dev, dev_y, train_y, embedding, opt, q_labels, ql_mask = inputs

    model.opt['interpret'] = False
    batches = utils.BatchGen(dev, batch_size=args.batch_size, evaluation=True, gpu=args.cuda)
    predictions = []
    pred_answers = {}
    for i, batch in enumerate(batches):
        pred = model.predict(batch)[0]
        predictions.extend(pred)

    em, f1 = utils.score(predictions, dev_y)

    print("[EM: {0:.2f} F1: {1:.2f}] on {2}".format(em, f1, data_file))

    batches = utils.BatchGen(dev, batch_size=args.batch_size, evaluation=True, gpu=args.cuda, shuffle=True)
    model.opt['interpret'] = True
    t_a, t_total_a = {0.1:0, 0.2:0, 0.3:0, 0.4:0, 0.5:0, 0.6:0, 0.7:0, 0.8:0, 0.9:0}, 0
    f1s_a = []; ovs_a = []
    # evaluate the model for all interpretations and all answers
    # if f1 score for all GT answers is > p then count answer as correct
    for i, batch in tqdm(enumerate(batches)):
        i_predictions = []
        truth = np.take(dev_y, batches.indices[i], 0)
        if args.n_actions>0:
            for a in range(args.n_actions):
                latent_a = Variable(torch.ones(batch[0].size(0))*a).long().cuda()
                pred = model.predict_inter(batch, latent_a=latent_a)
                i_predictions.append(pred[0])
        else:
            i_predictions = model.predict(batch)[0]
        for b in range(batch[0].size(0)):
            f1s = []
            for ta in truth[b]:
                f1_v = []
                for a in range(args.n_actions):
                    _, f1_a = utils.score_test_alli([i_predictions[a][b]], [[ta]])
                    f1_v += [f1_a]
                if args.n_actions>0:
                    f1s += [max(f1_v)]
                else:
                    _, f1_v = utils.score_test_alli([i_predictions[b]], [[ta]])
                    f1s += [f1_v]
            f1s = np.array(f1s)
            for p in t_a.keys():
                t_a[p] = t_a[p] + int((f1s>p).sum() == n_ans)

            f1_i = []; ov_i = []
            for a in range(args.n_actions):
                _, f1_a = utils.score_test_alli([i_predictions[a][b]], [truth[b]])
                ov_a = utils.overlap([i_predictions[a][b]], [truth[b]])
                f1_i += [f1_a]; ov_i += [ov_a]
            
            if args.n_actions == 0:
                _, f1_i = utils.score_test_alli([i_predictions[b]], [truth[b]])
                ov_i = utils.overlap([i_predictions[b]], [truth[b]])
            f1s_a += [f1_i]; ovs_a += [ov_i]
        t_total_a += batch[0].size(0)

    f1s_a = np.array(f1s_a); ovs_a = np.array(ovs_a)
    return t_total_a, f1s_a, ovs_a, t_a
Esempio n. 2
0
def main():
    print('[program starts.]')
    train, dev, dev_y, train_y, embedding, opt, q_labels, ql_mask = utils.load_data(
        vars(args), args)
    if args.resume:
        print('[loading previous model...]')
        checkpoint = torch.load(
            os.path.join(model_dir, args.restore_dir, args.resume))
        if args.resume_options:
            opt = checkpoint['config']
        state_dict = checkpoint['state_dict']
        model = DocReaderModel(opt, embedding, state_dict)
    else:
        raise RuntimeError('Include checkpoint of the trained model')

    if args.cuda:
        model.cuda()

    with open(args.data_file, 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
    dev_ids = data['dev_ids']

    # evaluate restored model
    model.opt['interpret'] = False
    batches = utils.BatchGen(dev,
                             batch_size=args.batch_size,
                             evaluation=True,
                             gpu=args.cuda)
    predictions = []
    for i, batch in enumerate(batches):
        predictions.extend(model.predict(batch)[0])
    em, f1 = utils.score(predictions, dev_y)
    print("[sampled EM: {} F1: {}]".format(em, f1))

    batches = utils.BatchGen(dev,
                             batch_size=args.batch_size,
                             evaluation=True,
                             gpu=args.cuda)
    model.opt['interpret'] = True
    t_em_c, t_f1_c, t_total = [0] * 3
    f1s = []
    ems = []
    pred_answers = {}
    # evaluate the model for all interpretations and select the one with highest accuracy
    for i, batch in tqdm(enumerate(batches)):
        i_predictions = []
        truth = np.take(dev_y, batches.indices[i], 0)
        confidence = []
        ans_a = []
        for a in range(args.n_actions):
            latent_a = Variable(torch.ones(batch[0].size(0)) * a).long().cuda()
            pred = model.predict_inter(batch, latent_a=latent_a)
            i_predictions.append(pred[0])
            computed_a = pred[-1]
            confidence.append(pred[-2])
            ans_a += [pred[0]]

        confidence = np.array(confidence)
        for b in range(batch[0].size(0)):
            em_v, f1_v = [], []
            a = np.argmax(confidence[:, b])
            em_c, f1_c = utils.score_test_alli([i_predictions[a][b]],
                                               [truth[b]])

            for a in range(args.n_actions):
                em_a, f1_a = utils.score_test_alli([i_predictions[a][b]],
                                                   [truth[b]])
                em_v += [em_a]
                f1_v += [f1_a]

            pred_answers[dev_ids[i * args.batch_size +
                                 b]] = [[a_i[b] for a_i in ans_a],
                                        list(map(str, f1_v)),
                                        str(computed_a[b])]

            f1s += [f1_v]
            ems += [em_v]
            t_em_c += em_c
            t_f1_c += f1_c

        t_total += batch[0].size(0)

    with open('predictions_a.json', 'w') as f:
        json.dump(pred_answers, f)

    def toscore(score):
        return 100. * score / t_total

    f1s = np.array(f1s)
    ems = np.array(ems)

    print("[max EM: {} F1: {}]".format(toscore(np.max(ems, 1).sum()),
                                       toscore(np.max(f1s, 1).sum())))
    print("[min EM: {} F1: {}]".format(toscore(np.min(ems, 1).sum()),
                                       toscore(np.min(f1s, 1).sum())))
    print("[avg EM: {} F1: {}]".format(toscore(np.average(ems, 1).sum()),
                                       toscore(np.average(f1s, 1).sum())))

    print("[con EM: {} F1: {}]".format(toscore(t_em_c), toscore(t_f1_c)))
Esempio n. 3
0
def main():
    log.info('[program starts.]')
    train, dev, dev_y, train_y, embedding, opt, q_labels, ql_mask = utils.load_data(
        vars(args), args)
    log.info('[Data loaded.ql_mask]')

    if args.resume:
        log.info('[loading previous model...]')
        checkpoint = torch.load(
            os.path.join(model_dir, args.restore_dir, args.resume))
        if args.resume_options:
            opt = checkpoint['config']
        state_dict = checkpoint['state_dict']
        model = DocReaderModel(opt, embedding, state_dict)
    else:
        raise RuntimeError('Include checkpoint of the trained model')

    if args.cuda:
        model.cuda()

    with open(os.path.join(squad_dir, 'meta.msgpack'), 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    vocab = meta['vocab']
    ids_word = {i: w for i, w in enumerate(vocab)}

    def to_text(inp):
        s = ""
        for ids in inp.numpy():
            s += ids_word[ids] + " "
        return s

    # evaluate restored model
    batches = utils.BatchGen(dev,
                             batch_size=100,
                             evaluation=True,
                             gpu=args.cuda)
    predictions = []
    for i, batch in enumerate(batches):
        predictions.extend(model.predict(batch)[0])
    em, f1 = utils.score(predictions, dev_y)
    log.info("[dev EM: {} F1: {}]".format(em, f1))

    batches = utils.BatchGen(dev,
                             batch_size=args.batch_size,
                             evaluation=True,
                             gpu=args.cuda)
    model.opt['interpret'] = True
    #itrs = [30, 58]
    itrs = [0, 30]
    outputs = ""
    # collect document encodings for induced interpretations (embeds) and interpretations chosen by the model (computed_a)
    X = [[] for _ in range(itrs[1] - itrs[0] + 1)]
    for i, batch in enumerate(batches):
        if i < itrs[0]: continue
        truth = np.take(dev_y, batches.indices[i], 0)
        i_predictions = []
        for a in range(args.n_actions):
            latent_a = Variable(torch.ones(args.batch_size) * a).long().cuda()
            i_predictions.append(
                model.predict_inter(batch, latent_a=latent_a)[0])

        for b in range(len(batch[0])):
            outputs += batch[-2][b] + '\n' + to_text(batch[5][b]) + '\n'
            outputs += "idx = {} truth={}".format(
                (i - itrs[0]) * args.batch_size + b, truth[b]) + '\n'
            for a in range(args.n_actions):
                em_v, f1_v = utils.score([i_predictions[a][b]], [truth[b]])
                outputs += i_predictions[a][b] + '\n' + "b={0} a={1} ".format(
                    i - itrs[0], a, em_v, f1_v) + '\n'
            outputs += '\n'

        for a in range(args.n_actions):
            latent_a = Variable(torch.ones(args.batch_size) * a).long().cuda()
            embeds, actions, questions, computed_a = model.get_embeddings(
                batch, latent_a=[1, latent_a])
            X[i - itrs[0]].append([embeds, actions, questions, computed_a])
        if i >= itrs[1]:
            break

    print(outputs)

    # rearrange encodings
    x_emb, x_l, x_q, computed_a = [], [], [], []
    for it in range(itrs[1] - itrs[0] + 1):
        for b in range(args.batch_size):
            for a in range(args.n_actions):
                x_emb.append(X[it][a][0][b])
                x_l.append(X[it][a][1][b])
            x_q.append(X[it][a][2][b])
            computed_a.append(X[it][a][3][b])
    x_emb = np.array(x_emb)
    x_l = np.array(x_l)
    x_q = np.array(x_q)
    computed_a = np.array(computed_a).astype(int)

    # 256D -> 2D
    tsne_model = TSNE(n_components=2,
                      verbose=1,
                      random_state=0,
                      angle=.99,
                      init='pca')
    tsne_d = tsne_model.fit_transform(x_emb)

    # find document encodings for selected interpretations
    a = np.reshape(computed_a, ((itrs[1] - itrs[0] + 1) * args.batch_size))
    a_oh = np.expand_dims(np.eye(args.n_actions)[a], -1)
    tsne_d_r = np.reshape(
        tsne_d,
        ((itrs[1] - itrs[0] + 1) * args.batch_size, args.n_actions, -1))
    sel_tsne_d = np.sum(tsne_d_r * a_oh, 1)

    # setup the plot
    N = args.n_actions

    c = x_l.astype(int)
    x = tsne_d[:, 0]
    y = tsne_d[:, 1]
    plt.scatter(x, y, c=c, s=40, cmap=discrete_cmap(N, 'jet'), alpha=0.5)
    names = [str(i // (args.n_actions)) for i in range(tsne_d.shape[0])]
    for i, txt in enumerate(names):
        plt.annotate(txt, (x[i], y[i]), size='x-small')

    c = computed_a.astype(int)
    x = sel_tsne_d[:, 0]
    y = sel_tsne_d[:, 1]
    plt.scatter(x, y, c=c, s=70, marker='x', cmap=discrete_cmap(N, 'jet'))
    names = [str(i) for i in range(sel_tsne_d.shape[0])]
    for i, txt in enumerate(names):
        plt.annotate(txt, (x[i], y[i]), size='x-small')

    plt.colorbar(ticks=range(N))
    plt.clim(-0.5, N - 0.5)
    plt.title("tSNE")
    plt.show()
Esempio n. 4
0
def main():
    log.info('[program starts.]')
    train, dev, dev_y, train_y, embedding, opt, q_labels, ql_mask = utils.load_data_train(
        vars(args), args)
    log.info('[Data loaded.ql_mask]')
    log.info('vocab size = %d' % opt['vocab_size'])

    with open(args.data_file, 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
        dev_ae = list(data['dev_ans_exists'])
        trn_ae = list(data['trn_ans_exists'])
        #dev_ae = [1]*len(dev_y); trn_ae = [1]*len(train_y)

    if args.resume:
        log.info('[loading previous model...]')
        checkpoint = torch.load(
            os.path.join(model_dir, args.restore_dir, args.resume))
        if args.resume_options:
            opt = checkpoint['config']
        state_dict = checkpoint['state_dict']
        model = DocReaderModel(opt, embedding, state_dict)
        epoch_0 = checkpoint['epoch'] + 1
        indices = list(range(len(train)))
        for i in range(checkpoint['epoch']):
            random.shuffle(indices)  # synchronize random seed
        train = [train[i] for i in indices]
        trn_ae = [trn_ae[i] for i in indices]
        train_y = [train_y[i] for i in indices]

        q_labels = [q_labels[i] for i in indices]
        ql_mask = [ql_mask[i] for i in indices]
        if args.reduce_lr:
            utils.lr_decay(model.optimizer, args.reduce_lr, log)
    else:
        model = DocReaderModel(opt, embedding)
        epoch_0 = 1

    train_y = np.array(train_y)  # text answers for training set
    q_labels = np.array(q_labels)
    ql_mask = np.array(ql_mask)
    print("timestamp {}".format(timestamp))
    trn_eval_size = len(trn_ae)
    dev_y = np.array(dev_y)

    if args.cuda:
        model.cuda()
    # evaluate pre-trained model
    if args.resume and not args.debug:
        batches = utils.BatchGen(train[:trn_eval_size],
                                 batch_size=bs_valid,
                                 evaluation=True,
                                 gpu=args.cuda)
        predictions = []
        ae_ta = []
        for batch in batches:
            if args.squad == 2:
                ans_b, _, _, ae_i = model.predict(batch)
                ae_ta.extend(ae_i)
                predictions.extend(ans_b)
            else:
                predictions.extend(model.predict(batch)[0])
        em_t, f1_t = utils.score(predictions, train_y[:trn_eval_size])
        if 'exist' in args.ae_archt:
            em_t, f1_t = utils.score_list(predictions, train_y[:trn_eval_size],
                                          trn_ae[:trn_eval_size])
            n_ae = sum(trn_ae[:trn_eval_size])
            n_dae = trn_eval_size - n_ae
            print('tot_pos=%d, true_pos=%d, cor_p=%d, cor_n=%d'%(sum(ae_ta), sum(trn_ae[:trn_eval_size]), \
                                                        (np.array(trn_ae[:trn_eval_size]).squeeze()*np.array(ae_ta).squeeze()).sum(),\
                                                ((np.array(trn_ae[:trn_eval_size]).squeeze()==0)*(np.array(ae_ta).squeeze()==0)).sum()))
        log.info("[train EM: {0:.3f} F1: {1:3f}]".format(em_t, f1_t))

        batches = utils.BatchGen(dev,
                                 batch_size=bs_valid,
                                 evaluation=True,
                                 gpu=args.cuda)
        predictions = []
        ae_ta = []
        for batch in batches:
            if args.squad == 2:
                ans_b, _, _, ae_i = model.predict(batch)
                ae_ta.extend(ae_i)
                predictions.extend(ans_b)
            else:
                predictions.extend(model.predict(batch)[0])
        em_v, f1_v = utils.score(predictions, dev_y)
        if 'exist' in args.ae_archt:
            em_v, f1_v = utils.score_list(predictions, np.array(dev_y), dev_ae)
            n_ae = sum(dev_ae)
            n_dae = len(dev_ae) - n_ae
            print('tot_pos=%d, true_pos=%d, cor_p=%d, cor_n=%d'%(sum(ae_ta), sum(dev_ae), \
                                                        (np.array(dev_ae).squeeze()*np.array(ae_ta).squeeze()).sum(),\
                                                ((np.array(dev_ae).squeeze()==0)*(np.array(ae_ta).squeeze()==0)).sum()))
        log.info("[val EM: {} F1: {}]".format(em_v, f1_v))
        best_val_score = f1_v
        if args.summary:
            writer.add_scalars('accuracies', {
                'em_t': em_t,
                'f1_t': f1_t,
                'em_v': em_v,
                'f1_v': f1_v
            }, epoch_0 - 1)
    else:
        best_val_score = 0.0

    if 'const' in args.beta:
        beta = float(args.beta.split('_')[1]) * 0.1
    if 'const' in args.alpha:
        alpha = float(args.alpha.split('_')[1]) * 0.1

    scope = 'pi_q'
    if args.select_i:
        scope = 'select_i'

    dummy_r = np.zeros(args.batch_size)
    latent_a = None
    target_i = None
    indices = None  # induced interpretation
    rewards = dummy_r
    # training
    for epoch in range(epoch_0, epoch_0 + args.epochs):
        log.warn('Epoch {} timestamp {}'.format(epoch, timestamp))
        batches = utils.BatchGen(train,
                                 batch_size=args.batch_size,
                                 gpu=args.cuda)
        start = datetime.now()
        if args.vae and not args.select_i:
            scope = utils.select_scope_update(args, epoch - epoch_0)
        print("scope = {} beta = {} alpha = {} ".format(scope, beta, alpha))
        for i, batch in enumerate(batches):
            inds = batches.indices[i]
            # synchronize available interpretations with the current batch
            labels = np.take(q_labels, inds, 0)
            l_mask = np.take(ql_mask, inds, 0)
            if args.vae:  # VAE framework
                if scope == 'rl':
                    if args.rl_tuning == 'pgm':
                        # policy gradient with EM scores for rewards
                        truth = np.take(train_y, inds, 0)
                        pred_m, latent_a, indices = model.predict(batch)[:3]
                        _, f1_m = utils.score_em(None, pred_m, truth)
                        rewards = f1_m
                        # normalize rewards over batch
                        rewards -= rewards.mean()
                        rewards /= (rewards.std() + 1e-08)
                    elif args.rl_tuning == 'pg':
                        # policy gradient with F1 scores for rewards
                        truth = np.take(train_y, inds, 0)
                        pred_m, latent_a, indices = model.predict(batch)[:3]
                        _, f1_m = utils.score_sc(None, pred_m, truth)
                        rewards = f1_m
                        # normalize rewards over batch
                        rewards -= rewards.mean()
                        rewards /= (rewards.std() + 1e-08)
                    elif args.rl_tuning == 'sc':
                        # reward computed by self-critic
                        truth = np.take(train_y, inds, 0)
                        pred_s, pred_m, latent_a, indices = model.predict_self_critic(
                            batch)
                        rs, rm = utils.score_sc(pred_s, pred_m, truth)
                        rewards = rs - rm
                else:
                    rewards = dummy_r

                if args.select_i:
                    i_predictions = []
                    truth = np.take(train_y, batches.indices[i], 0)
                    for a in range(args.n_actions):
                        latent_a = Variable(torch.ones(batch[0].size(0)) *
                                            a).long().cuda()
                        i_predictions.append(
                            model.predict_inter(batch, latent_a=latent_a)[0])
                    f1_all = []
                    for b in range(batch[0].size(0)):
                        f1_v = []
                        for a in range(args.n_actions):
                            _, f1_a = utils.score_test_alli(
                                [i_predictions[a][b]], [truth[b]])
                            f1_v += [f1_a]
                        f1_all += [f1_v]

                    target_i = np.argmax(np.array(f1_all), 1)


                model.update(batch, q_l=[labels, l_mask], r=rewards, scope=scope, beta=beta, alpha=alpha, \
                                                                latent_a=latent_a, target_i=target_i, span=indices)

            elif args.self_critic:
                # self-critic framework where rewards are computed as difference between the F1 score produced
                # by the current model during greedy inference and by sampling
                truth = np.take(train_y, inds, 0)
                if args.critic_loss:
                    pred_m, latent_a, indices = model.predict(batch)[:3]
                    _, f1_m = utils.score_sc(None, pred_m, truth)
                    rewards = f1_m
                else:
                    pred_s, pred_m, latent_a, indices = model.predict_self_critic(
                        batch)
                    rs, rm = utils.score_sc(pred_s, pred_m, truth)
                    rewards = rs - rm
                model.update(batch,
                             r=rewards,
                             q_l=[labels, l_mask],
                             latent_a=latent_a)
            else:
                model.update(batch, q_l=[labels, l_mask])

            if i % args.log_per_updates == 0:
                # printing
                if args.vae and not args.select_i:
                    log.info('updates[{0:6}] l_p[{1:.3f}] l_q[{2:.3f}] l_rl[{3:.3f}] l_ae[{4:.3f}] l_ce[{5:.3f}] l_cr[{6:.3f}] remaining[{7}]'.format(
                    model.updates, model.train_loss['p'].avg, model.train_loss['q'].avg, model.train_loss['rl'].avg, model.train_loss['ae'].avg,\
                                 model.train_loss['ce'].avg, model.train_loss['cr'].avg, str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0]))
                    if args.summary:
                        writer.add_scalars('losses', {'p':model.train_loss['p'].avg, 'q':model.train_loss['q'].avg, 'ce':model.train_loss['ce'].avg, \
                                                    'ae':model.train_loss['ae'].avg,'rl':model.train_loss['rl'].avg, 'cr':model.train_loss['cr'].avg,}, (epoch-1)*len(batches)+i)
                else:
                    log.info(
                        'updates[{0:6}] train loss[{1:.5f}] remaining[{2}]'.
                        format(
                            model.updates, model.train_loss.avg,
                            str((datetime.now() - start) / (i + 1) *
                                (len(batches) - i - 1)).split('.')[0]))
                    if args.summary:
                        writer.add_scalar('loss', model.train_loss.avg,
                                          (epoch - 1) * len(batches) + i)

                if scope == 'rl' and (i % 4 * args.log_per_updates == 0):
                    vbatches = utils.BatchGen(dev,
                                              batch_size=bs_valid,
                                              evaluation=True,
                                              gpu=args.cuda)
                    predictions = []
                    for batch in vbatches:
                        predictions.extend(model.predict(batch)[0])
                    em_v, f1_v = utils.score(predictions, dev_y)
                    log.warn("val EM: {0:.3f} F1: {1:3f}".format(em_v, f1_v))

        # eval
        if epoch % args.eval_per_epoch == 0:

            batches = utils.BatchGen(dev,
                                     batch_size=bs_valid,
                                     evaluation=True,
                                     gpu=args.cuda)
            predictions = []
            ae_ta = []
            for i, batch in enumerate(batches):
                if args.squad == 2:
                    ans_b, _, _, ae_i = model.predict(batch)
                    ae_ta.extend(ae_i)
                    predictions.extend(ans_b)
                else:
                    predictions.extend(model.predict(batch)[0])
            em_v, f1_v = utils.score(predictions, dev_y)
            if 'exist' in args.ae_archt:
                em_v, f1_v = utils.score_list(predictions, dev_y, dev_ae)
                n_ae = sum(dev_ae[:trn_eval_size])
                n_dae = len(dev_ae) - n_ae
                print('tot_pos=%d, true_pos=%d, cor_p=%d, cor_n=%d'%(sum(ae_ta), sum(dev_ae), \
                                                        (np.array(dev_ae).squeeze()*np.array(ae_ta).squeeze()).sum(),\
                                                ((np.array(dev_ae).squeeze()==0)*(np.array(ae_ta).squeeze()==0)).sum()))
            log.info("[val EM: {} F1: {}]".format(em_v, f1_v))

            batches = utils.BatchGen(train[:trn_eval_size],
                                     batch_size=bs_valid,
                                     evaluation=True,
                                     gpu=args.cuda)
            predictions = []
            ae_ta = []
            for batch in batches:
                if args.squad == 2:
                    ans_b, _, _, ae_i = model.predict(batch)
                    ae_ta.extend(ae_i)
                    predictions.extend(ans_b)
                else:
                    predictions.extend(model.predict(batch)[0])
            em_t, f1_t = utils.score(predictions, train_y[:trn_eval_size])
            if 'exist' in args.ae_archt:
                em_t, f1_t = utils.score_list(predictions,
                                              train_y[:trn_eval_size],
                                              trn_ae[:trn_eval_size])
                n_ae = sum(trn_ae[:trn_eval_size])
                n_dae = trn_eval_size - n_ae
                print('tot_pos=%d, true_pos=%d, cor_p=%d, cor_n=%d'%(sum(ae_ta), sum(trn_ae[:trn_eval_size]), \
                                                        (np.array(trn_ae[:trn_eval_size]).squeeze()*np.array(ae_ta).squeeze()).sum(),\
                                                ((np.array(trn_ae[:trn_eval_size]).squeeze()==0)*(np.array(ae_ta).squeeze()==0)).sum()))
            log.info("[train EM: {0:.3f} F1: {1:3f}]".format(em_t, f1_t))

            print("current_dir {}".format(current_dir))

            if args.summary:
                writer.add_scalars('accuracies', {
                    'em_t': em_t,
                    'f1_t': f1_t,
                    'em_v': em_v,
                    'f1_v': f1_v
                }, epoch)

        # save
        if not args.save_last_only or epoch == epoch_0 + args.epochs - 1:
            try:
                os.remove(
                    os.path.join(current_dir,
                                 'checkpoint_epoch_{}.pt'.format(epoch - 1)))
            except OSError:
                pass
            model_file = os.path.join(current_dir,
                                      'checkpoint_epoch_{}.pt'.format(epoch))
            model.save(model_file, epoch)
            if f1_v > best_val_score:
                best_val_score = f1_v
                copyfile(model_file, os.path.join(current_dir,
                                                  'best_model.pt'))
                log.info('[new best model saved.]')

    # load test data that is the development set
    train, dev, dev_y, train_y, embedding, opt, q_labels, ql_mask = utils.load_data(
        vars(args), args)

    batches = utils.BatchGen(dev,
                             batch_size=bs_valid,
                             evaluation=True,
                             gpu=args.cuda)
    predictions = []
    ae_ta = []
    for batch in batches:
        if args.squad == 2:
            ans_b, _, _, ae_i = model.predict(batch)
            ae_ta.extend(ae_i)
            predictions.extend(ans_b)
        else:
            predictions.extend(model.predict(batch)[0])
    em_v, f1_v = utils.score(predictions, dev_y)
    if 'exist' in args.ae_archt:
        em_v, f1_v = utils.score_list(predictions, np.array(dev_y), dev_ae)
        n_ae = sum(dev_ae)
        n_dae = len(dev_ae) - n_ae
        print('tot_pos=%d, true_pos=%d, cor_p=%d, cor_n=%d'%(sum(ae_ta), sum(dev_ae), \
                                                    (np.array(dev_ae).squeeze()*np.array(ae_ta).squeeze()).sum(),\
                                            ((np.array(dev_ae).squeeze()==0)*(np.array(ae_ta).squeeze()==0)).sum()))
    log.info("[test EM: {} F1: {}]".format(em_v, f1_v))

    if args.summary:
        # export scalar data to JSON for external processing
        writer.export_scalars_to_json(
            os.path.join(current_dir, "all_scalars.json"))
        writer.close()
Esempio n. 5
0
def main():
    log.info('[program starts.]')
    train, dev, dev_y, train_y, embedding, opt, q_labels, ql_mask = utils.load_data(
        vars(args), args)
    log.info('[Data loaded.ql_mask]')

    if args.resume:
        log.info('[loading previous model...]')
        checkpoint = torch.load(
            os.path.join(model_dir, args.restore_dir, args.resume))
        if args.resume_options:
            opt = checkpoint['config']
        state_dict = checkpoint['state_dict']
        model = DocReaderModel(opt, embedding, state_dict)
    else:
        raise RuntimeError('Include checkpoint of the trained model')

    if args.cuda:
        model.cuda()

    outputs = ""
    # evaluate restored model
    model.opt['interpret'] = False
    batches = utils.BatchGen(dev,
                             batch_size=100,
                             evaluation=True,
                             gpu=args.cuda)
    predictions = []
    for i, batch in enumerate(batches):
        predictions.extend(model.predict(batch)[0])
    em, f1 = utils.score(predictions, dev_y)
    log.info("[dev EM: {} F1: {}]".format(em, f1))
    outputs += "[dev EM: {} F1: {}]\n".format(em, f1)

    with open(os.path.join(squad_dir, 'meta.msgpack'), 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    vocab = meta['vocab']
    ids_word = {i: w for i, w in enumerate(vocab)}

    def to_text(inp):
        s = ""
        for ids in inp.numpy():
            s += ids_word[ids] + " "
        return s

    test_int = {i: [] for i in range(args.n_actions)}
    batches = utils.BatchGen(dev,
                             batch_size=args.batch_size,
                             evaluation=True,
                             gpu=args.cuda,
                             shuffle=True)
    for i, batch in enumerate(batches):
        model.opt['interpret'] = False
        # collect predicted answers for various interpretations
        predictions, acts = model.predict_inter(batch)[:2]
        truth = np.take(dev_y, batches.indices[i], 0)
        for b in range(len(predictions)):
            em_v, f1_v = utils.score([predictions[b]], [truth[b]])
            log.warn("b={0} a={1} EM: {2:.3f} F1: {3:3f}".format(
                b, acts[b], em_v, f1_v))
        model.opt['interpret'] = True
        i_predictions = []
        for a in range(args.n_actions):
            latent_a = Variable(torch.ones(batch[0].size()[0]) *
                                a).long().cuda()
            i_predictions.append(
                model.predict_inter(batch, latent_a=latent_a)[0])

        for b in range(batch[0].size()[0]):
            f1s = []
            for a in range(args.n_actions):
                em_v, f1_v = utils.score([i_predictions[a][b]], [truth[b]])
                f1s.append(f1_v)

            if len(set(f1s)) >= 1:
                outputs += batch[-2][b] + '\n' + to_text(batch[5][b]) + '\n'
                outputs += "pred_a={} truth={}".format(acts[b],
                                                       truth[b]) + '\n'
                for a in range(args.n_actions):
                    test_int[a] += [i_predictions[a][b]]
                    em_v, f1_v = utils.score([i_predictions[a][b]], [truth[b]])
                    outputs += i_predictions[a][
                        b] + '\n' + "b={0} a={1} EM: {2:.3f} F1: {3:3f}".format(
                            b, a, em_v, f1_v) + '\n'
                    log.warn("b={0} a={1} EM: {2:.3f} F1: {3:3f}".format(
                        b, a, em_v, f1_v))
                outputs += '\n'

    with open(os.path.join(current_dir, 'ints.msgpack'), 'wb') as f:
        msgpack.dump(test_int, f)

    with open(os.path.join(current_dir, "interpret.txt"), "w") as txtf:
        txtf.write(outputs)