Esempio n. 1
0
 def on_epoch_end(self, epoch, logs={}):
     if not do_ema:
         results = model.predict([val_context_word, val_question_word, val_context_char, val_question_char], \
                                 verbose=1, batch_size=64)
         _, _, y_start_pred, y_end_pred = results
         y_start_pred = np.reshape(y_start_pred, (-1))
         y_end_pred = np.reshape(y_end_pred, (-1))
         answer_dict, remapped_dict = util.convert_tokens(eval_file, val_qid.tolist(), \
                                                          y_start_pred.tolist(), y_end_pred.tolist())
         metrics = util.evaluate(eval_file, answer_dict)
         print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                                metrics['f1']))
         ems.append(metrics['exact_match'])
         f1s.append(metrics['f1'])
         if metrics['f1'] > self.best_f1:
             self.best_f1 = metrics['f1']
             model.save_weights('model/QANet_v99.h5')
         if epoch + 1 == 25:
             model.save_weights('model/QANet_v99_60k.h5')
     else:
         # validation with ema
         # save backup weights
         print('saving temp weights...')
         model.save_weights('temp_model2.h5')
         ExponentialMovingAverage_EpochEnd(model,
                                           self.ema_trainable_weights_vals)
         results = model.predict([val_context_word, val_question_word, val_context_char, val_question_char], \
                                 verbose=1, batch_size=64)
         _, _, y_start_pred, y_end_pred = results
         y_start_pred = np.reshape(y_start_pred, (-1))
         y_end_pred = np.reshape(y_end_pred, (-1))
         answer_dict, remapped_dict = util.convert_tokens(eval_file, val_qid.tolist(), y_start_pred.tolist(), \
                                                          y_end_pred.tolist())
         metrics_ema = util.evaluate(eval_file, answer_dict)
         print("After EMA, Exact Match: {}, F1: {}".format(
             metrics_ema['exact_match'], metrics_ema['f1']))
         ems.append(metrics_ema['exact_match'])
         f1s.append(metrics_ema['f1'])
         if metrics_ema['f1'] > self.best_f1:
             self.best_f1 = metrics_ema['f1']
             model.save_weights('model/QANet_ema_v99.h5')
         if epoch + 1 == 25:
             model.save_weights('model/QANet_ema_v99_60k.h5')
         # load backup
         print('loading temp weights...')
         model.load_weights('temp_model2.h5')
     result = pd.DataFrame([ems, f1s], index=['em', 'f1']).transpose()
     result.to_csv('log/result2.csv', index=None)
Esempio n. 2
0
    def _get_predictions(self, observations):
        datatype = 'public'

        word2idx_dict = self.model_dicts['word2idx_dict']
        char2idx_dict = self.model_dicts['char2idx_dict']
        bpe2idx_dict = self.model_dicts['bpe2idx_dict']
        pos2idx_dict = self.model_dicts['pos2idx_dict']

        data_examples, data_eval = process_file(
            self.model_config, observations, datatype,
            remove_unicode=self.model_config.remove_unicode, bpe_model=self.bpe_model, is_test=True)

        data_features, data_meta = build_features_notfdata(self.model_config, data_examples, datatype,
                                                           word2idx_dict, char2idx_dict, bpe2idx_dict, pos2idx_dict,
                                                           is_test=True)

        total = data_meta["total"]

        answer_dict = {}
        remapped_dict = {}

        print(len(data_features))
        # hotfix добить длину data_examples до делителя config.batch_size
        while len(data_features) % self.model_config.batch_size != 0:
            data_features.append(data_features[-1])

        print(len(data_features))

        for step in tqdm(range(total // self.model_config.batch_size + 1)):

            def get_batch():
                batch_items = data_features[step * self.model_config.batch_size:(step + 1) * self.model_config.batch_size]
                batch = dict()
                for key in batch_items[0].keys():
                    batch[key] = np.stack([el[key] for el in batch_items])
                return batch

            batch = get_batch()

            qa_id, loss, yp1, yp2 = self.tf_session.run(
                [self.model.qa_id, self.model.loss, self.model.yp1, self.model.yp2], feed_dict={
                    self.model.c_ph: batch['context_idxs'],
                    self.model.q_ph: batch['ques_idxs'],
                    self.model.ch_ph: batch['context_char_idxs'],
                    self.model.qh_ph: batch['ques_char_idxs'],
                    self.model.cb_ph: batch['context_bpe_idxs'],
                    self.model.qb_ph: batch['ques_bpe_idxs'],
                    self.model.cp_ph: batch['context_pos_idxs'],
                    self.model.qp_ph: batch['ques_pos_idxs'],
                    self.model.y1_ph: batch['y1'],
                    self.model.y2_ph: batch['y2'],
                    self.model.qa_id: batch['id'],
                })

            answer_dict_, remapped_dict_ = convert_tokens(
                data_eval, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)

        return remapped_dict
Esempio n. 3
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle):
    """
    Evaluate a batch while training.
    :param model: The model object.
    :param num_batches: Number of batches to evaluate.
    :param eval_file: The file with the correct answers.
    :param sess: The session.
    :param data_type: The type of the data (train/dev/test)
    :param handle:
    :param str_handle:
    :return: metrics dictionary and list of tensorflow summaries.
    """
    answer_dict = {}
    losses = []
    for _ in tqdm(range(1, num_batches + 1)):
        qa_id, loss, yp1, yp2, = sess.run(
            [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle})
        answer_dict_, _ = convert_tokens(
            eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
        answer_dict.update(answer_dict_)
        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    metrics["loss"] = loss
    loss_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ])
    f1_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
    em_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
    return metrics, [loss_sum, f1_sum, em_sum]
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle,
                   str_handle):
    answer_dict = {}
    losses = []
    for _ in tqdm(range(1, num_batches + 1)):
        qa_id, loss, yp1, yp2, = sess.run(
            [model.qa_id, model.loss, model.yp1, model.yp2],
            feed_dict={handle: str_handle})
        answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(),
                                         yp1.tolist(), yp2.tolist())
        answer_dict.update(answer_dict_)
        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    metrics["loss"] = loss
    loss_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/loss".format(data_type),
                         simple_value=metrics["loss"]),
    ])
    f1_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/f1".format(data_type),
                         simple_value=metrics["f1"]),
    ])
    em_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/em".format(data_type),
                         simple_value=metrics["exact_match"]),
    ])
    return metrics, [loss_sum, f1_sum, em_sum]
Esempio n. 5
0
File: run.py Progetto: wrccrwx/glomo
def evaluate_batch(data_source, model, max_batches, eval_file):
    answer_dict = {}
    total_loss, step_cnt = 0, 0
    for step, data in enumerate(data_source):
        if step >= max_batches and max_batches > 0: break

        context_idxs = Variable(data['context_idxs'], volatile=True)
        ques_idxs = Variable(data['ques_idxs'], volatile=True)
        context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
        ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
        context_lens = Variable(data['context_lens'], volatile=True)
        y1 = Variable(data['y1'], volatile=True)
        y2 = Variable(data['y2'], volatile=True)

        graph = data['graph']
        graph_q = data['graph_q']

        elmo = data['elmo']
        elmo_q = data['elmo_q']
        if elmo is not None:
            elmo.volatile = True
            elmo_q.volatile = True

        logit1, logit2, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, return_yp=True, pre_att=graph, pre_att_q=graph_q, elmo=elmo, elmo_q=elmo_q)
        loss = criterion(logit1, y1) + criterion(logit2, y2)
        answer_dict_, _ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist())
        answer_dict.update(answer_dict_)

        total_loss += loss.data[0]
        step_cnt += 1
    loss = total_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss
    return metrics
Esempio n. 6
0
def evaluate_batch(data_source, model, max_batches, eval_file, config):
    answer_dict = {}
    sp_dict = {}
    total_loss, step_cnt = 0, 0
    iter = data_source
    for step, data in enumerate(iter):
        if step >= max_batches and max_batches > 0: break

        context_idxs = Variable(data['context_idxs'], volatile=True)
        ques_idxs = Variable(data['ques_idxs'], volatile=True)
        context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
        ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
        context_lens = Variable(data['context_lens'], volatile=True)
        y1 = Variable(data['y1'], volatile=True)
        y2 = Variable(data['y2'], volatile=True)
        q_type = Variable(data['q_type'], volatile=True)
        is_support = Variable(data['is_support'], volatile=True)
        is_support_word=  Variable(data['is_support_word'],volatile=True)
        start_mapping = Variable(data['start_mapping'], volatile=True)
        end_mapping = Variable(data['end_mapping'], volatile=True)
        all_mapping = Variable(data['all_mapping'], volatile=True)

        logit1, logit2, predict_type, predict_support, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping, is_support_word,return_yp=True)
        loss = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) + nll_sum(logit2, y2)) / context_idxs.size(0) + config.sp_lambda * nll_average(predict_support.view(-1, 2), is_support.view(-1))
        answer_dict_ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1))
        answer_dict.update(answer_dict_)
#       相当于map[x]=y
        total_loss += loss.data.item()
        step_cnt += 1
    loss = total_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss

    return metrics
Esempio n. 7
0
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)
    graph_handler = GraphHandler(config, model)
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        #saver = tf.train.Saver()
        graph_handler.initialize(sess)
        #saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        ensember_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            start_logits, stop_logits, qa_id, loss, yp1, yp2 = sess.run([
                model.start_logits, model.stop_logits, model.qa_id, model.loss,
                model.yp1, model.yp2
            ])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
            start_logits.tolist()
            stop_logits.tolist()
            for id, start, stop in zip(qa_id, start_logits, stop_logits):
                ensember_dict[str(id)] = {'yp1': start, 'yp2': stop}
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        answer_path = config.answer_file + "_" + str(config.load_step)
        with open(answer_path, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                               metrics['f1']))
        ensember_dict['loss'] = loss
        ensember_dict['exact_math'] = metrics['exact_match']
        ensember_dict['f1'] = metrics['f1']
        file_name = config.model_name + '_' + config.run_id + '.pklz'
        save_path = os.path.join(config.result_path, file_name)
        with gzip.open(save_path, 'wb', compresslevel=3) as fh:
            pickle.dump(ensember_dict, fh)
Esempio n. 8
0
def predict(data_source, model, eval_file, config, prediction_file):
    answer_dict = {}
    sp_dict = {}
    sp_th = config.sp_threshold
    for step, data in enumerate(tqdm(data_source)):
        context_idxs = Variable(data['context_idxs'], volatile=True)
        ques_idxs = Variable(data['ques_idxs'], volatile=True)
        context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
        ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
        context_lens = Variable(data['context_lens'], volatile=True)
        start_mapping = Variable(data['start_mapping'], volatile=True)
        end_mapping = Variable(data['end_mapping'], volatile=True)
        all_mapping = Variable(data['all_mapping'], volatile=True)
        is_support = Variable(data['is_support'],volatile= True)
        is_support_word=  Variable(data['is_support_word'],volatile=True)
        logit1, logit2, predict_type, predict_support, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping,is_support_word,return_yp=True)
        answer_dict_ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1))
        answer_dict.update(answer_dict_)

        predict_support_np = torch.sigmoid(predict_support[:, :, 1]).data.cpu().numpy()
        for i in range(predict_support_np.shape[0]):
            cur_sp_pred = []
            cur_id = data['ids'][i]
            for j in range(predict_support_np.shape[1]):
                if j >= len(eval_file[cur_id]['sent2title_ids']): break
                if predict_support_np[i, j] > sp_th:
                    cur_sp_pred.append(eval_file[cur_id]['sent2title_ids'][j])
            sp_dict.update({cur_id: cur_sp_pred})

    prediction = {'answer': answer_dict, 'sp': sp_dict}
    with open(prediction_file, 'w') as f:
        json.dump(prediction, f)
Esempio n. 9
0
    def test(self):
        # 加载数据化数据
        test_loader = DataLoader(dataset=MyDataset(self.config.test_data_file, self.digital_keys),
                                 batch_size=self.config.val_num_batches * self.device_count)
        # 加载原始数据
        with open(self.config.test_eval_file, "r") as fh:
            test_eval_file = json.load(fh)

        answer_dict = {}
        self.logger.info('testing model...')
        answer_save_file = open(self.config.answer_file, 'w', encoding='utf-8')

        self.model.is_train = False
        self.model.eval()
        for batch in test_loader:
            logits1, logits2 = self.model(batch[0].to(self.device), batch[1].to(self.device), batch[2].to(self.device),
                                          batch[3].to(self.device))
            loss = self.calc_loss(logits1, logits2, batch[4].to(self.device), batch[5].to(self.device))
            # 开始位置
            p1 = logits1.argmax(dim=1)
            # 结束位置
            p2 = logits2.argmax(dim=1)
            answer_dict_, remapped_dict = convert_tokens(test_eval_file, batch[6].to(self.device).tolist(), p1.tolist(),
                                                         p2.tolist())
            answer_dict.update(answer_dict_)
            uuid = test_eval_file[str(batch[6].tolist()[0])]["uuid"]
            # save answer
            answer_save_file.write(str(uuid + ":" + remapped_dict[uuid] + "\n"))

        metrics = evaluate(test_eval_file, answer_dict)
        self.logger.info('test exact_match:{},f1:{},loss:{}'.format(metrics['exact_match'], metrics['f1'], loss))
Esempio n. 10
0
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file,
                                 get_record_parser(config, is_test=True),
                                 config).make_one_shot_iterator()

        model = Model(config,
                      test_batch,
                      word_mat,
                      char_mat,
                      trainable=False,
                      graph=g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)

            #with open(config.answer_file, "w") as fh:
            #    json.dump(remapped_dict, fh)
            '''
            metrics = evaluate(eval_file, answer_dict)
            print("Exact Match: {}, F1: {}".format(
                metrics['exact_match'], metrics['f1']))
            '''
            with open(config.answer_csv, 'w') as f:
                print('dumping ans file to : %s' % str(config.answer_csv))
                s = csv.writer(f, delimiter=',', lineterminator='\n')
                for i in sorted(remapped_dict):
                    s.writerow([remapped_dict[i]])
Esempio n. 11
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle):
	answer_dict = {}
	losses = []
	outlier_count = 0
	for _ in tqdm(range(1, num_batches + 1)):
		qa_id, loss, yp1, yp2, = sess.run(
			[model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle})
		answer_dict_, _, outlier = convert_tokens(
			eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
		if outlier:
			outlier_count += 1
			continue
		answer_dict.update(answer_dict_)
		losses.append(loss)
	#print("outlier_count:",outlier_count)
	loss = np.mean(losses)
	metrics = evaluate(eval_file, answer_dict)
	metrics["loss"] = loss
	loss_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ])
	f1_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
	em_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
	rouge_l_f = tf.Summary(value=[tf.Summary.Value(
		tag="{}/rouge-l-f".format(data_type), simple_value=metrics["rouge-l-f"]), ])
	rouge_l_p = tf.Summary(value=[tf.Summary.Value(
		tag="{}/rouge-l-p".format(data_type), simple_value=metrics["rouge-l-p"]), ])
	rouge_l_r = tf.Summary(value=[tf.Summary.Value(
		tag="{}/rouge-l-r".format(data_type), simple_value=metrics["rouge-l-r"]), ])
	outlier_c = tf.Summary(value=[tf.Summary.Value(
		tag="{}/outlier_count".format(data_type), simple_value=outlier_count), ])
	return metrics, [loss_sum, f1_sum, em_sum, rouge_l_f, rouge_l_p, rouge_l_r, outlier_c]
Esempio n. 12
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle):
	answer_dict = {}
	losses_esp = []
	losses_ee = []
	outlier_count = 0
	for _ in tqdm(range(1, num_batches + 1)):
		qa_id, loss, yp1, yp2, ee_loss = sess.run(
			[model.qa_id, model.loss, model.yp1, model.yp2, model.ee_loss], feed_dict={handle: str_handle})
		answer_dict_, _, outlier = convert_tokens(
			eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
		#if outlier:
		#	outlier_count += 1
		#	continue
		answer_dict.update(answer_dict_)
		losses_esp.append(loss)
		losses_ee.append(ee_loss)
	#print("outlier_count:",outlier_count)
	loss_esp = np.mean(losses_esp)
	loss_ee = np.mean(losses_ee)
	metrics = evaluate(eval_file, answer_dict)
	metrics["ee_loss"] = loss_ee
	metrics["esp_loss"] = loss_esp

	loss_sum1 = tf.Summary(value=[tf.Summary.Value(
		tag="{}/loss_esp".format(data_type), simple_value=metrics["esp_loss"]), ])
	loss_sum2 = tf.Summary(value=[tf.Summary.Value(
		tag="{}/loss_ee".format(data_type), simple_value=metrics["ee_loss"]), ])
	f1_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
	em_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
	rouge_l_f = tf.Summary(value=[tf.Summary.Value(
		tag="{}/ROUGE-L-F1".format(data_type), simple_value=metrics["rouge-l-f"]), ])

	return metrics, [loss_sum1, loss_sum2, f1_sum, em_sum, rouge_l_f]
Esempio n. 13
0
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2):
    nll_meter = util.AverageMeter()

    model.eval()
    pred_dict = {}
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(data_loader.dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            cc_idxs = cc_idxs.to(device)
            qc_idxs = qc_idxs.to(device)
            batch_size = cw_idxs.size(0)

            # Forward
            log_p = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs)
            y1, y2 = y1.to(device), y2.to(device)
            #print("ckpt 1")
            ans_lens = y2 - y1
            loss = 0
            for i in range(max_len):
                mask = ((torch.ones_like(y1) * i) == ans_lens).type(
                    torch.cuda.LongTensor)
                y = y1 * mask
                loss += F.nll_loss(log_p[:, :, i], y)

            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            log_p, ans_len = torch.max(log_p, dim=-1)
            starts = torch.max(log_p, dim=-1)[1]
            ends = starts
            for i in range(starts.size(0)):
                ends[i] += ans_len.type(torch.cuda.LongTensor)[i, starts[i]]
            # print("starts and ends:", starts, ends, starts.size(), ends.size())
            # starts, ends = util.discretize(p, p + ans_lens, max_len, use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict
Esempio n. 14
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle, config):
	answer_dict = {}
	losses_esp = losses_pr = losses_ee = []
	outlier_count = 0
	for _ in tqdm(range(1, num_batches + 1)):
		if config.with_passage_ranking:
			qa_id, loss_esp, loss_pr, loss_ee, yp1, yp2, = sess.run(
				[model.qa_id, model.loss, model.pr_loss, model.e_loss, model.yp1, model.yp2],
				feed_dict={handle: str_handle})
		else:
			qa_id, loss_esp, yp1, yp2, = sess.run(
				[model.qa_id, model.loss, model.yp1, model.yp2],
				feed_dict={handle: str_handle})
		answer_dict_, _, outlier = convert_tokens(
			config, eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
		if outlier:
			outlier_count += 1
			continue
		answer_dict.update(answer_dict_)
		if loss_esp<100:
			losses_esp.append(loss_esp)
		print(loss_esp)
		if config.with_passage_ranking:
			losses_pr.append(loss_pr)
			losses_ee.append(loss_ee)
	#print("outlier_count:",outlier_count)
	loss_esp = np.mean(losses_esp)
	print("dev_loss:",loss_esp)
	if config.with_passage_ranking:
		loss_pr = np.mean(losses_pr)
		loss_ee = np.mean(losses_ee)
	metrics = evaluate(eval_file, answer_dict)
	metrics["loss_esp"] = loss_esp
	metrics["loss_ee"] = loss_esp
	if config.with_passage_ranking:
		metrics["loss_pr"] = loss_pr
		metrics["loss_ee"] = loss_ee
	loss_sum1 = tf.Summary(value=[tf.Summary.Value(
		tag="{}/loss_esp".format(data_type), simple_value=metrics["loss_esp"]), ])
	if config.with_passage_ranking:
		loss_sum2 = tf.Summary(value=[tf.Summary.Value(
			tag="{}/loss_pr".format(data_type), simple_value=metrics["loss_pr"]), ])
		loss_sum3 = tf.Summary(value=[tf.Summary.Value(
			tag="{}/loss_ee".format(data_type), simple_value=metrics["loss_ee"]), ])
	f1_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
	em_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
	rouge_l_f = tf.Summary(value=[tf.Summary.Value(
		tag="{}/ROUGE-L".format(data_type), simple_value=metrics["rouge-l-f"]), ])
	rouge_l_p = tf.Summary(value=[tf.Summary.Value(
		tag="{}/rouge-l-p".format(data_type), simple_value=metrics["rouge-l-p"]), ])
	rouge_l_r = tf.Summary(value=[tf.Summary.Value(
		tag="{}/rouge-l-r".format(data_type), simple_value=metrics["rouge-l-r"]), ])
	outlier_c = tf.Summary(value=[tf.Summary.Value(
		tag="{}/outlier_count".format(data_type), simple_value=outlier_count), ])
	if config.with_passage_ranking:
		return metrics, [loss_sum1, loss_sum2, loss_sum3, rouge_l_f]
	return metrics, [loss_sum1, rouge_l_f]
Esempio n. 15
0
def test(config, dataset="test"):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)

    if dataset == "test":
        test_eval_file = config.test_eval_file
        test_meta = config.test_meta
        test_record_file = config.test_record_file
    elif dataset == "addsent":
        print('HELLO')
        test_eval_file = config.addsent_eval_file
        test_meta = config.addsent_meta
        test_record_file = config.addsent_record_file
    elif dataset == "addonesent":
        test_eval_file = config.addonesent_eval_file
        test_meta = config.addonesent_meta
        test_record_file = config.addonesent_record_file

    with open(test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                               metrics['f1']))
Esempio n. 16
0
def evaluate_batch(data_source, model, max_batches, eval_file, config):
    answer_dict = {}
    sp_dict = {}
    total_loss, step_cnt = 0, 0
    iter = data_source
    for step, data in enumerate(iter):
        if step >= max_batches and max_batches > 0: break

        context_idxs = Variable(data['context_idxs'], volatile=True)
        ques_idxs = Variable(data['ques_idxs'], volatile=True)
        context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
        ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
        context_lens = Variable(data['context_lens'], volatile=True)
        y1 = Variable(data['y1'], volatile=True)
        y2 = Variable(data['y2'], volatile=True)
        q_type = Variable(data['q_type'], volatile=True)
        is_support = Variable(data['is_support'], volatile=True)
        start_mapping = Variable(data['start_mapping'], volatile=True)
        end_mapping = Variable(data['end_mapping'], volatile=True)
        all_mapping = Variable(data['all_mapping'], volatile=True)
        #
        subject_y1 = Variable(data['subject_y1'])
        subject_y2 = Variable(data['subject_y2'])
        object_y1 = Variable(data['object_y1'])
        object_y2 = Variable(data['object_y2'])
        relations = Variable(data['relations'])
        #
        #
        model_results = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, relations, \
            context_lens, start_mapping, end_mapping, all_mapping, return_yp=True)

        (logit1, logit2, predict_type, predict_support, logit_subject_start, logit_subject_end, \
            logit_object_start, logit_object_end, k_relations, loss_relation, yp1, yp2, sy1, sy2, oy1, oy2) = model_results
        loss_1 = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) +
                  nll_sum(logit2, y2)) / context_idxs.size(0)
        loss_2 = nll_average(predict_support.view(-1, 2), is_support.view(-1))
        loss_3_r = torch.sum(loss_relation)
        loss_3_s = (nll_sum(logit_subject_start, subject_y1) + nll_sum(
            logit_subject_end, subject_y2)) / context_idxs.size(0)
        loss_3_o = (nll_sum(logit_object_start, object_y1) + nll_sum(
            logit_object_end, object_y2)) / context_idxs.size(0)

        loss = loss_1 + config.sp_lambda * loss_2 + config.evi_lambda * (
            loss_3_s + loss_3_r + loss_3_o)

        answer_dict_ = convert_tokens(
            eval_file, data['ids'],
            yp1.data.cpu().numpy().tolist(),
            yp2.data.cpu().numpy().tolist(),
            np.argmax(predict_type.data.cpu().numpy(), 1))
        answer_dict.update(answer_dict_)

        total_loss += loss.item()  # total_loss += loss.data[0]
        step_cnt += 1
    loss = total_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss

    return metrics
Esempio n. 17
0
def test(config):

    gpu_options = tf.GPUOptions(visible_device_list="2")
    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 gpu_options=gpu_options)
    sess_config.gpu_options.allow_growth = True

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}

        # tqdm
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_, outlier = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
            print("\n", loss)
            if (loss > 50):
                for i, j, k in zip(qa_id.tolist(), yp1.tolist(), yp2.tolist()):
                    print(answer_dict[str(i)], j, k)
                #print("IDs: {} Losses: {} Yp1: {} Yp2: {}".format(qa_id.tolist(),\
                #	loss.tolist(), yp1.tolist(), yp2.tolist()))
        loss = np.mean(losses)

        # evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict
        # since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\
         metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\
         metrics['rouge-l-r']))
Esempio n. 18
0
def predict(config):

    prepro_predict(config)

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.bpe_emb_file, "r") as fh:
        bpe_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.pos_emb_file, "r") as fh:
        pos_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.predict_eval_file, "r") as fh:
        predict_eval_file = json.load(fh)
    with open(config.predict_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.predict_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config,
                  test_batch,
                  word_mat,
                  char_mat,
                  bpe_mat,
                  pos_mat,
                  trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        # TODO: add restoring from best model or from model name
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        print('Restoring from: {}'.format(
            tf.train.latest_checkpoint(config.save_dir)))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                predict_eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)

        path_to_save_answer = config.predict_file + '_ans'
        with open(path_to_save_answer, "w") as fh:
            json.dump(remapped_dict, fh)

        print("Answer dumped: {}".format(path_to_save_answer))
Esempio n. 19
0
def evaluate(args, model, data_loader, device, eval_file, max_len,
             use_squad_v2):
    nll_meter = util.AverageMeter()

    model.eval()
    pred_dict = {}
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
         tqdm(total=len(data_loader.dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, cw_pos, cw_ner, cw_freq, cqw_extra, y1, y2, ids in data_loader:

            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            cw_pos = cw_pos.to(device)
            cw_ner = cw_ner.to(device)
            cw_freq = cw_freq.to(device)
            cqw_extra = cqw_extra.to(device)

            batch_size = cw_idxs.size(0)

            # Forward
            if args.model == 'bidaf':
                log_p1, log_p2 = model(cw_idxs, qw_idxs)
            else:
                log_p1, log_p2 = model(cw_idxs, qw_idxs, cw_pos, cw_ner,
                                       cw_freq, cqw_extra)

            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, max_len, use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict
Esempio n. 20
0
def evaluate_batch(data_source, model, max_batches, eval_file, config):
    answer_dict = {}
    sp_dict = {}
    total_loss, step_cnt = 0, 0
    iter = data_source
    for step, data in enumerate(iter):
        if step >= max_batches and max_batches > 0: break

        with torch.no_grad():
            if config.cuda:
                data = {
                    k: (data[k].cuda() if k != 'ids' else data[k])
                    for k in data
                }
            context_idxs = data['context_idxs']
            ques_idxs = data['ques_idxs']
            context_char_idxs = data['context_char_idxs']
            ques_char_idxs = data['ques_char_idxs']
            context_lens = data['context_lens']
            y1 = data['y1']
            y2 = data['y2']
            q_type = data['q_type']
            is_support = data['is_support']
            start_mapping = data['start_mapping']
            end_mapping = data['end_mapping']
            all_mapping = data['all_mapping']

            logit1, logit2, predict_type, predict_support, yp1, yp2 = model(
                context_idxs,
                ques_idxs,
                context_char_idxs,
                ques_char_idxs,
                context_lens,
                start_mapping,
                end_mapping,
                all_mapping,
                context_lens.sum(1).max().item(),
                return_yp=True)
            loss = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) +
                    nll_sum(logit2, y2)
                    ) / context_idxs.size(0) + config.sp_lambda * nll_average(
                        predict_support.view(-1, 2), is_support.view(-1))
            answer_dict_ = convert_tokens(
                eval_file, data['ids'],
                yp1.data.cpu().numpy().tolist(),
                yp2.data.cpu().numpy().tolist(),
                np.argmax(predict_type.data.cpu().numpy(), 1))
            answer_dict.update(answer_dict_)

            total_loss += loss.item()
        step_cnt += 1
    loss = total_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss

    return metrics
Esempio n. 21
0
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2,
             args):
    nll_meter = util.AverageMeter()

    model.eval()
    pred_dict = {}
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(data_loader.dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:

            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            batch_size = cw_idxs.size(0)

            ## Additions for BERT ##
            max_context_len, max_question_len = args.para_limit, args.ques_limit
            if "bert" in args.model_type:
                bert_dev_embeddings = get_embeddings("dev", ids,
                                                     args.para_limit,
                                                     args.ques_limit)
            else:
                bert_dev_embeddings = None

            # Forward
            log_p1, log_p2 = model(cw_idxs, qw_idxs, bert_dev_embeddings, \
            max_context_len, max_question_len, device)
            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, max_len, use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict
Esempio n. 22
0
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file,
                                 get_record_parser(config, is_test=True),
                                 config).make_one_shot_iterator()

        model = QANet(config,
                      test_batch,
                      word_mat,
                      char_mat,
                      trainable=False,
                      graph=g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                                   metrics['f1']))
Esempio n. 23
0
    def SelfEvaluate(self,
                     batches,
                     eval_file=None,
                     answer_file=None,
                     drop_file=None,
                     dev=None):
        print('Starting evaluation')

        with open(eval_file, 'r', encoding='utf-8') as f:
            eval_file = json.load(f)
        with open(dev, 'r', encoding='utf-8') as f:
            dev = json.load(f)

        answer_dict = {}
        mapped_dict = {}

        for batch in batches:
            data = prepare_data(batch)
            full_p_states, p_mask, full_q_states, q_mask = self.encode(data)
            logits1, logits2, ans_log = self.decode(full_p_states, p_mask,
                                                    full_q_states, q_mask)
            y1, y2, has_ans = get_predictions(logits1, logits2, ans_log)
            qa_id = data['id']
            answer_dict_, mapped_dict_ = convert_tokens(
                eval_file, qa_id, y1, y2, has_ans)
            answer_dict.update(answer_dict_)
            mapped_dict.update(mapped_dict_)

            del full_p_states, p_mask, full_q_states, q_mask, y1, y2, answer_dict_, mapped_dict_, has_ans, ans_log, logits1, logits2

        with open(drop_file, 'r', encoding='utf-8') as f:
            drop = json.load(f)
        for i in drop['drop_ids']:
            uuid = eval_file[str(i)]["uuid"]
            answer_dict[str(i)] = ''
            mapped_dict[uuid] = ''

        with open(answer_file, 'w', encoding='utf-8') as f:
            json.dump(mapped_dict, f)
        metrics = evaluate(dev, mapped_dict)

        # sub_path = join('./result/', "submit.csv")
        # #log.info('Writing submission file to {}...'.format(sub_path))
        # with open(sub_path, 'w') as csv_fh:
        #     csv_writer = csv.writer(csv_fh, delimiter=',')
        #     csv_writer.writerow(['Id', 'Predicted'])
        #     for uuid in sorted(mapped_dict):
        #         csv_writer.writerow([uuid, mapped_dict[uuid]])

        print("EM: {}, F1: {}, Has answer: {}, No answer: {}".format(
            metrics['exact'], metrics['f1'], metrics['HasAns_f1'],
            metrics['NoAns_f1']))

        return metrics['exact'], metrics['f1']
Esempio n. 24
0
def predict(data_source, model, eval_file, config, prediction_file):
    answer_dict = {}
    sp_dict = {}
    sp_th = config.sp_threshold
    for step, data in enumerate(tqdm(data_source)):
        with torch.no_grad():
            if config.cuda:
                data = {
                    k: (data[k].cuda() if k != 'ids' else data[k])
                    for k in data
                }
            context_idxs = data['context_idxs']
            ques_idxs = data['ques_idxs']
            context_char_idxs = data['context_char_idxs']
            ques_char_idxs = data['ques_char_idxs']
            context_lens = data['context_lens']
            start_mapping = data['start_mapping']
            end_mapping = data['end_mapping']
            all_mapping = data['all_mapping']

            logit1, logit2, predict_type, predict_support, yp1, yp2 = model(
                context_idxs,
                ques_idxs,
                context_char_idxs,
                ques_char_idxs,
                context_lens,
                start_mapping,
                end_mapping,
                all_mapping,
                context_lens.sum(1).max().item(),
                return_yp=True)
            answer_dict_ = convert_tokens(
                eval_file, data['ids'],
                yp1.data.cpu().numpy().tolist(),
                yp2.data.cpu().numpy().tolist(),
                np.argmax(predict_type.data.cpu().numpy(), 1))
        answer_dict.update(answer_dict_)

        predict_support_np = torch.sigmoid(
            predict_support[:, :, 1] -
            predict_support[:, :, 0]).data.cpu().numpy()
        for i in range(predict_support_np.shape[0]):
            cur_sp_pred = []
            cur_id = data['ids'][i]
            for j in range(predict_support_np.shape[1]):
                if j >= len(eval_file[cur_id]['sent2title_ids']): break
                if predict_support_np[i, j] > sp_th:
                    cur_sp_pred.append(eval_file[cur_id]['sent2title_ids'][j])
            sp_dict.update({cur_id: cur_sp_pred})

    prediction = {'answer': answer_dict, 'sp': sp_dict}
    with open(prediction_file, 'w') as f:
        json.dump(prediction, f)
Esempio n. 25
0
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["num_batches"]

    print("Loading model...")
    test_batch = get_batch_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config, is_test=True).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        select_right = []
        for step in tqdm(range(1, total + 1)):
            qa_id, loss, yp1, yp2 , y1, y2, is_select_p, is_select= sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2, model.y1, model.y2, model.is_select_p, model.is_select])
            y1 = np.argmax(y1, axis=-1)
            y2 = np.argmax(y2, axis=-1)
            sp = np.argmax(is_select_p, axis=-1)
            s = np.argmax(is_select, axis=-1)
            sp = [ n+i*config.passage_num for i,n in enumerate(sp.tolist()) ]
            s = [ m+i*config.passage_num for i,m in enumerate(s.tolist()) ]
            select_right.append(len(set(s).intersection(set(sp))))

            answer_dict_, _ = convert_tokens(
                eval_file, [qa_id[n] for n in sp], [yp1[n] for n in sp], [yp2[n] for n in sp], [y1[n] for n in sp], [y2[n] for n in sp], sp, s)
            answer_dict.update(answer_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        select_accu = sum(select_right)/ (len(select_right)*(config.batch_size/config.passage_num))
        write_prediction(eval_file, answer_dict, 'answer_for_evl.json', config)
        metrics = evaluate(eval_file, answer_dict, filter=False)
        metrics['Selection Accuracy'] = select_accu
        
        print("Exact Match: {}, F1: {}, selection accuracy: {}".format(
            metrics['exact_match'], metrics['f1'], metrics['Selection Accuracy']))
Esempio n. 26
0
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2):
    nll_meter = util.AverageMeter()
    # print('Memory 3: ', torch.cuda.memory_allocated())
    model.eval()
    pred_dict = {}
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(data_loader.dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # print('Memory at start of loop section: ', torch.cuda.memory_allocated())
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            cc_idxs = cc_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            qc_idxs = qc_idxs.to(device)
            batch_size = cw_idxs.size(0)
            # print('Memory Before Forward Pass: '******'Memory After Forward Pass: '******'Memory After Loss Calc: ', torch.cuda.memory_allocated())
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, max_len, use_squad_v2)
            print('Max start idx score: ', torch.max(starts))
            print('Max End idx score: ', torch.max(ends))

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict
Esempio n. 27
0
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2):
    nll_meter = util.AverageMeter()
    loss_f = torch.nn.CrossEntropyLoss()

    model.eval()
    pred_dict = {}
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    mems = (tuple(), tuple(), tuple())
    with torch.no_grad(), \
            tqdm(total=len(data_loader.dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            cc_idxs = cc_idxs.to(device)
            qc_idxs = qc_idxs.to(device)

            batch_size = cw_idxs.size(0)

            # Forward
            log_p1, log_p2, mems = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs,
                                         *mems)
            y1, y2 = y1.to(device), y2.to(device)
            loss = torch.mean(loss_f(log_p1, y1) + loss_f(log_p2, y2))
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, max_len, use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict, ids.tolist(),
                                           starts.tolist(), ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
    results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict
def test(config):
    with open(config.word_emb_file, "r", encoding="utf-8") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r", encoding="utf-8") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r", encoding="utf-8") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        answer_dict = {}
        remapped_dict = {}
        for _ in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp = sess.run([model.qa_id, model.loss, model.yp])
            remapped_dict_, answer_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)

        f = open(config.answer_file, "w", encoding="utf-8")
        for key in answer_dict:
            f.write(str(key) + "\t" + answer_dict[key] + "\n")
        # 处理不合法(被丢弃)的测试样本
        # 直接选第一个答案
        ans_list = list(answer_dict.keys())
        with open(config.test_file, "r", encoding="utf-8") as fh:
            for line in fh:
                sample = json.loads(line)
                if sample["query_id"] not in ans_list:
                    f.write(
                        str(sample["query_id"]) + "\t" +
                        sample['alternatives'].split("|")[0] + "\n")
        f.close()
Esempio n. 29
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle,
                   str_handle):
    """
    Evaluate a 
    """

    all_yp3 = []
    conter_high = 0

    answer_dict = {}
    losses = []
    for numb_b in (range(1, num_batches + 1)):

        qa_id, loss, yp1, yp2, yp3, y1, y2, y3, logging, logging2, q = sess.run(
            [
                model.qa_id, model.loss, model.yp1, model.yp2, model.yp3,
                model.y1, model.y2, model.y3, model.logging, model.logging2,
                model.q
            ],
            feed_dict={handle: str_handle})

        answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(),
                                         yp1.tolist(), yp2.tolist(),
                                         yp3.tolist())

        answer_dict.update(answer_dict_)
        losses.append(loss)

    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    print(metrics)
    metrics["loss"] = loss

    loss_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/loss".format(data_type),
                         simple_value=metrics["loss"]),
    ])
    f1_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/f1".format(data_type),
                         simple_value=metrics["f1"]),
    ])
    em_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/em".format(data_type),
                         simple_value=metrics["exact_match"]),
    ])

    return metrics, [loss_sum, f1_sum, em_sum]
Esempio n. 30
0
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2): # use_squad_v2 = True
    nll_meter = util.AverageMeter() #  Keep track of average values over time.

    model.eval() # Sets the module in evaluation mode.
    pred_dict = {}
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(data_loader.dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:  # ids
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            batch_size = cw_idxs.size(0)

            # Forward
            log_p1, log_p2 = model(cw_idxs, qw_idxs)
            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)   ##

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()  # e^log_p1  e^log_p2
            starts, ends = util.discretize(p1, p2, max_len, use_squad_v2)  ## return :  start_idxs &  end_idxs

            # Log info
            progress_bar.update(batch_size)
            progress_bar.set_postfix(NLL=nll_meter.avg)

            preds, _ = util.convert_tokens(gold_dict,    ## Convert predictions to tokens from the context.
                                           ids.tolist(),
                                           starts.tolist(),
                                           ends.tolist(),
                                           use_squad_v2)
            pred_dict.update(preds)

    model.train()

    results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)  ##
    results_list = [('NLL', nll_meter.avg),
                    ('F1', results['F1']),
                    ('EM', results['EM'])]
    if use_squad_v2:
        results_list.append(('AvNA', results['AvNA']))
    results = OrderedDict(results_list)

    return results, pred_dict
Esempio n. 31
0
File: main.py Progetto: txye/QANet
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file, get_record_parser(
            config, is_test=True), config).make_one_shot_iterator()

        model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph = g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(
                metrics['exact_match'], metrics['f1']))
Esempio n. 32
0
File: main.py Progetto: txye/QANet
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle):
    answer_dict = {}
    losses = []
    for _ in tqdm(range(1, num_batches + 1)):
        qa_id, loss, yp1, yp2, = sess.run(
            [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle})
        answer_dict_, _ = convert_tokens(
            eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
        answer_dict.update(answer_dict_)
        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    metrics["loss"] = loss
    loss_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ])
    f1_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
    em_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
    return metrics, [loss_sum, f1_sum, em_sum]