Esempio n. 1
0
def optimal_thr_calc(model, handles, iterators, el_mode):
    val_datasets = args.el_val_datasets if el_mode else args.ed_val_datasets
    tp_fp_scores_labels = []
    fn_scores = []
    for val_dataset in val_datasets:  # 1, 4
        dataset_handle = handles[val_dataset]
        iterator = iterators[val_dataset]
        model.sess.run(iterator.initializer)
        while True:
            try:
                retrieve_l = [
                    model.final_scores, model.cand_entities_len,
                    model.cand_entities, model.begin_span, model.end_span,
                    model.spans_len, model.begin_gm, model.end_gm,
                    model.ground_truth, model.ground_truth_len,
                    model.words_len, model.chunk_id
                ]
                result_l = model.sess.run(retrieve_l,
                                          feed_dict={
                                              model.input_handle_ph:
                                              dataset_handle,
                                              model.dropout: 1
                                          })
                tp_fp_batch, fn_batch = threshold_calculation(
                    *result_l, el_mode)
                tp_fp_scores_labels.extend(tp_fp_batch)
                fn_scores.extend(fn_batch)
            except tf.errors.OutOfRangeError:
                break
    return optimal_thr_calc_aux(tp_fp_scores_labels, fn_scores)
Esempio n. 2
0
def optimal_thr_calc(el_mode):
    filenames = args.el_datasets if el_mode else args.ed_datasets
    val_datasets = args.el_val_datasets if el_mode else args.ed_val_datasets

    ensemble_fixed = []
    ensemble_acc = [
    ]  # final_scores and similarity_scores. all the rest are fixed
    for model_num, model_folder in enumerate(
            args.output_folder):  # for all ensemble models
        model, handles = create_input_pipeline(
            el_mode, model_folder, [filenames[i] for i in val_datasets])

        retrieve_l = (model.final_scores, model.cand_entities_len,
                      model.cand_entities, model.begin_span, model.end_span,
                      model.spans_len, model.begin_gm, model.end_gm,
                      model.ground_truth, model.ground_truth_len,
                      model.words_len, model.chunk_id)
        elem_idx = 0
        for dataset_handle in handles:  # 1, 4  for each validation dataset
            while True:
                try:
                    result_l = model.sess.run(retrieve_l,
                                              feed_dict={
                                                  model.input_handle_ph:
                                                  dataset_handle,
                                                  model.dropout: 1
                                              })
                    if model_num == 0:
                        ensemble_fixed.append(result_l[1:])
                        ensemble_acc.append(result_l[0])
                    else:
                        ensemble_acc[elem_idx] += result_l[0]

                    elem_idx += 1
                except tf.errors.OutOfRangeError:
                    break
        model.close_session()

    number_of_models = len(args.output_folder)
    tp_fp_scores_labels = []
    fn_scores = []
    for final_scores, fixed in zip(ensemble_acc, ensemble_fixed):
        final_scores /= number_of_models

        tp_fp_batch, fn_batch = threshold_calculation(final_scores, *fixed,
                                                      el_mode)
        tp_fp_scores_labels.extend(tp_fp_batch)
        fn_scores.extend(fn_batch)

    return train.optimal_thr_calc_aux(tp_fp_scores_labels, fn_scores)