def optimal_thr_calc(model, handles, iterators, el_mode): val_datasets = args.el_val_datasets if el_mode else args.ed_val_datasets tp_fp_scores_labels = [] fn_scores = [] for val_dataset in val_datasets: # 1, 4 dataset_handle = handles[val_dataset] iterator = iterators[val_dataset] model.sess.run(iterator.initializer) while True: try: retrieve_l = [ model.final_scores, model.cand_entities_len, model.cand_entities, model.begin_span, model.end_span, model.spans_len, model.begin_gm, model.end_gm, model.ground_truth, model.ground_truth_len, model.words_len, model.chunk_id ] result_l = model.sess.run(retrieve_l, feed_dict={ model.input_handle_ph: dataset_handle, model.dropout: 1 }) tp_fp_batch, fn_batch = threshold_calculation( *result_l, el_mode) tp_fp_scores_labels.extend(tp_fp_batch) fn_scores.extend(fn_batch) except tf.errors.OutOfRangeError: break return optimal_thr_calc_aux(tp_fp_scores_labels, fn_scores)
def optimal_thr_calc(el_mode): filenames = args.el_datasets if el_mode else args.ed_datasets val_datasets = args.el_val_datasets if el_mode else args.ed_val_datasets ensemble_fixed = [] ensemble_acc = [ ] # final_scores and similarity_scores. all the rest are fixed for model_num, model_folder in enumerate( args.output_folder): # for all ensemble models model, handles = create_input_pipeline( el_mode, model_folder, [filenames[i] for i in val_datasets]) retrieve_l = (model.final_scores, model.cand_entities_len, model.cand_entities, model.begin_span, model.end_span, model.spans_len, model.begin_gm, model.end_gm, model.ground_truth, model.ground_truth_len, model.words_len, model.chunk_id) elem_idx = 0 for dataset_handle in handles: # 1, 4 for each validation dataset while True: try: result_l = model.sess.run(retrieve_l, feed_dict={ model.input_handle_ph: dataset_handle, model.dropout: 1 }) if model_num == 0: ensemble_fixed.append(result_l[1:]) ensemble_acc.append(result_l[0]) else: ensemble_acc[elem_idx] += result_l[0] elem_idx += 1 except tf.errors.OutOfRangeError: break model.close_session() number_of_models = len(args.output_folder) tp_fp_scores_labels = [] fn_scores = [] for final_scores, fixed in zip(ensemble_acc, ensemble_fixed): final_scores /= number_of_models tp_fp_batch, fn_batch = threshold_calculation(final_scores, *fixed, el_mode) tp_fp_scores_labels.extend(tp_fp_batch) fn_scores.extend(fn_batch) return train.optimal_thr_calc_aux(tp_fp_scores_labels, fn_scores)