def run_evaluation(batches, extra_text=''): predictions = [] batches_with_mask = [] for batch in batches: token_batch, label_batch, shape_batch, char_batch, seq_len_batch, tok_len_batch, label_mask_batch = batch batch_seq_len, mask_batch, seq_len_batch = mask(batch) batches_with_mask.append(batch + (mask_batch, )) char_embedding_feed = char_feed(token_batch, char_batch, tok_len_batch) lstm_feed = { model.input_x1: token_batch, model.input_x2: shape_batch, model.input_y: label_batch, model.input_mask: mask_batch, model.sequence_lengths: seq_len_batch, model.max_seq_len: batch_seq_len, model.batch_size: batch_size, } lstm_feed.update(char_embedding_feed) if viterbi: run_list = [model.predictions, model.transition_params] preds, transition_params = sess.run(run_list, feed_dict=lstm_feed) viterbi_repad = np.empty((batch_size, batch_seq_len)) for i, (unary_scores, sequence_lens) in enumerate(zip(preds, seq_len_batch)): viterbi_sequence, _ = crf.viterbi_decode( unary_scores, transition_params) viterbi_repad[i] = viterbi_sequence predictions.append(viterbi_repad) else: run_list = [model.predictions, model.unflat_scores] preds, scores = sess.run(run_list, feed_dict=lstm_feed) inv_label_map = dp.inv_label_map() f1_micro, precision = evaluation.segment_eval( batches_with_mask, predictions, type_set, type_int_int_map, inv_label_map, dp.inv_token_map(), outside_idx=map( lambda t: type_set[t] if t in type_set else type_set['O'], outside_set), pad_width=pad_width, start_end=False, extra_text='Segment evaluation %s:' % extra_text) print('') return f1_micro, precision
def run_evaluation(eval_batches, extra_text=""): predictions = [] for b, (eval_label_batch, eval_token_batch, eval_shape_batch, eval_char_batch, eval_seq_len_batch, eval_tok_len_batch, eval_mask_batch) in enumerate(eval_batches): batch_size, batch_seq_len = eval_token_batch.shape char_lens = np.sum(eval_tok_len_batch, axis=1) max_char_len = np.max(eval_tok_len_batch) eval_padded_char_batch = np.zeros( (batch_size, max_char_len * batch_seq_len)) for b in range(batch_size): char_indices = [ item for sublist in [ range(i * max_char_len, i * max_char_len + d) for i, d in enumerate(eval_tok_len_batch[b]) ] for item in sublist ] eval_padded_char_batch[ b, char_indices] = eval_char_batch[b][:char_lens[b]] char_embedding_feeds = {} if FLAGS.char_dim == 0 else { char_embedding_model.input_chars: eval_padded_char_batch, char_embedding_model.batch_size: batch_size, char_embedding_model.max_seq_len: batch_seq_len, char_embedding_model.token_lengths: eval_tok_len_batch, char_embedding_model.max_tok_len: max_char_len } basic_feeds = { model.input_x1: eval_token_batch, model.input_x2: eval_shape_batch, model.input_y: eval_label_batch, model.input_mask: eval_mask_batch, model.max_seq_len: batch_seq_len, model.batch_size: batch_size, model.sequence_lengths: eval_seq_len_batch } basic_feeds.update(char_embedding_feeds) total_feeds = basic_feeds.copy() if FLAGS.viterbi: preds, transition_params = sess.run( [model.predictions, model.transition_params], feed_dict=total_feeds) viterbi_repad = np.empty((batch_size, batch_seq_len)) for batch_idx, (unary_scores, sequence_lens) in enumerate( zip(preds, eval_seq_len_batch)): viterbi_sequence, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params) viterbi_repad[batch_idx] = viterbi_sequence predictions.append(viterbi_repad) else: preds, scores = sess.run( [model.predictions, model.unflat_scores], feed_dict=total_feeds) predictions.append(preds) if FLAGS.print_preds != '': evaluation.print_conlleval_format( FLAGS.print_preds, eval_batches, predictions, labels_id_str_map, vocab_id_str_map, pad_width) # print evaluation f1_micro, precision = evaluation.segment_eval( eval_batches, predictions, type_set, type_int_int_map, labels_id_str_map, vocab_id_str_map, outside_idx=map( lambda t: type_set[t] if t in type_set else type_set["O"], outside_set), pad_width=pad_width, start_end=FLAGS.start_end, extra_text="Segment evaluation %s:" % extra_text) return f1_micro, precision
def run_evaluation(batches, batch_dataset, extra_text=''): probabilities_A = [] probabilities_B = [] probabilities_C = [] predictions_A = [] predictions_B = [] predictions_C = [] batches_with_mask = [] for batch in batches: token_batch, label_batch, shape_batch, char_batch, seq_len_batch, tok_len_batch = batch batch_seq_len, mask_batch, seq_len_batch = mask(batch) batches_with_mask.append(batch + (mask_batch, )) char_embedding_feed = char_feed(token_batch, char_batch, tok_len_batch) lstm_feed = { model.input_x1: token_batch, model.input_x2: shape_batch, model.input_y: label_batch, model.input_mask: mask_batch, model.sequence_lengths: seq_len_batch, model.max_seq_len: batch_seq_len, model.batch_size: batch_size, } lstm_feed.update(char_embedding_feed) if viterbi: [probs_A], [probs_B], [probs_C ] = sess.run(model.marginal_probabilities(), feed_dict=lstm_feed) probabilities_A.append(probs_A) probabilities_B.append(probs_B) probabilities_C.append(probs_C) run_list_A = [model.predictions_A, model.transition_params_A] run_list_B = [model.predictions_B, model.transition_params_B] run_list_C = [model.predictions_C, model.transition_params_C] preds_A, transition_params_A = sess.run(run_list_A, feed_dict=lstm_feed) viterbi_repad_A = np.empty((batch_size, batch_seq_len)) for i, (unary_scores, sequence_lens) in enumerate(zip(preds_A, seq_len_batch)): viterbi_sequence_A, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_A) viterbi_repad_A[i] = viterbi_sequence_A predictions_A.append(viterbi_repad_A) preds_B, transition_params_B = sess.run(run_list_B, feed_dict=lstm_feed) viterbi_repad_B = np.empty((batch_size, batch_seq_len)) for i, (unary_scores, sequence_lens) in enumerate(zip(preds_B, seq_len_batch)): viterbi_sequence_B, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_B) viterbi_repad_B[i] = viterbi_sequence_B predictions_B.append(viterbi_repad_B) preds_C, transition_params_C = sess.run(run_list_C, feed_dict=lstm_feed) viterbi_repad_C = np.empty((batch_size, batch_seq_len)) for i, (unary_scores, sequence_lens) in enumerate(zip(preds_C, seq_len_batch)): viterbi_sequence_C, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_C) viterbi_repad_C[i] = viterbi_sequence_C predictions_C.append(viterbi_repad_C) else: if batch_dataset == 'A': run_list = [model.predictions_A, model.unflat_scores_A] elif batch_dataset == 'B': run_list = [model.predictions_B, model.unflat_scores_B] elif batch_dataset == 'C': run_list = [model.predictions_C, model.unflat_scores_C] preds, scores = sess.run(run_list, feed_dict=lstm_feed) inv_label_map_A = dp.inv_label_maps()['weak'] inv_label_map_B = dp.inv_label_maps()['cdr'] inv_label_map_C = dp.inv_label_maps()['bc'] predictions = (predictions_A, predictions_B, predictions_C) type_sets = (type_set_A, type_set_B, type_set_C) type_int_int_maps = (type_int_int_map_A, type_int_int_map_B, type_int_int_map_C) inv_label_maps = (inv_label_map_A, inv_label_map_B, inv_label_map_C) merged_preds_list = [] for (preds_A, preds_B, preds_C, probs_A, probs_B, probs_C) in zip(predictions_A, predictions_B, predictions_C, probabilities_A, probabilities_B, probabilities_C): merged_preds_sublist = [] for (pred_A, pred_B, pred_C, prob_A, prob_B, prob_C) in zip(preds_A, preds_B, preds_C, probs_A, probs_B, probs_C): A = [inv_label_map_A[p] for p in pred_A] B = [inv_label_map_B[p] for p in pred_B] C = [inv_label_map_C[p] for p in pred_C] def beginning(p): return p[0] == 'U' or p[0] == 'B' ret = A.copy() in_conflict = False mention_start = 0 for i, (a, b, c) in enumerate(zip(A, B, C)): if in_conflict and ( (a == 'O' and b == 'O' and c == 'O') or (beginning(a) and beginning(b) and beginning(c))): set_A = set(A[mention_start:i]) set_A.discard('O') set_B = set(B[mention_start:i]) set_B.discard('O') set_C = set(C[mention_start:i]) set_C.discard('O') A_total = B_total = C_total = -np.inf if len(set_A) != 0: A_total = 0 if len(set_B) != 0: B_total = 0 if len(set_C) != 0: C_total = 0 for x in range(mention_start, i): A_total += prob_A[x, int(pred_A[x])] B_total += prob_B[x, int(pred_B[x])] C_total += prob_C[x, int(pred_C[x])] ret[mention_start:i] = [ A[mention_start:i], B[mention_start:i], C[mention_start:i] ][np.argmax([A_total, B_total, C_total])] in_conflict = False if not in_conflict and (beginning(a) or beginning(b) or beginning(c)): mention_start = i in_conflict = in_conflict or a != b or a != c or b != c # options = set([A[i], B[i], C[i]]) # options.discard('O') # if options == set(): # merged_preds.append('O') # else: # merged_preds.append(np.random.choice(list(options))) merged_preds = [dp.label_maps['weak'][p] for p in ret] merged_preds_sublist.append(merged_preds) merged_preds_list.append(merged_preds_sublist) for i in range(len(batches_with_mask)): for j in range(len(batches_with_mask[i][1])): for k in range(len(batches_with_mask[i][1][j])): if batch_dataset == 'B': inv_label_map = inv_label_map_B if batch_dataset == 'C': inv_label_map = inv_label_map_C l = batches_with_mask[i][1][j][k] batches_with_mask[i][1][j][k] = dp.label_maps['weak'][ inv_label_map[l]] f1_micro, precision = evaluation.segment_eval( batches_with_mask, merged_preds_list, type_set_A, type_int_int_map_A, inv_label_map_A, dp.inv_token_map(), outside_idx=map( lambda t: type_set_A[t] if t in type_set_A else type_set_A['O'], outside_set), pad_width=pad_width, start_end=False, extra_text='Segment evaluation %s:' % extra_text) print('') return f1_micro, precision
def run_evaluation(eval_batches, output=None, extra_text=""): predictions = [] for b, (eval_label_batch, eval_token_batch, eval_shape_batch, eval_char_batch, eval_seq_len_batch, eval_tok_len_batch, eval_mask_batch) in enumerate(eval_batches): batch_size, batch_seq_len = eval_token_batch.shape char_lens = np.sum(eval_tok_len_batch, axis=1) max_char_len = np.max(eval_tok_len_batch) eval_padded_char_batch = np.zeros((batch_size, max_char_len * batch_seq_len)) for b in range(batch_size): char_indices = [item for sublist in [range(i * max_char_len, i * max_char_len + d) for i, d in enumerate(eval_tok_len_batch[b])] for item in sublist] eval_padded_char_batch[b, char_indices] = eval_char_batch[b][:char_lens[b]] char_embedding_feeds = {} if FLAGS.char_dim == 0 else { char_embedding_model.input_chars: eval_padded_char_batch, char_embedding_model.batch_size: batch_size, char_embedding_model.max_seq_len: batch_seq_len, char_embedding_model.token_lengths: eval_tok_len_batch, char_embedding_model.max_tok_len: max_char_len } basic_feeds = { model.input_x1: eval_token_batch, model.input_x2: eval_shape_batch, model.input_y: eval_label_batch, model.input_mask: eval_mask_batch, model.max_seq_len: batch_seq_len, model.batch_size: batch_size, model.sequence_lengths: eval_seq_len_batch } basic_feeds.update(char_embedding_feeds) total_feeds = basic_feeds.copy() if FLAGS.viterbi: preds, transition_params = sess.run([model.predictions, model.transition_params], feed_dict=total_feeds) viterbi_repad = np.empty((batch_size, batch_seq_len)) for batch_idx, (unary_scores, sequence_lens) in enumerate(zip(preds, eval_seq_len_batch)): viterbi_sequence, _ = tf.contrib.crf.viterbi_decode(unary_scores, transition_params) viterbi_repad[batch_idx] = viterbi_sequence predictions.append(viterbi_repad) else: preds, scores = sess.run([model.predictions, model.unflat_scores], feed_dict=total_feeds) predictions.append(preds) if output is not None: evaluation.output_predicted_to_file( (FLAGS.model_dir if FLAGS.model_dir != '' else FLAGS.load_dir) + "/" + output + ".txt", eval_batches, predictions, labels_id_str_map, vocab_id_str_map, pad_width) # print evaluation precision, recall, f1_micro = evaluation.segment_eval(eval_batches, predictions, labels_id_str_map, vocab_id_str_map, pad_width=pad_width, start_end=FLAGS.start_end, logger=logger, extra_text="Segment evaluation %s:" % extra_text) return f1_micro, precision
def run_evaluation(batches, batch_dataset, extra_text=''): predictions_A = [] predictions_B = [] batches_with_mask = [] for batch in batches: token_batch, label_batch, shape_batch, char_batch, seq_len_batch, tok_len_batch = batch batch_seq_len, mask_batch, seq_len_batch = mask(batch) batches_with_mask.append(batch + (mask_batch, )) char_embedding_feed = char_feed(token_batch, char_batch, tok_len_batch) lstm_feed = { model.input_x1: token_batch, model.input_x2: shape_batch, model.input_y: label_batch, model.input_mask: mask_batch, model.sequence_lengths: seq_len_batch, model.max_seq_len: batch_seq_len, model.batch_size: batch_size, } lstm_feed.update(char_embedding_feed) if viterbi: run_list_A = [model.predictions_A, model.transition_params_A] run_list_B = [model.predictions_B, model.transition_params_B] preds_A, transition_params_A = sess.run(run_list_A, feed_dict=lstm_feed) viterbi_repad_A = np.empty((batch_size, batch_seq_len)) for i, (unary_scores, sequence_lens) in enumerate(zip(preds_A, seq_len_batch)): viterbi_sequence_A, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_A) viterbi_repad_A[i] = viterbi_sequence_A predictions_A.append(viterbi_repad_A) preds_B, transition_params_B = sess.run(run_list_B, feed_dict=lstm_feed) viterbi_repad_B = np.empty((batch_size, batch_seq_len)) for i, (unary_scores, sequence_lens) in enumerate(zip(preds_B, seq_len_batch)): viterbi_sequence_B, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_B) viterbi_repad_B[i] = viterbi_sequence_B predictions_B.append(viterbi_repad_B) else: if batch_dataset == 'A': run_list = [model.predictions_A, model.unflat_scores_A] elif batch_dataset == 'B': run_list = [model.predictions_B, model.unflat_scores_B] elif batch_dataset == 'C': run_list = [model.predictions_C, model.unflat_scores_C] preds, scores = sess.run(run_list, feed_dict=lstm_feed) inv_label_map_A = dp.inv_label_maps()['A'] inv_label_map_B = dp.inv_label_maps()['B'] inv_label_map_C = dp.inv_label_maps()['full'] predictions = (predictions_A, predictions_B) merged_preds_list = [] for (preds_A, preds_B) in zip(predictions_A, predictions_B): merged_preds_sublist = [] for (pred_A, pred_B) in zip(preds_A, preds_B): A = [inv_label_map_A[p] for p in pred_A] B = [inv_label_map_B[p] for p in pred_B] def beginning(p): return p[0] == 'U' or p[0] == 'B' ret = A.copy() in_conflict = False mention_start = 0 for i, (a, b) in enumerate(zip(A, B)): if in_conflict and ((a == 'O' and b == 'O') or (beginning(a) and beginning(b))): possible = [] set_A = set(A[mention_start:i]) set_A.discard('O') set_B = set(B[mention_start:i]) set_B.discard('O') if len(set_A) != 0: possible.append(A[mention_start:i]) if len(set_B) != 0: possible.append(B[mention_start:i]) ret[mention_start:i] = possible[np.random.randint(3) % len(possible)] in_conflict = False if not in_conflict and (beginning(a) or beginning(b)): mention_start = i in_conflict = in_conflict or a != b # options = set([A[i], B[i], C[i]]) # options.discard('O') # if options == set(): # merged_preds.append('O') # else: # merged_preds.append(np.random.choice(list(options))) merged_preds = [dp.label_maps['full'][p] for p in ret] merged_preds_sublist.append(merged_preds) merged_preds_list.append(merged_preds_sublist) for i in range(len(batches_with_mask)): for j in range(len(batches_with_mask[i][1])): for k in range(len(batches_with_mask[i][1][j])): if batch_dataset == 'full': inv_label_map = inv_label_map_C l = batches_with_mask[i][1][j][k] batches_with_mask[i][1][j][k] = dp.label_maps['full'][ inv_label_map[l]] f1_micro, precision = evaluation.segment_eval( batches_with_mask, merged_preds_list, type_set_C, type_int_int_map_C, inv_label_map_C, dp.inv_token_map(), outside_idx=map( lambda t: type_set_C[t] if t in type_set_C else type_set_C['O'], outside_set), pad_width=pad_width, start_end=False, extra_text='Segment evaluation %s:' % extra_text) print('') return f1_micro, precision