pred_y = [] current_index = 0 for sent_y in test_y: pred_y.append(flat_pred_y[current_index:current_index + len(sent_y)]) current_index += len(sent_y) assert len(pred_y) == len(test_x_features) sents = [[word['word[0]'] for word in words] for words in test_x_features] for words, features,\ true_labels, pred_labels in izip(sents, test_x_features, test_y, pred_y): print_label_error(words, features, true_labels, pred_labels, target_true_label='IC', target_pred_label='AL', print_features=True, model=model, dict_vect=dict_vect, label_encoder=label_encoder) print "Confusion matrix:" table = pds.DataFrame(confusion_matrix(list(chain.from_iterable(test_y)), list(chain.from_iterable(pred_y)), labels=labels), index=map(lambda s: '{}_true'.format(s), labels), columns=map(lambda s: '{}_pred'.format(s), labels)) print table
def eval_rule_based(output_path, okform_dir, accepted_labels=set(['AL', 'IC']), print_errors=False): """ Return: numpy.ndarray: (#label, 3) count of #match, #mode, #ref for each label First word of sentence is ignored """ ret_stat = np.zeros((len(accepted_labels), 3), dtype=np.float64) n_finished = 0 n_errorless = 0 with Path(output_path).open('r', encoding='utf8') as prediction_file: while True: if n_finished % 1000 == 0: logger.info('Finished {}/{}'.format(n_errorless, n_finished)) line1 = prediction_file.readline() line2 = prediction_file.readline() if not line2: break try: id_ = line1.strip() pred_json = json.loads(line2.strip()) if pred_json['resultingHeadline'] is None: continue pred_tokens = pred_json['resultingHeadline'] auxil_path = str(Path(okform_dir) / Path(id_).with_suffix('.auxil')) paf_path = str(Path(okform_dir) / Path(id_).with_suffix('.paf')) title_sents, _ = separate_title_from_body(auxil_path, paf_path) true_tokens = [item['token'] for item in title_sents[0]['features']] if is_consistent_prediction(pred_tokens, true_tokens): stat = eval_stat(pred_tokens, true_tokens, accepted_labels) if print_errors: print_label_error(true_tokens, # we don't have features here features=None, instance_id=id_, excluded_indices=set([0]), correct_labels=map(get_label, true_tokens), predicted_labels=map(get_label, pred_tokens), target_true_label='IC', target_pred_label='AL', print_features=False) ret_stat += stat n_errorless += 1 else: logger.debug( 'Predicted and true tokens inconsisent:\n{}\n{}\n'.format( pred_tokens, true_tokens) ) except: logger.error(traceback.format_exc()) continue finally: n_finished += 1 return ret_stat
word_counter = Counter() feature_counter = Counter() for words, s in izip(load_sents(content_path), load_test_data(test_data_path)): correct_labels = [l for _, l in s] features = [f for f, _ in s] predicted_labels = tagger.tag(features) # all the predicted/true labels # used for confusion matrix pred_y += predicted_labels true_y += correct_labels print_label_error(words, features, correct_labels, predicted_labels, args.true_label, args.pred_label, args.print_features) cm = confusion_matrix(true_y, pred_y, labels=labels) table = df(cm, index=map(lambda s: '{}_true'.format(s), labels), columns=map(lambda s: '{}_pred'.format(s), labels)) print table print word_counter.most_common(10) print feature_counter.most_common(10) # import sys # sys.stderr.write("Confusion matrix:\n") # sys.stderr.write("{}".format(cm))
def eval_rule_based(output_path, okform_dir, accepted_labels=set(['AL', 'IC']), print_errors=False): """ Return: numpy.ndarray: (#label, 3) count of #match, #mode, #ref for each label First word of sentence is ignored """ ret_stat = np.zeros((len(accepted_labels), 3), dtype=np.float64) n_finished = 0 n_errorless = 0 with Path(output_path).open('r', encoding='utf8') as prediction_file: while True: if n_finished % 1000 == 0: logger.info('Finished {}/{}'.format(n_errorless, n_finished)) line1 = prediction_file.readline() line2 = prediction_file.readline() if not line2: break try: id_ = line1.strip() pred_json = json.loads(line2.strip()) if pred_json['resultingHeadline'] is None: continue pred_tokens = pred_json['resultingHeadline'] auxil_path = str( Path(okform_dir) / Path(id_).with_suffix('.auxil')) paf_path = str( Path(okform_dir) / Path(id_).with_suffix('.paf')) title_sents, _ = separate_title_from_body(auxil_path, paf_path) true_tokens = [ item['token'] for item in title_sents[0]['features'] ] if is_consistent_prediction(pred_tokens, true_tokens): stat = eval_stat(pred_tokens, true_tokens, accepted_labels) if print_errors: print_label_error( true_tokens, # we don't have features here features=None, instance_id=id_, excluded_indices=set([0]), correct_labels=map(get_label, true_tokens), predicted_labels=map(get_label, pred_tokens), target_true_label='IC', target_pred_label='AL', print_features=False) ret_stat += stat n_errorless += 1 else: logger.debug( 'Predicted and true tokens inconsisent:\n{}\n{}\n'. format(pred_tokens, true_tokens)) except: logger.error(traceback.format_exc()) continue finally: n_finished += 1 return ret_stat
for words, s in izip( load_sents(content_path), load_test_data(test_data_path)): correct_labels = [l for _, l in s] features = [f for f, _ in s] predicted_labels = tagger.tag(features) # all the predicted/true labels # used for confusion matrix pred_y += predicted_labels true_y += correct_labels print_label_error(words, features, correct_labels, predicted_labels, args.true_label, args.pred_label, args.print_features) cm = confusion_matrix(true_y, pred_y, labels=labels) table = df(cm, index=map(lambda s: '{}_true'.format(s), labels), columns=map(lambda s: '{}_pred'.format(s), labels)) print table print word_counter.most_common(10) print feature_counter.most_common(10) # import sys # sys.stderr.write("Confusion matrix:\n") # sys.stderr.write("{}".format(cm))