def update_batch(self, results): for original_name, top_words in results: prediction = self.filter_impossible_names_fn(top_words)[0] original_subtokens = Counter(common.get_subtokens(original_name)) predicted_subtokens = Counter(common.get_subtokens(prediction)) print( '------------------___#######################################################################' ) print(predicted_subtokens.items()) print('------------------___') print(original_subtokens) print( '------------------___#######################################################################' ) self.nr_true_positives += sum( count for element, count in predicted_subtokens.items() if element in original_subtokens) print("tp", self.nr_true_positives) self.nr_false_positives += sum( count for element, count in predicted_subtokens.items() if element not in original_subtokens) print("fp", self.nr_false_positives) self.nr_false_negatives += sum( count for element, count in original_subtokens.items() if element not in predicted_subtokens) print("fn", self.nr_false_negatives) self.nr_predictions += 1 print('all', self.nr_predictions)
def update_batch(self, results, directory): for original_name, top_words in results: prediction = self.filter_impossible_names_fn(top_words)[0] original_subtokens = Counter(common.get_subtokens(original_name)) predicted_subtokens = Counter(common.get_subtokens(prediction)) predicted_tokens = list( dict.fromkeys([ token for p in top_words for token in common.get_subtokens(p) ]))[:10] with open(directory + '/pred_top5.txt', 'a') as top_pred_file, open(directory + '/ref.txt', 'a') as ref_file: top_pred_file.write(' '.join(predicted_tokens[:5]) + "\n") ref_file.write(' '.join(original_subtokens) + "\n") self.nr_true_positives += sum( count for element, count in predicted_subtokens.items() if element in original_subtokens) self.nr_false_positives += sum( count for element, count in predicted_subtokens.items() if element not in original_subtokens) self.nr_false_negatives += sum( count for element, count in original_subtokens.items() if element not in predicted_subtokens) self.nr_predictions += 1 original_subtokens_2 = common.get_subtokens(original_name) self.precision_total_1 += sum(1 for element in predicted_tokens[:1] if element in original_subtokens_2) self.precision_total_2 += sum( 1 for element in predicted_tokens[:2] if element in original_subtokens_2) / 2.0 self.precision_total_3 += sum( 1 for element in predicted_tokens[:3] if element in original_subtokens_2) / 3.0 self.precision_total_4 += sum( 1 for element in predicted_tokens[:4] if element in original_subtokens_2) / 4.0 self.precision_total_5 += sum( 1 for element in predicted_tokens[:5] if element in original_subtokens_2) / 5.0 self.recall_total_1 += sum( 1 for element in predicted_tokens[:1] if element in original_subtokens_2) / len(original_subtokens_2) self.recall_total_2 += sum( 1 for element in predicted_tokens[:2] if element in original_subtokens_2) / len(original_subtokens_2) self.recall_total_3 += sum( 1 for element in predicted_tokens[:3] if element in original_subtokens_2) / len(original_subtokens_2) self.recall_total_4 += sum( 1 for element in predicted_tokens[:4] if element in original_subtokens_2) / len(original_subtokens_2) self.recall_total_5 += sum( 1 for element in predicted_tokens[:5] if element in original_subtokens_2) / len(original_subtokens_2)
def update_per_subtoken_statistics(self, results, true_positive, false_positive, false_negative): for original_name, top_words in results: prediction = common.filter_impossible_names(top_words)[0] original_subtokens = common.get_subtokens(original_name) predicted_subtokens = common.get_subtokens(prediction) for subtok in predicted_subtokens: if subtok in original_subtokens: true_positive += 1 else: false_positive += 1 for subtok in original_subtokens: if not subtok in predicted_subtokens: false_negative += 1 return true_positive, false_positive, false_negative
def update_algorithm_dict(results, algorithm_dict): for original_name, top_words in results: prediction = common.filter_impossible_names(top_words)[0] original_subtokens = common.get_subtokens(original_name) predicted_subtokens = common.get_subtokens(prediction) original_main = original_subtokens[0] predicted_main = predicted_subtokens[0] # print('original_subtokens: ', original_subtokens) # print('predicted_subtokens: ', predicted_subtokens) # print('') if predicted_main not in algorithm_dict: algorithm_dict[predicted_main] = Algorithm(predicted_main, 0, 0) else: if predicted_main == original_main: algorithm_dict[predicted_main].true_positive += 1 else: algorithm_dict[predicted_main].false_negative += 1 return algorithm_dict