def update_batch(self, results):
        for original_name, top_words in results:
            prediction = self.filter_impossible_names_fn(top_words)[0]
            original_subtokens = Counter(common.get_subtokens(original_name))
            predicted_subtokens = Counter(common.get_subtokens(prediction))
            print(
                '------------------___#######################################################################'
            )
            print(predicted_subtokens.items())
            print('------------------___')
            print(original_subtokens)
            print(
                '------------------___#######################################################################'
            )
            self.nr_true_positives += sum(
                count for element, count in predicted_subtokens.items()
                if element in original_subtokens)
            print("tp", self.nr_true_positives)
            self.nr_false_positives += sum(
                count for element, count in predicted_subtokens.items()
                if element not in original_subtokens)
            print("fp", self.nr_false_positives)
            self.nr_false_negatives += sum(
                count for element, count in original_subtokens.items()
                if element not in predicted_subtokens)
            print("fn", self.nr_false_negatives)
            self.nr_predictions += 1

            print('all', self.nr_predictions)
Beispiel #2
0
    def update_batch(self, results, directory):
        for original_name, top_words in results:
            prediction = self.filter_impossible_names_fn(top_words)[0]
            original_subtokens = Counter(common.get_subtokens(original_name))
            predicted_subtokens = Counter(common.get_subtokens(prediction))
            predicted_tokens = list(
                dict.fromkeys([
                    token for p in top_words
                    for token in common.get_subtokens(p)
                ]))[:10]
            with open(directory + '/pred_top5.txt',
                      'a') as top_pred_file, open(directory + '/ref.txt',
                                                  'a') as ref_file:
                top_pred_file.write(' '.join(predicted_tokens[:5]) + "\n")
                ref_file.write(' '.join(original_subtokens) + "\n")

            self.nr_true_positives += sum(
                count for element, count in predicted_subtokens.items()
                if element in original_subtokens)
            self.nr_false_positives += sum(
                count for element, count in predicted_subtokens.items()
                if element not in original_subtokens)
            self.nr_false_negatives += sum(
                count for element, count in original_subtokens.items()
                if element not in predicted_subtokens)
            self.nr_predictions += 1

            original_subtokens_2 = common.get_subtokens(original_name)
            self.precision_total_1 += sum(1 for element in predicted_tokens[:1]
                                          if element in original_subtokens_2)
            self.precision_total_2 += sum(
                1 for element in predicted_tokens[:2]
                if element in original_subtokens_2) / 2.0
            self.precision_total_3 += sum(
                1 for element in predicted_tokens[:3]
                if element in original_subtokens_2) / 3.0
            self.precision_total_4 += sum(
                1 for element in predicted_tokens[:4]
                if element in original_subtokens_2) / 4.0
            self.precision_total_5 += sum(
                1 for element in predicted_tokens[:5]
                if element in original_subtokens_2) / 5.0
            self.recall_total_1 += sum(
                1 for element in predicted_tokens[:1]
                if element in original_subtokens_2) / len(original_subtokens_2)
            self.recall_total_2 += sum(
                1 for element in predicted_tokens[:2]
                if element in original_subtokens_2) / len(original_subtokens_2)
            self.recall_total_3 += sum(
                1 for element in predicted_tokens[:3]
                if element in original_subtokens_2) / len(original_subtokens_2)
            self.recall_total_4 += sum(
                1 for element in predicted_tokens[:4]
                if element in original_subtokens_2) / len(original_subtokens_2)
            self.recall_total_5 += sum(
                1 for element in predicted_tokens[:5]
                if element in original_subtokens_2) / len(original_subtokens_2)
Beispiel #3
0
 def update_per_subtoken_statistics(self, results, true_positive, false_positive, false_negative):
     for original_name, top_words in results:
         prediction = common.filter_impossible_names(top_words)[0]
         original_subtokens = common.get_subtokens(original_name)
         predicted_subtokens = common.get_subtokens(prediction)
         for subtok in predicted_subtokens:
             if subtok in original_subtokens:
                 true_positive += 1
             else:
                 false_positive += 1
         for subtok in original_subtokens:
             if not subtok in predicted_subtokens:
                 false_negative += 1
     return true_positive, false_positive, false_negative
def update_algorithm_dict(results, algorithm_dict):
    for original_name, top_words in results:
        prediction = common.filter_impossible_names(top_words)[0]
        original_subtokens = common.get_subtokens(original_name)
        predicted_subtokens = common.get_subtokens(prediction)
        original_main = original_subtokens[0]
        predicted_main = predicted_subtokens[0]
        # print('original_subtokens: ', original_subtokens)
        # print('predicted_subtokens: ', predicted_subtokens)
        # print('')
        if predicted_main not in algorithm_dict:
            algorithm_dict[predicted_main] = Algorithm(predicted_main, 0, 0)
        else:
            if predicted_main == original_main:
                algorithm_dict[predicted_main].true_positive += 1
            else:
                algorithm_dict[predicted_main].false_negative += 1

    return algorithm_dict