Python test_cnnの例

プログラミング言語: Python

名前空間/パッケージ名: ml.extractors.cnn_core.test

メソッド/関数: test_cnn

hotexamples.comのコード掲載数: 5

Python test_cnn - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのml.extractors.cnn_core.test.test_cnnの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

    def get(self):

        args = test_parser.parse_args()
        job_id = args['job_id']
        test_sentence = args['test_sentence']
        #positive_types = args['positive_types']

        print "Extracting event from sentence:"
        print test_sentence
        sys.stdout.flush()

        job = Job.objects.get(id = job_id)
        vocabulary = pickle.loads(job.vocabulary)
        predicted_labels = test_cnn([test_sentence], [0],
                                    write_model_to_file(job_id),
                                    vocabulary)

        print "predicted_labels"
        print predicted_labels
        sys.stdout.flush()
            
        return predicted_labels[0]

コード例 #2

ファイルを表示

def impact_sampling_controller(task_ids, task_categories, training_examples,
                               training_labels, task_information, costSoFar,
                               budget, job_id):

    print "Impact Sampling Controller activated."
    sys.stdout.flush()

    if len(task_categories) < 4:
        return round_robin_controller(task_ids, task_categories,
                                      training_examples, training_labels,
                                      task_information, costSoFar, budget,
                                      job_id)

    #First update the statistics about metric improvements from the last
    #action taken

    last_task_id = task_ids[-1]
    last_task_category = task_categories[-1]

    categories_to_examples = {}
    for i, task_category in zip(range(len(task_categories) - 1),
                                task_categories[0:-1]):

        #This check is because some data in the database is inconsistent
        if isinstance(task_category, dict):
            task_category_id = task_category['id']
        else:
            task_category_id = task_category

        if not task_category_id in categories_to_examples:
            categories_to_examples[task_category_id] = []

        categories_to_examples[task_category_id].append(task_ids[i])

    #For every kind of action, check to see how well the extractor can
    #predict it

    #Take the examples from the GENERATE category and use them to compute
    #recall.
    #Take the examples from the LABEL category and use them to compute
    #precision.

    training_positive_examples = []
    training_negative_examples = []
    validation_recall_examples = []
    validation_recall_labels = []
    validation_precision_examples = []
    validation_precision_labels = []

    recall_measuring_task_cat_ids = [0]
    precision_measuring_task_cat_ids = [2]
    other_task_cat_ids = [1]

    for recall_measuring_task_cat_id in recall_measuring_task_cat_ids:
        recall_task_ids = categories_to_examples[recall_measuring_task_cat_id]

        recall_examples, placeholder = split_examples(
            recall_task_ids,
            [recall_measuring_task_cat_id for i in recall_task_ids], ['all'])

        if len(placeholder) > 0:
            raise Exception

        shuffle(recall_examples)

        size_of_validation_recall_examples = int(
            ceil(0.2 * len(recall_examples)))

        validation_recall_examples += recall_examples[
            0:size_of_validation_recall_examples]

        validation_recall_labels += [
            1 for e in range(size_of_validation_recall_examples)
        ]

        training_positive_examples += recall_examples[
            size_of_validation_recall_examples:]

        print "ADDING RECALL EXAMPLES"
        print len(training_positive_examples)
        print len(training_negative_examples)
        sys.stdout.flush()

    for precision_measuring_task_cat_id in precision_measuring_task_cat_ids:
        precision_task_ids = categories_to_examples[
            precision_measuring_task_cat_id]

        pos_examples, neg_examples = split_examples(
            precision_task_ids,
            [precision_measuring_task_cat_id
             for i in precision_task_ids], ['all'])

        if len(placeholder) > 0:
            raise Exception

        shuffled_indices = np.random.permutation(
            np.arange(len(pos_examples) + len(neg_examples)))

        size_of_validation_precision_examples = int(
            ceil(0.2 * len(shuffled_indices)))

        for index in shuffled_indices[0:size_of_validation_precision_examples]:
            if index < len(pos_examples):
                validation_precision_examples.append(pos_examples[index])
                validation_precision_labels.append(1)
            else:
                real_index = index - len(pos_examples)
                validation_precision_examples.append(neg_examples[real_index])
                validation_precision_labels.append(0)

        for index in shuffled_indices[size_of_validation_precision_examples:]:
            if index < len(pos_examples):
                training_positive_examples.append(pos_examples[index])
            else:
                real_index = index - len(pos_examples)
                training_negative_examples.append(neg_examples[real_index])

        print "ADDING PRECISION EXAMPLES"
        print len(training_positive_examples)
        print len(training_negative_examples)
        sys.stdout.flush()

    for other_task_cat_id in other_task_cat_ids:
        other_task_ids = categories_to_examples[other_task_cat_id]

        pos_examples, neg_examples = split_examples(
            other_task_ids, [other_task_cat_id for i in other_task_ids],
            ['all'])

        training_positive_examples += pos_examples
        training_negative_examples += neg_examples

        print "ADDING ALL OTHER EXAMPLES"
        print len(training_positive_examples)
        print len(training_negative_examples)
        sys.stdout.flush()

    print "RETRAINING TO FIGURE OUT WHAT ACTION TO DO NEXT"
    print len(training_positive_examples)
    print len(training_negative_examples)
    sys.stdout.flush()

    f1s = []

    for i in range(3):
        retrain(job_id, ['all'],
                training_positive_examples=training_positive_examples,
                training_negative_examples=training_negative_examples)

        job = Job.objects.get(id=job_id)
        vocabulary = pickle.loads(job.vocabulary)

        predicted_labels = test_cnn(
            validation_recall_examples + validation_precision_examples,
            validation_recall_labels + validation_precision_labels,
            write_model_to_file(job_id), vocabulary)

        predicted_labels_for_recall_examples = predicted_labels[
            0:len(validation_recall_examples)]
        predicted_labels_for_precision_examples = predicted_labels[
            len(validation_recall_examples):]

        #compute scores separately for precision and recall
        _, recall, _ = computeScores(predicted_labels_for_recall_examples,
                                     validation_recall_labels)

        precision, _, _ = computeScores(
            predicted_labels_for_precision_examples,
            validation_precision_labels)

        print "------------------------------------------"
        print "------------------------------------------"
        print "------------------------------------------"
        print recall
        print predicted_labels_for_recall_examples
        print validation_recall_labels
        print precision
        print predicted_labels_for_precision_examples
        print validation_precision_labels
        print "------------------------------------------"
        print "------------------------------------------"
        print "------------------------------------------"
        sys.stdout.flush()

        if (precision + recall) == 0:
            f1 = 0.0
        else:
            f1 = 2.0 * (precision * recall) / (precision + recall)

        f1s.append(f1)
    f1 = np.mean(f1s)
    ## Add in the extra data and compute the effect

    print "ADDING BACK IN EXTRA DATA"
    print last_task_id
    print last_task_category
    sys.stdout.flush()

    pos_examples, neg_examples = split_examples([last_task_id],
                                                [last_task_category], ['all'])

    training_positive_examples += pos_examples
    training_negative_examples += neg_examples

    new_f1s = []
    for i in range(3):
        retrain(job_id, ['all'],
                training_positive_examples=training_positive_examples,
                training_negative_examples=training_negative_examples)

        job = Job.objects.get(id=job_id)
        vocabulary = pickle.loads(job.vocabulary)

        predicted_labels = test_cnn(
            validation_recall_examples + validation_precision_examples,
            validation_recall_labels + validation_precision_labels,
            write_model_to_file(job_id), vocabulary)
        predicted_labels_for_recall_examples = predicted_labels[
            0:len(validation_recall_examples)]
        predicted_labels_for_precision_examples = predicted_labels[
            len(validation_recall_examples):]

        #compute scores separately for precision and recall
        _, new_recall, _ = computeScores(predicted_labels_for_recall_examples,
                                         validation_recall_labels)

        new_precision, _, _ = computeScores(
            predicted_labels_for_precision_examples,
            validation_precision_labels)

        print "------------------------------------------"
        print "------------------------------------------"
        print "------------------------------------------"
        print new_recall
        print predicted_labels_for_recall_examples
        print validation_recall_labels
        print new_precision
        print predicted_labels_for_precision_examples
        print validation_precision_labels
        print "------------------------------------------"
        print "------------------------------------------"
        print "------------------------------------------"
        sys.stdout.flush()

        if (new_precision + new_recall) == 0:
            new_f1 = 0.0
        else:
            new_f1 = (2.0 * (new_precision * new_recall) /
                      (new_precision + new_recall))
        new_f1s.append(new_f1)

    new_f1 = np.mean(new_f1s)

    change_in_f1 = new_f1 - f1

    current_control_data = pickle.loads(job.control_data)

    current_control_data[last_task_category].append(change_in_f1)

    job.control_data = pickle.dumps(current_control_data)
    job.save()

    print "------------------------------------------"
    print "------------------------------------------"
    print "------------------------------------------"
    print current_control_data
    print "------------------------------------------"
    print "------------------------------------------"
    print "------------------------------------------"
    sys.stdout.flush()

    if len(task_categories) < 6:
        return round_robin_controller(task_ids, task_categories,
                                      training_examples, training_labels,
                                      task_information, costSoFar, budget,
                                      job_id)

    #Add an exploration term

    best_task_category = []
    best_change = float('-inf')
    num_actions_taken_so_far = 0.0
    for task_category in current_control_data.keys():
        num_actions_taken_so_far += len(current_control_data[task_category])

    computed_values_of_each_action = []
    for task_category in current_control_data.keys():
        average_change = np.average(
            current_control_data[task_category],
            weights=range(1,
                          len(current_control_data[task_category]) + 1))
        exploration_term = sqrt(2.0 * log(num_actions_taken_so_far) /
                                len(current_control_data[task_category]))
        c = 0.2
        ucb_value = average_change + (c * exploration_term)

        computed_values_of_each_action.append(
            (current_control_data[task_category], average_change,
             exploration_term, ucb_value))

        print "------------------------------------------"
        print "------------------------------------------"
        print "------------------------------------------"
        print "Value of action %d" % task_category
        print current_control_data[task_category]
        print average_change
        print exploration_term
        print ucb_value
        print "------------------------------------------"
        print "------------------------------------------"
        print "------------------------------------------"
        sys.stdout.flush()

        if ucb_value > best_change:
            best_task_category = [task_category]
            best_change = ucb_value
        elif ucb_value == best_change:
            best_task_category.append(task_category)

    current_logging_data = pickle.loads(job.logging_data)
    current_logging_data.append(
        (best_task_category, computed_values_of_each_action))
    job.logging_data = pickle.dumps(current_logging_data)
    job.save()

    #epsilon = 1.0 / num_actions_taken_so_far

    #if random() < epsilon:
    #    other_choices = [0,1,2]
    #    for item in best_task_category:
    #        other_choices.remove(item)
    #    best_task_category = sample(other_choices, 1)[0]
    #else:
    best_task_category = sample(best_task_category, 1)[0]

    if best_task_category == 2:
        print "choosing the LABEL category"
        sys.stdout.flush()

        next_category = app.config['EXAMPLE_CATEGORIES'][2]

        (selected_examples,
         expected_labels) = get_unlabeled_examples_from_tackbp(
             task_ids, task_categories, training_examples, training_labels,
             task_information, costSoFar, budget, job_id)

        task = make_labeling_crowdjs_task(selected_examples, expected_labels,
                                          task_information)

        return 2, task, len(selected_examples) * app.config[
            'CONTROLLER_LABELS_PER_QUESTION'], len(
                selected_examples) * app.config[
                    'CONTROLLER_LABELS_PER_QUESTION'] * next_category['price']

    elif best_task_category == 0:
        print "choosing the RECALL category"
        sys.stdout.flush()

        next_category = app.config['EXAMPLE_CATEGORIES'][0]

        task = make_recall_crowdjs_task(task_information)

        num_hits = app.config['CONTROLLER_GENERATE_BATCH_SIZE']
        return 0, task, num_hits, num_hits * next_category['price']

    elif best_task_category == 1:
        print "choosing the PRECISION category"
        sys.stdout.flush()

        next_category = app.config['EXAMPLE_CATEGORIES'][1]

        #positive_examples = []

        generate_task_ids = categories_to_examples[0]
        positive_examples, negative_examples = split_examples(
            generate_task_ids, [0 for i in generate_task_ids], ['all'])
        #for training_example_set, training_label_set in zip(
        #        training_examples, training_labels):
        #    for training_example, training_label in zip(
        #            training_example_set, training_label_set):
        #        if training_label == 1:
        #            positive_examples.append(training_example)

        num_hits = app.config['CONTROLLER_GENERATE_BATCH_SIZE'] * app.config[
            'CONTROLLER_NUM_MODIFY_TASKS_PER_SENTENCE']

        selected_positive_examples = sample(positive_examples, num_hits)

        task = make_precision_crowdjs_task(selected_positive_examples,
                                           task_information)

        return 1, task, num_hits, num_hits * next_category['price']

コード例 #3

ファイルを表示

def uncertainty_sampling_controller(task_ids, task_categories,
                                    training_examples, training_labels,
                                    task_information, costSoFar, budget,
                                    job_id):

    print "Uncertainty Sampling Controller activated."
    sys.stdout.flush()

    if len(task_categories) < 3:
        return round_robin_controller(task_ids, task_categories,
                                      training_examples, training_labels,
                                      task_information, costSoFar, budget,
                                      job_id)

    categories_to_examples = {}
    for i, task_category in zip(range(len(task_categories)), task_categories):

        #This check is because some data in the database is inconsistent
        if isinstance(task_category, dict):
            task_category_id = task_category['id']
        else:
            task_category_id = task_category

        if not task_category_id in categories_to_examples:
            categories_to_examples[task_category_id] = []

        categories_to_examples[task_category_id].append(task_ids[i])

    #For every kind of action, check to see how well the extractor can
    #predict it
    worst_task_category_id = []
    worst_fscore = 1.0
    for target_task_category_id in categories_to_examples.keys():

        training_positive_examples = []
        training_negative_examples = []
        validation_positive_examples = []
        validation_negative_examples = []
        validation_all_examples = []
        validation_all_labels = []

        for task_category_id in categories_to_examples.keys():
            matching_task_ids = categories_to_examples[task_category_id]
            pos_examples, neg_examples = split_examples(
                matching_task_ids,
                [task_category_id for i in matching_task_ids], ['all'])
            if not task_category_id == target_task_category_id:
                training_positive_examples += pos_examples
                training_negative_examples += neg_examples
            else:
                shuffle(pos_examples)
                shuffle(neg_examples)

                size_of_validation_positive_examples = int(
                    ceil(0.2 * len(pos_examples)))
                size_of_validation_negative_examples = int(
                    ceil(0.2 * len(neg_examples)))

                validation_positive_examples += pos_examples[
                    0:size_of_validation_positive_examples]
                validation_negative_examples += neg_examples[
                    0:size_of_validation_negative_examples]

                training_positive_examples += pos_examples[
                    size_of_validation_positive_examples:]
                training_negative_examples += neg_examples[
                    size_of_validation_negative_examples:]

        validation_all_examples = (validation_positive_examples +
                                   validation_negative_examples)
        validation_all_labels = (
            [1 for e in range(len(validation_positive_examples))] +
            [0 for e in range(len(validation_negative_examples))])

        print "RETRAINING TO FIGURE OUT WHAT ACTION TO DO NEXT"
        print len(training_positive_examples)
        print len(training_negative_examples)
        print len(validation_all_examples)

        retrain(job_id, ['all'],
                training_positive_examples=training_positive_examples,
                training_negative_examples=training_negative_examples)

        job = Job.objects.get(id=job_id)
        vocabulary = pickle.loads(job.vocabulary)
        predicted_labels = test_cnn(validation_all_examples,
                                    validation_all_labels,
                                    write_model_to_file(job_id), vocabulary)

        precision, recall, f1 = computeScores(predicted_labels,
                                              validation_all_labels)

        print "Action:"
        print target_task_category_id
        print "Scores:"
        print precision, recall, f1
        sys.stdout.flush()

        if f1 < worst_fscore:
            worst_fscore = f1
            worst_task_category_id = [target_task_category_id]
        elif f1 == worst_fscore:
            worst_task_category_id.append(target_task_category_id)

    print "Worst F Score"
    print worst_fscore
    sys.stdout.flush()

    worst_task_category_id = sample(worst_task_category_id, 1)[0]

    if worst_task_category_id == 2:
        print "choosing the LABEL category"
        sys.stdout.flush()

        next_category = app.config['EXAMPLE_CATEGORIES'][2]

        (selected_examples,
         expected_labels) = get_unlabeled_examples_from_tackbp(
             task_ids, task_categories, training_examples, training_labels,
             task_information, costSoFar, budget, job_id)

        task = make_labeling_crowdjs_task(selected_examples, expected_labels,
                                          task_information)

        return 2, task, len(selected_examples) * app.config[
            'CONTROLLER_LABELS_PER_QUESTION'], len(
                selected_examples) * app.config[
                    'CONTROLLER_LABELS_PER_QUESTION'] * next_category['price']

    elif worst_task_category_id == 0:
        print "choosing the RECALL category"
        sys.stdout.flush()

        next_category = app.config['EXAMPLE_CATEGORIES'][0]

        task = make_recall_crowdjs_task(task_information)

        num_hits = app.config['CONTROLLER_GENERATE_BATCH_SIZE']
        return 0, task, num_hits, num_hits * next_category['price']

    elif worst_task_category_id == 1:
        print "choosing the PRECISION category"
        sys.stdout.flush()

        next_category = app.config['EXAMPLE_CATEGORIES'][1]

        #positive_examples = []

        generate_task_ids = categories_to_examples[0]
        positive_examples, negative_examples = split_examples(
            generate_task_ids, [0 for i in generate_task_ids], ['all'])
        #for training_example_set, training_label_set in zip(
        #        training_examples, training_labels):
        #    for training_example, training_label in zip(
        #            training_example_set, training_label_set):
        #        if training_label == 1:
        #            positive_examples.append(training_example)

        num_hits = app.config['CONTROLLER_GENERATE_BATCH_SIZE'] * app.config[
            'CONTROLLER_NUM_MODIFY_TASKS_PER_SENTENCE']

        selected_positive_examples = sample(positive_examples, num_hits)

        task = make_precision_crowdjs_task(selected_positive_examples,
                                           task_information)

        return 1, task, num_hits, num_hits * next_category['price']

コード例 #4

ファイルを表示

ファイル: util.py プロジェクト: uw-hai/extremest-extraction-server

def get_unlabeled_examples_from_tackbp(task_ids, task_categories,
                                       training_examples, training_labels,
                                       task_information, costSoFar, budget,
                                       job_id):

    print "choosing to find examples from TACKBP and label them"
    sys.stdout.flush()
    next_category = app.config['EXAMPLE_CATEGORIES'][2]

    #num_positive_examples_to_label = int(
    #    app.config['CONTROLLER_LABELING_BATCH_SIZE'] / 2.0)
    num_positive_examples_to_label = app.config[
        'CONTROLLER_LABELING_BATCH_SIZE']

    num_negative_examples_to_label = (
        app.config['CONTROLLER_LABELING_BATCH_SIZE'] -
        num_positive_examples_to_label)

    retrain(job_id, ['all'])

    test_examples = []
    test_labels = []

    tackbp_newswire_corpus = str(
        requests.get(
            app.config['TACKBP_NW_09_CORPUS_URL']).content).split('\n')

    #Get all the previous examples that we labeled already
    used_examples = []
    for i, task_category in zip(range(len(task_categories)), task_categories):
        #This check is because some data in the database is inconsistent
        if isinstance(task_category, dict):
            task_category_id = task_category['id']
        else:
            task_category_id = task_category
        if task_category_id == 2:
            used_examples += training_examples[i]

    tackbp_newswire_corpus = set(tackbp_newswire_corpus) - set(used_examples)
    for sentence in tackbp_newswire_corpus:
        test_examples.append(sentence)
        test_labels.append(0)

    job = Job.objects.get(id=job_id)
    vocabulary = pickle.loads(job.vocabulary)

    predicted_labels = test_cnn(test_examples, test_labels,
                                write_model_to_file(job_id), vocabulary)

    positive_examples = []
    negative_examples = []
    for i in range(len(predicted_labels)):
        predicted_label = predicted_labels[i]
        example = test_examples[i]
        if predicted_label == 1:
            positive_examples.append(example)
        else:
            negative_examples.append(example)

    print "Sampling examples from the corpus"
    sys.stdout.flush()

    selected_examples = []
    expected_labels = []
    if len(positive_examples) < num_positive_examples_to_label:
        selected_examples += positive_examples
        expected_labels += [1 for i in range(len(positive_examples))]
        selected_examples += sample(
            negative_examples, app.config['CONTROLLER_LABELING_BATCH_SIZE'] -
            len(positive_examples))
        expected_labels += [
            0 for i in range(app.config['CONTROLLER_LABELING_BATCH_SIZE'] -
                             len(positive_examples))
        ]
    elif len(negative_examples) < num_negative_examples_to_label:
        selected_examples += negative_examples
        expected_labels += [0 for i in range(len(negative_examples))]
        selected_examples += sample(
            positive_examples, app.config['CONTROLLER_LABELING_BATCH_SIZE'] -
            len(negative_examples))
        expected_labels += [
            1 for i in range(app.config['CONTROLLER_LABELING_BATCH_SIZE'] -
                             len(negative_examples))
        ]
    else:
        selected_examples += sample(positive_examples,
                                    num_positive_examples_to_label)
        expected_labels += [1 for i in range(num_positive_examples_to_label)]
        selected_examples += sample(negative_examples,
                                    num_negative_examples_to_label)
        expected_labels += [0 for i in range(num_negative_examples_to_label)]

    print "Shuffling examples from the corpus"
    sys.stdout.flush()

    shuffle(selected_examples)

    return selected_examples, expected_labels

コード例 #5

ファイルを表示

def test_on_held_out_set(job_id, positive_types, test_set):
    job = Job.objects.get(id = job_id)
                
    checkpoint = getLatestCheckpoint(job_id)
    (task_information, budget) = pickle.loads(job.task_information)

    (task_ids, task_categories, costSoFar) = pickle.loads(checkpoint)

    if test_set == -1:
        test_positive_examples, test_negative_examples = split_examples(
            task_ids[0:2],
            task_categories[0:2], positive_types)
        test_examples = test_positive_examples + test_negative_examples
        test_labels = ([1 for e in test_positive_examples] +
                       [0 for e in test_negative_examples])
    elif test_set >= 0 and test_set <= 4:
        relations = ['nationality', 'born', 'lived', 'died', 'travel']
        amount_of_data = [1898, 496, 3897, 1493, 1992]
        testfile_name = 'data/test_data/test_strict_new_feature'
        (test_labels, test_features, test_examples,
         test_positive_examples,
         test_negative_examples) = parse_angli_test_data(
             testfile_name, [], test_set)
    elif test_set >= 5 and test_set <= 9:
        relations = ['transfermoney', 'broadcast', 'attack', 'contact',
                     'transferownership']
        testfile_name = 'data/test_data/testEvents'
        relation = relations[test_set-5]
        test_positive_examples, test_negative_examples = parse_tackbp_test_data(testfile_name, relation)
        test_examples = test_positive_examples + test_negative_examples
        test_labels = ([1 for e in test_positive_examples] +
                       [0 for e in test_negative_examples])
    else:
        test_positive_examples = []
        test_negative_examples = []
        pos_testfile_name = 'data/test_data/self_generated/death_pos'
        neg_testfile_name = 'data/test_data/self_generated/death_neg'
        with open(pos_testfile_name, 'r') as pos_testfile:
            for line in pos_testfile:
                test_positive_examples.append(line)
        with open(neg_testfile_name, 'r') as neg_testfile:
            for line in neg_testfile:
                test_negative_examples.append(line)
        test_examples = test_positive_examples + test_negative_examples
        test_labels = ([1 for e in test_positive_examples] +
                       [0 for e in test_negative_examples])


    #job = Job.objects.get(id = job_id)
    vocabulary = pickle.loads(job.vocabulary)

    predicted_labels = test_cnn(
        test_examples,
        test_labels,
        write_model_to_file(job_id),
        vocabulary)

    print "predicted_labels"
    print predicted_labels
    sys.stdout.flush()

    precision, recall, f1 = computeScores(predicted_labels, test_labels)


    true_positives = []
    false_positives = []
    true_negatives = []
    false_negatives = []

    for example, label in zip(
            test_positive_examples,
            predicted_labels[0:len(test_positive_examples)]):
        if label == 1:
            true_positives.append(example)
        else:
            false_negatives.append(example)

    for example, label in zip(
            test_negative_examples,
            predicted_labels[len(test_positive_examples):]):
        if label == 1:
            false_positives.append(example)
        else:
            true_negatives.append(example)


    return (true_positives,
            false_positives,
            true_negatives,
            false_negatives,
            [precision, recall, f1])