Beispiel #1
0
def test1():
    """ Test to find F1 score by manually selecting product id(s) from original data to test """

    script_dir = os.path.dirname(__file__)
    file_to_learn = os.path.join(script_dir,
                                 '../autofunc/assets/consumer_systems.csv')

    train_data = make_df(file_to_learn)

    # Use a threshold to get the top XX% of frequency values
    threshold = 0.7

    ## Choose ID(s) from learning file to separate into the testing set
    test_ids = [691, 169]

    test_df, train_df = split_learning_verification(train_data, test_ids)

    test_list = df_to_list(test_df)

    comb_sort = counter_pandas(train_df)
    thresh_results = get_top_results(comb_sort, threshold)

    # Find the F1 score
    learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(
        thresh_results, test_list)

    assert len(learned_dict) != 0
    assert f1 > 0
Beispiel #2
0
# Dataset used for data mining
script_dir = os.path.dirname(__file__)
file_to_learn = os.path.join(script_dir, '../autofunc/assets/consumer_systems.csv')

train_data = pd.read_csv(file_to_learn)
combos_sorted = counter_pandas(train_data)

# Use a threshold to get the top XX% of confidence values
threshold = 0.5
thresh_results = get_top_results(combos_sorted, threshold)

# Use a known product for verification
test_file = os.path.join(script_dir, '../autofunc/assets/jigsawQuery_headers.csv')
test_data = pd.read_csv(test_file)
test_list = df_to_list(test_data)

learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(thresh_results,
                                                                                                    test_list)


# Optional write to file - uncomment and rename to write file
# write_results_from_dict(learned_dict, 'test1.csv')



print('Recall = {0:.5f}'.format(recall))
print('Precision = {0:.5f}'.format(precision))
print('F1 = {0:.5f}'.format(f1))