def test1(): """ Test to find F1 score by manually selecting product id(s) from original data to test """ script_dir = os.path.dirname(__file__) file_to_learn = os.path.join(script_dir, '../autofunc/assets/consumer_systems.csv') train_data = make_df(file_to_learn) # Use a threshold to get the top XX% of frequency values threshold = 0.7 ## Choose ID(s) from learning file to separate into the testing set test_ids = [691, 169] test_df, train_df = split_learning_verification(train_data, test_ids) test_list = df_to_list(test_df) comb_sort = counter_pandas(train_df) thresh_results = get_top_results(comb_sort, threshold) # Find the F1 score learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall( thresh_results, test_list) assert len(learned_dict) != 0 assert f1 > 0
# Dataset used for data mining script_dir = os.path.dirname(__file__) file_to_learn = os.path.join(script_dir, '../autofunc/assets/consumer_systems.csv') train_data = pd.read_csv(file_to_learn) combos_sorted = counter_pandas(train_data) # Use a threshold to get the top XX% of confidence values threshold = 0.5 thresh_results = get_top_results(combos_sorted, threshold) # Use a known product for verification test_file = os.path.join(script_dir, '../autofunc/assets/jigsawQuery_headers.csv') test_data = pd.read_csv(test_file) test_list = df_to_list(test_data) learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(thresh_results, test_list) # Optional write to file - uncomment and rename to write file # write_results_from_dict(learned_dict, 'test1.csv') print('Recall = {0:.5f}'.format(recall)) print('Precision = {0:.5f}'.format(precision)) print('F1 = {0:.5f}'.format(f1))