def main(): ground_truth_path = './data/cran_Ground_Truth.tsv' search_engine_conf_path = './data/total_query_results.json' configuration_path = './data/SearchEngines.csv' ground_truth_dict = utils.read_ground_truth(ground_truth_path) search_engine_conf = utils.read_json(search_engine_conf_path) MRR_results = MRR(ground_truth_dict, search_engine_conf) R_precision_results = R_Precision(ground_truth_dict, search_engine_conf) configurations = pd.read_csv(configuration_path) configurations['MRR'] = MRR_results configurations['Mean'] = np.mean(R_precision_results, axis=1) configurations['Min'] = np.min(R_precision_results, axis=1) configurations['Max'] = np.max(R_precision_results, axis=1) configurations['Median'] = np.median(R_precision_results, axis=1) configurations['1_quartile'] = np.quantile(a=R_precision_results, q=.25, axis=1) configurations['3_quartile'] = np.quantile(a=R_precision_results, q=.75, axis=1) configurations_top_5 = configurations.sort_values( by=['MRR'], ascending=False).head(5).SE_ID print(list(configurations_top_5)) col_names = ['Conf_' + str(i) for i in list(configurations_top_5)] #col_names.reverse() search_engine_conf_top_5 = { key: search_engine_conf[key] for key in configurations_top_5 } print('P@k....') P_at_k_res = P_at_k(ground_truth_dict, search_engine_conf_top_5) temp_df = pd.DataFrame(P_at_k_res).transpose() temp_df.columns = col_names ax = temp_df.plot(title='P@k') ax.set_xlabel('k values') ax.set_ylabel('Mean P@k') plt.savefig('./Report/Images/Pk.png') print('NCDG@k....') ncdg_at_k_res = ncdg(ground_truth_dict, search_engine_conf_top_5) temp_df = pd.DataFrame(ncdg_at_k_res).transpose() temp_df.columns = col_names ax = temp_df.plot(title='NCDG@k') ax.set_xlabel('k values') ax.set_ylabel('Mean NCDG@k') plt.savefig('./Report/Images/NCDGk.png') configurations = configurations.sort_values(by=['MRR'], ascending=False) configurations.to_csv(r'./data/SearchEnginesResults.csv', index=False)
def rename_sudokus(): with open('ground_truth.renamed.csv', 'w', encoding='utf8') as f: for i, (file_path, coords) in enumerate(read_ground_truth(GT_OUT_FILE)): i += 31 new_path = os.path.join(os.path.dirname(file_path), f'sudoku_{i:d}.jpg') if os.path.exists(new_path): raise RuntimeError() os.rename(file_path, new_path) cells = [new_path] + [str(a) for a in coords.flatten()] line = ', '.join(cells) + '\n' print(line, end='') f.write(line)
def community_search_for_all_nodes(graph, ground_truth_file_address, start_with_given_node=True): """do community search for all nodes separately, calculate accuracy measures then report the Avg. and SD. Args: graph ([nx.Graph]): [the given network] ground_truth_file_address ([str]): [filename of the ground-truth information of communities] start_with_given_node (bool, optional): [if Ture: start expansion with the given node, if False: start with a node of highest degree]. Defaults to False. """ start_time = time.time() performance_info = dict() ground_truth_com2nodes = utils.read_ground_truth(ground_truth_file_address) for e, node in enumerate(graph.nodes()): performance_info[node] = { 'degree': graph.degree[node], 'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0 } community = community_search(graph, node, start_with_given_node) utils.update_performance_info(node, performance_info, community, ground_truth_com2nodes) precision = sum(performance_info[x]['precision'] for x in list(graph.nodes())) / graph.number_of_nodes() recall = sum(performance_info[x]['recall'] for x in list(graph.nodes())) / graph.number_of_nodes() f1_score = sum(performance_info[x]['f1_score'] for x in list(graph.nodes())) / graph.number_of_nodes() sd = sqrt( sum((performance_info[x]['f1_score'] - f1_score)**2 for x in list(graph.nodes())) / graph.number_of_nodes()) print('precision = %.4f' % precision, end='\t') print('recall = %.4f' % recall, end='\t') print('f1-score = %.4f' % f1_score, end='\t') print('sd(fscore) = %.4f' % sd, end='\t') finish_time = time.time() print('time = %.4f' % (finish_time - start_time))
import cv2 as cv import numpy as np import config from sudoku_detector import SudokuDetector from utils import rotation_correction, read_ground_truth, show RED = (0, 0, 255) GREEN = (0, 255, 0) BLUE = (255, 0, 0) CYAN = (255, 255, 0) MAGENTA = (255, 0, 255) gt_annoatations = read_ground_truth(config.sudokus_gt_path) detector = SudokuDetector() for sudoku_index, (file_path, gt_coords) in enumerate(gt_annoatations): if sudoku_index < 0: continue start = time.time() sudoku_img_org = cv.imread(file_path) # Ensure that the sudoku is always rotated by at most 45 deg in either direction. sudoku_img_org, gt_coords = rotation_correction(sudoku_img_org, gt_coords) det = detector.detect(sudoku_img_org)
def main(): ground_truth_path = './data/part_1_2__Ground_Truth.tsv' result_se1_path = './data/part_1_2__Results_SE_1.tsv' result_se2_path = './data/part_1_2__Results_SE_2.tsv' result_se3_path = './data/part_1_2__Results_SE_3.tsv' ground_truth = utils.read_ground_truth(ground_truth_path) se1 = utils.read_result_se(result_se1_path) se2 = utils.read_result_se(result_se2_path) se3 = utils.read_result_se(result_se3_path) search_engine_conf = {1: se1, 2: se2, 3: se3} print('P@k....') P_at_k_res = EvaluationMetrics.P_at_k(ground_truth, search_engine_conf, k_vals=[4]) PK = [] for k, P_k_list in P_at_k_res.items(): count = 1 for pk in P_k_list: SE = 'SE' + str(count) PK.append([SE, pk]) count += 1 df = pd.DataFrame(PK, columns=['SE', 'PK']) print(P_at_k_res) plt.figure(figsize=(15, 7)) plot = sns.barplot(x='SE', y='PK', data=df) for p in plot.patches: plot.annotate(format(p.get_height(), ',.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha='center', va='center', xytext=(0, 10), textcoords='offset points') plt.title('P@k Evaluation Metrics of Search Engines with k =4') plt.xlabel('Search Engines') plt.ylabel('The average P@k over all provided queries') plt.ylim(0, df['PK'].max() * 1.2) plt.savefig('./Report/Images/Pk_part1_2.png') print('R@k....') R_at_k_res = EvaluationMetrics.R_at_k(ground_truth, search_engine_conf, k_vals=[4]) RK = [] for k, R_k_list in R_at_k_res.items(): count = 1 for rk in R_k_list: SE = 'SE' + str(count) RK.append([SE, rk]) count += 1 print(R_at_k_res) df = pd.DataFrame(RK, columns=['SE', 'RK']) print(df) plt.figure(figsize=(15, 7)) plot = sns.barplot(x='SE', y='RK', data=df) for p in plot.patches: plot.annotate(format(p.get_height(), ',.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha='center', va='center', xytext=(0, 10), textcoords='offset points') plt.title('R@k Evaluation Metrics of Search Engines with k =4') plt.xlabel('Search Engines') plt.ylabel('The average R@k over all provided queries') plt.ylim(0, df['RK'].max() * 1.2) plt.savefig('./Report/Images/Rk_part1_2.png') print('NCDG@k....') ncdg_at_k_res = EvaluationMetrics.ncdg(ground_truth, search_engine_conf, k_vals=[4]) nDCG_results = [] for k, nDCG_list in ncdg_at_k_res.items(): count = 1 for nDCG in nDCG_list: SE = 'SE' + str(count) nDCG_results.append([SE, nDCG]) count += 1 df = pd.DataFrame(nDCG_results, columns=['SE', 'nDCG']) print(df) plt.figure(figsize=(15, 7)) plot = sns.barplot(x='SE', y='nDCG', data=df) for p in plot.patches: plot.annotate(format(p.get_height(), ',.2f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha='center', va='center', xytext=(0, 10), textcoords='offset points') plt.title('nDCG Evaluation Metrics of Search Engines with k =4') plt.xlabel('Search Engines') plt.ylabel('The average nDCG over all provided queries') plt.ylim(0, df['nDCG'].max() * 1.2) plt.savefig('./Report/Images/nDCG_part1_2.png')
import cv2 as cv import config from utils import rotation_correction, read_ground_truth if __name__ == '__main__': for file_name, coords in read_ground_truth(config.sudokus_gt_path): img = cv.imread(file_name, cv.IMREAD_COLOR) img, coords = rotation_correction(img, coords) print(f'{file_name}, Shape: {img.shape}') img = cv.polylines(img, [coords], True, (0, 255, 0), thickness=5) cv.line(img, tuple(coords[0, :]), tuple(coords[1, :]), (255, 0, 255), thickness=10) cv.line(img, tuple(coords[1, :]), tuple(coords[2, :]), (255, 255, 0), thickness=10) h, w = img.shape[:2] cx = int(round(w / 2)) cy = int(round(h / 2)) img = cv.drawMarker(img, (cx, cy), (0, 0, 255), thickness=5) img = cv.resize(img, (1024, int(img.shape[0] / (img.shape[1] / 1024))))
def main(): # CRANFIELD positions = (1, 3, 5, 10) ground_truth_path = '../../../data/cran_Ground_Truth.tsv' search_engine_conf_path = '../../../data/total_query_results_cran.json' configuration_path = '../../../data/SearchEnginesCran.csv' ground_truth_dict = utils.read_ground_truth(ground_truth_path) search_engine_conf = utils.read_json(search_engine_conf_path) MRR_results = MRR(ground_truth_dict, search_engine_conf) R_precision_results = R_Precision(ground_truth_dict, search_engine_conf) configurations = pd.read_csv(configuration_path) configurations['MRR'] = MRR_results configurations['Mean'] = np.mean(R_precision_results, axis=1) configurations['Min'] = np.min(R_precision_results, axis=1) configurations['Max'] = np.max(R_precision_results, axis=1) configurations['Median'] = np.median(R_precision_results, axis=1) configurations['1_quartile'] = np.quantile(a=R_precision_results, q=.25, axis=1) configurations['3_quartile'] = np.quantile(a=R_precision_results, q=.75, axis=1) configurations_top_5 = configurations.sort_values( by=['MRR'], ascending=False).head(5).SE_ID print(list(configurations_top_5)) col_names = ['Conf_' + str(i) for i in list(configurations_top_5)] search_engine_conf_top_5 = { key: search_engine_conf[key] for key in configurations_top_5 } print('P@k....') P_at_k_res = P_at_k(ground_truth_dict, search_engine_conf_top_5) temp_df = pd.DataFrame(P_at_k_res).transpose() temp_df.columns = col_names figure, axes = plt.subplots(1, 2, figsize=(8, 3)) ax1 = temp_df.plot(title='P@k Cranfield Data', ax=axes[0], style='.-') ax1.set_xticks(positions) ax1.set_xlabel('k values') ax1.set_ylabel('Mean P@k') print('NCDG@k....') ncdg_at_k_res = ncdg(ground_truth_dict, search_engine_conf_top_5) temp_df = pd.DataFrame(ncdg_at_k_res).transpose() temp_df.columns = col_names ax2 = temp_df.plot(title='NCDG@k Cranfield Data', ax=axes[1], style='.-') ax2.set_xticks(positions) ax2.set_xlabel('k values') ax2.set_ylabel('Mean NCDG@k') plt.tight_layout() figure.savefig('../../../Report/Images/CranPlot.png') configurations = configurations.sort_values(by=['MRR'], ascending=False) configurations.to_csv(r'../../../data/SearchEnginesResultsCran.csv', index=False) # TIME ground_truth_path = '../../../data/time_Ground_Truth.tsv' search_engine_conf_path = '../../../data/total_query_results_time.json' configuration_path = '../../../data/SearchEnginesTime.csv' ground_truth_dict = utils.read_ground_truth(ground_truth_path) search_engine_conf = utils.read_json(search_engine_conf_path) MRR_results = MRR(ground_truth_dict, search_engine_conf) R_precision_results = R_Precision(ground_truth_dict, search_engine_conf) configurations = pd.read_csv(configuration_path) configurations['MRR'] = MRR_results configurations['Mean'] = np.mean(R_precision_results, axis=1) configurations['Min'] = np.min(R_precision_results, axis=1) configurations['Max'] = np.max(R_precision_results, axis=1) configurations['Median'] = np.median(R_precision_results, axis=1) configurations['1_quartile'] = np.quantile(a=R_precision_results, q=.25, axis=1) configurations['3_quartile'] = np.quantile(a=R_precision_results, q=.75, axis=1) configurations_top_5 = configurations.sort_values( by=['MRR'], ascending=False).head(5).SE_ID print(list(configurations_top_5)) col_names = ['Conf_' + str(i) for i in list(configurations_top_5)] search_engine_conf_top_5 = { key: search_engine_conf[key] for key in configurations_top_5 } print('P@k....') P_at_k_res = P_at_k(ground_truth_dict, search_engine_conf_top_5) temp_df = pd.DataFrame(P_at_k_res).transpose() temp_df.columns = col_names figure, axes = plt.subplots(1, 2, figsize=(8, 3)) ax1 = temp_df.plot(title='P@k Time Data', ax=axes[0], style='.-') ax1.set_xticks(positions) ax1.set_xlabel('k values') ax1.set_ylabel('Mean P@k') print('NCDG@k....') ncdg_at_k_res = ncdg(ground_truth_dict, search_engine_conf_top_5) temp_df = pd.DataFrame(ncdg_at_k_res).transpose() temp_df.columns = col_names ax2 = temp_df.plot(title='NCDG@k Time Data', ax=axes[1], style='.-') ax2.set_xticks(positions) ax2.set_xlabel('k values') ax2.set_ylabel('Mean NCDG@k') plt.tight_layout() figure.savefig('../../../Report/Images/TimePlot.png') configurations = configurations.sort_values(by=['MRR'], ascending=False) configurations.to_csv(r'../../../data/SearchEnginesResultsTime.csv', index=False)