Beispiel #1
0
def main():
    ground_truth_path = './data/cran_Ground_Truth.tsv'
    search_engine_conf_path = './data/total_query_results.json'
    configuration_path = './data/SearchEngines.csv'

    ground_truth_dict = utils.read_ground_truth(ground_truth_path)
    search_engine_conf = utils.read_json(search_engine_conf_path)

    MRR_results = MRR(ground_truth_dict, search_engine_conf)
    R_precision_results = R_Precision(ground_truth_dict, search_engine_conf)

    configurations = pd.read_csv(configuration_path)
    configurations['MRR'] = MRR_results
    configurations['Mean'] = np.mean(R_precision_results, axis=1)
    configurations['Min'] = np.min(R_precision_results, axis=1)
    configurations['Max'] = np.max(R_precision_results, axis=1)
    configurations['Median'] = np.median(R_precision_results, axis=1)
    configurations['1_quartile'] = np.quantile(a=R_precision_results,
                                               q=.25,
                                               axis=1)
    configurations['3_quartile'] = np.quantile(a=R_precision_results,
                                               q=.75,
                                               axis=1)

    configurations_top_5 = configurations.sort_values(
        by=['MRR'], ascending=False).head(5).SE_ID
    print(list(configurations_top_5))
    col_names = ['Conf_' + str(i) for i in list(configurations_top_5)]
    #col_names.reverse()
    search_engine_conf_top_5 = {
        key: search_engine_conf[key]
        for key in configurations_top_5
    }

    print('P@k....')
    P_at_k_res = P_at_k(ground_truth_dict, search_engine_conf_top_5)
    temp_df = pd.DataFrame(P_at_k_res).transpose()
    temp_df.columns = col_names
    ax = temp_df.plot(title='P@k')
    ax.set_xlabel('k values')
    ax.set_ylabel('Mean P@k')
    plt.savefig('./Report/Images/Pk.png')

    print('NCDG@k....')
    ncdg_at_k_res = ncdg(ground_truth_dict, search_engine_conf_top_5)
    temp_df = pd.DataFrame(ncdg_at_k_res).transpose()
    temp_df.columns = col_names
    ax = temp_df.plot(title='NCDG@k')
    ax.set_xlabel('k values')
    ax.set_ylabel('Mean NCDG@k')
    plt.savefig('./Report/Images/NCDGk.png')

    configurations = configurations.sort_values(by=['MRR'], ascending=False)
    configurations.to_csv(r'./data/SearchEnginesResults.csv', index=False)
Beispiel #2
0
def rename_sudokus():
    with open('ground_truth.renamed.csv', 'w', encoding='utf8') as f:
        for i, (file_path,
                coords) in enumerate(read_ground_truth(GT_OUT_FILE)):
            i += 31
            new_path = os.path.join(os.path.dirname(file_path),
                                    f'sudoku_{i:d}.jpg')

            if os.path.exists(new_path):
                raise RuntimeError()

            os.rename(file_path, new_path)

            cells = [new_path] + [str(a) for a in coords.flatten()]
            line = ', '.join(cells) + '\n'
            print(line, end='')
            f.write(line)
def community_search_for_all_nodes(graph,
                                   ground_truth_file_address,
                                   start_with_given_node=True):
    """do community search for all nodes separately, calculate accuracy measures then report the Avg. and SD.

	Args:
		graph ([nx.Graph]): [the given network]
		ground_truth_file_address ([str]): [filename of the ground-truth information of communities]
		start_with_given_node (bool, optional): [if Ture: start expansion with the given node, if False: start with a node of highest degree]. Defaults to False.
	"""
    start_time = time.time()
    performance_info = dict()
    ground_truth_com2nodes = utils.read_ground_truth(ground_truth_file_address)

    for e, node in enumerate(graph.nodes()):
        performance_info[node] = {
            'degree': graph.degree[node],
            'precision': 0.0,
            'recall': 0.0,
            'f1-score': 0.0
        }
        community = community_search(graph, node, start_with_given_node)
        utils.update_performance_info(node, performance_info, community,
                                      ground_truth_com2nodes)

    precision = sum(performance_info[x]['precision']
                    for x in list(graph.nodes())) / graph.number_of_nodes()
    recall = sum(performance_info[x]['recall']
                 for x in list(graph.nodes())) / graph.number_of_nodes()
    f1_score = sum(performance_info[x]['f1_score']
                   for x in list(graph.nodes())) / graph.number_of_nodes()
    sd = sqrt(
        sum((performance_info[x]['f1_score'] - f1_score)**2
            for x in list(graph.nodes())) / graph.number_of_nodes())

    print('precision = %.4f' % precision, end='\t')
    print('recall = %.4f' % recall, end='\t')
    print('f1-score = %.4f' % f1_score, end='\t')
    print('sd(fscore) = %.4f' % sd, end='\t')
    finish_time = time.time()
    print('time = %.4f' % (finish_time - start_time))
Beispiel #4
0
import cv2 as cv
import numpy as np

import config
from sudoku_detector import SudokuDetector
from utils import rotation_correction, read_ground_truth, show

RED = (0, 0, 255)
GREEN = (0, 255, 0)
BLUE = (255, 0, 0)

CYAN = (255, 255, 0)
MAGENTA = (255, 0, 255)

gt_annoatations = read_ground_truth(config.sudokus_gt_path)

detector = SudokuDetector()

for sudoku_index, (file_path, gt_coords) in enumerate(gt_annoatations):
    if sudoku_index < 0:
        continue

    start = time.time()
    sudoku_img_org = cv.imread(file_path)

    # Ensure that the sudoku is always rotated by at most 45 deg in either direction.
    sudoku_img_org, gt_coords = rotation_correction(sudoku_img_org, gt_coords)

    det = detector.detect(sudoku_img_org)
Beispiel #5
0
def main():

    ground_truth_path = './data/part_1_2__Ground_Truth.tsv'
    result_se1_path = './data/part_1_2__Results_SE_1.tsv'
    result_se2_path = './data/part_1_2__Results_SE_2.tsv'
    result_se3_path = './data/part_1_2__Results_SE_3.tsv'

    ground_truth = utils.read_ground_truth(ground_truth_path)

    se1 = utils.read_result_se(result_se1_path)
    se2 = utils.read_result_se(result_se2_path)
    se3 = utils.read_result_se(result_se3_path)

    search_engine_conf = {1: se1, 2: se2, 3: se3}

    print('P@k....')
    P_at_k_res = EvaluationMetrics.P_at_k(ground_truth,
                                          search_engine_conf,
                                          k_vals=[4])
    PK = []
    for k, P_k_list in P_at_k_res.items():
        count = 1
        for pk in P_k_list:
            SE = 'SE' + str(count)
            PK.append([SE, pk])
            count += 1

    df = pd.DataFrame(PK, columns=['SE', 'PK'])
    print(P_at_k_res)
    plt.figure(figsize=(15, 7))
    plot = sns.barplot(x='SE', y='PK', data=df)
    for p in plot.patches:
        plot.annotate(format(p.get_height(), ',.2f'),
                      (p.get_x() + p.get_width() / 2., p.get_height()),
                      ha='center',
                      va='center',
                      xytext=(0, 10),
                      textcoords='offset points')

    plt.title('P@k Evaluation Metrics of Search Engines with k =4')
    plt.xlabel('Search Engines')
    plt.ylabel('The average P@k over all provided queries')
    plt.ylim(0, df['PK'].max() * 1.2)
    plt.savefig('./Report/Images/Pk_part1_2.png')

    print('R@k....')
    R_at_k_res = EvaluationMetrics.R_at_k(ground_truth,
                                          search_engine_conf,
                                          k_vals=[4])
    RK = []
    for k, R_k_list in R_at_k_res.items():
        count = 1
        for rk in R_k_list:
            SE = 'SE' + str(count)
            RK.append([SE, rk])
            count += 1
    print(R_at_k_res)
    df = pd.DataFrame(RK, columns=['SE', 'RK'])
    print(df)
    plt.figure(figsize=(15, 7))
    plot = sns.barplot(x='SE', y='RK', data=df)
    for p in plot.patches:
        plot.annotate(format(p.get_height(), ',.2f'),
                      (p.get_x() + p.get_width() / 2., p.get_height()),
                      ha='center',
                      va='center',
                      xytext=(0, 10),
                      textcoords='offset points')

    plt.title('R@k Evaluation Metrics of Search Engines with k =4')
    plt.xlabel('Search Engines')
    plt.ylabel('The average R@k over all provided queries')
    plt.ylim(0, df['RK'].max() * 1.2)
    plt.savefig('./Report/Images/Rk_part1_2.png')

    print('NCDG@k....')
    ncdg_at_k_res = EvaluationMetrics.ncdg(ground_truth,
                                           search_engine_conf,
                                           k_vals=[4])

    nDCG_results = []
    for k, nDCG_list in ncdg_at_k_res.items():
        count = 1
        for nDCG in nDCG_list:
            SE = 'SE' + str(count)
            nDCG_results.append([SE, nDCG])
            count += 1

    df = pd.DataFrame(nDCG_results, columns=['SE', 'nDCG'])
    print(df)
    plt.figure(figsize=(15, 7))
    plot = sns.barplot(x='SE', y='nDCG', data=df)
    for p in plot.patches:
        plot.annotate(format(p.get_height(), ',.2f'),
                      (p.get_x() + p.get_width() / 2., p.get_height()),
                      ha='center',
                      va='center',
                      xytext=(0, 10),
                      textcoords='offset points')

    plt.title('nDCG Evaluation Metrics of Search Engines with k =4')
    plt.xlabel('Search Engines')
    plt.ylabel('The average nDCG over all provided queries')
    plt.ylim(0, df['nDCG'].max() * 1.2)
    plt.savefig('./Report/Images/nDCG_part1_2.png')
Beispiel #6
0
import cv2 as cv

import config
from utils import rotation_correction, read_ground_truth

if __name__ == '__main__':
    for file_name, coords in read_ground_truth(config.sudokus_gt_path):
        img = cv.imread(file_name, cv.IMREAD_COLOR)

        img, coords = rotation_correction(img, coords)

        print(f'{file_name}, Shape: {img.shape}')

        img = cv.polylines(img, [coords], True, (0, 255, 0), thickness=5)

        cv.line(img,
                tuple(coords[0, :]),
                tuple(coords[1, :]), (255, 0, 255),
                thickness=10)
        cv.line(img,
                tuple(coords[1, :]),
                tuple(coords[2, :]), (255, 255, 0),
                thickness=10)

        h, w = img.shape[:2]
        cx = int(round(w / 2))
        cy = int(round(h / 2))

        img = cv.drawMarker(img, (cx, cy), (0, 0, 255), thickness=5)

        img = cv.resize(img, (1024, int(img.shape[0] / (img.shape[1] / 1024))))
Beispiel #7
0
def main():
    # CRANFIELD
    positions = (1, 3, 5, 10)
    ground_truth_path = '../../../data/cran_Ground_Truth.tsv'
    search_engine_conf_path = '../../../data/total_query_results_cran.json'
    configuration_path = '../../../data/SearchEnginesCran.csv'

    ground_truth_dict = utils.read_ground_truth(ground_truth_path)
    search_engine_conf = utils.read_json(search_engine_conf_path)
    MRR_results = MRR(ground_truth_dict, search_engine_conf)
    R_precision_results = R_Precision(ground_truth_dict, search_engine_conf)

    configurations = pd.read_csv(configuration_path)
    configurations['MRR'] = MRR_results
    configurations['Mean'] = np.mean(R_precision_results, axis=1)
    configurations['Min'] = np.min(R_precision_results, axis=1)
    configurations['Max'] = np.max(R_precision_results, axis=1)
    configurations['Median'] = np.median(R_precision_results, axis=1)
    configurations['1_quartile'] = np.quantile(a=R_precision_results,
                                               q=.25,
                                               axis=1)
    configurations['3_quartile'] = np.quantile(a=R_precision_results,
                                               q=.75,
                                               axis=1)

    configurations_top_5 = configurations.sort_values(
        by=['MRR'], ascending=False).head(5).SE_ID
    print(list(configurations_top_5))
    col_names = ['Conf_' + str(i) for i in list(configurations_top_5)]
    search_engine_conf_top_5 = {
        key: search_engine_conf[key]
        for key in configurations_top_5
    }

    print('P@k....')
    P_at_k_res = P_at_k(ground_truth_dict, search_engine_conf_top_5)
    temp_df = pd.DataFrame(P_at_k_res).transpose()
    temp_df.columns = col_names
    figure, axes = plt.subplots(1, 2, figsize=(8, 3))
    ax1 = temp_df.plot(title='P@k Cranfield Data', ax=axes[0], style='.-')
    ax1.set_xticks(positions)
    ax1.set_xlabel('k values')
    ax1.set_ylabel('Mean P@k')

    print('NCDG@k....')
    ncdg_at_k_res = ncdg(ground_truth_dict, search_engine_conf_top_5)
    temp_df = pd.DataFrame(ncdg_at_k_res).transpose()
    temp_df.columns = col_names
    ax2 = temp_df.plot(title='NCDG@k Cranfield Data', ax=axes[1], style='.-')
    ax2.set_xticks(positions)
    ax2.set_xlabel('k values')
    ax2.set_ylabel('Mean NCDG@k')
    plt.tight_layout()
    figure.savefig('../../../Report/Images/CranPlot.png')

    configurations = configurations.sort_values(by=['MRR'], ascending=False)
    configurations.to_csv(r'../../../data/SearchEnginesResultsCran.csv',
                          index=False)

    # TIME
    ground_truth_path = '../../../data/time_Ground_Truth.tsv'
    search_engine_conf_path = '../../../data/total_query_results_time.json'
    configuration_path = '../../../data/SearchEnginesTime.csv'

    ground_truth_dict = utils.read_ground_truth(ground_truth_path)
    search_engine_conf = utils.read_json(search_engine_conf_path)
    MRR_results = MRR(ground_truth_dict, search_engine_conf)
    R_precision_results = R_Precision(ground_truth_dict, search_engine_conf)

    configurations = pd.read_csv(configuration_path)
    configurations['MRR'] = MRR_results
    configurations['Mean'] = np.mean(R_precision_results, axis=1)
    configurations['Min'] = np.min(R_precision_results, axis=1)
    configurations['Max'] = np.max(R_precision_results, axis=1)
    configurations['Median'] = np.median(R_precision_results, axis=1)
    configurations['1_quartile'] = np.quantile(a=R_precision_results,
                                               q=.25,
                                               axis=1)
    configurations['3_quartile'] = np.quantile(a=R_precision_results,
                                               q=.75,
                                               axis=1)

    configurations_top_5 = configurations.sort_values(
        by=['MRR'], ascending=False).head(5).SE_ID
    print(list(configurations_top_5))
    col_names = ['Conf_' + str(i) for i in list(configurations_top_5)]
    search_engine_conf_top_5 = {
        key: search_engine_conf[key]
        for key in configurations_top_5
    }

    print('P@k....')
    P_at_k_res = P_at_k(ground_truth_dict, search_engine_conf_top_5)
    temp_df = pd.DataFrame(P_at_k_res).transpose()
    temp_df.columns = col_names
    figure, axes = plt.subplots(1, 2, figsize=(8, 3))
    ax1 = temp_df.plot(title='P@k Time Data', ax=axes[0], style='.-')
    ax1.set_xticks(positions)
    ax1.set_xlabel('k values')
    ax1.set_ylabel('Mean P@k')

    print('NCDG@k....')
    ncdg_at_k_res = ncdg(ground_truth_dict, search_engine_conf_top_5)
    temp_df = pd.DataFrame(ncdg_at_k_res).transpose()
    temp_df.columns = col_names
    ax2 = temp_df.plot(title='NCDG@k Time Data', ax=axes[1], style='.-')
    ax2.set_xticks(positions)
    ax2.set_xlabel('k values')
    ax2.set_ylabel('Mean NCDG@k')
    plt.tight_layout()
    figure.savefig('../../../Report/Images/TimePlot.png')

    configurations = configurations.sort_values(by=['MRR'], ascending=False)
    configurations.to_csv(r'../../../data/SearchEnginesResultsTime.csv',
                          index=False)