def printPriors():
    # PRINT prior VECTORIZED
    vectorized_priors = [
        utils.readFromFile("global_mse", param.global_res_path).item(),
        utils.readFromFile("global_rho", param.global_res_path).item(),
        utils.readFromFile("global_tau", param.global_res_path).item(),
        utils.readFromFile("global_p@10", param.global_res_path).item(),
        utils.readFromFile("global_p@20", param.global_res_path).item()
    ]
    print("\nWithout LSH (VECTORIZED):")
    print_evaluation(vectorized_priors)
Esempio n. 2
0
def getCollections():
    for i, item in enumerate(COLLECTIONS_ARCHIVE):
        print(f"{i}: {item['name']}")
    while True:
        collectionId = int(input("Выберите коллекцию: "))
        if collectionId >= 0 and collectionId < len(COLLECTIONS_ARCHIVE):
            break
    archivePath = COLLECTIONS_ARCHIVE[collectionId]['path']
    if checkCollections(archivePath) == -1:
        print('ОШИБКА: коллекции не прошли проверку')
        return -1
    return (eval(utils.readFromFile(archivePath + COLLECTIONS_FILE_NAME)),
            eval(utils.readFromFile(archivePath + TF_FILE_NAME)))
def lshUtilizationCounts():
    for i in (utils.readFromFile("utilization_counts", param.lsh_res_path)):
        print(i)

    norm_ged_index = utils.readFromFile("global_norm_ged_lsh_index",
                                        param.global_res_path)

    pair_geds = []
    for i in norm_ged_index:
        for j in i:
            if j["lsh_use"]:
                pair_geds.append(j["target_denorm"])

    plot.LSHGEDdistribution(pair_geds, dataset_name, path=param.lsh_res_path)
def vectorizedVSloopy():
    for i in utils.readFromFile("zeroDifsCounts", param.vector_loopy_res_path):
        print(i)

    differences = utils.readFromFile("AbsolutePiorDifs",
                                     param.vector_loopy_res_path)

    printPriors()

    # Prior loopy is printed in the next section

    plot.heatmap(differences,
                 dataset_name,
                 path=param.vector_loopy_res_path + plot_subfolder_name)
    plot.histogram(differences,
                   dataset_name,
                   path=param.vector_loopy_res_path + plot_subfolder_name)
Esempio n. 5
0
def plot_statistics(statistics_dicts, out_filename):
    tex_str = ''
    offset_strs = list(statistics_dicts.keys())
    offset_strs.sort()
    for offset_str in offset_strs:
        statistics_dict = statistics_dicts[offset_str]
        keys = statistics_dict.keys()

        # all but locally converged (2) and early stopped (5) count as possibly crossed
        lines = ['({}, {})'.format(n_hidden, 1.0 - ((statistics_dict[n_hidden][2] + statistics_dict[n_hidden][5]) / np.sum(statistics_dict[n_hidden])))
                 for n_hidden in np.sort(np.array(list(keys)))]
        tex_str += '\\addplot coordinates {\n' + '\n'.join(lines) + '};\n\\addlegendentry{$\\Delta = ' + offset_str + '$}\n'

    print('LaTeX code excerpt:')
    print(tex_str)

    tex_str = utils.readFromFile('tex_head.txt') + tex_str + utils.readFromFile('tex_tail.txt')
    utils.writeToFile(out_filename, tex_str)
def main():
    tree = None
    if not u.definitions['LoadFromFile']:
        dictionaryArray = u.readFromFile(u.definitions['DictionaryPath'], readLines=True)
        tree = bktree.BKTree(stringmetrics.levenshtein)
        tree.parallelAdd(dictionaryArray)
        u.saveObjectToFile(object=tree, savePath=u.definitions['TreeSavePath'])
    else:
        tree = u.loadObjectFromFile(u.definitions['TreeSavePath'])

    book = u.getBook(glob(u.definitions['BookPathAndExt'])[0])
    p = Pool(processes=cpu_count())
    spellingMistakes = p.map(tree.findMistakes, [(word, book) for word in book])
    spellingMistakes = [x for x in spellingMistakes if x is not None]
    print(spellingMistakes)
def getDatasetName():
    d = ['AIDS700nef', 'LINUX', 'IMDBMulti']

    #num = input("Which dataset statistics? Press the number"
    #            "\n0. {} \n1. {} \n2. {}".format(d[0], d[1], d[2]))
    #dataset_name = d[int(num)]
    dataset_name = d[0]

    param.initGlobals(dataset_name)

    tests.testName(
        dataset_name,
        utils.readFromFile("datasetName",
                           param.temp_runfiles_path,
                           param_type='str'))

    return dataset_name
Esempio n. 8
0
def prepareByFile(imgObject, detector, w, h):
    '''
    resize, возвращение подготовленного для сравнения объекта(-ов)

    :param imgObject: изображение
    :param detector: путь до изображения
    :param w: ширина изображения после resize
    :param h: высота изображения после resize

    :returns kp: особые точки изображения
    :returns desc: описания особых точек изображения
    '''

    img = utils.readFromFile(imgObject)
    img = utils.resize(img, w, h)

    print 'PREPARE '

    if not detector:
        raise Exception("Detector can't be None")
    kp, desc = detector.detectAndCompute(img, None)
    return kp, desc
from utils import readFromFile, transform
from Service import run

if __name__ == '__main__':
    print(
        "Introduceti: \n 1 - pentru procesare easy.txt \n 2 - pentru procesare medium.txt \n 3 - pentru "
        "procesare hard.txt \n 4 - pentru procesare fricker26.txt \n 5 - pentru procesare berlin \n"
    )

    ind = int(input())
    if (ind == 1):
        fileName = 'data/easy.txt'
    if (ind == 2):
        fileName = 'data/medium.txt'
    if (ind == 3):
        fileName = 'data/hard.txt'
    if (ind == 4):
        fileName = 'data/fricker26.txt'
    if (ind == 5):
        filname = 'data/berlin.in'
        mat = transform('data/berlin.in')
    else:
        mat = readFromFile(fileName)

    problParam = {'matrix': mat, 'noNodes': len(mat)}
    generationParam = {'popSize': 400, 'noGen': 2000}

    run(problParam, generationParam)
Esempio n. 10
0
def getCollections():
    if checkCollections() == -1:
        print('ОШИБКА: коллекции не прошли проверку')
        return -1
    return eval(utils.readFromFile(COLLECTIONS_FILE_NAME)), eval(
        utils.readFromFile(TF_FILE_NAME))
Esempio n. 11
0
from GA import GA
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import warnings
import math
from utils import citire, modularity, afisare, generateNewValue, readFromFile, readFromFileMedium, readFromFileCoordonate

from random import seed

mat = []
nrNoduri, mat = readFromFile(mat, 'easy_01_tsp.txt')

gaParam = {"popSize": 15, "noGen": 50, "network": mat}
problParam = {'function': modularity, 'retea': mat, 'noNodes': nrNoduri}


def main():

    ga = GA(gaParam, problParam)
    ga.initialisation(
    )  # fac o initializare, generez 300 de cromozomi si ii adaug in populatie
    ga.evaluation()

    for g in range(gaParam['noGen']):

        ga.oneGenerationElitism()

        bestChromo = ga.bestChromosome()

        #print(bestChromo.repres)
Esempio n. 12
0
def maxPopulation(k, dataSet):
    Cp, C = [], []
    for i in dataSet.keys():
        Cp.append((i, dataSet[i][0]))

    Cp = sorted(Cp, key=lambda people: people[1], reverse=True)
    Cp = Cp[:k]
    for el in Cp:
        C.append(el[0])

    return C


#creazione data-set
minDataSet = readFromFile("data/unifiedCancerData_111.csv")
mediumDataSet = readFromFile("data/unifiedCancerData_290.csv")
maxDataSet = readFromFile("data/unifiedCancerData_896.csv")
completeDataSet = readFromFile("data/unifiedCancerData_3108.csv")

#Domanda1
k = 15  #number of cluster
q = 5  # number of iteration in k-means clustering
P = completeDataSet.keys()

hierarchical_clusters_dict = hierarchicalClustering(P, k, False)

ClusterGraph(hierarchical_clusters_dict)

#Domanda2
#Costruisco la lista dei centri = sono le 15 coordinate con la popolazione maggiore
def priorVSposterior():
    norm_ged_index = utils.readFromFile("global_norm_ged_lsh_index",
                                        param.global_res_path)
    ged_dif_lsh, ged_dif_nolsh, true_ged = getGlobalGEDDifs(norm_ged_index)

    # PRINT prior LOOPY
    loopy_priors = [
        utils.readFromFile("global_prior_mse", param.global_res_path).item(),
        utils.readFromFile("global_prior_rho", param.global_res_path).item(),
        utils.readFromFile("global_prior_tau", param.global_res_path).item(),
        utils.readFromFile("global_prior_p@10", param.global_res_path).item(),
        utils.readFromFile("global_prior_p@20", param.global_res_path).item()
    ]
    SSE_noLSH = utils.getSSE(ged_dif_nolsh)  # Sum of squared errors
    AVG_REL_ERROR_noLSH = utils.getAvRelEr(ged_dif_nolsh,
                                           true_ged)  # Average relative error
    prior_errors = [
        "\nSSE (no LSH) = {}".format(SSE_noLSH),
        "AVG_REL_ERROR (no LSH) = {}".format(AVG_REL_ERROR_noLSH)
    ]
    print("\nWithout LSH (Loop-based):")
    print_evaluation(metrics=loopy_priors, errors=prior_errors)

    # #PRINT POSTERIOR
    loopy_posts = [
        utils.readFromFile("global_post_mse", param.global_res_path).item(),
        utils.readFromFile("global_post_rho", param.global_res_path).item(),
        utils.readFromFile("global_post_tau", param.global_res_path).item(),
        utils.readFromFile("global_post_p@10", param.global_res_path).item(),
        utils.readFromFile("global_post_p@20", param.global_res_path).item()
    ]
    SSE_LSH = utils.getSSE(ged_dif_lsh)  # Sum of squared errors
    AVG_REL_ERROR_LSH = utils.getAvRelEr(ged_dif_lsh,
                                         true_ged)  # Average relative error
    post_errors = [
        "\nSSE (LSH) = {}".format(SSE_LSH),
        "AVG_REL_ERROR (LSH) = {}".format(AVG_REL_ERROR_LSH)
    ]
    print("\nWith LSH:")
    print_evaluation(metrics=loopy_posts, errors=post_errors)

    # Now, Global distribution and variance of errors.
    plot.comparativeDistribution(np.abs(ged_dif_lsh),
                                 np.abs(ged_dif_nolsh),
                                 dataset_name,
                                 path=param.global_res_path +
                                 plot_subfolder_name)
    plot.comparativeScatterplot(np.abs(ged_dif_lsh),
                                np.abs(ged_dif_nolsh),
                                dataset_name,
                                path=param.global_res_path +
                                plot_subfolder_name)

    return loopy_posts, SSE_LSH, AVG_REL_ERROR_LSH
def drillDownBuckets(drillDownStats):
    path_for_dd_plots = param.global_res_path + 'drill_down_' + plot_subfolder_name
    trainable_buckets = utils.readFromFile("trainable_buckets_dict",
                                           param.lsh_res_path,
                                           param_type='dict')
    dd_index = utils.readFromFile("drill_down_index", param.lsh_res_path)

    bucketpriors = [[] for _ in range((len(trainable_buckets["bucketName"])))]
    bucketposts = [[] for _ in range((len(trainable_buckets["bucketName"])))]
    buckettargets = [[] for _ in range((len(trainable_buckets["bucketName"])))]

    for index_j, drill_dict in enumerate(dd_index):
        bucketpriors[drill_dict["bucket_index"]].append(
            drill_dict["priorpred"])
        bucketposts[drill_dict["bucket_index"]].append(drill_dict["postpred"])
        buckettargets[drill_dict["bucket_index"]].append(drill_dict["target"])

    for i, b in enumerate(trainable_buckets["bucketName"]):
        prior = np.array(bucketpriors[i])
        ground = np.array(buckettargets[i])
        post = np.array(bucketposts[i])

        # if there are unutilized buckets they should be skipped
        if len(prior) == 0: continue

        # later stats
        drill_ged_dif_lsh = post - ground
        drill_ged_dif_nolsh = prior - ground

        # Prior paper stats
        prior_drills = [
            np.mean(
                F.mse_loss(torch.tensor(prior),
                           torch.tensor(ground),
                           reduction='none').detach().numpy()),
            calculate_ranking_correlation(spearmanr, prior, ground),
            calculate_ranking_correlation(kendalltau, prior, ground),
            calculate_prec_at_k(10, prior, ground),
            calculate_prec_at_k(20, prior, ground)
        ]
        SSE_noLSH_drill = utils.getSSE(drill_ged_dif_nolsh)
        AVG_REL_ERROR_noLSH_drill = utils.getAvRelEr(drill_ged_dif_nolsh,
                                                     ground)
        prior_errors_drill = [
            "\nSSE (no LSH) = {}".format(SSE_noLSH_drill),
            "AVG_REL_ERROR (no LSH) = {}".format(AVG_REL_ERROR_noLSH_drill)
        ]
        print("\nTable {}, bucket {} ({})".format(
            trainable_buckets["table"][i], b, int(b, 2)))
        print("\nWITHOUT LSH:")
        print_evaluation(prior_drills, prior_errors_drill)

        # Post paper stats
        scoresDRILLPOST = np.mean(
            F.mse_loss(torch.tensor(post),
                       torch.tensor(ground),
                       reduction='none').detach().numpy())
        rho_listDRILLPOST = calculate_ranking_correlation(
            spearmanr, post, ground)
        tau_listDRILLPOST = calculate_ranking_correlation(
            kendalltau, post, ground)
        prec_at_10_listDRILLPOST = calculate_prec_at_k(10, post, ground)
        prec_at_20_listDRILLPOST = calculate_prec_at_k(20, post, ground)

        SSE_LSH_drill = utils.getSSE(drill_ged_dif_lsh)
        AVG_REL_ERROR_LSH_drill = utils.getAvRelEr(drill_ged_dif_lsh, ground)
        post_errors_drill = [
            "\nSSE (LSH) = {}".format(SSE_LSH_drill),
            "AVG_REL_ERROR (LSH) = {}".format(AVG_REL_ERROR_LSH_drill)
        ]
        print("\nWITH LSH:")
        print_evaluation([
            scoresDRILLPOST, rho_listDRILLPOST, tau_listDRILLPOST,
            prec_at_10_listDRILLPOST, prec_at_20_listDRILLPOST
        ], post_errors_drill)

        # For bar chart
        label = "Table {}, bucket {}".format(trainable_buckets["table"][i],
                                             int(b, 2))

        drillDownStats["labels"].append(label)
        drillDownStats["mse"].append(scoresDRILLPOST)
        drillDownStats["rho"].append(rho_listDRILLPOST)
        drillDownStats["tau"].append(tau_listDRILLPOST)
        drillDownStats["p10"].append(prec_at_10_listDRILLPOST)
        drillDownStats["p20"].append(prec_at_20_listDRILLPOST)
        drillDownStats["sse"].append(SSE_LSH_drill)
        drillDownStats["ale"].append(AVG_REL_ERROR_LSH_drill)

        # Error distribution

        plot.comparativeDistribution(np.abs(drill_ged_dif_lsh),
                                     np.abs(drill_ged_dif_nolsh),
                                     dataset_name,
                                     path=path_for_dd_plots,
                                     address=label)
        plot.comparativeScatterplot(np.abs(drill_ged_dif_lsh),
                                    np.abs(drill_ged_dif_nolsh),
                                    dataset_name,
                                    path=path_for_dd_plots,
                                    address=label)

        # LSH Utilization
        # used_pairs = len(prior)
        # how will I get bucket size?
        # bucket_pairs = 0
        # pairspercent = round(used_pairs * 100 / bucket_pairs, 1)
        # print("\nLSH Usage (pairs): {} of {} ({}%)".format(used_pairs, bucket_pairs, pairspercent))

    # Now we plot the drill down bar chart WITH LSH
    # First the SSE on its own since it's way bigger than the others.
    plot.drillDownSSE(drillDownStats["labels"],
                      drillDownStats["sse"],
                      dataset_name,
                      path=path_for_dd_plots)
    plot.drillDownMSE(drillDownStats["labels"],
                      drillDownStats["mse"],
                      dataset_name,
                      path=path_for_dd_plots)

    plot.drillDownCorrelation(drillDownStats,
                              dataset_name,
                              path=path_for_dd_plots)
    plot.drillDownStats2(drillDownStats, dataset_name, path=path_for_dd_plots)