Exemple #1
0
 def quality_pnn(i, jaccard_similarity_matrix, p):
     '''Quality estimation by p nearest neighbors classification.
     '''
     k_most_similar_cols = jaccard_similarity_matrix[i, :].argsort()[-p:]
     k_most_similar_targets = map(lambda c: graph_database[c][2], k_most_similar_cols)
     true_target_i = graph_database[i][2]
     estimated_target_i = statistics.predict_target_majority(k_most_similar_targets)
     if type(true_target_i) is list:
         return int(estimated_target_i in true_target_i) # zero-one loss
     else:
         return int(true_target_i == estimated_target_i) # zero-one loss
 def quality_pnn(i, jaccard_similarity_matrix, p):
     '''Quality estimation by p nearest neighbors classification.
     '''
     k_most_similar_cols = jaccard_similarity_matrix[i, :].argsort()[-p:]
     k_most_similar_targets = map(lambda c: graph_database[c][2],
                                  k_most_similar_cols)
     true_target_i = graph_database[i][2]
     estimated_target_i = statistics.predict_target_majority(
         k_most_similar_targets)
     if type(true_target_i) is list:
         return int(estimated_target_i in true_target_i)  # zero-one loss
     else:
         return int(true_target_i == estimated_target_i)  # zero-one loss
Exemple #3
0
    def quality(i, jaccard_similarity_matrix, infl_point):
        threshold = 1. - infl_point
        similar_cols = np.where(jaccard_similarity_matrix[i, :] >= threshold)[0]
        similar_targets = map(lambda c: graph_database[c][2], similar_cols)
        true_target_i = graph_database[i][2]
        estimated_target_i = statistics.predict_target_majority(similar_targets)
#         print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i
#         print "Similar cols:", similar_cols
#         print "Similar targets:", similar_targets
#         print "--------------------------------------"
#         fp = open(output_dir + "classification_threshold", "a")
#         fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i))
#         fp.write("Similar cols: {0}\n".format(similar_cols))
#         fp.write("Similar targets: {0}\n".format(similar_targets))
#         fp.write("--------------------------------------\n")
#         fp.close()
        if type(true_target_i) is list:
            return int(estimated_target_i in true_target_i) # zero-one loss
        else:
            return int(true_target_i == estimated_target_i) # zero-one loss
 def quality(i, jaccard_similarity_matrix, infl_point):
     threshold = 1. - infl_point
     similar_cols = np.where(
         jaccard_similarity_matrix[i, :] >= threshold)[0]
     similar_targets = map(lambda c: graph_database[c][2], similar_cols)
     true_target_i = graph_database[i][2]
     estimated_target_i = statistics.predict_target_majority(
         similar_targets)
     #         print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i
     #         print "Similar cols:", similar_cols
     #         print "Similar targets:", similar_targets
     #         print "--------------------------------------"
     #         fp = open(output_dir + "classification_threshold", "a")
     #         fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i))
     #         fp.write("Similar cols: {0}\n".format(similar_cols))
     #         fp.write("Similar targets: {0}\n".format(similar_targets))
     #         fp.write("--------------------------------------\n")
     #         fp.close()
     if type(true_target_i) is list:
         return int(estimated_target_i in true_target_i)  # zero-one loss
     else:
         return int(true_target_i == estimated_target_i)  # zero-one loss
Exemple #5
0
    def quality(i, sketch_matrix):
        col_i = sketch_matrix.get_column(i)
        similar_cols = list(sketch_matrix.get_similar_columns(col_i))
        if i in similar_cols:
            similar_cols.remove(i)
        similar_targets = map(lambda c: graph_database[c][2], similar_cols)
        true_target_i = graph_database[i][2]
        estimated_target_i = statistics.predict_target_majority(similar_targets)
#         print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i
#         print "Similar cols:", similar_cols
#         print "Similar targets:", similar_targets
#         print "--------------------------------------"
#         fp = open(output_dir + "classification_sketch", "a")
#         fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i))
#         fp.write("Similar cols: {0}\n".format(similar_cols))
#         fp.write("Similar targets: {0}\n".format(similar_targets))
#         fp.write("--------------------------------------\n")
#         fp.close()
        if type(true_target_i) is list:
            return int(estimated_target_i in true_target_i) # zero-one loss
        else:
            return int(true_target_i == estimated_target_i) # zero-one loss
 def quality(i, sketch_matrix):
     col_i = sketch_matrix.get_column(i)
     similar_cols = list(sketch_matrix.get_similar_columns(col_i))
     if i in similar_cols:
         similar_cols.remove(i)
     similar_targets = map(lambda c: graph_database[c][2], similar_cols)
     true_target_i = graph_database[i][2]
     estimated_target_i = statistics.predict_target_majority(
         similar_targets)
     #         print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i
     #         print "Similar cols:", similar_cols
     #         print "Similar targets:", similar_targets
     #         print "--------------------------------------"
     #         fp = open(output_dir + "classification_sketch", "a")
     #         fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i))
     #         fp.write("Similar cols: {0}\n".format(similar_cols))
     #         fp.write("Similar targets: {0}\n".format(similar_targets))
     #         fp.write("--------------------------------------\n")
     #         fp.close()
     if type(true_target_i) is list:
         return int(estimated_target_i in true_target_i)  # zero-one loss
     else:
         return int(true_target_i == estimated_target_i)  # zero-one loss