def quality_pnn(i, jaccard_similarity_matrix, p): '''Quality estimation by p nearest neighbors classification. ''' k_most_similar_cols = jaccard_similarity_matrix[i, :].argsort()[-p:] k_most_similar_targets = map(lambda c: graph_database[c][2], k_most_similar_cols) true_target_i = graph_database[i][2] estimated_target_i = statistics.predict_target_majority(k_most_similar_targets) if type(true_target_i) is list: return int(estimated_target_i in true_target_i) # zero-one loss else: return int(true_target_i == estimated_target_i) # zero-one loss
def quality_pnn(i, jaccard_similarity_matrix, p): '''Quality estimation by p nearest neighbors classification. ''' k_most_similar_cols = jaccard_similarity_matrix[i, :].argsort()[-p:] k_most_similar_targets = map(lambda c: graph_database[c][2], k_most_similar_cols) true_target_i = graph_database[i][2] estimated_target_i = statistics.predict_target_majority( k_most_similar_targets) if type(true_target_i) is list: return int(estimated_target_i in true_target_i) # zero-one loss else: return int(true_target_i == estimated_target_i) # zero-one loss
def quality(i, jaccard_similarity_matrix, infl_point): threshold = 1. - infl_point similar_cols = np.where(jaccard_similarity_matrix[i, :] >= threshold)[0] similar_targets = map(lambda c: graph_database[c][2], similar_cols) true_target_i = graph_database[i][2] estimated_target_i = statistics.predict_target_majority(similar_targets) # print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i # print "Similar cols:", similar_cols # print "Similar targets:", similar_targets # print "--------------------------------------" # fp = open(output_dir + "classification_threshold", "a") # fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i)) # fp.write("Similar cols: {0}\n".format(similar_cols)) # fp.write("Similar targets: {0}\n".format(similar_targets)) # fp.write("--------------------------------------\n") # fp.close() if type(true_target_i) is list: return int(estimated_target_i in true_target_i) # zero-one loss else: return int(true_target_i == estimated_target_i) # zero-one loss
def quality(i, jaccard_similarity_matrix, infl_point): threshold = 1. - infl_point similar_cols = np.where( jaccard_similarity_matrix[i, :] >= threshold)[0] similar_targets = map(lambda c: graph_database[c][2], similar_cols) true_target_i = graph_database[i][2] estimated_target_i = statistics.predict_target_majority( similar_targets) # print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i # print "Similar cols:", similar_cols # print "Similar targets:", similar_targets # print "--------------------------------------" # fp = open(output_dir + "classification_threshold", "a") # fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i)) # fp.write("Similar cols: {0}\n".format(similar_cols)) # fp.write("Similar targets: {0}\n".format(similar_targets)) # fp.write("--------------------------------------\n") # fp.close() if type(true_target_i) is list: return int(estimated_target_i in true_target_i) # zero-one loss else: return int(true_target_i == estimated_target_i) # zero-one loss
def quality(i, sketch_matrix): col_i = sketch_matrix.get_column(i) similar_cols = list(sketch_matrix.get_similar_columns(col_i)) if i in similar_cols: similar_cols.remove(i) similar_targets = map(lambda c: graph_database[c][2], similar_cols) true_target_i = graph_database[i][2] estimated_target_i = statistics.predict_target_majority(similar_targets) # print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i # print "Similar cols:", similar_cols # print "Similar targets:", similar_targets # print "--------------------------------------" # fp = open(output_dir + "classification_sketch", "a") # fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i)) # fp.write("Similar cols: {0}\n".format(similar_cols)) # fp.write("Similar targets: {0}\n".format(similar_targets)) # fp.write("--------------------------------------\n") # fp.close() if type(true_target_i) is list: return int(estimated_target_i in true_target_i) # zero-one loss else: return int(true_target_i == estimated_target_i) # zero-one loss
def quality(i, sketch_matrix): col_i = sketch_matrix.get_column(i) similar_cols = list(sketch_matrix.get_similar_columns(col_i)) if i in similar_cols: similar_cols.remove(i) similar_targets = map(lambda c: graph_database[c][2], similar_cols) true_target_i = graph_database[i][2] estimated_target_i = statistics.predict_target_majority( similar_targets) # print "Col:", i, ", Target:", true_target_i, ", Est. target: ", estimated_target_i # print "Similar cols:", similar_cols # print "Similar targets:", similar_targets # print "--------------------------------------" # fp = open(output_dir + "classification_sketch", "a") # fp.write("Col: {0}, Target: {1}, Est. target: {2}\n".format(i, true_target_i, estimated_target_i)) # fp.write("Similar cols: {0}\n".format(similar_cols)) # fp.write("Similar targets: {0}\n".format(similar_targets)) # fp.write("--------------------------------------\n") # fp.close() if type(true_target_i) is list: return int(estimated_target_i in true_target_i) # zero-one loss else: return int(true_target_i == estimated_target_i) # zero-one loss