class RescalSimilarityEvaluation(EvaluationAlgorithm): def __init__(self, args, output_folder, logger, ground_truth, start_time): self.init(args, output_folder, logger, ground_truth, start_time) self.rank = int(args.rescalsim[0]) self.threshold = float(args.rescalsim[1]) self.evalDetails = NeedEvaluationDetailDict() self.offers = ground_truth.getOfferIndices() self.wants = ground_truth.getWantIndices() self.foldNumber = 0 def log1(self): self.logger.info('For RESCAL prediction based on need similarity with threshold: %f' % self.threshold) def evaluate_fold(self, test_tensor, test_needs, idx_test): # execute the rescal algorithm useNeedTypeSlice = (self.args.rescalsim[2] == 'True') useConnectionSlice = (self.args.rescalsim[3] == 'True') A, R = execute_rescal(test_tensor, self.rank, useNeedTypeSlice, useConnectionSlice) # use the most similar needs per need to predict connections self.log1() P_bin = predict_rescal_connections_by_need_similarity(A, self.threshold, self.offers, self.wants, test_needs) binary_pred = matrix_to_array(P_bin, idx_test) self.report.add_evaluation_data(self.ground_truth.getArrayFromSliceMatrix( SparseTensor.CONNECTION_SLICE, idx_test), binary_pred) if self.args.statistics: S = similarity_ranking(A) y_prop = [1.0 - i for i in np.nan_to_num(S[idx_test])] precision, recall, threshold = m.precision_recall_curve( self.ground_truth.getArrayFromSliceMatrix(SparseTensor.CONNECTION_SLICE, idx_test), y_prop) write_precision_recall_curve_file( self.output_folder + "/statistics/rescalsim_" + self.start_time, "precision_recall_curve_fold%d.csv" % self.foldNumber, precision, recall, threshold) TP, FP, threshold = m.roc_curve(self.ground_truth.getArrayFromSliceMatrix( SparseTensor.CONNECTION_SLICE, idx_test), y_prop) write_ROC_curve_file(self.output_folder + "/statistics/rescalsim_" + self.start_time, "ROC_curve_fold%d.csv" % self.foldNumber, TP, FP, threshold) self.evalDetails.add_statistic_details(self.ground_truth.getSliceMatrix( SparseTensor.CONNECTION_SLICE), P_bin, idx_test) def finish_evaluation(self): self.log1() self.report.summary() if self.args.statistics: self.evalDetails.output_statistic_details( self.output_folder + "/statistics/rescalsim_" + self.start_time, self.ground_truth.getHeaders(), self.args.fbeta) gexf = create_gexf_graph(self.ground_truth, self.evalDetails) output_file = open(self.output_folder + "/statistics/rescalsim_" + self.start_time + "/graph.gexf", "w") gexf.write(output_file) output_file.close()
def __init__(self, args, output_folder, logger, ground_truth, start_time): self.init(args, output_folder, logger, ground_truth, start_time) self.rank = int(args.rescalsim[0]) self.threshold = float(args.rescalsim[1]) self.evalDetails = NeedEvaluationDetailDict() self.offers = ground_truth.getOfferIndices() self.wants = ground_truth.getWantIndices() self.foldNumber = 0
class CosineEvaluation(EvaluationAlgorithm): def __init__(self, args, output_folder, logger, ground_truth, start_time, weighted): self.init(args, output_folder, logger, ground_truth, start_time) self.weighted = weighted self.threshold = float(args.cosine_weigthed[0]) if weighted else float(args.cosine[0]) self.transitive_threshold = float(args.cosine_weigthed[1]) if weighted else float(args.cosine[1]) self.evalDetails = NeedEvaluationDetailDict() def logEvaluationLine(self): str = "" if self.weighted: str = " weighted" self.logger.info('For prediction of%s cosine similarity between needs with thresholds %f, %f:' % (str, self.threshold, self.transitive_threshold)) def evaluate_fold(self, test_tensor, test_needs, idx_test): self.logEvaluationLine() binary_pred = cosinus_link_prediciton(test_tensor, test_needs, self.threshold, self.transitive_threshold, self.weighted) self.report.add_evaluation_data(self.ground_truth.getArrayFromSliceMatrix( SparseTensor.CONNECTION_SLICE, idx_test), matrix_to_array(binary_pred, idx_test)) if self.args.statistics: self.evalDetails.add_statistic_details( self.ground_truth.getSliceMatrix(SparseTensor.CONNECTION_SLICE), binary_pred, idx_test) def finish_evaluation(self): self.logEvaluationLine() self.report.summary() if self.args.statistics: folder = "/statistics/cosine_" if self.weighted: folder = "/statistics/wcosine_" self.evalDetails.output_statistic_details( self.output_folder + folder + self.start_time, self.ground_truth.getHeaders(), self.args.fbeta) gexf = create_gexf_graph(self.ground_truth, self.evalDetails) output_file = open(self.output_folder + folder + self.start_time + "/graph.gexf", "w") gexf.write(output_file) output_file.close()
class RescalEvaluation(EvaluationAlgorithm): def __init__(self, args, output_folder, logger, ground_truth, start_time): self.init(args, output_folder, logger, ground_truth, start_time) self.rank = int(args.rescal[0]) self.threshold = float(args.rescal[1]) self.evalDetails = NeedEvaluationDetailDict() self.AUC_test = [] self.foldNumber = 0 self.offers = ground_truth.getOfferIndices() self.wants = ground_truth.getWantIndices() def log1(self): self.logger.info('For RESCAL prediction with threshold %f:' % self.threshold) def evaluate_fold(self, test_tensor, test_needs, idx_test): # set transitive connections before execution if (self.args.rescal[3] == 'True'): self.logger.info('extend connections transitively to the next need for RESCAL learning') test_tensor = extend_next_hop_transitive_connections(test_tensor) # execute the rescal algorithm useNeedTypeSlice = (self.args.rescal[2] == 'True') A, R = execute_rescal( test_tensor, self.rank, useNeedTypeSlice, init=self.args.rescal[4], conv=float(self.args.rescal[5]), lambda_A=float(self.args.rescal[6]), lambda_R=float(self.args.rescal[7]), lambda_V=float(self.args.rescal[8])) # evaluate the predictions self.logger.info('start predict connections ...') prediction = np.round_(predict_rescal_connections_array(A, R, idx_test), decimals=5) self.logger.info('stop predict connections') precision, recall, threshold = m.precision_recall_curve( self.ground_truth.getArrayFromSliceMatrix(SparseTensor.CONNECTION_SLICE, idx_test), prediction) optimal_threshold = get_optimal_threshold(recall, precision, threshold, self.args.fbeta) self.logger.info('optimal RESCAL threshold would be ' + str(optimal_threshold) + ' (for maximum F' + str(self.args.fbeta) + '-score)') auc = m.auc(recall, precision) self.AUC_test.append(auc) self.logger.info('AUC test: ' + str(auc)) # use a fixed threshold to compute several measures self.log1() P_bin = predict_rescal_connections_by_threshold(A, R, self.threshold, self.offers, self.wants, test_needs) binary_pred = matrix_to_array(P_bin, idx_test) self.report.add_evaluation_data(self.ground_truth.getArrayFromSliceMatrix( SparseTensor.CONNECTION_SLICE, idx_test), binary_pred) if self.args.statistics: write_precision_recall_curve_file( self.output_folder + "/statistics/rescal_" + self.start_time, "precision_recall_curve_fold%d.csv" % self.foldNumber, precision, recall, threshold) TP, FP, threshold = m.roc_curve(self.ground_truth.getArrayFromSliceMatrix( SparseTensor.CONNECTION_SLICE, idx_test), prediction) write_ROC_curve_file(self.output_folder + "/statistics/rescal_" + self.start_time, "ROC_curve_fold%d.csv" % self.foldNumber, TP, FP, threshold) self.evalDetails.add_statistic_details(self.ground_truth.getSliceMatrix( SparseTensor.CONNECTION_SLICE), P_bin, idx_test, prediction) self.foldNumber += 1 def finish_evaluation(self): self.AUC_test = np.array(self.AUC_test) self.logger.info('AUC-PR Test Mean / Std: %f / %f' % (self.AUC_test.mean(), self.AUC_test.std())) self.logger.info('----------------------------------------------------') self.log1() self.report.summary() if self.args.statistics: self.evalDetails.output_statistic_details( self.output_folder + "/statistics/rescal_" + self.start_time, self.ground_truth.getHeaders(), self.args.fbeta, True) gexf = create_gexf_graph(self.ground_truth, self.evalDetails) output_file = open(self.output_folder + "/statistics/rescal_" + self.start_time + "/graph.gexf", "w") gexf.write(output_file) output_file.close()
def __init__(self, args, output_folder, logger, ground_truth, start_time, weighted): self.init(args, output_folder, logger, ground_truth, start_time) self.weighted = weighted self.threshold = float(args.cosine_weigthed[0]) if weighted else float(args.cosine[0]) self.transitive_threshold = float(args.cosine_weigthed[1]) if weighted else float(args.cosine[1]) self.evalDetails = NeedEvaluationDetailDict()