def compute_native_vs_foreign(nativeElements, foreignElements, classify_geometry): total_count = 0 rejected_count = 0 # Print string if classify_geometry == CLASSIFY_ELLIPSOID: which_classifier_str = "Classification with Ellipsoids" elif classify_geometry == CLASSIFY_CUBOID: which_classifier_str = "Classification with Cuboids" # Progress Bar p_bar.init(len(foreignElements), "Native vs Foreign: " + which_classifier_str) for foreign in foreignElements: if not _belongs_to_native(foreign, nativeElements, classify_geometry): rejected_count += 1 total_count += 1 p_bar.update() TN = (rejected_count / total_count) * 100 FP = 100 - TN TNstr = "True Negatives (TN) = " + str(round(TN, 2)) + "%" + "(" + str(rejected_count )+"/"+ str(total_count) + ") \n" FPstr = "False Positives (FP) = " + str(round(FP, 2)) + "%" + "(" + str(total_count - rejected_count)+"/"+str(total_count) + ") \n" logger.log(which_classifier_str) logger.log(TNstr) logger.log(FPstr) p_bar.finish() return TN, FP
def cluster_evaluation(max_k, symbolClasses): start_k = 2 best_ks = [] for cl in symbolClasses: prediction_str = [] data = cl.learning_set logger.log("Symbol: " + str([cl.name])) max_ps = 0 best_k = 1 for k in range(start_k, max_k + 1): p_bar.init(0, "ps(" + str(k) + ")") avg_ps = 0 for j in range(0, global_v.MAX_ITER_CLUS_EVALUATION): ps = prediction_strength(data, k) avg_ps += ps prediction_str.append(ps) avg_ps /= global_v.MAX_ITER_CLUS_EVALUATION logger.log("prediction_strength(" + str(k) + ") = " + str(avg_ps)) p_bar.finish() if max_ps <= avg_ps: max_ps = avg_ps best_k = k best_ks.append(best_k) return best_ks
def compute(training_set, start_k=2, end_k=7): Results = [] best_ks = [] prediction_str = [] max_ps = 0 best_k = 1 for k in range(start_k, end_k + 1): p_bar.init(0, "ps(" + str(k) + ")") avg_ps = 0 for j in range(0, global_v.MAX_ITER_CLUS_EVALUATION): ps = prediction_strength(training_set, k) avg_ps += ps prediction_str.append(ps) avg_ps /= global_v.MAX_ITER_CLUS_EVALUATION Results.append(avg_ps) p_bar.finish() if max_ps <= avg_ps: max_ps = avg_ps best_k = k best_ks.append(best_k) return Results
def compute(training_set, start_k=2, end_k=7): Results = [] # Main loop. Computes indices for different amount of clusters for k in range(start_k, end_k + 1): p_bar.init(0, "rat_l(" + str(k) + ")") # 1) Compute clusters. clusters = kmeans.compute(k, training_set) # 2) Compute the barycenter of entire data center = __compute_barycenter(clusters) p = len(center) R = 0 BGSS = 0 TSS = 0 # 3) Compute the mean R of quatients between BGSS and TSS # for each dimension of data for j in range(0, p): BGSS = __compute_bgss_j(j, clusters, center) TSS = __compute_tss_j(j, clusters, center) R += BGSS / TSS R /= p # 4) Ratkowsky-Lance index C = sqrt((R/k)) Results.append(C) p_bar.finish() return Results
def __compute_clusters(nativeElements): logger.log_header("Clustering K = " + str(global_v.K)) # Init the progress bar p_bar.init(1, "Clustering") # Legacy function, requirs a list as input tmp_list = [nativeElements] Clusterer().computeClusters(tmp_list) # Finish the progress bar p_bar.finish()
def compute_training_vs_testing(nativeElements, classify_geometry): # Classification counters total_count = 0 belongs_count = 0 # Print string if classify_geometry == CLASSIFY_ELLIPSOID: which_classifier_str = "Classification with Ellipsoids" elif classify_geometry == CLASSIFY_CUBOID: which_classifier_str = "Classification with Cuboids" # Progress Bar p_bar.init(len(nativeElements.test_set), "Training vs Testing: " + which_classifier_str) # Classification for test_symbol in nativeElements.test_set: if _belongs_to_native(test_symbol, nativeElements, classify_geometry): belongs_count += 1 total_count += 1 p_bar.update() # Results TP = (belongs_count / total_count ) * 100 FN = 100 - TP TPstr = "True Positive (TP) = " + str(round(TP,2)) + "%" + "(" + str(belongs_count) +"/" +str(total_count) + ")" FNstr = "False Negative (FN) = " + str(round(FN,2)) + "%" + "(" + str(total_count - belongs_count) +"/" + str(total_count) + ")" logger.log(which_classifier_str) logger.log(TPstr) logger.log(FNstr) p_bar.finish() return TP, FN
def compute(training_set, start_k=2, end_k=7): Results = [] # Main loop. Computes indices for different amount of clusters for k in range(start_k, end_k + 1): p_bar.init(0, "pbm(" + str(k) + ")") # 1) Compute clusters. clusters = kmeans.compute(k, training_set) # 2) Find maximum distance between two clusters barycenters Db = __max_barycenter_distance(clusters) Ew = __distance_within_barycenter(clusters) Et = __distance_between_barycenter(clusters) C = ((1 / k) * (Et / Ew) * Db)**2 Results.append(C) p_bar.finish() return Results