def __normalize(nativeElements, foreignElements): logger.log_header("Normalization") MIN_INDEX = 0 MAX_INDEX = 1 # Find MIN / MAX For native elements min_max_values = __min_max(nativeElements, MIN_INDEX, MAX_INDEX) for nativeClass in nativeElements: for element in nativeClass.learning_set: for i in range(0, len(element.characteristicsValues)): charValue = element.characteristicsValues[i] element.characteristicsValues[i] = __norm( charValue, min_max_values[i][MIN_INDEX], min_max_values[i][MAX_INDEX]) for element in nativeClass.test_set: for i in range(0, len(element.characteristicsValues)): charValue = element.characteristicsValues[i] element.characteristicsValues[i] = __norm( charValue, min_max_values[i][MIN_INDEX], min_max_values[i][MAX_INDEX]) # Find MIN / MAX For foreign elements """
def __min_max(nativeElements, MIN_INDEX=0, MAX_INDEX=1): # Get the dimension dim = len(nativeElements[0].learning_set[0].characteristicsValues) # Set up min max values of each characterstic min_max_values = [[0] * 2 for x in range(dim)] for min_max_value in min_max_values: min_max_value[MIN_INDEX] = 99999 min_max_value[MAX_INDEX] = -99999 logger.log_header("Normalizing Native", styles=[logger.LogHeaderStyle.SUB_HEADER]) for nativeClass in nativeElements: # Find max and min of each characterstic for element in nativeClass.learning_set: # Dimensions must fit if len(element.characteristicsValues) != dim: logger.log("Incorrect dimensions. Exiting...") sys.exit() # Find min max for i in range(0, dim): charValue = element.characteristicsValues[i] # Min if min_max_values[i][MIN_INDEX] >= charValue: min_max_values[i][MIN_INDEX] = charValue # Max if min_max_values[i][MAX_INDEX] <= charValue: min_max_values[i][MAX_INDEX] = charValue # Find max and min of each characterstic for element in nativeClass.test_set: # Dimensions must fit if len(element.characteristicsValues) != dim: logger.log("Incorrect dimensions. Exiting...") sys.exit() # Find min max for i in range(0, dim): charValue = element.characteristicsValues[i] # Min if min_max_values[i][MIN_INDEX] >= charValue: min_max_values[i][MIN_INDEX] = charValue # Max if min_max_values[i][MAX_INDEX] <= charValue: min_max_values[i][MAX_INDEX] = charValue """ for i in range(0, dim): logger.log("Value #" + str(i), filename="test.txt") logger.log(min_max_values[i][MIN_INDEX], filename="test.txt", styles=[logger.LogStyle.NONE]) logger.log(min_max_values[i][MAX_INDEX], filename="test.txt", styles=[logger.LogStyle.NONE]) """ return min_max_values
def __init__(self, centroid, points, name, number, give_info=True, do_ellipsoid=True, do_cuboid=True): if give_info: logger.log_header("Created Cluster: " + str([name]) + " Number: #" + str(number), styles=[logger.LogHeaderStyle.SUB_HEADER]) self.center = centroid self.points = points if do_cuboid: logger.log("Creating Cuboid in Cluster") self.cuboid = Cuboid(self.points) if do_ellipsoid: logger.log("Creating Ellipsoid in Cluster") self.ellipsoid = Ellipsoid(self.points, global_v.SEMI_AXIS_SCALE) if (global_v.CHAR_NUM == 3): self.rejected_x, self.rejected_y, self.rejected_z = self.ellipsoid.is_point_in_ellipsoid( self.points[:]) else: self.rejected_x, self.rejected_y = self.ellipsoid.is_point_in_ellipsoid( self.points[:]) if give_info: self.__info(name, number) logger.log('Points in ellipsoid: ' + str((1 - len(self.rejected_x) / len(self.points)) * 100) + '%')
def run(): symbolClasses = [] if global_v.NATIVE_TRAINING_FILE: logger.log_header("Cluster Evaluation: " + str(global_v.NATIVE_TRAINING_FILE)) symbolClasses = loader.deserialize_native() __compute_cluster_evaluation(symbolClasses.learning_set) else: logger.log_header("Cluster Evaluation, k clouds: " + str(global_v.K_CLOUD_DISTORTION)) symbolClasses = __generate_symbol() __compute_cluster_evaluation(symbolClasses[0].learning_set)
def __rat_l_evaluation(training_set, start_k, end_k): logger.log_header("Ratkowsky-Lance", filename=logger.LOG_CLUSTER_FILE_NAME, styles=[logger.LogHeaderStyle.SUB_HEADER]) Results = rat_l.compute(training_set, start_k, end_k) for i in range(0, len(Results)): logger.log("rat_l(" + str(i + start_k) + ") = " + str(Results[i]), filename=logger.LOG_CLUSTER_FILE_NAME, styles=[logger.LogStyle.NONE])
def __pbm_evaluation(training_set, start_k, end_k): logger.log_header("PBM", filename=logger.LOG_CLUSTER_FILE_NAME, styles=[logger.LogHeaderStyle.SUB_HEADER]) Results = pbm.compute(training_set, start_k, end_k) for i in range(0, len(Results)): logger.log("pbm(" + str(i + start_k) + ") = " + str(Results[i]), filename=logger.LOG_CLUSTER_FILE_NAME, styles=[logger.LogStyle.NONE])
def __compute_clusters(nativeElements): logger.log_header("Clustering K = " + str(global_v.K)) # Init the progress bar p_bar.init(1, "Clustering") # Legacy function, requirs a list as input tmp_list = [nativeElements] Clusterer().computeClusters(tmp_list) # Finish the progress bar p_bar.finish()
def __load_symbols(): # Load Native symbols logger.log_header("Loading Native symbols") nativeElements = loader.deserialize_native() # Load Foreign symbols logger.log_header("Loading Foreign symbols") foreignElements = loader.load_foreign_xls() global_v.CLASS_NUM = 1 global_v.CHAR_NUM = len( nativeElements.learning_set[0].characteristicsValues) return nativeElements, foreignElements
def __deserialize(): logger.log_header("Deserializing") nativeElements = loader.deserialize_native() for learning_element in nativeElements.learning_set: element_str = str(learning_element.characteristicsValues) element_str = element_str.strip("[") element_str = element_str.rstrip("]") logger.log(element_str, filename="training" + "_" + ".txt", styles=[logger.LogStyle.NONE, logger.LogStyle.FILE_ONLY], text_indent="")
def serialize_chosen_elements(nativeElements): logger.log_header("Choosing Native elements") chosenNativeElements = SymbolClass("", ColorChooser().get_color()) m_filename = "[" # Go through all symbols classes and choose the classes we want for i in range(0, len(nativeElements)): if (nativeElements[i].name in global_v.NATIVE_CLASSES or len(global_v.NATIVE_CLASSES) == 0): chosenNativeElements.learning_set += nativeElements[i].learning_set chosenNativeElements.test_set += nativeElements[i].test_set chosenNativeElements.name += str(nativeElements[i].name) + ", " m_filename += str(nativeElements[i].name) + ", " chosenNativeElements.name = chosenNativeElements.name.rstrip(", ") m_filename = m_filename.rstrip(", ") m_filename += "]" for learning_element in chosenNativeElements.learning_set: element_str = str(learning_element.characteristicsValues) element_str = element_str.strip("[]") element_str = element_str.rstrip("]") logger.log(element_str, filename="training" + "_" + m_filename + ".txt", styles=[logger.LogStyle.NONE, logger.LogStyle.FILE_ONLY], text_indent="") for test_element in chosenNativeElements.test_set: element_str = str(test_element.characteristicsValues) element_str = element_str.strip("[") element_str = element_str.rstrip("]") logger.log(element_str, filename="test" + "_" + m_filename + ".txt", styles=[logger.LogStyle.NONE, logger.LogStyle.FILE_ONLY], text_indent="") # Log logger.log(str(chosenNativeElements)) return chosenNativeElements
def __print_results(accuracy, sensitivity, precision, f_measure, TP, FN, TN, FP, classify_geometry): # Choose the file for results if classify_geometry == classifier.CLASSIFY_ELLIPSOID: filename = logger.LOG_RESULTS_ELLIPSOIDS_FILE_NAME header = "Ellipsoids" elif classify_geometry == classifier.CLASSIFY_CUBOID: filename = logger.LOG_RESULTS_CUBOIDS_FILE_NAME header = "Cuboids" # The decimel to round to for logging results round_decimel = 2 # Main Header logger.log_header("Results: " + header, filename) # SubHeader: Classifier Quality logger.log_header("Classifier Quality: " + header, filename, styles=[logger.LogHeaderStyle.SUB_HEADER]) logger.log("TP: " + str(round(TP, round_decimel)), filename, styles=[logger.LogStyle.NONE]) logger.log("FN: " + str(round(FN, round_decimel)), filename, styles=[logger.LogStyle.NONE]) logger.log("TN: " + str(round(TN, round_decimel)), filename, styles=[logger.LogStyle.NONE]) logger.log("FP: " + str(round(FP, round_decimel)), filename, styles=[logger.LogStyle.NONE]) # SubHeader: Classifier Measurements logger.log_header("Classifier Measurements: " + header, filename, styles=[logger.LogHeaderStyle.SUB_HEADER]) logger.log("Accuracy: " + str(round(accuracy, round_decimel)), filename, styles=[logger.LogStyle.NONE]) logger.log("Sensitivity: " + str(round(sensitivity, round_decimel)), filename, styles=[logger.LogStyle.NONE]) logger.log("Precision: " + str(round(precision, round_decimel)), filename, styles=[logger.LogStyle.NONE]) logger.log("F-Measure: " + str(round(f_measure, round_decimel)), filename, styles=[logger.LogStyle.NONE])
def __compute_classifier_quality_ellipsoids(nativeElements, foreignElements): logger.log_header("Classification. Ellipsoids") # Training vs Test, Ellipsoids logger.log_header("Classification Training vs Testing. Ellipsoids", styles=[logger.LogHeaderStyle.SUB_HEADER]) TP, FN = classifier.compute_training_vs_testing( nativeElements, classifier.CLASSIFY_ELLIPSOID) # Native vs Foreign, Ellipsoids logger.log_header("Classification Native vs Foreign. Ellipsoids", styles=[logger.LogHeaderStyle.SUB_HEADER]) TN, FP = classifier.compute_native_vs_foreign( nativeElements, foreignElements, classifier.CLASSIFY_ELLIPSOID) # Classifier Measurements (accuracy, sensitivity, precision, f_measure) = classifier.compute_measurements(TP, FN, TN, FP) # Print results __print_results(accuracy, sensitivity, precision, f_measure, TP, FN, TN, FP, classifier.CLASSIFY_ELLIPSOID)
def __serialize(): logger.log_header("Serializing") nativeElements = loader.load_native_xls() loader.serialize_chosen_elements(nativeElements)