def _formCluster(index, data_set): data = data_set[index] x = data.get_x() y = data.get_y() z = data.get_z() cluster = Cluster.ClusterCenter(x, y, z, x, y, z, data.get_class()) return cluster
def _refactorClusters(clusters): # change so when x_pos is updated the original location is not lost new_clusters = [] for cluster in clusters: num_of_data_points = 1 x_total = float(cluster.get_x()) y_total = float(cluster.get_y()) z_total = float(cluster.get_z()) for data_point in cluster.get_list(): x_total += float(data_point.get_x()) y_total += float(data_point.get_y()) z_total += float(data_point.get_z()) num_of_data_points += 1 x_average = x_total / num_of_data_points y_average = y_total / num_of_data_points z_average = z_total / num_of_data_points new_clusters.append( Cluster.ClusterCenter(x_average, y_average, z_average, cluster.get_original_x(), cluster.get_original_y(), cluster.get_original_z(), cluster.get_class())) num_of_changes = 0 for i in range(3): for data_point in clusters[i].get_list(): closest_cluster = 0 closest_distance = sys.maxsize for j in range(3): euclidean_distance = math.sqrt( math.pow( float(data_point.get_x()) - new_clusters[j].get_x(), 2) + math.pow( float(data_point.get_y()) - new_clusters[j].get_y(), 2) + math.pow( float(data_point.get_z()) - new_clusters[j].get_z(), 2)) if euclidean_distance < closest_distance: closest_distance = euclidean_distance closest_cluster = j new_clusters[closest_cluster].add_to_list(data_point) if i != closest_cluster: num_of_changes += 1 if num_of_changes >= 3: return _refactorClusters(new_clusters) else: return new_clusters