def kMeans(data_set, score_funcs, k): assert (k <= len(data_set)) results_list = [] old_centroids = _initializeCentroids(k, data_set) clusters = _group_points(data_set, old_centroids) results_list.append(Analyze.analyze_clusters(clusters, score_funcs)) new_centroids = _findMeanVectors(clusters, data_set) while not _hasConverged(old_centroids, new_centroids): old_centroids = new_centroids clusters = _group_points(data_set, new_centroids) # keep recording the data results_list.append(Analyze.analyze_clusters(clusters, score_funcs)) new_centroids = _findMeanVectors(clusters, data_set) # last item is a repeat: return results_list[:-1]
def dbscan(data_pts, radius, minpts, score_funcs=None): # labels is a dictionary with points as keys and values as the cluster label labels = _cluster(data_pts, radius, minpts) # Assign points to cluster making labels keys and values list of points belonging to individual clusters clusters = defaultdict(list) for key, label in labels.items(): clusters[label].append(key) # Calculate cluster sse result = [Analyze.analyze_clusters(clusters, score_funcs)] return result
def ACO(dataset, iterations, num_clusters, num_ants, beta, prob_cutoff, num_elite_ants, decay_rate, q, score_funcs): ''' The main function for the ACO algorithm. Takes in the dataset to be clustered, maximum number of iterations, the number of ants to be included, and the score functions to be used. Creates individual ants, tracks the pheromone matrix, and updates the best clustering found so far. ''' pheromone_matrix = _initialize_pheromones(dataset, num_clusters) ants = _initialize_ants(dataset, num_ants, num_clusters, beta, prob_cutoff, pheromone_matrix) best_score = iteration_best_score = float("inf") best_clustering = None results = [] #_print_ant_info(ants) for iteration in range(iterations): #Loop through all data points and have all ants cluster each data point for point_number in range(len(dataset)): for i, ant in enumerate(ants): ant.update_beliefs() #After all data points have been assigned to a cluster for all ants, rank the ants by objective function rank_info = _rank_ants(ants) ants = [ranked_ant[0] for ranked_ant in rank_info.ants_and_scores] #Let the elite (best scoring) ants update the pheromone matrix, then update ants' matrices pheromone_matrix = _update_pheromones(pheromone_matrix, ants[0:num_elite_ants], decay_rate, q) _update_ants_pheromones(pheromone_matrix, ants) iteration_best_score = rank_info.best_score iteration_best_clustering = rank_info.best_clustering #If we found a better clustering this iteration, update the global best if iteration_best_score < best_score: best_score = iteration_best_score best_clustering = iteration_best_clustering #Reset the ants' memory lists _reset_ants(ants) #Score the best cluster, and append it to the list of values to be returned result = Analyze.analyze_clusters(best_clustering, score_funcs) results.append(result) return results
def competitive_learning(data_set, eta, num_clusters, iterations, score_funcs): ''' The main competitive learning algorithm. Creates a two layer network, then trains the weights of the network by updating the weights of the node with the strongest output for each training example ''' #Initialize variables num_inputs = len( data_set[0]) # Number of inputs is equal to the number of features weight_layer = Layer.Layer(num_inputs, num_clusters, eta) results = [] for iteration in range(iterations): #Train the network, score the resulting clustering, append the score #to the list of scores, and move on to next iteration weight_layer = _train_network(data_set, weight_layer, num_clusters) clustering = _cluster(data_set, weight_layer) result = Analyze.analyze_clusters(clustering, score_funcs) results.append(result) return results