def __init__(self, time, categories, dimensions, values, data): try: global conn conn = psycopg2.connect(dbname='postgres', user='******', host='localhost', password='******') except psycopg2.DatabaseError as ex: print(ex) sys.exit(1) try: global engine engine = create_engine( 'postgresql://*****:*****@localhost:5432/postgres', echo=True) except Exception as ex: print(ex) sys.exit(1) self.cursor = conn.cursor() self.data = data self.categories = categories #for testing # self.values = values # self.dimensions = dimensions+time #for testing #org begin global conn reduced_dimensions, reduced_values = Clustering.Cluster( dimensions, values, self.data, conn) print(reduced_dimensions) print(reduced_values) self.dimensions = reduced_dimensions + time self.values = reduced_values #org end PatternStore.create_table_object(self.data) self.formDatacube()
def kmeans(examples: list, numClusters: int, verbose: bool = False) -> list: """ Generate numClusters Clusters using greedy algorithm. Args ---- examples (list): List of sample points numClusters (int): Number of clusters to generate verbose (bool, optional): Print statements. Defaults to False. Returns ------- list: Current set of defined clusters. Algorithm --------- Randomly choose numExamples examples as initial centroids while true - Assign each member of passed in examples to the cluster whose centroid is closest - Compute updated clusters centroid with the addition of new members - If new cluster centroids are not different then they were in the previous iteration of the while loop - return the current set of clusters """ # Generate numClusters centroids and add them to separate cluster list # elements initialCentroids = random.sample(examples, numClusters) clusters = [] for centroid in initialCentroids: clusters.append(Clustering.Cluster([centroid])) # # Iterate until centroids do not change (converge) # converged = False numIterations = 0 while not converged: numIterations += 1 # Create a list containing numClusters distinct empty lists newClusters = [] for i in range(numClusters): newClusters.append([]) # # Associate each example with the cluster with the closest centroid # for example in examples: # Find the centroid among the centroids of the cluster list # that is closest to the example starting with first centroid # group and compare other centroids currentSmallestDistance = \ example.distance(clusters[0].getCentroid()) index = 0 for i in range(1, numClusters): # Comment out one of the two folllowing lines # if power paramter is 1, calculate Manhattan distance # if power paramter is 2, calculate Euclidean distance # power = 1 power = 2 distance = example.distance(clusters[i].getCentroid(), power) if distance < currentSmallestDistance: currentSmallestDistance = distance index = i # Add example element to the cluster list of examples, where the # distance between the element and the cluster list's # centroid is minimum. newClusters[index].append(example) for cluster in newClusters: # Check to see if any empty clusters if len(cluster) == 0: raise ValueError('Empty cluster') # Update each cluster; determine if cluster centroid has changed converged = True for i in range(numClusters): if clusters[i].update(newClusters[i]) > 0.0: converged = False if verbose: print('Iteration num:', numIterations) for cluster in clusters: print(cluster) print("\n") return clusters