def main(): print( 'Start Scenario 1, Kmean Clustering with semi-labeled data\nUsing jieba syntactic, jieba semantic features ...' ) test_cluster = cluster.Cluster() test_cluster.syntactic_analysis('jieba') test_cluster.semantic_analysis() test_cluster.write_syntactic_feature('syntactic_jieba.pickle') _, result_1_syntactic = test_cluster.evaluate('Syntactic', 'result_1_syntactic.pickle') _, result_1_semantic = test_cluster.evaluate('Semantic', 'result_1_semantic.pickle') _, result_1_both = test_cluster.evaluate('Syntactic_Semantic', 'result_1_both.pickle') # print('Start Scenario 2, Kmean Clustering with semi-labeled data\nUsing ckip syntactic, ckip semantic features ...') # test_cluster2 = cluster.Cluster() # test_cluster2.syntactic_analysis('ckip') # _, result_2_syntactic = test_cluster2.evaluate('Syntactic', 'result_2_syntactic.pickle') # _, result_2_semantic = test_cluster2.evaluate('Semantic', 'result_2_semantic.pickle') # _, result_2_both = test_cluster2.evaluate('Syntactic_Semantic', 'result_2_both.pickle') print( 'Start Scenario 3, Kmean Clustering with semi-labeled data\nUsing jieba syntactic, ckip semantic features ...' ) test_cluster3 = cluster.Cluster() test_cluster3.syntactic_analysis('jieba') test_cluster3.semantic_analysis() _, result_3_both = test_cluster3.evaluate('Syntactic_Semantic', 'result_3_both.pickle')
def test_features_syn(): docs = doc.get_docs_nested(get_data_dir(sys.argv[2])) max_size = int(sys.argv[3]) num_combine = int(sys.argv[4]) min_size = int(sys.argv[5]) d = collections.defaultdict(list) for _doc in docs: d[_doc.label].append(_doc) pure_clusters = d.values() broken_clusters = list() for x in xrange(10): for _cluster in pure_clusters: broken_clusters += [_cluster[i:i + max_size] for i in range(0, len(_cluster), max_size)] combined_clusters = list() while broken_clusters: if len(broken_clusters) < num_combine: clusters = list(broken_clusters) else: clusters = random.sample(broken_clusters, num_combine) for _cluster in clusters: broken_clusters.remove(_cluster) combined_clusters.append(utils.flatten(clusters)) clusters = map(lambda combined_cluster: cluster.Cluster(combined_cluster), combined_clusters) ncluster.test_features(clusters, min_size)
def kmeans_clustering(cluster_list, num_clusters, num_iterations): """ Compute the k-means clustering of a set of clusters Note: the function may not mutate cluster_list Input: List of clusters, integers number of clusters and number of iterations Output: List of clusters whose length is num_clusters """ # position initial clusters at the location of clusters with largest populations clusters = [cluster1 for cluster1 in cluster_list] clusters.sort(key = lambda x:x.total_population(), reverse=True) clusters = clusters[:num_clusters] for _ in range(num_iterations): # num_iterations == q # initalize num_clusters i.e k empty cluster empty_cluster = [cluster.Cluster(set([]), 0, 0, 0, 0) for _ in range(num_clusters)] for jdx in range(len(cluster_list)): distance, merge_with = float('inf'), None for cluster1 in clusters: if cluster_list[jdx].distance(cluster1) < distance: distance, merge_with = cluster_list[jdx].distance(cluster1), cluster1 empty_cluster[clusters.index(merge_with)].merge_clusters(cluster_list[jdx]) # new_clusters[.index(closest_cluster_center)].merge_clusters(county) clusters = empty_cluster return clusters
def __init__(self, options): self.options = options # We initialize the scheduling module print "Using the platform configuration file %s" % self.options.file rc = py_lat_module.lat_module_init ("verbose", "1", "ini_config_file", self.options.file) if (rc != 0): print "ERROR: lat_module_init() failed (ret: %d)\n" % rc print "Success.\n" print "Initializing the AFE scheduler..." rc = py_lat_module.lat_device_sched_init (); if (rc != 0): print "ERROR: lat_device_sched_init() failed (ret: %d)\n" % rc print "Success.\n"; print "Initializing the host scheduler..." rc = py_lat_module.lat_host_sched_init (); if (rc != 0): print "ERROR: lat_device_sched_init() failed (ret: %d)\n" % rc print "Success.\n"; print "Initializing the meta scheduler..." rc = py_lat_module.lat_meta_sched_init (); if (rc != 0): print "ERROR: lat_meta_sched_init() failed (ret: %d)\n" % rc print "Success.\n" self.py_lat_module = py_lat_module # Setup the virtual platform self.cluster = cluster.Cluster(options)
def __init__(self, nClusters, nTotalUnits, Ti, Tn, NBin_nEntries, ZF): #schedule self.wakeQ = SortedDict() self.now = 0 self.ZF = ZF self.VERBOSE = op.verboseDirector self.nClusters = nClusters self.Tn = Tn # it is used when assigning filters to clusters self.nUnitsCluster = nTotalUnits / nClusters #components self.centralMem = simpleMemory.SimpleMemory(self, op.CM_size, op.CM_nPorts, op.CM_bytesCyclePort) self.clusters = [] self.coordsWindow = {} self.clustersProcWindow = { } # [windowID] -> count of clusters processing this window self.filtersPending = {} self.clustersReadingWindow = {} self.output = [] for i in range(nClusters): self.clusters.append( cluster.Cluster(self, i, self.nUnitsCluster, Ti, Tn, NBin_nEntries, op.SB_size_per_cluster, self.cbClusterDoneReading, self.cbClusterDone))
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append( cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = sequential_clustering(singleton_list, 15) print("Displaying " + str(len(cluster_list)) + " sequential clusters") #cluster_list = algos.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = algos.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: plot.plot_clusters(data_table, cluster_list, False) #plot.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def main(): if (len(sys.argv) == 2 and sys.argv[1] == "spec_help"): spechelp = spec_help.SpecHelp() spechelp.express() uu = user_usage.UserUsage() uu.log_command("spec_help", '') exit() if (len(sys.argv) < 3 or len(sys.argv) > 5): usage() exit() cspec_path = sys.argv[1] permute_command = sys.argv[2] if len(sys.argv) == 3: scope = '' debug = False elif len(sys.argv) == 4: if sys.argv[3] == '-debug': scope = '' debug = True else: scope = sys.argv[3] debug = False else: #len(sys.argv) == 5 if (sys.argv[4] != '-debug'): usage() else: scope = sys.argv[3] debug = True #real_cluster_system = cluster_system.ClusterSystem() if (permute_command == "new_spec"): cluster_spec.generate_new_spec(cspec_path) uu = user_usage.UserUsage() uu.log_command(permute_command, '') exit() validate_args(permute_command, scope) validate_cspec_is_cspec(cspec_path) f = open(cspec_path, 'r') cspec_lines = f.readlines() f.close() true_stdout = stdout.Stdout() validated, missing_optionals = cluster_spec.validate( cspec_lines, true_stdout) if not (validated): exit() cspec = cluster_spec.ClusterSpec(cspec_path, cspec_lines, true_stdout, missing_optionals, True, debug) cluster_runs = cluster_runs_info.ClusterRunsInfo(cspec, true_stdout) hp_cluster = cluster.Cluster(cluster_runs, true_stdout) pdriver = permutation_driver.PermutationDriver(cspec_lines, cspec_path, true_stdout, hp_cluster) uu = user_usage.UserUsage() uu.log_command(permute_command, scope) pdriver.run_command(permute_command, scope)
def clusterephem(clustername): c = cluster.Cluster('NGC 2236') # print c.coordinatestring coords = ','.join(c.coordinatestring) coords = coords.replace(' ', ':') mag = c.solarmagnitude diam = c['diam'] * 60 result = '%s,f|O,%s,%.1f,2000,%i' % (clustername, coords, mag, diam) return result
def gen_random_clusters(num_clusters): """ Return a list of clusters with centers corresponding to points of the unit square. """ return [ cluster.Cluster(set([]), uniform(-1, 1), uniform(-1, 1), randrange(1, 101), randrange(1, 101)) for _ in range(num_clusters) ]
def kmeans(examples, k, verbose=False): """ :param examples: 样本 类型Example :param k: k表示k个聚类中心 :param verbose: 冗长的意思,这里类比 to_print :return: k-mean的结果 """ # Get k randomly chosen initial centroids, create cluster for each initialCentroids = random.sample(examples, k) clusters = [] # 依据k个initialCentroids,创建k个Cluster(每个Cluster的点暂时只有一个,即对应的initialCentroid for e in initialCentroids: clusters.append(cluster.Cluster([e])) # Iterate until centroids do not change converged = False numIterations = 0 while not converged: numIterations += 1 # Create a list containing k distinct empty lists newClusters = [] for i in range(k): newClusters.append([]) # Associate each example with closest centroid for e in examples: # Find the centroid closest to e # 假设e与第一个Cluster的聚类中心的距离是最短的,然后根据实际情况更新 smallestDistance = e.distance(clusters[0].getCentroid()) index = 0 for i in range(1, k): distance = e.distance(clusters[i].getCentroid()) if distance < smallestDistance: smallestDistance = distance index = i # Add e to the list of examples for appropriate cluster newClusters[index].append(e) for c in newClusters: # Avoid having empty clusters if len(c) == 0: raise ValueError('Empty Cluster') # Update each cluster; check if a centroid has changed converged = True for i in range(k): # 第i个更新examples和聚类中心,并返回旧的聚类中心和新的聚类中心的距离 # 如果该距离大于0,表示聚类中心发生变化,根据k-mean算法,继续执行一次while循环,直到converged = True,即聚集完成 if clusters[i].update(newClusters[i]) > 0.0: converged = False if verbose: print('Iteration #' + str(numIterations)) for c in clusters: print(c) print('') # add blank line return clusters
def main(argv=sys.argv, json_writer=simple_json_writer): # pragma: no cover try: global logger provider_config, logger, fine = util.provider_config_from_environment() data_dir = os.getenv('PRO_DATA_DIR', os.getcwd()) hostnamer = util.Hostnamer(provider_config.get("cyclecloud.hostnames.use_fqdn", True)) cluster_name = provider_config.get("cyclecloud.cluster.name") provider = CycleCloudProvider(config=provider_config, cluster=cluster.Cluster(cluster_name, provider_config, logger), hostnamer=hostnamer, json_writer=json_writer, terminate_requests=JsonStore("terminate_requests.json", data_dir), templates=JsonStore("templates.json", data_dir, formatted=True), clock=true_gmt_clock) provider.fine = fine # every command has the format cmd -f input.json cmd, ignore, input_json_path = argv[1:] input_json = util.load_json(input_json_path) if provider.fine: logger.debug("Arguments - %s %s %s", cmd, ignore, json.dumps(input_json)) if cmd == "templates": provider.templates() elif cmd == "create_machines": provider.create_machines(input_json) elif cmd in ["status", "create_status", "terminate_status"]: if "requests" in input_json: # provider.status handles both create_status and deprecated terminate_status calls. provider.status(input_json) elif cmd == "terminate_status": # doesn't pass in a requestId but just a list of machines. provider.terminate_status(input_json) else: # should be impossible raise RuntimeError("Unexpected input json for cmd %s" % (input_json, cmd)) elif cmd == "get_return_requests": provider.get_return_requests(input_json) elif cmd == "terminate_machines": provider.terminate_machines(input_json) except ImportError as e: logger.exception(unicode(e)) except Exception as e: if logger: logger.exception(unicode(e)) else: import traceback traceback.print_exc()
def __init__(self): cfg = yaml.safe_load(open("config.yaml")) mongourl = cfg['mongourl'] database = cfg['database'] collection = cfg['collection'] collection_new = cfg['collection_new'] self.dataset_path = cfg['dataset_path'] self.temp_path = cfg['temp_path'] self.cluster = cluster.Cluster() self.vec = vectorize.Vectors() self.db = db_helper.Db(mongourl, database, collection) self.db_new = db_helper.Db(mongourl, database, collection_new)
def get_cluster(self, name=None): """Return all clusters under this datacenter as a list of cluster objects or given cluster as cluster object""" objs = [] for obj in self.mor.hostFolder.childEntity: if obj.__class__.__name__ != "vim.ClusterComputeResource": continue if not name or name == obj.name: objs.append(cluster.Cluster(self._server, self, obj)) if name and name == obj.name: break if name and len(objs): objs = objs[0] return objs
def kmeans(examples, k, verbose = False): #Get k randomly chosen initial centroids, create cluster for each initialCentroids = random.sample(examples, k) clusters = [] for e in initialCentroids: clusters.append(cluster.Cluster([e])) #Iterate until centroids do not change converged = False numIterations = 0 while not converged: numIterations += 1 #Create a list containing k distinct empty lists newClusters = [] for i in range(k): newClusters.append([]) #Associate each example with closest centroid for e in examples: #Find the centroid closest to e smallestDistance = e.distance(clusters[0].getCentroid()) index = 0 for i in range(1, k): distance = e.distance(clusters[i].getCentroid()) if distance < smallestDistance: smallestDistance = distance index = i #Add e to the list of examples for appropriate cluster newClusters[index].append(e) for c in newClusters: #Avoid having empty clusters if len(c) == 0: try: newClusters.remove(c) except: raise ValueError('Empty Cluster') #Update each cluster; check if a centroid has changed converged = True for i in range(k): if clusters[i].update(newClusters[i]) > 0.0: converged = False if verbose: print('Iteration #' + str(numIterations)) for c in clusters: print(c) print('') #add blank line return clusters
def run(self, max_clusters, distances): """ This method runs the agglomerative clustering algorithm until we reach max clusters :param max_clusters: max clusters in list clusters :param distances: matrix of calculated distances :return: our_cluster: list of sorted clusters (by id) """ for sample in self.samples: our_cluster = cluster.Cluster(sample.id, set([sample])) self.clusters.append(our_cluster) while (len(self.clusters) > max_clusters): merging = self.min_in_distances(distances) self.update_clusters(merging[0], merging[1]) our_clusters = self.sort_by_id() return our_clusters
def form_clusters(instances, assignments): ''' Takes a list of instances and assignments and returns a list of Cluster objects with Mocked centers ''' cluster_map = dict() m = MockCenter() m.label = None for x in xrange(assignments.max() + 1): cluster_map[x] = cluster.Cluster(list(), m, x) for instance, assignment in zip(instances, assignments): cluster_map[assignment].members.append(instance) clusters = cluster_map.values() clusters = filter(lambda c: len(c.members), clusters) map(lambda _cluster: _cluster.set_label(), clusters) return clusters
def read_clusters_sorted_by_time(file_name, named_entities_file_name): tweets = [] clusters = {} clusters_timeline = [] named_entities = {} file_to_read = open(file_name, "r") file_entities = open(named_entities_file_name, 'r') for line in file_entities.readlines(): data = line.split(",", 1) named_entities[data[0]] = data[1] for line in file_to_read.readlines(): data = line.split(",") tokens = [ en.SimpleEntity(entity) if not (entity in named_entities.get(data[2])) else en.NamedEntity(entity) for entity in data[5].split(" ") ] tweet = tw.Tweet(clst_id=data[0], id=data[2], timestamp_ms=data[3], user_id=data[4], tokens=tokens, content=data[6]) tweets.append(tweet) if not clusters.get(data[0]): new_cl = cluster.Cluster(clst_id=data[0], clst_name=data[1], created_time=data[3]) clusters[data[0]] = new_cl oldest_valid_time = new_cl.get_created_time( ) - ed.Constants.EPOCH * 8 for past_cluster in clusters_timeline[::-1]: if past_cluster.get_created_time() >= oldest_valid_time: new_cl.add_past_neighbour(past_cluster) else: break clusters_timeline.append(new_cl) clusters[data[0]].append(tweet) for clst_id in clusters: clusters[clst_id].aggregate_entities() return tweets, clusters
def kmeans_clustering(cluster_list, num_clusters, num_iterations): """ Compute the k-means clustering of a set of clusters Note: the function may not mutate cluster_list Input: List of clusters, integers number of clusters and number of iterations Output: List of clusters whose length is num_clusters """ # position initial clusters at the location of clusters with largest populations sorted_cluster = list(cluster_list) sorted_cluster.sort(key=lambda cluster: cluster.total_population()) centers = list() for idx_k in range(1, num_clusters + 1): horiz = sorted_cluster[len(sorted_cluster) - idx_k].horiz_center() vert = sorted_cluster[len(sorted_cluster) - idx_k].vert_center() centers.append((horiz, vert)) assert len(centers) == num_clusters # force k centers # clustering for idx_i in range(num_iterations): # initialize k empty clusters clusters = list() for idx_k in range(num_clusters): clusters.append( cluster.Cluster(set(), centers[idx_k][0], centers[idx_k][1], 0, 0)) # assigning closest points for idx_j in range(len(cluster_list)): min_dist = float('inf') for idx_k in range(num_clusters): # compute the distance vert = cluster_list[idx_j].vert_center() - centers[idx_k][1] horiz = cluster_list[idx_j].horiz_center() - centers[idx_k][0] dist = math.sqrt(vert**2 + horiz**2) if dist < min_dist: merge_cluster = clusters[idx_k] min_dist = dist merge_cluster.merge_clusters(cluster_list[idx_j]) # adjusting the cluster centers if idx_i < num_iterations - 1: for idx_k in range(num_clusters): horiz = clusters[idx_k].horiz_center() vert = clusters[idx_k].vert_center() centers[idx_k] = (horiz, vert) return clusters
def form_clusters_alt(instances, l_idx): ''' instances - list of clustered things l_idx - list of lists of indices into instances e.g. [ [1, 3, 5], [0, 2, 4] ] ''' clusters = list() m = MockCenter() m.label = None for x, l in enumerate(l_idx): _cluster = cluster.Cluster(list(), m) for idx in l: _cluster.members.append(instances[idx]) clusters.append(_cluster) clusters = filter(lambda c: len(c.members), clusters) map(lambda _cluster: _cluster.set_label(), clusters) return clusters
def __enter__(self): self.last_unused_port = 12247 import random self.clusterName = ''.join( [chr(random.choice(range(ord('a'), ord('z')))) for c in range(8)]) self.num_hosts = 8 require_hosts(self.num_hosts) self.servers = [] self.cluster = cluster.Cluster() #self.cluster.verbose = True self.cluster.log_level = 'DEBUG' self.cluster.transport = 'infrc' self.cluster.__enter__() try: self.cluster.start_coordinator(hosts[0]) # Hack below allows running with an existing coordinator #self.cluster.coordinator_host = hosts[0] #self.cluster.coordinator_locator = cluster.coord_locator(self.cluster.transport, # self.cluster.coordinator_host) syncArgs = '' if hasattr(getattr(self, self._testMethodName), 'sync'): syncArgs = '--sync' for host in hosts[:self.num_hosts]: self.servers.append( self.cluster.start_server(host, args='--clusterName=%s %s' % (self.clusterName, syncArgs))) # Hack below can be used to use different ports for all servers #self.servers.append( # self.cluster.start_server(host, # port=self.last_unused_port, # args='--clusterName=%s' % self.clusterName)) #self.last_unused_port += 1 self.cluster.ensure_servers() self.rc = ramcloud.RAMCloud() print('%s ... ' % self.cluster.log_subdir, end='', file=sys.stderr) self.rc.set_log_file( os.path.join(self.cluster.log_subdir, 'client.log')) self.rc.connect(self.cluster.coordinator_locator) except: self.cluster.__exit__() raise return self
def form_clusters(true_labels, predicted_labels): cluster_map = dict() class Mock: pass for x in xrange(predicted_labels.max() + 1): m = Mock() m.label = None cluster_map[x] = cluster.Cluster(list(), m, x) count = 0 for true, predicted in zip(true_labels, predicted_labels): m = Mock() m._id = count count += 1 m.label = true cluster_map[predicted].members.append(m) clusters = filter(lambda cluster: cluster.members, cluster_map.values()) map(lambda cluster: cluster.set_label(), clusters) return clusters
def em_random_restarts(num_restarts, num_clusters, parameters_df): ll = None clusters = None for i in range(num_restarts): # Initialize cluster statistics new_clusters = [cl.Cluster() for j in range(num_clusters)] cl.initialize_clusters(new_clusters, parameters_df) # Run EM & Get LL Value new_ll = em.em(parameters_df, new_clusters) # Save Best LL Value and its clusters if ll is None or new_ll > ll: ll = new_ll clusters = new_clusters return ll, clusters
def form_clusters(instances, assignments): print "Forming Clusters" cluster_map = dict() class Mock: pass for x in xrange(_number_of_clusters): m = Mock() m.label = None cluster_map[x] = cluster.Cluster(list(), m, x) for instance, assignment in zip(instances, assignments): m = Mock() m._id = instance[0] m.label = instance[1] cluster_map[assignment].members.append(m) clusters = cluster_map.values() map(lambda cluster: cluster.set_label(), clusters) print "Done\n" return clusters
def __init__(self, **kwargs): self.tests = {} self.all_solutions = {} self.name = 'NO NAME' self.cached_solutions = None self.skip_count = 0 self.clusters = {} self.cluster_size = 0 self.dimentions = None self.output = None self.is_tl_hidden = True self.is_rt_hidden = True self.is_wa_hidden = True self.is_changed = False self.sources_path = '' self.tests_path = '' if 'file' in kwargs: data = json.load(kwargs['file']) self.clusters = dict( (key, [cluster.Cluster(obj=c) for c in data['clusters'][key]]) for key in data['clusters']) self.all_solutions = dict( (e['name']['file'], solution.Solution(self, obj=e)) for e in data['solutions']) self.tests = dict( (e['name'], testf.Test(obj=e)) for e in data['tests']) self.is_tl_hidden = data.get('is_tl_hidden', True) self.is_rt_hidden = data.get('is_rt_hidden', True) self.is_wa_hidden = data.get('is_wa_hidden', True) self.cluster_size = data.get('cluster_size', 0) self.name = data.get('name', 'NO NAME') self.sources_path = data.get( 'sources_path', os.path.split(list( self.all_solutions.values())[0].filepath)[0]) self.tests_path = data.get( 'tests_path', os.path.split(list(self.tests.values())[0].name)[0]) if 'output' in kwargs: self.output = kwargs['output']
def clusterize(self, count): if self.current_cluster_name(count) in self.clusters: self.update_current_cluster_name(count) self.is_changed = True return self.update_current_cluster_name(count) labels, centers = clustering.clusterize(self, kmax=count) labels = self.sort_labels(labels) self.is_changed = True cluster_name = self.current_cluster_name() self.clusters[cluster_name] = [ cluster.Cluster(idx=idx, name='cluster {}'.format(idx), description='', center=center.tolist()) for idx, center in enumerate(centers) ] for idx, label in enumerate(labels): self.clusters[cluster_name][label].elements.append(idx)
def kmeans(tweets, k, maxRound, cutoff): init = random.sample(tweets, k) # randomly sample k tweets clusters = [cluster.Cluster(t) for t in init] # Use the init set as k separate clusters round = 0 while round < maxRound: #print 'Round #%s<br>' % round lists = [[] for c in clusters] # Create an empty list for each cluster for t in tweets: # Compute distances to each of the cluster dist = [ float(tweet_distance(t, clusters[i].centroid)) / min(len(tokenise(t)), len(tokenise(clusters[i].centroid))) for i in range(len(clusters)) ] # Find the max, which indicate the most similarity maxDist = max(dist) idx = dist.index(maxDist) # If the tweet doesn't fit into any cluster (below a threshold), randomly assign it to a cluster, otherwise, assign it to the cluster with maximum distance if maxDist < cutoff: lists[random.sample(range(k), 1)[0]].append(t) else: lists[idx].append(t) # Update the clusters biggest_shift = 0.0 for i in range(len(clusters)): shift = clusters[i].update(lists[i]) biggest_shift = max(biggest_shift, shift) # If the clusters aren't shifting much (i.e. twitter distance remain high), break and return the results if biggest_shift > cutoff: break round = round + 1 #print "Done clustering...<br>" return clusters
def update(self): movements.move_path(self) if self.attitude == Attitude["friends"]: print "runway ", self.runaway if self.attitude == Attitude["friends"]: self.run_away() #print self.runaway if self.runaway >= 100: print "change attitude" self.cluster.remove_member(self) self.cluster = cluster.Cluster( attitude=Attitude["friendly"], starting_positions=[list(self.rect.center)], start_position=self.rect.bottom) game.main_game.add_clusters([self.cluster]) self.change_attitude(Attitude["friendly"]) self.set_path(movements.happy_dance, default=True) self.cluster.add_cluster(happiness=self.happiness, movement=self.default_movement) if self.fadeaway: self.fade_away()
def gen_random_clusters(num_clusters): ''' creates a list of clusters where each cluster in this list corresponds to one randomly generated point in the square with corners (±1,±1) ''' random_data1 = [cluster.Cluster(set(), random.random(), random.random(), 0, 0) for _ in range(num_clusters)] random_data2 = random_data1[:] time1 = time.time() for _ in range(100): dist = slow_closest_pair(random_data1) time2 = time.time() slow_time = (time2 - time1)/100 time1 = time.time() for _ in range(100): dist = fast_closest_pair(random_data2) time2 = time.time() fast_time = (time2 - time1)/100 return (slow_time, fast_time)
def cluster(self, *args): args = list(args) if not len(args) or not args[0]: print "cluster command requires an action. Valid actions are: " print " list\n status" return action = args.pop(0) if action == 'list': cluster.list() else: # actions that require a cluster name if not len(args) or not args[0]: print "cluster name required for {}".format(action) return cluster_name = args.pop(0) cl = cluster.Cluster(cluster_name) if action == 'status': print "{} status: {}".format(cluster_name, cl.status()) elif action == 'create': if not cl.create(): print "Failed to bring up {} cluster".format(cluster_name) else: print "Unknown cluster command: {}".format(action)
def kmeans_clustering(cluster_list, num_clusters, num_iterations): """ Compute the k-means clustering of a set of clusters. The initial points used as the centers are the clusters with largest population. Input: List of clusters, number of clusters, number of iterations Output: List of clusters of length num_clusters """ srtd_clstrs = sorted(cluster_list, key=lambda x: x.total_population(), reverse=True) centers = srtd_clstrs[:num_clusters] for _ in range(num_iterations): clstrs = [ cluster.Cluster(set([]), cntr.horiz_center(), cntr.vert_center(), 0, cntr.averaged_risk()) for cntr in centers ] for c in cluster_list: min_idx = min_pair(c, centers) clstrs[min_idx].merge_clusters(c) centers = clstrs return clstrs