def main(): # A value which determines the whether we have to stop updating clusters # (the optimization has 'converged' ) epsilon = 0.001 printFlag = False myBag, collection = dataPreparation.dataCollect() myArray = dataPreparation.vectorization(myBag, collection, printFlag) clusters = dataPreparation.klusterNum(myArray) # Number of the clusters numClusters = len(clusters) outfile = open("output.json", "w") # Cluster those data! print("Clustering started...\n") clusters = cluster.kmeans(myArray, numClusters, epsilon) print("Success\n") print ("K-means precision = ", cluster.precision(clusters), "\n") # Print our clusters for i, c in enumerate(clusters): print ("Cluster: ", i, file = outfile) for p in c.vectors: print ("Vector :", p['topic'], p['id'], file = outfile) print('\n', file = outfile)
def run_transform(name, data_x, data_y, transformer): print("Working on {}...".format(name)) report_name = "reports/{}_nn_output.txt".format(name) sys.stdout = open(report_name, "w") #2 transform the data transform_x = transformer(data_x, data_y, name) plot_corr(name, pd.DataFrame(data=transform_x), data_y) kmeans_name = "{} KMeans Clustered".format(name) gmm_name = "{} GMM Clustered".format(name) #3 cluster the transformed data kmeans_clustered = kmeans(kmeans_name, transform_x, data_y) gmm_clustered = gmm(gmm_name, transform_x, data_y) #4 run neural network on transformed data x_train, x_test, y_train, y_test = split_data(transform_x, data_y) run_nn(name, x_train, x_test, y_train, y_test) #5 call run_nn on cluster from #3 (clustered from dimensionally reduced) kmx_train, kmx_test, kmy_train, kmy_test = split_data( kmeans_clustered, data_y) run_nn(kmeans_name, kmx_train, kmx_test, kmy_train, kmy_test) gmmx_train, gmmx_test, gmmy_train, gmmy_test = split_data( gmm_clustered, data_y) run_nn(gmm_name, gmmx_train, gmmx_test, gmmy_train, gmmy_test) sys.stdout = sys.__stdout__ print("Finished {}!".format(name)) print()
def kmeans(args, train_features, train_labels, test_features, test_labels): """Perform KMeans clustering. Options: n (int): number of clusters used in KMeans. """ return cluster.kmeans(args, train_features, train_labels)
def thresh_search(fname, nw=200, wl=10, w2v={}, ignore_list=[], top_n=None): f = open('thresh_search.csv', 'w+') cw = csv.writer(f) cw.writerow(['Thresh', 'Purity']) og = sys.stdout be_quiet = EmptyStdout() for t in range(0, 255): t = t / 5 purities = [] sys.stdout = be_quiet for _ in range(10): X, y = get_vectors(fname, thresh=t, nw=nw, wl=10, w2v_params=w2v, top_n=top_n, ignore_list=ignore_list) n = len(set(y)) + 1 purities.append(kmeans(X, y, n)['purity']['Total']) sys.stdout = og purity = sum(purities) / 5 print('T:%f, Purity: %0.4f' % (t, purity)) cw.writerow([t, purity]) f.close()
def customClusterQuestions(docs, algorithm, parameters, removeOutliers=True): """ Customized question clustering methods. Implemented as a knock-off to prevent interupting with production builds Parameters: docs (list): List of strings (questions) algorithm (str): The clustering algorithm that will be used (DBSCAN, K Means, Gaussian K Means, Agglomerative Clustering) parameters (string): Parameters used to initiate the clustering algorithms (epsilon, clusters, strictness, threshold) removeOutliers (bool): A flag to determine whether to remove outliers or not (default is True) Returns: corpus: Corpus that has clusters list attached to it """ corpus = tagAndVectorizeCorpus(docs) params = [{param["param"]: param["value"]} for param in parameters][0] if algorithm == "DBSCAN": return cluster.dbscan(corpus, float(params["epsilon"])) if algorithm == "K Means": return cluster.kmeans(corpus, int(params["clusters"]), removeOutliers) if algorithm == "Gaussian K Means": return cluster.g_kmeans(corpus, min(4, int(params["strictness"])), removeOutliers) if algorithm == "Agglomerative Clustering": return cluster.agglomerate(corpus, float(params["threshold"]), removeOutliers)
def run_original(data_x, data_y, name): print("Running {} original charts and clustering".format(name)) # Run Original report_name = "reports/{}_nn_output.txt".format(name) sys.stdout = open(report_name, "w") x_train, x_test, y_train, y_test = split_data(data_x, data_y) run_nn(name, x_train, x_test, y_train, y_test) plot_corr("{} Original".format(name), x_train, y_train) # Cluster Original (#1) kmeans("{} KMeans".format(name), data_x, data_y) gmm("{} GMM".format(name), data_x, data_y) sys.stdout = sys.__stdout__ print("Finished {} original charts and clustering".format(name))
def main(): (options, args) = parser.parse_args() mfile = open(options.m, 'w') #map file print >> mfile, open(options.input1).read() % ("Twitter Users") print >> mfile, open(options.input2).read() sfile = open(options.script, 'w') #script file initialize = open(options.initialize, 'r').read() print >> sfile, initialize % (38, -27) if options.f: ufile = open(options.u, 'w') print >> ufile, options.f ufile.close() else: statout = commands.getstatusoutput("ls %s > %s" % (options.d, options.u)) users = open(options.u, 'r') #user listing points = [] num = 1 for line in users: #for each user in listing user = options.d + line[0:-1] #absolute path to user data print "user: "******" lat: %s; lon: %s" % (lat,lon) # if lat and lon and num<37: # marker = open(options.marker).read() # print >>sfile, marker % (num,lat,lon,num,num,username) # num += 1 except: print " error unpacking location" # print "points: %s" % points markers = kmeans(points, 50, 100) #clusters of users print "markers: %s" % markers i = 0 #counter for m in markers: num = m[0] #number of members in cluster lat = m[1][0] #latitude of cluster mean lon = m[1][1] #longitude of cluster mean if num > 0: #cluster contains 1 or more users marker = open(options.marker).read() print >> sfile, marker % (i, lat, lon, i, i, num) i += 1 print >> sfile, open(options.close, 'r').read()
def saveKmeans(filename, target_dir, k, scvis=False): createDir(target_dir) _, k_mask = cluster.kmeans(filename, clusters=k, scvis=scvis) if not scvis: with open(target_dir + "color_mask_" + str(k) + ".txt", "w+") as o: for k_ in k_mask: o.write(str(k_) + "\n") else: with open(target_dir + "color_mask_" + str(k) + "_scvis.txt", "w+") as o: for k_ in k_mask: o.write(str(k_) + "\n")
def main(): (options, args) = parser.parse_args() mfile = open(options.m, "w") # map file print >> mfile, open(options.input1).read() % ("Twitter Users") print >> mfile, open(options.input2).read() sfile = open(options.script, "w") # script file initialize = open(options.initialize, "r").read() print >> sfile, initialize % (38, -27) if options.f: ufile = open(options.u, "w") print >> ufile, options.f ufile.close() else: statout = commands.getstatusoutput("ls %s > %s" % (options.d, options.u)) users = open(options.u, "r") # user listing points = [] num = 1 for line in users: # for each user in listing user = options.d + line[0:-1] # absolute path to user data print "user: "******"/") + 1 : user.rfind(".")] # isolate username print username if options.v: # verbose option print username data = open(user, "r") # user data file line1 = data.readline() print line1 try: add, lat, lon, ts = line1.split("$xyzzy$") lon = lon[:-1] if lat and lon: points.append([username, lat, lon]) # print " lat: %s; lon: %s" % (lat,lon) # if lat and lon and num<37: # marker = open(options.marker).read() # print >>sfile, marker % (num,lat,lon,num,num,username) # num += 1 except: print " error unpacking location" # print "points: %s" % points markers = kmeans(points, 50, 100) # clusters of users print "markers: %s" % markers i = 0 # counter for m in markers: num = m[0] # number of members in cluster lat = m[1][0] # latitude of cluster mean lon = m[1][1] # longitude of cluster mean if num > 0: # cluster contains 1 or more users marker = open(options.marker).read() print >> sfile, marker % (i, lat, lon, i, i, num) i += 1 print >> sfile, open(options.close, "r").read()
def generate_clusteredpoints(filepath, methodType, dataIndex, recordList): oripoints = processtool.read_points_fromfile(filepath) points = processtool.generate_plist(oripoints) if methodType == 'kmeans': kmeansfactory = kmeans(__kmeansk__[dataIndex], points) kmeansfactory.kmeansClusterWithRecord(recordList) #kmeansfactory.printResult() return kmeansfactory.points elif methodType == 'dbscan': #dbscanfactory = dbscan(1.1, 5, points) dbscanfactory = dbscan(epsSelect[dataIndex], MinPtsSelect[dataIndex], points) dbscanfactory.dbscanCluster() #dbscanfactory.printResult() return dbscanfactory.points
def __init__(self,keyParams,boolParams, numParams, samples, samplesCount, nodesCount, majorant = False): '''the LICS itself''' '''reverse keyparams pls!''' self.boolParams = boolParams self.numParams = numParams self.keyParams = keyParams self.keyStatements = [ statements.get_statement(statements.op_takeValue,p) for p in keyParams] self.boolStatements = [ statements.get_statement(statements.op_takeValue,p) for p in boolParams] self.numStatements = [ statements.get_statement(statements.op_takeValue,p) for p in numParams] self.samples = set(samples) self.samplesCount = samplesCount self.nodesCount = nodesCount self.tree = treeNode.treeNode(self.samples,self.keyStatements,majorant) self.clusteriser = cluster.kmeans(self,numParams,boolParams + keyParams,samplesCount) self.isMajorant = majorant
def approximate_corners(IMAGE_IN, IMAGE_OUT=None, PTS_OUT=None, HOM_OUT=None): """ Approximates the locations of corners on each of two calibration boards in ChESS response image with filename *IMAGE_IN*. If requested, an image is written to *IMAGE_OUT* for visual inspection, a matrix of (x, y) image coordinates in the shape of each board is np.save()'d to *PTS_OUT*0 and *PTS_OUT*1, respectively, and a homography from [x, y, 1]^T (with x in range(width) and y in range(height)) to each board's image coordinates is np.savetxt()'d to *HOM_OUT*[0,1].npy, again respectively. """ # read in image and disregard pixels of very slight intensity image_in = np.asarray(cv2.imread(IMAGE_IN)) nrows = len(image_in) ncols = len(image_in[0]) image_out = np.zeros((nrows, ncols, 3), dtype=np.int) points = [ [x, y, image_in[y][x][0]] for y in range(nrows) for x in range(ncols) if image_in[y][x][0] > 15 ] # 2-means cluster to find the boards x_min = min([ point[0] for point in points ]) x_max = max([ point[0] for point in points ]) boards = cl.kmeans(points, [[x_min, 0], [x_max, 0]], 5) # process each board separately for b, board in enumerate(boards): # do some filtering to (hopefully!) get one point per corner board = _reduce(board) # guess homography and point order if HOM_OUT != None: THIS_HOM_OUT = HOM_OUT + str(b) else: THIS_HOM_OUT = None board = _reshape(board, THIS_HOM_OUT) # output if PTS_OUT != None: np.save(PTS_OUT + str(b), board) if IMAGE_OUT != None: _imwrite(image_out, board, b) # output if IMAGE_OUT != None: cv2.imwrite(IMAGE_OUT, image_out)
def saveGeneTable(filename, file_after_reduction, target_dir, k, scvis=False): createDir(target_dir) _, k_mask = cluster.kmeans(file_after_reduction, clusters=k, scvis=scvis) data = loadTSV(filename) indexes = [] for _ in range(k): indexes.append([]) for index in range(len(k_mask)): indexes[k_mask[index]].append(index) result = np.zeros(shape=(k, len(data[0]))) for _ in range(k): result[_] = np.mean(data[indexes[_]], axis=0) if not scvis: np.savetxt(target_dir + "geneTable_" + str(k) + ".txt", result.transpose(), delimiter="\t") else: np.savetxt(target_dir + "geneTable_" + str(k) + "_scvis.txt", result.transpose(), delimiter="\t")
def wl_search(fname, ignore_list=[], top_n=None, thresh=5, w2v={}): f = open('wl_search.csv', 'w+') cw = csv.writer(f) cw.writerow(['Num Walks', 'Walk Len', 'Purity']) og = sys.stdout be_quiet = EmptyStdout() for nw in range(100, 900, 100): if nw == 0: nw = 10 for window in range(2, 10): w2v['window'] = window purities = [] sys.stdout = be_quiet for _ in range(5): X, y = get_vectors(fname, thresh=thresh, nw=nw, wl=10, w2v_params=w2v, top_n=top_n, ignore_list=ignore_list) n = len(set(y)) + 1 purities.append(kmeans(X, y, n)['purity']['Total']) sys.stdout = og purity = sum(purities) / 5 print('NW: %d, WL:%d, Purity: %0.4f' % (nw, window, purity)) cw.writerow([nw, window, purity]) f.close()
def decide(self): start = time.time() print('decide' + ' ' + str(self.current_cycle)) if self.my_side == 'Police': my_agents = self.world.polices # return 0 else: my_agents = self.world.terrorists # ignore dead agents alive_agents = [] for agent in my_agents: if agent.status == EAgentStatus.Dead: if self.my_side == 'Terrorist': if self.target_bombsites[agent.id]: if self.target_bombsites[agent.id] in self.bombsites: self.bombsites[self.target_bombsites[agent.id]]['task'] = 0 self.bombsites[self.target_bombsites[agent.id]]['status'] = -1 self.target_bombsites[agent.id] = None else: alive_agents.append(agent) # update bombsites if terrorists score is changed: exploded_bombsites = [] if self.last_scores['Terrorist'] != self.world.scores['Terrorist']: for bombsite in self.bombsites: if self.world.board[bombsite[0]][bombsite[1]] == ECell.Empty: exploded_bombsites.append(bombsite) for bombsite in exploded_bombsites: del self.bombsites[bombsite] if exploded_bombsites: print(exploded_bombsites) updated,unreachables,bypassed_bombsites_list = False, [], [] for i,bombsite in enumerate(self.unreachable_bombsites): distance, path, bypassed_bombsites = self._a_star(self.start_pos,bombsite,agent_block=False,bombsite_block=False) if not bypassed_bombsites: self.bombsites[bombsite] = {'size':self.world.board[bombsite[0]][bombsite[1]],'initial_distance':distance, 'status':-2,'task':0,'bscore':0, 'failed':0, 'agent':None,'ert':-1} updated = True print('unreachable opened:',bombsite,self.bombsites[bombsite]) else: unreachables.append(bombsite) bypassed_bombsites_list.append((bombsite,bypassed_bombsites)) self.unreachable_bombsites = unreachables if updated: for bombsite in self.bombsites: self.bombsites[bombsite]['bscore'] = 0 for bombsite, bypassed_bombsites in bypassed_bombsites_list: bypassed_bombsite = bypassed_bombsites[-1] unreachable_size = self.world.board[bombsite[0]][bombsite[1]] self.bombsites[bypassed_bombsite]['bscore'] += (self.BOMBSITE_COEFFICIENT[unreachable_size] * self.unreachable_bscore_coefficient) if self.my_side == 'Police': bombsite_positions = sorted(self.bombsites, key = lambda x: self.bombsites[x]['initial_distance']) self.clusters, self.cluster_centers = kmeans(bombsite_positions, len(my_agents)) self.cluster_index = [0] * len(my_agents) print(self.clusters,self.cluster_centers) # prevent passing by exploding bombs for bomb in self.world.bombs: if bomb.explosion_remaining_time == 1: for neighbor in bomb.position.get_neighbours(self.world): self.world.board[neighbor.y][neighbor.x] = ECell.Wall if self.my_side == 'Terrorist': # update bombsites if bomb(s) is defused if self.last_scores['Police'] != self.world.scores['Police']: bomb_count = len(self.world.bombs) for last_bomb in self.last_bombs: for index,bomb in enumerate(self.world.bombs): if bomb.position == last_bomb.position: break elif index == bomb_count - 1: bombsite_position = self._position_to_tuple(last_bomb.position) if bombsite_position in self.bombsites: self.bombsites[bombsite_position]['status'] = -1 self.bombsites[bombsite_position]['failed'] += 2 self.last_bombs = self.world.bombs # choose nearest agent for each bombsite if bombsites number is smaller than agents number not_planted_bombsites = 0 for bombsite in self.bombsites: if self.bombsites[bombsite]['status'] != 2: not_planted_bombsites += 1 if not_planted_bombsites < len(alive_agents): for agent_id,agent in enumerate(my_agents): if agent.planting_remaining_time == -1: self.target_bombsites[agent_id] = None for bombsite in self.bombsites: if self.bombsites[bombsite]['status'] != 2: min_distance = 1000 for agent in alive_agents: if not self.target_bombsites[agent.id]: distance, _ = self._a_star(agent.position,bombsite) if distance < min_distance: best_agent = agent.id min_distance = distance self.bombsites[bombsite]['task'] = 1 self.target_bombsites[best_agent] = bombsite print('targets:',self.target_bombsites) else:# police # update bombsites status according to sounds and visions for bombsite in self.bombsites: if self.bombsites[bombsite]['status'] < 0: self.bombsites[bombsite]['status'] += 1 in_vision_bombs = [] for bomb in self.world.bombs: bombsite = self._position_to_tuple(bomb.position) self.bombsites[bombsite]['brt'] = bomb.explosion_remaining_time in_vision_bombs.append(bombsite) for agent in alive_agents: if agent.defusion_remaining_time == -1: for bombsite in self.bombsites: if self.bombsites[bombsite]['status'] >= 0: if self._heuristic(bombsite,agent.position) <= self.world.constants.police_vision_distance: if not bombsite in in_vision_bombs: if self._heuristic(bombsite,agent.position) == 1: self.bombsites[bombsite]['status'] = -1 * self.world.constants.bomb_planting_time - 1 else: self.bombsites[bombsite]['status'] = -2 self.bombsites[bombsite]['task'] = 0 else: self.bombsites[bombsite]['status'] = 2 sound_counts = {ESoundIntensity.Weak:0,ESoundIntensity.Normal:0,ESoundIntensity.Strong:0} in_range_sites = {ESoundIntensity.Weak:[],ESoundIntensity.Normal:[],ESoundIntensity.Strong:[]} for sound in agent.bomb_sounds: sound_counts[sound] += 1 for bombsite in self.bombsites: if self._heuristic(bombsite,agent.position) <= self.world.constants.sound_ranges[ESoundIntensity.Weak]: distance, _ = self._a_star(agent.position, bombsite, not_valid_ecells=[ECell.Wall],agent_block=False) if distance: minimum_distance = self.world.constants.police_vision_distance for sound_kind in self.sound_kinds: if distance <= self.world.constants.sound_ranges[sound_kind] and distance > minimum_distance: if sound_counts[sound_kind]: status = self.bombsites[bombsite]['status'] in_range_sites[sound_kind].append((bombsite,status,distance)) else: self.bombsites[bombsite]['status'] = -2 if self.target_bombsites[agent.id] == bombsite: self.checked_bombsites[agent.id].append(bombsite) break minimum_distance = self.world.constants.sound_ranges[sound_kind] for sound_kind in self.sound_kinds: if sound_counts[sound_kind] > 0: possibles,sures = [], [] for index,(bombsite,status,distance) in enumerate(in_range_sites[sound_kind]): if status >= 2: sures.append(bombsite) elif status >= 0: possibles.append(bombsite) if len(sures) == sound_counts[sound_kind]: for bombsite in possibles: self.bombsites[bombsite]['status'] = -2 else: status = int((len(sures) + len(possibles)) <= sound_counts[sound_kind]) + 1 for bombsite in possibles: self.bombsites[bombsite]['status'] = status # print(agent.id,'at',agent.position,bombsite,status,in_range_sites) for bombsite in self.bombsites: if self.bombsites[bombsite]['ert'] > -1: self.bombsites[bombsite]['ert'] -= 1 elif self.bombsites[bombsite]['status'] == 2: self.bombsites[bombsite]['ert'] = self.world.constants.bomb_explosion_time self.last_scores = self.world.scores # print(self.bombsites) for agent in alive_agents: if self.my_side == 'Police': bombsite_direction = self._find_bombsite_direction(agent) doing_bomb_operation = (agent.defusion_remaining_time != - 1) if doing_bomb_operation: # defusing bombsite_position = self._sum_pos_tuples(self.DIR_TO_POS[bombsite_direction],(agent.position.y,agent.position.x)) self._agent_print(agent.id, 'Continuing bomb operation') self.bombsites[bombsite_position]['status'] = 2 if agent.defusion_remaining_time == 1: self.bombsites[bombsite_position]['status'] = -1 * self.world.constants.bomb_planting_time - 1 self.bombsites[bombsite_position]['task'] = 0 continue if bombsite_direction: try: multiple = len(bombsite_direction) except: multiple = False if multiple: bombsite_direction = bombsite_direction[0] bombsite_position = self._sum_pos_tuples(self.DIR_TO_POS[bombsite_direction],(agent.position.y,agent.position.x)) if self.target_bombsites[agent.id] == bombsite_position or multiple: for bomb in self.world.bombs: if self._position_to_tuple(bomb.position) == bombsite_position: if bomb.explosion_remaining_time < self.world.constants.bomb_defusion_time: has_time = False else: has_time = True break if has_time: self._agent_print(agent.id, 'Starting bomb operation') self.defuse(agent.id, bombsite_direction) self.bombsites[bombsite_position]['task'] = 2 continue else: # self.scape_bombsite(agent) self._agent_print(agent.id, 'direction: Ignoring bomb due to lack of time :(') self.bombsites[bombsite_position]['status'] = 3 self.bombsites[bombsite_position]['task'] = 1 self.bombsites[bombsite_position]['agent'] = -1 if self.world.bombs and not bombsite_direction: # bomb in vision: found = False for bomb in self.world.bombs: bombsite_position = self._position_to_tuple(bomb.position) if self.bombsites[bombsite_position]['task'] < 2 and self.bombsites[bombsite_position]['status'] < 3 and self.target_bombsites[agent.id] == bombsite_position and self._heuristic(agent.position,bombsite_position) <= self.world.constants.police_vision_distance: distance, path = self._a_star(agent.position, bomb.position) time_needed = distance + self.world.constants.bomb_defusion_time if time_needed < bomb.explosion_remaining_time: found = True self.bombsites[bombsite_position]['task'] = 1 self.bombsites[bombsite_position]['agent'] = agent.id self.path_move(agent,path) self._agent_print(agent.id, 'Going to defuse.') break # print('time status:',time_needed,bomb.explosion_remaining_time) else: self._agent_print(agent.id, 'Ignoring bomb due to lack of time :(') self.bombsites[bombsite_position]['task'] = 1 self.bombsites[bombsite_position]['agent'] = -1 self.bombsites[bombsite_position]['status'] = 3 if found: continue # patrol: cluster = self.clusters[agent.id] if cluster: best_bombsite, (best_distance, best_path) = self.best_bombsite_patrol(agent) if best_path: if len(best_path) > 1: self.path_move(agent, best_path) else: # len(bombsites) < len(agents) pass else: # terrorist if self.last_position[agent.id] == agent.position: self.gir_count[agent.id] += 1 else: self.gir_count[agent.id] = 0 self.last_position[agent.id] = agent.position bombsite_direction = self._find_bombsite_direction(agent) doing_bomb_operation = agent.planting_remaining_time != -1 threatened = False if ESoundIntensity.Strong in agent.footstep_sounds: self.heard_sound_count[agent.id] += 1 else: self.heard_sound_count[agent.id] = 0 if doing_bomb_operation: try: multiple = len(bombsite_direction) except: multiple = False if multiple: bombsite_direction = bombsite_direction[0] bombsite_position = self.target_bombsites[agent.id] if self.heard_sound_count[agent.id] >= (self.world.constants.terrorist_vision_distance - self.world.constants.police_vision_distance): threatened = True # self.heard_sound_count[agent.id] = 0 if not threatened: self.gir_count[agent.id] = 0 if agent.planting_remaining_time <= 1: self._agent_print(agent.id, 'Finishing bomb operation') self.bombsites[bombsite_position]['task'] = 0 self.bombsites[bombsite_position]['status'] = 2 self.target_bombsites[agent.id] = None else: self._agent_print(agent.id, 'Continuing bomb operation') self.bombsites[bombsite_position]['task'] = 2 else: self._agent_print(agent.id, 'I swear I heard police footsteps, time to look around or maybe scape') if len(self._empty_directions(agent.position)) == 1: self.scape_bombsite(agent) else: self.move(agent.id, agent.position.direction_to(Position(bombsite_position[1],bombsite_position[0]))) self.bombsites[bombsite_position]['task'] = 1 continue if bombsite_direction: bombsite_position = self._sum_pos_tuples(self.DIR_TO_POS[bombsite_direction],(agent.position.y,agent.position.x)) threatening_polices = [] for police in self.world.polices: distance = self._heuristic(police.position,agent.position) if distance <= self.world.constants.terrorist_vision_distance and police.status == EAgentStatus.Alive: threatening_polices.append(police) if threatening_polices: self.scape_polices(agent,threatening_polices) threatened = True self.bombsites[bombsite_position]['failed'] += 1 self._agent_print(agent.id, "I see police(s). I can always plant, Now I must Scape.") continue if not threatened and self.bombsites[bombsite]['task'] != 2: if bombsite_position == self.target_bombsites[agent.id] and self.bombsites[bombsite_position]['failed'] <= 5: self._agent_print(agent.id, "Starting bomb operation, I don't see any polices.") self.plant(agent.id, bombsite_direction) self.bombsites[bombsite_position]['task'] = 2 continue # go to best bombsite if self.gir_count[agent.id] >= 4: self._agent_print(agent.id, "Doing random move because GIR KARDAM !!!") self.move(agent.id, random.choice(self._empty_directions(agent.position))) continue best_bombsite, best_distance, best_path = self.best_bombsite_plant(agent) if len(best_path) > 1: # there IS a possible bombsite self.bombsites[best_bombsite]['task'] = 1 self.target_bombsites[agent.id] = best_bombsite self.last_distance[agent.id] = best_distance self._agent_print(agent.id, 'Going to bombsite.') self.path_move(agent,best_path) else: # no bombsites left to plant threatening_polices = [] for police in self.world.polices: if self._heuristic(agent.position,police.position) <= self.world.constants.police_vision_distance + 1: threatening_polices.append(police) if threatening_polices: self.scape_polices(agent,threatening_polices) self._agent_print(agent.id, 'Nothing to do, Scaping police(s).') continue else: bombsite_position = self._find_bombsite_direction(agent,possible_only=False) if bombsite_position: self.scape_bombsite(agent) self._agent_print(agent.id, 'Nothing to do, Getting a safe distance with the last planted bomb.') continue else: self._agent_print(agent.id, 'Nothing to do, waiting ZzZzZ...') # if self.my_side == 'Police': # for agent_1 in patrol_moves: # for agent_2 in patrol_moves: # if agent_1 != agent_2 and self._position_to_tuple(agent_1.position) == patrol_moves[agent_2] and self._position_to_tuple(agent_2.position) == patrol_moves[agent_1]: # self.cluster_index[agent_1.id],self.cluster_index[agent_2.id] = self.cluster_index[agent_2.id],self.cluster_index[agent_1.id] # bombsite_position_1 = self.bombsite_positions[self.cluster_index[agent_1.id]] # bombsite_position_2 = self.bombsite_positions[self.cluster_index[agent_2.id]] # _, path_1 = self._a_star(agent_1.position, bombsite_position_1) # _, path_2 = self._a_star(agent_2.position, bombsite_position_1) # patrol_moves[agent_1] = path_1[1] # patrol_moves[agent_2] = path_2[1] # for agent in patrol_moves: # if patrol_moves[agent]: # self.position_move(agent,patrol_moves[agent]) # else: # print(self.heard_sound_count) if self.my_side == 'Police': print(self.target_bombsites) # print(self.bombsites) # for bombsite in self.bombsites: # print(bombsite,'status:',self.bombsites[bombsite]['status'],'task:',self.bombsites[bombsite]['task'],end=',') # print() end = time.time() print('time:',end - start)
def kmeansTest(): oriplist = processtool.read_points_fromfile("./Aggregation.txt") kmeansfactory = kmeans(7, processtool.generate_plist(oriplist)) kmeansfactory.kmeansCluster() kmeansfactory.printResult()
def trial(filepath, mingene, mincell, cnum=8): # make dir data_dir = "./data/" + "mincell=" + str(mincell) + "_mingene=" + str( mingene) + "/" graph_dir = "./process_images/" + "mincell=" + str( mincell) + "_mingene=" + str(mingene) + "/" model_dir = './model/' + "mincell=" + str(mincell) + "_mingene=" + str( mingene) + "/" if not os.path.exists(data_dir): os.makedirs(data_dir) if not os.path.exists(graph_dir): os.makedirs(graph_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) # filter data filtered = data_dir + "filtered.txt" sc = scanpy(filepath, mingenes=mingene, mincells=mincell) sc.getScanpy(filtered) # train autoencoder # autoencoder.train(filtered, model_dir, learning_rate=0.1, batch_size=100, epoch=800) # generate tsne plot.getTsne(filtered, graph_dir + "tsne.png") # generate kmeans k, _ = cluster.kmeans(filtered, graph_dir + "tsne with kmeans.png", cnum) # generate centroid values c = cluster.getCentroids( k, cluster.tsne(filtered), ) cluster.getGeneofCentroids(filtered, graph_dir + "centroid gene matrix.txt", c) # get cell index of centroids cindex = [sc.getFilteredCellList()[i] for i in c] with open(graph_dir + "centroidCellIndex.txt", "w") as out: out.write(str(cindex)) # get latent space autoencoder.getLatentSpace(filtered, data_dir, model_dir) latent = data_dir + "latentSpace.txt" # generate graphs for latent space # generate tsne plot.getTsne(latent, graph_dir + "tsne_latent.png") # generate kmeans cluster.kmeans(latent, graph_dir + "tsne with kmeans_latent.png", cnum) # generate centroid values c = cluster.getCentroids( k, cluster.tsne(latent), ) cluster.getGeneofCentroids(latent, graph_dir + "centroid gene matrix_latent.txt", c) # get cell index of centroids cindex = [sc.getFilteredCellList()[i] for i in c] with open(graph_dir + "centroidCellIndex_latent.txt", "w") as out: out.write(str(cindex))
(labels_names, super_class_labels_names), seed=4, perplexity=30, alpha=0.3, fpath=(fname, fname_super)) # anlz.tsne_plot(emails_representation, labels, args.seed) print('clustering...') ##clustering if (param.clustering == clust_kneams): if (param.affine == 'cosine'): # k-means cosine dist clusters, kmns_class = clst.kmeans_cosine_dist( emails_representation, param.k) elif (param.affine == 'euclidean'): # k-means euclidean clusters, kmns_class = clst.kmeans(emails_representation, param.k) print('kmeans sum of distanses = %f' % kmns_class.inertia_) else: raise ValueError('kmenas is not supported with affinity: ' + param.affine) elif (param.clustering == clust_hirarchical): # hirarchical (clusters, _) = clst.hirarchical(emails_representation, param.k, aff=param.affine, link=param.linkage) else: raise ValueError('clustering is not supported with: ' + param.clustering) # analyze print('analyzing...')
from cluster import kmeans dataset = "datasets/iris.csv" xs = "SepalLengthCm" ys = "PetalLengthCm" k = 2 kmeans(dataset, xs, ys, k) dataset = "datasets/adult.csv" xs = "age" ys = "capitalgain" k = 2 kmeans(dataset, xs, ys, k)
def main(): ''' prepare input files for automatic text analysis ''' print "PARSING" print "extracting contents and titles from file(s)." contents,titles = parse.parseDocs(file_documents) print "" ''' automatic text analysis ''' print "AUTOMATIC TEXT ANALYSIS" documents,terms, docvecs, nulvecs = textanalysis.main(contents,titles,path+file_stopwords, progress) print "" ''' clustering ''' print "CLUSTER ANALYSIS" print "\nleader clustering" print "*: vector forms a new centroid, .: the vector is assigned to existing centroid" cleader = cluster.leader(docvecs,leader_threshold,dist,comp,progress) print "!" print "found " + str(len(cleader)) + " clusters" print "\nkmeans clustering" print "*: cluster is altered, .: cluster not changed, |: next iteration" k = int(round(kmeans_factor*len(cleader),0)) ckmeans = cluster.kmeans(docvecs, k, dist, comp, True) print "!" print "found " + str(len(ckmeans)) + " clusters" # print "\nagglomorative clustering", # t = cluster.threshold(docvecs, agglomorate_factor, dist) # clink = cluster.agglomorate(docvecs, linkage, t, dist, comp, True) # print "!" # print "found " + str(len(clink)) + " clusters" print "\naggregate the clustering" print "a: consider next A cluster, b: consider next B cluster, !: aggregate" t = cluster.threshold(docvecs, aggregate_factor, dist) clustering = cluster.aggregate2(cleader,ckmeans,t,dist,comp,True) print "!" print "found " + str(len(clustering)) + " clusters" print "\nhandle overlap between clusters", clustering = cluster.prune(docvecs,clustering,dist,comp) print "!" print "found " + str(len(clustering)) + " clusters" print "" ''' topic extraction ''' print "TOPIC EXTRACTION" topics = topicextract.FindTopics(terms, clustering, documents, topic_length, progress) print "" print "The following topics have been found.\n" print "One topic per cluster. A Topic is a list of Terms with their cluster weights.\n" for t in range(len(topics)): print "Topic["+str(t)+"]:", topics[t], "\n" print "\nDONE!" for i in range(len(clustering)): clustering[i].settopic(topics[i]) # output to files out_file = open(outpath+'documents.txt','w') for i in documents: print>>out_file, i out_file.close() out_file = open(outpath+'terms.txt','w') for i in terms: print>>out_file, i out_file.close() out_file = open(outpath+'clusters.txt','w') for i in range(len(clustering)): c = clustering[i] for j in c: print>>out_file, str(i) + ":" + str(j.ref) out_file.close() out_file = open(outpath+'topics.txt','w') for i in topics: print>>out_file, i out_file.close() return documents, terms, docvecs, nulvecs, clustering, topics
def othermap(): the_map = gmaps.genMap(*cluster.kmeans()) return render_template("map.html", the_map=the_map)
import sys import numpy as np from cluster import kmeans if len(sys.argv) != 5: print() print( "Usage: %s [Seed Value] [Number of Centroids] [training Data] [Test Data] " % (sys.argv[0])) print() sys.exit(1) train, test = np.loadtxt(sys.argv[3]), np.loadtxt(sys.argv[4]) model = kmeans(train, int(sys.argv[2]), label=True, seed_value=int(sys.argv[1])) model.createCluster() correct = 0 if test.ndim < 2: classification = model.makeDecision(test, label=True) print(classification, test[-1]) if test[-1] == classification: correct += 1 else: for data in test: classification = model.makeDecision(data, label=True) print(classification, data[-1]) if data[-1] == classification: correct += 1
type=str, help="log directory for TensorBoard") params = parser.parse_args() if __name__ == "__main__": if params.mode == "embed": embed(params) if params.mode == "sif": sif(params) if params.mode == "cluster": if params.kmeans: kmeans(params) if params.opt_k: opt_k(params) if params.hierarch_k: hierarch_k(params) if params.mode == "project": if params.pca: pca(params) if params.tsne: tsne(params) if params.mode == "metadata": # optional write cluster labels to metadata if not params.meta_labels: params.meta_labels_file = None
if len(sys.argv) != 5: print "Usage: compressImage.py <numero_de_clusters> <threshold> <num_iteraciones> <nombre_imagen_con_extension> " exit(1) k = int(sys.argv[1]) threshold = float(sys.argv[2]) max_iter = int(sys.argv[3]) file_name = sys.argv[4] number_attributes = 3 # Load image image = misc.imread(file_name) # Transform pixel matrix into a one dimensional vector x = image.reshape((image.shape[0] * image.shape[1], 3)) clusters = createClusters(k, number_attributes, 0, 255) kmeans(clusters, x, threshold, max_iter) # Change each pixel color by that of its centroid compress(clusters, x) # Restore pixel matrix x = x.reshape(image.shape) im = Image.fromarray(x) out_file = os.path.basename(file_name)[:-4] out_file += "_" + str(k) + ".png" im.show() im.save(out_file)
for x in k.getOutput(): wrtr.writerow(x) eWrtr = csv.writer(open("./output/"+fn+"_error.csv","wb")) for x in k.getErrors(): eWrtr.writerow(x) # make sure data is in random order random.shuffle(res) # 20 percent set aside for cross-validation xval = int(0.20*len(res)) for c in range(1,maxClusters+1): minError = sys.maxint with timer.Timer(): for i in range(0,iters): k1 = cluster.kmeans(res, c, xval) err, xerr = k1.run() if xerr < minError: minError = xerr writeFile("k-%d-%f1.4"%(c,err), k1) print 'k-means,',i,',',c,',',minError,',Inter' sys.stderr.write("kmeans clusters: %d iter: %d \n"%(c,i)) print 'k-means,',c,',',minError,',', minl = math.log(0.9) maxl = math.log(math.sqrt(2.0) * dataSpread) dl = (maxl-minl)/float(maxClusters) for l in [math.exp(minl + i*dl) for i in range(0, 2*maxClusters)]: minError = sys.maxint with timer.Timer(): for i in range(0,iters):
['systems','junior','26...30','46k...50k',3], ['systems','senior','41...45','66k...70k',3], ['marketing','senior','36...40','46k...50k',10], ['marketing','junior','31...35','41k...45k',4], ['secretary','senior','46...50','36k...40k',4], ['secretary','junior','26...30','26k...30k',6] ] test_data = ['systems','26...30','46k...50k'] header = ['department', 'status', 'age','salary','count'] if __name__ == "__main__": optparser = OptionParser() optparser.add_option('--mins', dest='min_support', help='minium support value', default=0.6, type='float') optparser.add_option('--k', dest='k', help='kmeans k center point', default=3, type=int) (options, args) = optparser.parse_args() # find frequent itemset in the dataset in textbook min_support = options.min_support apriori("apriori_d.txt", min_support) # classify the test data by the classfier trained by the train data in the textbook, which is write in the code module DesisionTree(train_data) NaiveBayes(train_data, test_data) # cluster the dataset in textbook k = options.k textbook = kmeans("kmeans_t.txt", k)
def initialize(self): start = time.time() os.system("printf '\033c'") print('initialize') # AI constants: self.DIRECTIONS = [ ECommandDirection.Up, ECommandDirection.Right, ECommandDirection.Down, ECommandDirection.Left, ] self.DIR_TO_POS = { ECommandDirection.Up: (-1, +0), ECommandDirection.Right: (+0, +1), ECommandDirection.Down: (+1, +0), ECommandDirection.Left: (+0, -1), } self.BOMBSITES_ECELL = [ ECell.SmallBombSite, ECell.MediumBombSite, ECell.LargeBombSite, ECell.VastBombSite, ] self.BOMBSITE_COEFFICIENT = { ECell.SmallBombSite: self.world.constants.score_coefficient_small_bomb_site, ECell.MediumBombSite: self.world.constants.score_coefficient_medium_bomb_site, ECell.LargeBombSite: self.world.constants.score_coefficient_large_bomb_site, ECell.VastBombSite: self.world.constants.score_coefficient_vast_bomb_site, } self.sound_kinds = [ESoundIntensity.Strong,ESoundIntensity.Normal,ESoundIntensity.Weak] # agent constants: self.unreachable_bscore_coefficient = 2 self.bombsite_size_coefficient = -1 # self.unreachable_distance_power = 0.125 self.failed_bombsite_coefficient = -10 if self.my_side == 'Police': my_agents = self.world.polices else: my_agents = self.world.terrorists # bombsite dictionaries: self.bombsites = {} self.unreachable_bombsites = [] self.start_pos = my_agents[0].position is_police = (self.my_side == 'Police') for row,line in enumerate(self.world.board): for col,cell in enumerate(line): if cell not in [ECell.Wall,ECell.Empty]: distance, path = self._a_star(self.start_pos,(row,col),agent_block=False) if path: # KEYS: # status key: 0<: clear for sure, 0: unkown, 1: bomb is possible, 2: bomb for sure 3: no time to defuse # task key: 0 = nothing, 1 = going to act, 2 = acting # act means defuse or plant self.bombsites[(row,col)] = {'size':cell,'initial_distance':distance, 'status':0,'task': 0,'bscore': 0, 'failed':0,'agent':None,'ert':-1} else: self.unreachable_bombsites.append((row,col)) # ai variables: if self.my_side == 'Police': bombsite_positions = sorted(self.bombsites, key = lambda x: self.bombsites[x]['initial_distance']) self.clusters, self.cluster_centers = kmeans(bombsite_positions,len(my_agents)) print(self.clusters, self.cluster_centers) self.cluster_index = [0] * len(my_agents) self.checked_bombsites = [[] for i in range(len(my_agents))] self.target_bombsites = [None] * len(my_agents) for i,cluster in enumerate(self.clusters): self.target_bombsites[i] = cluster[0] else: self.last_bombs = set([]) self.heard_sound_count = [0] * len(my_agents) self.target_bombsites = [None] * len(my_agents) self.last_position = [[]] * len(my_agents) self.last_distance = [0] * len(my_agents) self.gir_count = [0] * len(my_agents) for agent in my_agents: self.last_position[agent.id] = agent.position for unreachable_bombsite in self.unreachable_bombsites: _, path, bypassed_bombsites = self._a_star(self.start_pos,unreachable_bombsite,agent_block=False,bombsite_block=False) bypassed_bombsite = bypassed_bombsites[-1] unreachable_size = self.world.board[unreachable_bombsite[0]][unreachable_bombsite[1]] self.bombsites[bypassed_bombsite]['bscore'] += (self.BOMBSITE_COEFFICIENT[unreachable_size] * self.unreachable_bscore_coefficient) self.last_scores = self.world.scores # timing: end = time.time() print('initialize time',end - start) #debug: print(self.unreachable_bombsites)
X_pca = pca.transform(X_scaled) print("Cumulative Explained Variance:", pca.explained_variance_ratio_.sum()) #perform LDA for 2 axes, measure performance LDA = LinearDiscriminantAnalysis(n_components=2, shrinkage='auto', solver='eigen') LDA_reduced_df = LDA.fit(X_scaled, Y).transform(X_scaled) print(LDA.score(X_scaled, Y)) #find all silhouette scores from 5-20 find_best_cluster("kmeans", LDA_reduced_df, 5, 20) #perform kmeans on chosen K kmeans = kmeans(LDA_reduced_df, 5) # Assign labels data['Cluster'] = kmeans['labels'] # Print silhouette score print("silhouette score:", kmeans['silhouette_score']) # Target labels y = kmeans['labels'] df = pd.DataFrame({ 'X1': LDA_reduced_df[:, 0], 'X2': LDA_reduced_df[:, 1], 'labels': y })
wrtr.writerow(x) eWrtr = csv.writer(open("./output/" + fn + "_error.csv", "wb")) for x in k.getErrors(): eWrtr.writerow(x) # make sure data is in random order random.shuffle(res) # 20 percent set aside for cross-validation xval = int(0.20 * len(res)) for c in range(1, maxClusters + 1): minError = sys.maxint with timer.Timer(): for i in range(0, iters): k1 = cluster.kmeans(res, c, xval) err, xerr = k1.run() if xerr < minError: minError = xerr writeFile("k-%d-%f1.4" % (c, err), k1) print 'k-means,', i, ',', c, ',', minError, ',Inter' sys.stderr.write("kmeans clusters: %d iter: %d \n" % (c, i)) print 'k-means,', c, ',', minError, ',', minl = math.log(0.9) maxl = math.log(math.sqrt(2.0) * dataSpread) dl = (maxl - minl) / float(maxClusters) for l in [math.exp(minl + i * dl) for i in range(0, 2 * maxClusters)]: minError = sys.maxint with timer.Timer(): for i in range(0, iters):