def compute_silhouette(data, l_matrix): """ Computes silhouette scores of the dendrogram given by l_matrix :param data: numpy array. Shape=(nb_samples, nb_genes) :param l_matrix: Scipy linkage matrix. Shape=(nb_genes-1, 4) :return: list of Silhouette scores """ nb_samples, nb_genes = data.shape # Form dendrogram and compute Silhouette score at each node clusters = {i: Cluster(index=i) for i in range(nb_genes)} scores = [] for i, z in enumerate(l_matrix): c1, c2, dist, n_elems = z clusters[nb_genes + i] = Cluster(c_left=clusters[c1], c_right=clusters[c2]) c1_indices = clusters[c1].indices c2_indices = clusters[c2].indices labels = [0] * len(c1_indices) + [1] * len(c2_indices) if len(labels) == 2: scores.append(0) else: expr = data[:, clusters[nb_genes + i].indices] m = 1 - pearson_correlation(expr, expr) score = silhouette_score(m, labels, metric='precomputed') scores.append(score) return scores
def dendrogram_distance(l_matrix, condensed=True): """ Computes the distances between each pair of genes according to the scipy linkage matrix. :param l_matrix: Scipy linkage matrix. Shape=(nb_genes-1, 4) :param condensed: whether to return the distances as a flat array containing the upper-triangular of the distance matrix :return: distances """ nb_genes = l_matrix.shape[0] + 1 # Fill distance matrix m clusters = {i: Cluster(index=i) for i in range(nb_genes)} m = np.zeros((nb_genes, nb_genes)) for i, z in enumerate(l_matrix): c1, c2, dist, n_elems = z clusters[nb_genes + i] = Cluster(c_left=clusters[c1], c_right=clusters[c2]) c1_indices = clusters[c1].indices c2_indices = clusters[c2].indices for c1_idx in c1_indices: for c2_idx in c2_indices: m[c1_idx, c2_idx] = dist m[c2_idx, c1_idx] = dist # Return flat array if condensed if condensed: return upper_diag_list(m) return m
def __init__(self, project_api, min_cluster_size=1, fix_distinct_clusters=False): Cluster.__init__(self, project_api, min_cluster_size) self.fix_distinct_clusters = fix_distinct_clusters self.algorithm_name = "divisivekmeans"
def inf(): d2 = Dataset.load('../models.nosync/data_train', c) t = TopicModelling.create(c) corpus, model = t.infer(d2) #TODO fix cl = Cluster(c) for t, titles in cl.find_similar(d2, corpus, model): print('{}: {}'.format(t, ', '.join(titles))) print('---\n')
def test_cluster(): """ Test of Cluster.clusters() """ c = Cluster(sample_points, 2, 1, 3) found = c.clusters() cluster1 = [0, 1, 3, 4, 6, 7] cluster2 = [8, 9, 10, 11, 12, 13, 14, 15] assert (found == [cluster1, cluster2]) or (found == [cluster2, cluster1])
def test_plot_icp_cluster2cluster(laser_a, laser_b): # source: a. mst_a = EMST(laser_a) clusters_a = Cluster(mst_a, k=1.0, min_size=10) # destination: b. mst_b = EMST(laser_b) clusters_b = Cluster(mst_b, k=1.0, min_size=10) # icp dst_indices, T = icp(laser_a.cartesian, laser_b.cartesian, init_pose=None, max_iterations=20, tolerance=0.001) matched_cluster_indices, _ = match_clusters(dst_indices, laser_a, laser_b, clusters_a, clusters_b) # plot plt.figure() # colors to iterate over colors_a = ['r', 'y'] * 20 colors_b = ['b', 'g'] * 20 # plot source clusters and arrows to destination count = 1 for cluster, color, cluster_b_index in zip(clusters_a.clusters, colors_a, matched_cluster_indices): plt.scatter(laser_a.cartesian[cluster, [0]], laser_a.cartesian[cluster, [1]], s=10.0, c=color, linewidths=0) if cluster_b_index >= 0: # plot arrow points to the corresponding cluster x1 = np.mean(laser_a.cartesian[cluster, [0]]) y1 = np.mean(laser_a.cartesian[cluster, [1]]) cluster_b = clusters_b.clusters[cluster_b_index] x2 = np.mean(laser_b.cartesian[cluster_b, [0]]) y2 = np.mean(laser_b.cartesian[cluster_b, [1]]) dx = x2 - x1 dy = y2 - y1 plt.arrow(x1, y1, dx, dy, width=0.001, head_width=0.05) plt.text((x1 + x2) / 2.0, (y1 + y2) / 2.0, str(count), fontsize=18) count += 1 # plot destination clusters for cluster, color in zip(clusters_b.clusters, colors_b): plt.scatter(laser_b.cartesian[cluster, [0]], laser_b.cartesian[cluster, [1]], s=10.0, c=color, linewidths=0) plt.show()
def jackknifing_tree(file_pattern, di_method): """ Given a pattern for the list of subsampled DI files (each file should have per-line format <sample>,<size>,<comma-separated DI> and have one of the sizes be 'real') Run clustering and count the differences between subsampled and real trees Returns: tree (size-->sample-->list of trees), symmetric_difference (size-->list of diffs), robinson_foulds_distance (size-->list of diffs) """ from clustering import Cluster import dendropy dTree = lambda x: dendropy.Tree.get_from_string(x, "newick") samples = None sizes = None trees = {} for file in glob.iglob(file_pattern): print >> sys.stderr, "reading subsampled DI file {0}....".format(file) d = {} with open(file) as f: for line in f: sample, size, di = line.strip().split(',', 2) if size not in d: d[size] = {} d[size][sample] = np.array(map(float, di.split(','))) if len(d) == 0: continue if sizes is None: sizes = d.keys() sizes.sort() samples = d[sizes[0]].keys() samples.sort() for size, di_dict in d.iteritems(): c = Cluster(None) c.init_from_di_list(di_dict, method=di_method, threshold=0) c.run_till_end() try: trees[size].append(dTree(str(c.trees[0]))) except KeyError: trees[size] = [dTree(str(c.trees[0]))] # tally (1) symmetric differences (edge weight ignored) # (2) robinson_foulds_distance (edge weight considered) # 'real' is the size that is the full pool that we compare all other trees to sym_diff = {} rob_diff = {} for size in sizes: if size == 'real': continue t_real = trees['real'][0] sym_diff[size] = [t_real.symmetric_difference(t) for t in trees[size]] rob_diff[size] = [t_real.robinson_foulds_distance(t) for t in trees[size]] return trees, sym_diff, rob_diff
def jackknifing_tree_DF(file_pattern, di_method, samples_to_exclude=['1412-1','1412-4']): """ Similar as jackknifing_tree but using DF files and (probably improved clustering in clustering.py which I need manually turn on) Run clustering and count the differences between subsampled and real trees Returns: tree (size-->sample-->list of trees), symmetric_difference (size-->list of diffs), robinson_foulds_distance (size-->list of diffs) """ from clustering import Cluster import dendropy dTree = lambda x: dendropy.Tree.get_from_string(x, "newick") trees = {} for file in glob.iglob(file_pattern): print >> sys.stderr, "reading subsampled DF file {0}....".format(file) d = {} # size --> list of dfs with open(file) as f: for df in DF.DFReader(f): sample = df.name if sample in samples_to_exclude: print >> sys.stderr, "EXCLUDING SAMPLE {0}!".format(sample) continue size = df.annotations['size'] if size not in d: d[size] = [] # need to change the mask for df!!! # not a problem when we did with DI becuz it was already masked df.change_vec_mask(valid_DI_pos) d[size].append(df) for size, df_list in d.iteritems(): c = Cluster(df_list, method=di_method, threshold=0) c.run_till_end() try: trees[size].append(dTree(str(c.trees[0]))) except KeyError: trees[size] = [dTree(str(c.trees[0]))] print "size", size, "file", file print c.trees[0] # tally (1) symmetric differences (edge weight ignored) # (2) robinson_foulds_distance (edge weight considered) # 'real' is the size that is the full pool that we compare all other trees to sym_diff = {} rob_diff = {} for size in trees: if size == 'real': continue t_real = trees['real'][0] sym_diff[size] = [t_real.symmetric_difference(t) for t in trees[size]] rob_diff[size] = [t_real.robinson_foulds_distance(t) for t in trees[size]] return trees, sym_diff, rob_diff
def track_clusters(artist): wrapper = LastfmWrapper() (tracks, matrix) = wrapper.presence_matrix(artist) c = Cluster() assignments = c.cluster(matrix) clusters = {} for index, track in enumerate(tracks): print assignments[index], " - ", track try: clusters[assignments[index]].append(track) except KeyError: clusters[assignments[index]] = [track] return ''.join(['clusterCallback(',json.dumps(clusters),')']);
def run(): start_time = time.clock() jieba.set_dictionary('jieba/dict.txt.big') jieba.initialize() print ("jieba " + str(time.clock() - start_time)) start_time = time.clock() news_rss_url = "http://hk.news.yahoo.com/rss/hong-kong" # news_rss_url = "http://hk.news.yahoo.com/rss/china" info = feedparser.parse(news_rss_url) start_time = time.clock() for entry in info.entries: # word count of each word of summary word_list = getBagOfWords(preprocess(jieba.cut(stripTag(entry.summary)))) # word count of each word of title bag_of_word_of_title = getBagOfWords(preprocess(jieba.cut(stripTag(entry.title)))) # Combine word count of both summary and title and title weights more bag_of_word = Counter() for i in range(3): bag_of_word.update(bag_of_word_of_title) bag_of_word.update(word_list) entry["bag_of_words"] = bag_of_word print ("preprocess " + str(time.clock() - start_time)) # result = Counter() # for entry in info.entries: # result.update(entry["bag_of_words"]) # printList(result) # Clustering them start_time = time.clock() clusters = clustering.clustering([Cluster([Vector(entry)]) for entry in info.entries]) print ("clustering " + str(time.clock() - start_time)) # Print the result newsList = [] for (index, cluster) in enumerate(clusters): for vector in cluster.listOfVectors: news = News(index, (vector == cluster.centroidVector), vector.data["title"], vector.data["published"], vector.data["link"]) newsList.append(news.__dict__) return json.dumps(newsList)
def clustering(x_train, y_train, writer, sheet_name, multiple=False, binary=True): cluster = Cluster() cluster.setupResults() cluster.train(x_train, y_train, multiple=multiple, binary=binary) cluster.results.to_excel(writer, sheet_name=sheet_name)
def test_neighbors(): """ Unit Test of Cluster._neighbors() """ c = Cluster(sample_points, 2, 1, 3) assert c._neighbors( sample_points[0]) == [sample_points[1], sample_points[3]] c = Cluster(sample_points, 2, 1.5, 3) assert c._neighbors(sample_points[0]) == [ sample_points[1], sample_points[3], sample_points[4] ]
def __cluster_subject__(self,subject_id,max_users=None,jpeg_file=None): """ override the parent method but still call it - also for correcting problems with nearest neighbours etc. things that only make sense with divisive k-means :param subject_id: :param jpeg_file: :return: """ time_to_cluster = Cluster.__cluster_subject__(self,subject_id,max_users,jpeg_file) # kmeans will sometimes split clusters such that we have two nearest neighbour clusters with no users # in common - both representing the same animal/thing. Do we want to find such cases and fix them? # in this case fixing is just a case of merging the clusters # if self.fix_distinct_clusters: # start = time.time() # self.clusterResults[subject_id] = self.correction.__fix__(self.clusterResults[subject_id]) # end = time.time() # # return time_to_cluster + (end-start) # else: return time_to_cluster
def __cluster_subject__(self, subject_id, max_users=None, jpeg_file=None): """ override the parent method but still call it - also for correcting problems with nearest neighbours etc. things that only make sense with divisive k-means :param subject_id: :param jpeg_file: :return: """ time_to_cluster = Cluster.__cluster_subject__(self, subject_id, max_users, jpeg_file) # kmeans will sometimes split clusters such that we have two nearest neighbour clusters with no users # in common - both representing the same animal/thing. Do we want to find such cases and fix them? # in this case fixing is just a case of merging the clusters # if self.fix_distinct_clusters: # start = time.time() # self.clusterResults[subject_id] = self.correction.__fix__(self.clusterResults[subject_id]) # end = time.time() # # return time_to_cluster + (end-start) # else: return time_to_cluster
def __init__(self, project_api,min_cluster_size=1): Cluster.__init__(self, project_api,min_cluster_size)
# word count of each word of summary word_list = getBagOfWords(preprocess(pseg.cut(stripTag( entry.summary)))) # word count of each word of title bag_of_word_of_title = getBagOfWords( preprocess(pseg.cut(stripTag(entry.title)))) # Combine word count of both summary and title and title weights more bag_of_word = Counter() for i in range(3): bag_of_word.update(bag_of_word_of_title) bag_of_word.update(word_list) entry["bag_of_words"] = bag_of_word # result = Counter() # for entry in info.entries: # result.update(entry["bag_of_words"]) # printList(result) # Clustering them clusters = clustering.clustering( [Cluster([Vector(entry)]) for entry in info.entries]) # Print the result for cluster in clusters: print "____FINAL___CLUSTER___" printList("CENTROID: " + cluster.centroidVector.data["title"]) for vector in cluster.listOfVectors: printList(vector.data["title"]) print "____END_OF_CLUSTER___"
if args.backbone == "vgg": model = VGG16(include_top=False, weights='imagenet') #model_vgg = Model(input = base_vgg_model.input, output = base_vgg_model.get_layer('block4_pool').output) else: model = ResNet50(weights='imagenet', include_top=False) if args.task == "anomaly": features_train_array = model.predict(x_train) features_train_array = features_train_array.reshape( num_train_samples, -1) #reshape to 2d from 4d array features_test_array = model.predict(x_test) features_test_array = features_test_array.reshape(num_test_samples, -1) estimator_str = "svc" if args.subtask == "outlier": estimator_str = "isolationforest" #estimator_params = GridSearch(features_train_array,y_train,estimator_str) Analyze(args, estimator_str, features_train_array, y_train, features_test_array, y_test, testoutputfile) if args.task == "cluster": features_train_array = model.predict(x_train) features_train_array_np = np.array(features_train_array) features_train_array_list.append(features_train_array_np.flatten()) features_train_array_list_np = np.array(features_train_array_list) Cluster(features_train_array, num_train_samples)
def realtime_test(): ''' Internal test. ''' # laser sub laser = LaserSub() # init loop r = rospy.Rate(1.0) while not rospy.is_shutdown( ): # wait until first scan is received so that laser is initialized if laser.first_recieved_done: # laser specs initialized by ROS sensor_msgs/LaserScan message break else: rospy.loginfo( '[coarse_level_association] Waiting for laser to init.') r.sleep() laser_prev = None clusters_prev = None clusters_id = None # init plot fig, ax = plt.subplots() ax.set_aspect('equal', adjustable='box') ax.set_title('Clusters Tracking', fontsize=18) ax.axis([-5.0, 5.0, -5.0, 5.0]) ax.plot([0], [0], marker='>', markersize=20, color="red") # plot the origin # loop r = rospy.Rate(30) while not rospy.is_shutdown(): # --- 1. laser input (copy self.laser at this moment) laser_now = copy.deepcopy(laser) # --- 2. EMST (create input graph for segmentation) mst = EMST(laser_now) # --- 3. EGBIS (apply image segmentation technique to clustering) clusters_now = Cluster(mst, k=1.0, min_size=10) if clusters_id is None: # first set of clusters generated clusters_id = [i for i in range(len(clusters_now.clusters))] # --- 4. ICP if laser_prev is not None: dst_indices, T = icp(laser_now.cartesian, laser_prev.cartesian, init_pose=None, max_iterations=20, tolerance=0.001) matched_cluster_indices, _ = match_clusters( dst_indices, laser_now, laser_prev, clusters_now, clusters_prev) # --- 5. Visualize laser_a = laser_now laser_b = laser_prev clusters_a = clusters_now clusters_b = clusters_prev clusters_id_new = [] # colors to iterate over colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] * 20 # plot source clusters with the same id as the matched destination clusters ax_list = [] for cluster, color, cluster_b_index in zip( clusters_a.clusters, colors, matched_cluster_indices): a = ax.scatter(laser_a.cartesian[cluster, [0]], laser_a.cartesian[cluster, [1]], s=20.0, c=color, linewidths=0) ax_list.append(a) if cluster_b_index >= 0: # show id x1 = np.mean(laser_a.cartesian[cluster, [0]]) y1 = np.mean(laser_a.cartesian[cluster, [1]]) cluster_b = clusters_b.clusters[cluster_b_index] x2 = np.mean(laser_b.cartesian[cluster_b, [0]]) y2 = np.mean(laser_b.cartesian[cluster_b, [1]]) _id = clusters_id[cluster_b_index] clusters_id_new.append(_id) a = ax.text((x1 + x2) / 2.0, (y1 + y2) / 2.0, str(_id), fontsize=22) ax_list.append(a) else: # no cluster_b match me. _id = max(clusters_id) + 1 clusters_id_new.append(_id) clusters_id = clusters_id_new # plot plt.pause(1e-12) # pause for real time display for a in ax_list: # clear data on the plot a.remove() # end of loop laser_prev = laser_now clusters_prev = clusters_now r.sleep() # plot plt.show()
# run streming API alongside with REST API # Create new threads thread1 = myThread(0, "Streaming API") thread2 = myThread(1, "REST API") # Start new Threads thread1.start() thread2.start() # wait on both threads to finish thread1.join() thread2.join() ''' PART 2 - group data''' # cluster data into groups clustering = Cluster() # get the one percent of all tweets one_percent = int(summary.get_total_number_in(my_storage.collection) * 0.01) my_storage.log_collection.insert_one({ "_id": "Whole data", "Number of clusters:": one_percent }) # cluster the data and measure time how long it took start = time.time() clustering.cluster_data(one_percent, my_storage) end = time.time() print("Time taken to cluster data: ", end - start) my_storage.log_collection.update_one({"_id": "Whole data"}, {"$set": { 'Cluster Time': (end - start) }})
def __init__(self, project_api, min_cluster_size=1): Cluster.__init__(self, project_api, min_cluster_size)
def run(self, process_odom_rate, process_laser_rate): """ The working loop. """ ######################## # prepare for the loop ######################## # scan visualize #plt = PlotVisual(axis=[-5.0,5.0,-5.0,5.0]) # previous state laser_prev = None clusters_prev = None ######################## # beginning of the loop with controled frequency ######################## count = 0 multiple = int(process_odom_rate/process_laser_rate) r = rospy.Rate(process_odom_rate) while not rospy.is_shutdown(): count += 1 # process lidar if count == multiple: count = 0 _time = time.time() # --- 1. laser input (copy self.laser at this moment) laser_now = copy.deepcopy(self.laser) # --- 2. EMST (create input graph for segmentation) mst = EMST(laser_now, neighbor_size=10) # --- 3. EGBIS (apply image segmentation technique to clustering) clusters = Cluster(mst, k=1.0, min_size=10) # --- 4. Coarse-level data association (ICP): update JointState.matched_track_indices CoarseLevel(laser_now, laser_prev, clusters, clusters_prev, self.x, tentative_threshold = 5, icp_max_dist=0.5) # --- 5. Fine-level data association # --- visualization _laser_plot = True #plt.visualize(laser_now, [_laser_plot, mst, clusters]) #plt.visualize(laser_now, [_laser_plot, None, None]) #plt.visualize(laser_now, [None, mst, None]) #plt.visualize(laser_now, [None, None, clusters]) # --- end of the loop laser_prev = laser_now clusters_prev = clusters # --- check loop rate duration = time.time() - _time rospy.loginfo(duration) if duration > 1.0/process_laser_rate: rospy.loginfo('[track_2d_lidar] Processing laser missed its control loop. It actually takes %f secs.' % duration) # process odom else: _time = time.time() # process odom self.process_odom.run(odom=self.odom, x=self.x) # check loop rate duration = time.time() - _time #rospy.loginfo(duration) if duration > 1.0/process_odom_rate: rospy.loginfo('[track_2d_lidar] Processing odom missed its control loop. It actually takes %f secs.' % duration) r.sleep()
def realtime_test(): # rate rate = 3 # --- joint states x = JointStates() # laser sub laser = LaserSub() # init loop r = rospy.Rate(1.0) while not rospy.is_shutdown( ): # wait until first scan is received so that laser is initialized if laser.first_recieved_done: # laser specs initialized by ROS sensor_msgs/LaserScan message break else: rospy.loginfo('[test_coarse_level] Waiting for laser to init.') r.sleep() laser_prev = None clusters_prev = None # init plot fig, ax = plt.subplots() ax.set_aspect('equal', adjustable='box') ax.set_title('Clusters Tracking', fontsize=22) ax.axis([-5.0, 5.0, -5.0, 5.0]) ax.plot([0], [0], marker='>', markersize=20, color="blue") # plot the origin r = rospy.Rate(rate) while not rospy.is_shutdown(): # ========================================= loop ========================================= _time = time.time() # --- 1. laser input (copy self.laser at this moment) laser_now = copy.deepcopy(laser) #time_1 = time.time() #duration_1 = time_1 - _time # --- 2. EMST (create input graph for segmentation) mst = EMST(laser_now, neighbor_size=10) #time_2 = time.time() #duration_2 = time_2 - time_1 # --- 3. EGBIS (apply image segmentation technique to clustering) clusters_now = Cluster(mst, k=3.0, min_size=5) #time_3 = time.time() #duration_3 = time_3 - time_2 # --- 4. Coarse-level data association (ICP): update JointState.matched_track_indices CoarseLevel(laser_now, laser_prev, clusters_now, clusters_prev, x, tentative_threshold=10, icp_max_dist=1.0) #time_4 = time.time() #duration_4 = time_4 - time_3 # --- 5. fake fine-level (no measurement matching, just replace with new ones) assert len(x.xt) == len(x.xp) assert len(x.matched_track_indices) == len(clusters_now.clusters) for i in range(len(x.xt)): xp_i = np.array([[], []]) # collect all cluster that matches this track for cluster, track_i in zip(clusters_now.clusters, x.matched_track_indices): if track_i == i: x_i = laser_now.cartesian[cluster, 0] y_i = laser_now.cartesian[cluster, 1] p_i = np.array([list(x_i), list(y_i)]) xp_i = np.concatenate((xp_i, p_i), axis=1) # xp x.xp[i] = xp_i # xt gamma_i = np.mean(x.xp[i][0]) delta_i = np.mean(x.xp[i][1]) dx = gamma_i - x.xt[i][0] dy = delta_i - x.xt[i][1] xt_i = np.array([ [gamma_i], # x [delta_i], # y [0], # theta [dx], # vx [dy], # vy [0] ]) # vtheta x.xt[i] = xt_i #time_5 = time.time() #duration_5 = time_5 - time_4 # --- 6. visualization #colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']*20 ax_list = [] for xp_i, xt_i, xt_counter, xt_id in zip(x.xp, x.xt, x.xt_counter, x.xt_id): # xt - arrow _x = xt_i[0][0] _y = xt_i[1][0] dx = (xt_i[3][0]) * 5 # enlarge the arrow length dy = (xt_i[4][0]) * 5 a = ax.arrow(_x, _y, dx, dy, width=0.0001, head_width=0.05) ax_list.append(a) # xt - counter if xt_counter < 0: color = 'g' # mature text = str(xt_id) else: color = 'r' # tentative text = str(xt_id) + '(' + str(xt_counter) + ')' # xp- box left = min(xp_i[0]) bottom = min(xp_i[1]) width = abs(max(xp_i[0]) - left) height = abs(max(xp_i[1]) - bottom) p = plt.Rectangle((left, bottom), width, height, fill=False, edgecolor=color, linewidth=2) #p.set_transform(ax.transAxes) #p.set_clip_on(False) a = ax.add_patch(p) ax_list.append(a) # xt - text top = bottom + height + 0.1 a = ax.text(left, top, text, color=color, fontsize=16) ax_list.append(a) # xp - measurements a = ax.scatter(xp_i[0], xp_i[1], s=25.0, c='k', linewidths=0) ax_list.append(a) # plot plt.pause(1e-12) # pause for real time display for a in ax_list: # clear data on the plot a.remove() #time_6 = time.time() #duration_6 = time_6 - time_5 # --- end of loop laser_prev = laser_now clusters_prev = clusters_now duration = time.time() - _time # rospy.loginfo(duration_1) # rospy.loginfo(duration_2) # rospy.loginfo(duration_3) # rospy.loginfo(duration_4) # rospy.loginfo(duration_5) # rospy.loginfo(duration_6) # rospy.loginfo(duration) # rospy.loginfo('-------------') if duration > 1.0 / rate: rospy.loginfo( '[track_2d_lidar] process_laser missed its control loop. It actually takes %f secs.' % duration) r.sleep() # ========================================= loop ========================================= # plot plt.show()
def __init__(self, project_api,min_cluster_size=1,fix_distinct_clusters=False): Cluster.__init__(self,project_api,min_cluster_size) self.fix_distinct_clusters = fix_distinct_clusters self.algorithm_name = "divisivekmeans"
def realtime(): # params k = rospy.get_param('tracking/EGBIS/k', 1.0) # node_name/a/a min_size = rospy.get_param('tracking/EGBIS/min_size', 5) icp_max_dist = rospy.get_param('tracking/ICP/icp_max_dist', 1.0) inflation_size = rospy.get_param('tracking/CheckStatic/inflation_size', 8) speed_threshold = rospy.get_param('tracking/CheckStatic/speed_threshold', 0.2) static_threshold = rospy.get_param('tracking/CheckStatic/static_threshold', 0.7) bagfile = rospy.get_param('tracking/bagfile', 'simple') use_amcl = rospy.get_param('tracking/use_amcl', True) # rate rate = 5 # joint states x = State(rate) # laser_odom sub laser_odom = LaserOdomSub(use_amcl) # process kf kf = ProcessKF(x, inflate_size=inflation_size, static_threshold=static_threshold, speed_threshold=speed_threshold, prediction_time=3.0) # init loop r = rospy.Rate(1.0) while not rospy.is_shutdown( ): # wait until first scan is received so that laser_odom is initialized if laser_odom.first_cartesian_done: # laser_odom specs initialized by ROS sensor_msgs/LaserScan message break else: rospy.loginfo( '[test_coarse_level] Waiting for laser_odom_sub to init.') r.sleep() laser_prev = None clusters_prev = None # init plot vt_plt = VisualizeTracking(x, kf) # vt_plt = VisualizeFromCar(laser_odom, x) # bagfile bag_duration = get_duration( '/home/wuch/tracking_ws/src/Tracking/tracking_2d_lidar/bagfiles/' + bagfile + '.bag') start_time = time.time() # write history with open( '/home/wuch/tracking_ws/src/Tracking/tracking_2d_lidar/outputs/history.csv', 'w') as csvfile: writer = csv.writer(csvfile) r = rospy.Rate(rate) #while not rospy.is_shutdown(): while (time.time() - start_time < bag_duration): # ========================================= loop ========================================= _time = time.time() # --- 1. laser_odom input (copy self.laser_odom at this moment) laser_odom.is_copying = True while (laser_odom.updated == False): pass laser_odom_now = copy.deepcopy(laser_odom) laser_odom.is_copying = False assert len(laser_odom.cartesian) == len(laser_odom_now.cartesian) # scatter_laser(laser_odom_now) #time_1 = time.time() #duration_1 = time_1 - _time # --- 2. EMST (create input graph for segmentation) mst = EMST(laser_odom_now, neighbor_size=10) # plot_mst(mst, laser_odom_now) #time_2 = time.time() #duration_2 = time_2 - time_1 # --- 3. EGBIS (apply image segmentation technique to clustering) clusters_now = Cluster(mst, k=k, min_size=min_size) # plot_segmentation(clusters_now, laser_odom_now) # time_3 = time.time() # duration_3 = time_3 - time_2 # --- 4. Coarse-level data association (ICP): update JointState.track_indices CoarseLevel(laser_odom_now, laser_prev, clusters_now, clusters_prev, x, tentative_threshold=0, icp_max_dist=icp_max_dist) # time_4 = time.time() # duration_4 = time_4 - time_3 # --- 5. KF kf.predict_update(laser_odom_now, clusters_now, rate) #kf.pub_estimated_vel() kf.publish_to_costmap() # kf.publish_kf_state(laser_odom_now) kf.publish_static_scan() #time_5 = time.time() #duration_5 = time_5 - time_4 # --- 6. visualization vt_plt.plot() # vt_plt.plot(laser_odom_now) #time_6 = time.time() #duration_6 = time_6 - time_5 # --- end of loop laser_prev = laser_odom_now clusters_prev = clusters_now # --- efficiency monitor duration = time.time() - _time # rospy.loginfo(duration_1) # rospy.loginfo(duration_2) # rospy.loginfo(duration_3) # rospy.loginfo(duration_4) # rospy.loginfo(duration_5) # rospy.loginfo(duration_6) # rospy.loginfo(duration) # rospy.loginfo('-------------') if duration > 1.0 / rate: rospy.loginfo( '[tracking] tracking missed its control loop. It actually takes %f secs.' % duration) r.sleep() # ========================================= loop ========================================= for x_list, y_list in zip(kf.x_history, kf.y_history): writer.writerow(list(x_list)) writer.writerow(list(y_list))
def main(args): print(args) cluster = Cluster(args.population, args.objects, args.colors, args.x_size, args.y_size, args.ticks) cluster.print_board()
def realtime_test(): # rosparam k = rospy.get_param('tracking/EGBIS/k', 1.0) # node_name/a/a min_size = rospy.get_param('tracking/EGBIS/min_size', 5) icp_max_dist = rospy.get_param('tracking/ICP/icp_max_dist', 1.0) inflation_size = rospy.get_param('tracking/CheckStatic/inflation_size', 8) speed_threshold = rospy.get_param('tracking/CheckStatic/speed_threshold', 0.2) static_threshold = rospy.get_param('tracking/CheckStatic/static_threshold', 0.7) # rate rate = 5 # joint states x = State(rate) # laser_odom sub laser_odom = LaserSub() # process kf kf = ProcessKF(x, inflate_size=inflation_size, static_threshold=static_threshold, speed_threshold=speed_threshold, prediction_time=3.0) # init loop r = rospy.Rate(1.0) while not rospy.is_shutdown( ): # wait until first scan is received so that laser_odom is initialized if laser_odom.first_recieved_done: # laser_odom specs initialized by ROS sensor_msgs/LaserScan message break else: rospy.loginfo('[test_coarse_level] Waiting for laser_sub to init.') r.sleep() laser_prev = None clusters_prev = None # init plot vt_plt = VisualizeTracking(x, kf) r = rospy.Rate(rate) while not rospy.is_shutdown(): # ========================================= loop ========================================= _time = time.time() # --- 1. laser_odom input (copy self.laser_odom at this moment) laser_odom_now = copy.deepcopy(laser_odom) #time_1 = time.time() #duration_1 = time_1 - _time # --- 2. EMST (create input graph for segmentation) mst = EMST(laser_odom_now, neighbor_size=10) #time_2 = time.time() #duration_2 = time_2 - time_1 # --- 3. EGBIS (apply image segmentation technique to clustering) clusters_now = Cluster(mst, k=k, min_size=min_size) # time_3 = time.time() # duration_3 = time_3 - time_2 # --- 4. Coarse-level data association (ICP): update JointState.track_indices CoarseLevel(laser_odom_now, laser_prev, clusters_now, clusters_prev, x, tentative_threshold=0, icp_max_dist=icp_max_dist) # time_4 = time.time() # duration_4 = time_4 - time_3 # --- 5. KF kf.predict_update(laser_odom_now, clusters_now, rate) kf.publish_kf_state() #time_5 = time.time() #duration_5 = time_5 - time_4 # --- 6. visualization vt_plt.plot() #time_6 = time.time() #duration_6 = time_6 - time_5 # --- end of loop laser_prev = laser_odom_now clusters_prev = clusters_now # --- efficiency monitor duration = time.time() - _time # rospy.loginfo(duration_1) # rospy.loginfo(duration_2) # rospy.loginfo(duration_3) # rospy.loginfo(duration_4) # rospy.loginfo(duration_5) # rospy.loginfo(duration_6) # rospy.loginfo(duration) # rospy.loginfo('-------------') if duration > 1.0 / rate: rospy.loginfo( '[tracking] tracking missed its control loop. It actually takes %f secs.' % duration) r.sleep() # ========================================= loop ========================================= vt_plt.show()
def test_cluster(self): a = Cluster("a", np.array([[2], [2], [4]])) b = Cluster("b", np.array([[4], [5], [6]])) self.assertEqual(a - b, 0) print(a | b | b | a | b)
from closest_search import ClosestSearch from clustering import Cluster from flask import Flask import json import closest_search app = Flask(__name__) cluster = Cluster() cs = ClosestSearch() @app.route("/event/<latitude>/<longitude>/<budget>/<category>") def fetch_all_events(latitude, longitude, budget, category): cluster.store_details(latitude, longitude, budget) events = cluster.get_filtered_events(category) return json.dumps(events) @app.route("/res_event/<cuisine>") def fetch_res_based_on_event(cuisine): result = cluster.fetch_event_res_result(cuisine) return json.dumps(result) @app.route("/rest/<latitude>/<longitude>/<budget>/<cuisine>") def fetch_all_restaurants(latitude, longitude, budget, cuisine): cluster.store_details(latitude, longitude, budget) restaurants = cluster.get_filtered_restaurants(cuisine) result = cluster.fetch_res_result(restaurants) return json.dumps(restaurants)
plt.figure(figsize=(10, 10)) plt.subplots_adjust(bottom=0.1) plt.scatter(mean, skewness, label='True Position') plt.xlabel("mean") plt.ylabel("skewness") #plt.show() processed_data = list(zip(mean, kurt)) training_data = np.reshape(training_data, [16508, 3]) print(training_data) print(np.shape(training_data)) data_cluster = Cluster(processed_data, training_data, ideal_data) feature_sets, labels = data_cluster.cluster() feature_sets = np.asarray(feature_sets) print(np.shape(feature_sets)) plt.figure(figsize=(10, 10)) plt.scatter(feature_sets[:, 2], feature_sets[:, 0], c=feature_sets[:, 3], cmap='rainbow') plt.show() ideal_label = max(labels) + 1 print(np.shape(feature_sets)) print(feature_sets)
def clustering(x_train, y_train, x_test, y_test): cluster = Cluster() cluster.train(x_train, y_train, x_test, y_test)