Ejemplo n.º 1
0
def compute_silhouette(data, l_matrix):
    """
    Computes silhouette scores of the dendrogram given by l_matrix
    :param data: numpy array. Shape=(nb_samples, nb_genes)
    :param l_matrix: Scipy linkage matrix. Shape=(nb_genes-1, 4)
    :return: list of Silhouette scores
    """
    nb_samples, nb_genes = data.shape

    # Form dendrogram and compute Silhouette score at each node
    clusters = {i: Cluster(index=i) for i in range(nb_genes)}
    scores = []
    for i, z in enumerate(l_matrix):
        c1, c2, dist, n_elems = z
        clusters[nb_genes + i] = Cluster(c_left=clusters[c1],
                                         c_right=clusters[c2])
        c1_indices = clusters[c1].indices
        c2_indices = clusters[c2].indices
        labels = [0] * len(c1_indices) + [1] * len(c2_indices)
        if len(labels) == 2:
            scores.append(0)
        else:
            expr = data[:, clusters[nb_genes + i].indices]
            m = 1 - pearson_correlation(expr, expr)
            score = silhouette_score(m, labels, metric='precomputed')
            scores.append(score)

    return scores
Ejemplo n.º 2
0
def dendrogram_distance(l_matrix, condensed=True):
    """
    Computes the distances between each pair of genes according to the scipy linkage
    matrix.
    :param l_matrix: Scipy linkage matrix. Shape=(nb_genes-1, 4)
    :param condensed: whether to return the distances as a flat array containing the
           upper-triangular of the distance matrix
    :return: distances
    """
    nb_genes = l_matrix.shape[0] + 1

    # Fill distance matrix m
    clusters = {i: Cluster(index=i) for i in range(nb_genes)}
    m = np.zeros((nb_genes, nb_genes))
    for i, z in enumerate(l_matrix):
        c1, c2, dist, n_elems = z
        clusters[nb_genes + i] = Cluster(c_left=clusters[c1],
                                         c_right=clusters[c2])
        c1_indices = clusters[c1].indices
        c2_indices = clusters[c2].indices

        for c1_idx in c1_indices:
            for c2_idx in c2_indices:
                m[c1_idx, c2_idx] = dist
                m[c2_idx, c1_idx] = dist

    # Return flat array if condensed
    if condensed:
        return upper_diag_list(m)

    return m
Ejemplo n.º 3
0
    def __init__(self,
                 project_api,
                 min_cluster_size=1,
                 fix_distinct_clusters=False):
        Cluster.__init__(self, project_api, min_cluster_size)

        self.fix_distinct_clusters = fix_distinct_clusters
        self.algorithm_name = "divisivekmeans"
Ejemplo n.º 4
0
def inf():
    d2 = Dataset.load('../models.nosync/data_train', c)
    t = TopicModelling.create(c)
    corpus, model = t.infer(d2)  #TODO fix
    cl = Cluster(c)
    for t, titles in cl.find_similar(d2, corpus, model):
        print('{}: {}'.format(t, ', '.join(titles)))
        print('---\n')
Ejemplo n.º 5
0
def test_cluster():
    """ Test of Cluster.clusters()
	"""
    c = Cluster(sample_points, 2, 1, 3)

    found = c.clusters()
    cluster1 = [0, 1, 3, 4, 6, 7]
    cluster2 = [8, 9, 10, 11, 12, 13, 14, 15]
    assert (found == [cluster1, cluster2]) or (found == [cluster2, cluster1])
Ejemplo n.º 6
0
def test_plot_icp_cluster2cluster(laser_a, laser_b):
    # source: a.
    mst_a = EMST(laser_a)
    clusters_a = Cluster(mst_a, k=1.0, min_size=10)

    # destination: b.
    mst_b = EMST(laser_b)
    clusters_b = Cluster(mst_b, k=1.0, min_size=10)

    # icp
    dst_indices, T = icp(laser_a.cartesian,
                         laser_b.cartesian,
                         init_pose=None,
                         max_iterations=20,
                         tolerance=0.001)
    matched_cluster_indices, _ = match_clusters(dst_indices, laser_a, laser_b,
                                                clusters_a, clusters_b)

    # plot
    plt.figure()

    # colors to iterate over
    colors_a = ['r', 'y'] * 20
    colors_b = ['b', 'g'] * 20

    # plot source clusters and arrows to destination
    count = 1
    for cluster, color, cluster_b_index in zip(clusters_a.clusters, colors_a,
                                               matched_cluster_indices):
        plt.scatter(laser_a.cartesian[cluster, [0]],
                    laser_a.cartesian[cluster, [1]],
                    s=10.0,
                    c=color,
                    linewidths=0)
        if cluster_b_index >= 0:
            # plot arrow points to the corresponding cluster
            x1 = np.mean(laser_a.cartesian[cluster, [0]])
            y1 = np.mean(laser_a.cartesian[cluster, [1]])
            cluster_b = clusters_b.clusters[cluster_b_index]
            x2 = np.mean(laser_b.cartesian[cluster_b, [0]])
            y2 = np.mean(laser_b.cartesian[cluster_b, [1]])
            dx = x2 - x1
            dy = y2 - y1
            plt.arrow(x1, y1, dx, dy, width=0.001, head_width=0.05)
            plt.text((x1 + x2) / 2.0, (y1 + y2) / 2.0, str(count), fontsize=18)
            count += 1

    # plot destination clusters
    for cluster, color in zip(clusters_b.clusters, colors_b):
        plt.scatter(laser_b.cartesian[cluster, [0]],
                    laser_b.cartesian[cluster, [1]],
                    s=10.0,
                    c=color,
                    linewidths=0)

    plt.show()
def jackknifing_tree(file_pattern, di_method):
	"""
	Given a pattern for the list of subsampled DI files
	(each file should have per-line format <sample>,<size>,<comma-separated DI>
	 and have one of the sizes be 'real')
	Run clustering and count the differences between subsampled and real trees
	Returns: tree (size-->sample-->list of trees), symmetric_difference (size-->list of diffs),
	         robinson_foulds_distance (size-->list of diffs)
	"""
	from clustering import Cluster
	import dendropy
	dTree = lambda x: dendropy.Tree.get_from_string(x, "newick")
	samples = None
	sizes = None
	trees = {}
	for file in glob.iglob(file_pattern):
		print >> sys.stderr, "reading subsampled DI file {0}....".format(file)
		d = {}
		with open(file) as f:
			for line in f:
				sample, size, di = line.strip().split(',', 2)
				if size not in d:
					d[size] = {}
				d[size][sample] = np.array(map(float, di.split(',')))
		if len(d) == 0: continue
		if sizes is None:
			sizes = d.keys()
			sizes.sort()
			samples = d[sizes[0]].keys()
			samples.sort()
		for size, di_dict in d.iteritems():
			c = Cluster(None)
			c.init_from_di_list(di_dict, method=di_method, threshold=0)
			c.run_till_end()
			try:
				trees[size].append(dTree(str(c.trees[0])))
			except KeyError:
				trees[size] = [dTree(str(c.trees[0]))]

	# tally (1) symmetric differences (edge weight ignored)
	# (2) robinson_foulds_distance (edge weight considered)
	# 'real' is the size that is the full pool that we compare all other trees to
	sym_diff = {}
	rob_diff = {}
	for size in sizes:
		if size == 'real': continue
		t_real = trees['real'][0]
		sym_diff[size] = [t_real.symmetric_difference(t) for t in trees[size]]
		rob_diff[size] = [t_real.robinson_foulds_distance(t) for t in trees[size]]

	return trees, sym_diff, rob_diff
def jackknifing_tree_DF(file_pattern, di_method, samples_to_exclude=['1412-1','1412-4']):
	"""
	Similar as jackknifing_tree but using DF files and
	(probably improved clustering in clustering.py which I need manually turn on)
	Run clustering and count the differences between subsampled and real trees
	Returns: tree (size-->sample-->list of trees), symmetric_difference (size-->list of diffs),
	         robinson_foulds_distance (size-->list of diffs)
	"""
	from clustering import Cluster
	import dendropy
	dTree = lambda x: dendropy.Tree.get_from_string(x, "newick")
	trees = {}
	for file in glob.iglob(file_pattern):
		print >> sys.stderr, "reading subsampled DF file {0}....".format(file)
		d = {} # size --> list of dfs
		with open(file) as f:
			for df in DF.DFReader(f):
				sample = df.name
				if sample in samples_to_exclude:
					print >> sys.stderr, "EXCLUDING SAMPLE {0}!".format(sample)
					continue
				size = df.annotations['size']
				if size not in d:
					d[size] = []
				# need to change the mask for df!!! 
				# not a problem when we did with DI becuz it was already masked
				df.change_vec_mask(valid_DI_pos)
				d[size].append(df)
		for size, df_list in d.iteritems():
			c = Cluster(df_list, method=di_method, threshold=0)
			c.run_till_end()
			try:
				trees[size].append(dTree(str(c.trees[0])))
			except KeyError:
				trees[size] = [dTree(str(c.trees[0]))]
			print "size", size, "file", file
			print c.trees[0]
	# tally (1) symmetric differences (edge weight ignored)
	# (2) robinson_foulds_distance (edge weight considered)
	# 'real' is the size that is the full pool that we compare all other trees to
	sym_diff = {}
	rob_diff = {}
	for size in trees:
		if size == 'real': continue
		t_real = trees['real'][0]
		sym_diff[size] = [t_real.symmetric_difference(t) for t in trees[size]]
		rob_diff[size] = [t_real.robinson_foulds_distance(t) for t in trees[size]]

	return trees, sym_diff, rob_diff
Ejemplo n.º 9
0
def track_clusters(artist):
    wrapper = LastfmWrapper()
    (tracks, matrix) = wrapper.presence_matrix(artist)
    
    c = Cluster()
    assignments = c.cluster(matrix)
    
    clusters = {}
    for index, track in enumerate(tracks):
        print assignments[index], " - ", track
        try:
            clusters[assignments[index]].append(track)
        except KeyError:
            clusters[assignments[index]] = [track]

    return ''.join(['clusterCallback(',json.dumps(clusters),')']);
Ejemplo n.º 10
0
def run():
    start_time = time.clock()
    jieba.set_dictionary('jieba/dict.txt.big')
    jieba.initialize()
    print ("jieba " + str(time.clock() - start_time))
    
    start_time = time.clock()

    news_rss_url = "http://hk.news.yahoo.com/rss/hong-kong"
    # news_rss_url = "http://hk.news.yahoo.com/rss/china"
    info = feedparser.parse(news_rss_url)

    
    start_time = time.clock()

    for entry in info.entries:
        # word count of each word of summary
        word_list = getBagOfWords(preprocess(jieba.cut(stripTag(entry.summary))))
        # word count of each word of title
        bag_of_word_of_title = getBagOfWords(preprocess(jieba.cut(stripTag(entry.title))))
        
        # Combine word count of both summary and title and title weights more
        bag_of_word = Counter()
        for i in range(3):
            bag_of_word.update(bag_of_word_of_title)
        bag_of_word.update(word_list)
        entry["bag_of_words"] = bag_of_word

    print ("preprocess " + str(time.clock() - start_time))
        

#     result = Counter()
#     for entry in info.entries:
#         result.update(entry["bag_of_words"])
#     printList(result) 
        
    # Clustering them
    start_time = time.clock()
    clusters = clustering.clustering([Cluster([Vector(entry)]) for entry in info.entries])
    print ("clustering " + str(time.clock() - start_time))

    # Print the result        
    newsList = []
    for (index, cluster) in enumerate(clusters):
        for vector in cluster.listOfVectors:
            news = News(index, (vector == cluster.centroidVector), vector.data["title"], vector.data["published"], vector.data["link"])
            newsList.append(news.__dict__)
    return json.dumps(newsList)
Ejemplo n.º 11
0
def clustering(x_train,
               y_train,
               writer,
               sheet_name,
               multiple=False,
               binary=True):
    cluster = Cluster()
    cluster.setupResults()
    cluster.train(x_train, y_train, multiple=multiple, binary=binary)
    cluster.results.to_excel(writer, sheet_name=sheet_name)
Ejemplo n.º 12
0
def test_neighbors():
    """ Unit Test of Cluster._neighbors()
	"""
    c = Cluster(sample_points, 2, 1, 3)
    assert c._neighbors(
        sample_points[0]) == [sample_points[1], sample_points[3]]

    c = Cluster(sample_points, 2, 1.5, 3)
    assert c._neighbors(sample_points[0]) == [
        sample_points[1], sample_points[3], sample_points[4]
    ]
Ejemplo n.º 13
0
    def __cluster_subject__(self,subject_id,max_users=None,jpeg_file=None):
        """
        override the parent method but still call it - also for correcting problems with nearest neighbours etc.
        things that only make sense with divisive k-means
        :param subject_id:
        :param jpeg_file:
        :return:
        """
        time_to_cluster = Cluster.__cluster_subject__(self,subject_id,max_users,jpeg_file)

        # kmeans will sometimes split clusters such that we have two nearest neighbour clusters with no users
        # in common - both representing the same animal/thing. Do we want to find such cases and fix them?
        # in this case fixing is just a case of merging the clusters
        # if self.fix_distinct_clusters:
        #     start = time.time()
        #     self.clusterResults[subject_id] = self.correction.__fix__(self.clusterResults[subject_id])
        #     end = time.time()
        #
        #     return time_to_cluster + (end-start)
        # else:
        return time_to_cluster
Ejemplo n.º 14
0
    def __cluster_subject__(self, subject_id, max_users=None, jpeg_file=None):
        """
        override the parent method but still call it - also for correcting problems with nearest neighbours etc.
        things that only make sense with divisive k-means
        :param subject_id:
        :param jpeg_file:
        :return:
        """
        time_to_cluster = Cluster.__cluster_subject__(self, subject_id,
                                                      max_users, jpeg_file)

        # kmeans will sometimes split clusters such that we have two nearest neighbour clusters with no users
        # in common - both representing the same animal/thing. Do we want to find such cases and fix them?
        # in this case fixing is just a case of merging the clusters
        # if self.fix_distinct_clusters:
        #     start = time.time()
        #     self.clusterResults[subject_id] = self.correction.__fix__(self.clusterResults[subject_id])
        #     end = time.time()
        #
        #     return time_to_cluster + (end-start)
        # else:
        return time_to_cluster
Ejemplo n.º 15
0
 def __init__(self, project_api,min_cluster_size=1):
     Cluster.__init__(self, project_api,min_cluster_size)
Ejemplo n.º 16
0
        # word count of each word of summary
        word_list = getBagOfWords(preprocess(pseg.cut(stripTag(
            entry.summary))))
        # word count of each word of title
        bag_of_word_of_title = getBagOfWords(
            preprocess(pseg.cut(stripTag(entry.title))))

        # Combine word count of both summary and title and title weights more
        bag_of_word = Counter()
        for i in range(3):
            bag_of_word.update(bag_of_word_of_title)
        bag_of_word.update(word_list)
        entry["bag_of_words"] = bag_of_word

#     result = Counter()
#     for entry in info.entries:
#         result.update(entry["bag_of_words"])
#     printList(result)

# Clustering them
    clusters = clustering.clustering(
        [Cluster([Vector(entry)]) for entry in info.entries])

    # Print the result
    for cluster in clusters:
        print "____FINAL___CLUSTER___"
        printList("CENTROID: " + cluster.centroidVector.data["title"])
        for vector in cluster.listOfVectors:
            printList(vector.data["title"])
        print "____END_OF_CLUSTER___"
Ejemplo n.º 17
0
    if args.backbone == "vgg":
        model = VGG16(include_top=False, weights='imagenet')
        #model_vgg = Model(input = base_vgg_model.input, output = base_vgg_model.get_layer('block4_pool').output)
    else:
        model = ResNet50(weights='imagenet', include_top=False)

    if args.task == "anomaly":
        features_train_array = model.predict(x_train)
        features_train_array = features_train_array.reshape(
            num_train_samples, -1)  #reshape to 2d from 4d array

        features_test_array = model.predict(x_test)
        features_test_array = features_test_array.reshape(num_test_samples, -1)
        estimator_str = "svc"
        if args.subtask == "outlier":
            estimator_str = "isolationforest"

        #estimator_params = GridSearch(features_train_array,y_train,estimator_str)

        Analyze(args, estimator_str, features_train_array, y_train,
                features_test_array, y_test, testoutputfile)

    if args.task == "cluster":
        features_train_array = model.predict(x_train)
        features_train_array_np = np.array(features_train_array)
        features_train_array_list.append(features_train_array_np.flatten())
        features_train_array_list_np = np.array(features_train_array_list)

        Cluster(features_train_array, num_train_samples)
Ejemplo n.º 18
0
def realtime_test():
    ''' Internal test.
    '''
    # laser sub
    laser = LaserSub()

    # init loop
    r = rospy.Rate(1.0)
    while not rospy.is_shutdown(
    ):  # wait until first scan is received so that laser is initialized
        if laser.first_recieved_done:  # laser specs initialized by ROS sensor_msgs/LaserScan message
            break
        else:
            rospy.loginfo(
                '[coarse_level_association] Waiting for laser to init.')
        r.sleep()

    laser_prev = None
    clusters_prev = None
    clusters_id = None

    # init plot
    fig, ax = plt.subplots()
    ax.set_aspect('equal', adjustable='box')
    ax.set_title('Clusters Tracking', fontsize=18)
    ax.axis([-5.0, 5.0, -5.0, 5.0])
    ax.plot([0], [0], marker='>', markersize=20,
            color="red")  # plot the origin

    # loop
    r = rospy.Rate(30)
    while not rospy.is_shutdown():
        # --- 1. laser input (copy self.laser at this moment)
        laser_now = copy.deepcopy(laser)

        # --- 2. EMST (create input graph for segmentation)
        mst = EMST(laser_now)

        # --- 3. EGBIS (apply image segmentation technique to clustering)
        clusters_now = Cluster(mst, k=1.0, min_size=10)
        if clusters_id is None:  # first set of clusters generated
            clusters_id = [i for i in range(len(clusters_now.clusters))]

        # --- 4. ICP
        if laser_prev is not None:
            dst_indices, T = icp(laser_now.cartesian,
                                 laser_prev.cartesian,
                                 init_pose=None,
                                 max_iterations=20,
                                 tolerance=0.001)
            matched_cluster_indices, _ = match_clusters(
                dst_indices, laser_now, laser_prev, clusters_now,
                clusters_prev)

            # --- 5. Visualize
            laser_a = laser_now
            laser_b = laser_prev
            clusters_a = clusters_now
            clusters_b = clusters_prev
            clusters_id_new = []

            # colors to iterate over
            colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] * 20

            # plot source clusters with the same id as the matched destination clusters
            ax_list = []
            for cluster, color, cluster_b_index in zip(
                    clusters_a.clusters, colors, matched_cluster_indices):
                a = ax.scatter(laser_a.cartesian[cluster, [0]],
                               laser_a.cartesian[cluster, [1]],
                               s=20.0,
                               c=color,
                               linewidths=0)
                ax_list.append(a)
                if cluster_b_index >= 0:
                    # show id
                    x1 = np.mean(laser_a.cartesian[cluster, [0]])
                    y1 = np.mean(laser_a.cartesian[cluster, [1]])
                    cluster_b = clusters_b.clusters[cluster_b_index]
                    x2 = np.mean(laser_b.cartesian[cluster_b, [0]])
                    y2 = np.mean(laser_b.cartesian[cluster_b, [1]])
                    _id = clusters_id[cluster_b_index]
                    clusters_id_new.append(_id)
                    a = ax.text((x1 + x2) / 2.0, (y1 + y2) / 2.0,
                                str(_id),
                                fontsize=22)
                    ax_list.append(a)
                else:  # no cluster_b match me.
                    _id = max(clusters_id) + 1
                    clusters_id_new.append(_id)

            clusters_id = clusters_id_new

            # plot
            plt.pause(1e-12)  # pause for real time display
            for a in ax_list:  # clear data on the plot
                a.remove()

        # end of loop
        laser_prev = laser_now
        clusters_prev = clusters_now
        r.sleep()

    # plot
    plt.show()
Ejemplo n.º 19
0
# run streming API alongside with REST API
# Create new threads
thread1 = myThread(0, "Streaming API")
thread2 = myThread(1, "REST API")

# Start new Threads
thread1.start()
thread2.start()
# wait on both threads to finish
thread1.join()
thread2.join()
''' PART 2 - group data'''

# cluster data into groups
clustering = Cluster()
# get the one percent of all tweets
one_percent = int(summary.get_total_number_in(my_storage.collection) * 0.01)
my_storage.log_collection.insert_one({
    "_id": "Whole data",
    "Number of clusters:": one_percent
})
# cluster the data and measure time how long it took
start = time.time()
clustering.cluster_data(one_percent, my_storage)
end = time.time()
print("Time taken to cluster data: ", end - start)
my_storage.log_collection.update_one({"_id": "Whole data"},
                                     {"$set": {
                                         'Cluster Time': (end - start)
                                     }})
Ejemplo n.º 20
0
 def __init__(self, project_api, min_cluster_size=1):
     Cluster.__init__(self, project_api, min_cluster_size)
Ejemplo n.º 21
0
    def run(self, process_odom_rate, process_laser_rate):
        """ The working loop.
        """
        ########################
        # prepare for the loop
        ########################
        # scan visualize
        #plt = PlotVisual(axis=[-5.0,5.0,-5.0,5.0])
        
        # previous state
        laser_prev = None
        clusters_prev = None

        ########################
        # beginning of the loop with controled frequency
        ########################
        count = 0
        multiple = int(process_odom_rate/process_laser_rate)
        r = rospy.Rate(process_odom_rate)
        while not rospy.is_shutdown():
            count += 1
            # process lidar
            if count == multiple:
                count = 0
                _time = time.time()
                
                # --- 1. laser input (copy self.laser at this moment)
                laser_now = copy.deepcopy(self.laser)

                # --- 2. EMST (create input graph for segmentation) 
                mst = EMST(laser_now, neighbor_size=10)

                # --- 3. EGBIS (apply image segmentation technique to clustering)
                clusters = Cluster(mst, k=1.0, min_size=10)

                # --- 4. Coarse-level data association (ICP): update JointState.matched_track_indices
                CoarseLevel(laser_now, laser_prev, clusters, clusters_prev, self.x, tentative_threshold = 5, icp_max_dist=0.5)

                # --- 5. Fine-level data association
                

                # --- visualization 
                _laser_plot = True
                #plt.visualize(laser_now, [_laser_plot, mst, clusters])
                #plt.visualize(laser_now, [_laser_plot, None, None])
                #plt.visualize(laser_now, [None, mst, None])
                #plt.visualize(laser_now, [None, None, clusters])

                # --- end of the loop
                laser_prev = laser_now
                clusters_prev = clusters

                # --- check loop rate
                duration = time.time() - _time
                rospy.loginfo(duration)
                if duration > 1.0/process_laser_rate:
                    rospy.loginfo('[track_2d_lidar] Processing laser missed its control loop. It actually takes %f secs.' % duration)
     
            # process odom
            else:
                _time = time.time()
                # process odom
                self.process_odom.run(odom=self.odom, x=self.x)

                #  check loop rate
                duration = time.time() - _time
                #rospy.loginfo(duration)
                if duration > 1.0/process_odom_rate:
                    rospy.loginfo('[track_2d_lidar] Processing odom missed its control loop. It actually takes %f secs.' % duration)
            
            r.sleep()
Ejemplo n.º 22
0
def realtime_test():
    # rate
    rate = 3

    # --- joint states
    x = JointStates()

    # laser sub
    laser = LaserSub()

    # init loop
    r = rospy.Rate(1.0)
    while not rospy.is_shutdown(
    ):  # wait until first scan is received so that laser is initialized
        if laser.first_recieved_done:  # laser specs initialized by ROS sensor_msgs/LaserScan message
            break
        else:
            rospy.loginfo('[test_coarse_level] Waiting for laser to init.')
        r.sleep()

    laser_prev = None
    clusters_prev = None

    # init plot
    fig, ax = plt.subplots()
    ax.set_aspect('equal', adjustable='box')
    ax.set_title('Clusters Tracking', fontsize=22)
    ax.axis([-5.0, 5.0, -5.0, 5.0])
    ax.plot([0], [0], marker='>', markersize=20,
            color="blue")  # plot the origin

    r = rospy.Rate(rate)
    while not rospy.is_shutdown():
        # ========================================= loop =========================================
        _time = time.time()

        # --- 1. laser input (copy self.laser at this moment)
        laser_now = copy.deepcopy(laser)
        #time_1 = time.time()
        #duration_1 = time_1 - _time

        # --- 2. EMST (create input graph for segmentation)
        mst = EMST(laser_now, neighbor_size=10)
        #time_2 = time.time()
        #duration_2 = time_2 - time_1

        # --- 3. EGBIS (apply image segmentation technique to clustering)
        clusters_now = Cluster(mst, k=3.0, min_size=5)
        #time_3 = time.time()
        #duration_3 = time_3 - time_2

        # --- 4. Coarse-level data association (ICP): update JointState.matched_track_indices
        CoarseLevel(laser_now,
                    laser_prev,
                    clusters_now,
                    clusters_prev,
                    x,
                    tentative_threshold=10,
                    icp_max_dist=1.0)
        #time_4 = time.time()
        #duration_4 = time_4 - time_3

        # --- 5. fake fine-level (no measurement matching, just replace with new ones)
        assert len(x.xt) == len(x.xp)
        assert len(x.matched_track_indices) == len(clusters_now.clusters)

        for i in range(len(x.xt)):
            xp_i = np.array([[], []])
            # collect all cluster that matches this track
            for cluster, track_i in zip(clusters_now.clusters,
                                        x.matched_track_indices):
                if track_i == i:
                    x_i = laser_now.cartesian[cluster, 0]
                    y_i = laser_now.cartesian[cluster, 1]
                    p_i = np.array([list(x_i), list(y_i)])
                    xp_i = np.concatenate((xp_i, p_i), axis=1)
            # xp
            x.xp[i] = xp_i
            # xt
            gamma_i = np.mean(x.xp[i][0])
            delta_i = np.mean(x.xp[i][1])
            dx = gamma_i - x.xt[i][0]
            dy = delta_i - x.xt[i][1]
            xt_i = np.array([
                [gamma_i],  # x
                [delta_i],  # y  
                [0],  # theta
                [dx],  # vx
                [dy],  # vy
                [0]
            ])  # vtheta
            x.xt[i] = xt_i

        #time_5 = time.time()
        #duration_5 = time_5 - time_4

        # --- 6. visualization
        #colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']*20
        ax_list = []
        for xp_i, xt_i, xt_counter, xt_id in zip(x.xp, x.xt, x.xt_counter,
                                                 x.xt_id):

            # xt - arrow
            _x = xt_i[0][0]
            _y = xt_i[1][0]
            dx = (xt_i[3][0]) * 5  # enlarge the arrow length
            dy = (xt_i[4][0]) * 5
            a = ax.arrow(_x, _y, dx, dy, width=0.0001, head_width=0.05)
            ax_list.append(a)

            # xt - counter
            if xt_counter < 0:
                color = 'g'  # mature
                text = str(xt_id)
            else:
                color = 'r'  # tentative
                text = str(xt_id) + '(' + str(xt_counter) + ')'

            # xp- box
            left = min(xp_i[0])
            bottom = min(xp_i[1])
            width = abs(max(xp_i[0]) - left)
            height = abs(max(xp_i[1]) - bottom)
            p = plt.Rectangle((left, bottom),
                              width,
                              height,
                              fill=False,
                              edgecolor=color,
                              linewidth=2)
            #p.set_transform(ax.transAxes)
            #p.set_clip_on(False)
            a = ax.add_patch(p)
            ax_list.append(a)

            # xt - text
            top = bottom + height + 0.1
            a = ax.text(left, top, text, color=color, fontsize=16)
            ax_list.append(a)

            # xp - measurements
            a = ax.scatter(xp_i[0], xp_i[1], s=25.0, c='k', linewidths=0)
            ax_list.append(a)

        # plot
        plt.pause(1e-12)  # pause for real time display
        for a in ax_list:  # clear data on the plot
            a.remove()

        #time_6 = time.time()
        #duration_6 = time_6 - time_5

        # --- end of loop
        laser_prev = laser_now
        clusters_prev = clusters_now

        duration = time.time() - _time
        # rospy.loginfo(duration_1)
        # rospy.loginfo(duration_2)
        # rospy.loginfo(duration_3)
        # rospy.loginfo(duration_4)
        # rospy.loginfo(duration_5)
        # rospy.loginfo(duration_6)
        # rospy.loginfo(duration)
        # rospy.loginfo('-------------')

        if duration > 1.0 / rate:
            rospy.loginfo(
                '[track_2d_lidar] process_laser missed its control loop. It actually takes %f secs.'
                % duration)
        r.sleep()

        # ========================================= loop =========================================

    # plot
    plt.show()
Ejemplo n.º 23
0
    def __init__(self, project_api,min_cluster_size=1,fix_distinct_clusters=False):
        Cluster.__init__(self,project_api,min_cluster_size)

        self.fix_distinct_clusters = fix_distinct_clusters
        self.algorithm_name = "divisivekmeans"
Ejemplo n.º 24
0
def realtime():
    # params
    k = rospy.get_param('tracking/EGBIS/k', 1.0)  # node_name/a/a
    min_size = rospy.get_param('tracking/EGBIS/min_size', 5)
    icp_max_dist = rospy.get_param('tracking/ICP/icp_max_dist', 1.0)
    inflation_size = rospy.get_param('tracking/CheckStatic/inflation_size', 8)
    speed_threshold = rospy.get_param('tracking/CheckStatic/speed_threshold',
                                      0.2)
    static_threshold = rospy.get_param('tracking/CheckStatic/static_threshold',
                                       0.7)

    bagfile = rospy.get_param('tracking/bagfile', 'simple')
    use_amcl = rospy.get_param('tracking/use_amcl', True)

    # rate
    rate = 5

    # joint states
    x = State(rate)

    # laser_odom sub
    laser_odom = LaserOdomSub(use_amcl)

    # process kf
    kf = ProcessKF(x,
                   inflate_size=inflation_size,
                   static_threshold=static_threshold,
                   speed_threshold=speed_threshold,
                   prediction_time=3.0)

    # init loop
    r = rospy.Rate(1.0)
    while not rospy.is_shutdown(
    ):  # wait until first scan is received so that laser_odom is initialized
        if laser_odom.first_cartesian_done:  # laser_odom specs initialized by ROS sensor_msgs/LaserScan message
            break
        else:
            rospy.loginfo(
                '[test_coarse_level] Waiting for laser_odom_sub to init.')
        r.sleep()

    laser_prev = None
    clusters_prev = None

    # init plot
    vt_plt = VisualizeTracking(x, kf)
    # vt_plt = VisualizeFromCar(laser_odom, x)

    # bagfile
    bag_duration = get_duration(
        '/home/wuch/tracking_ws/src/Tracking/tracking_2d_lidar/bagfiles/' +
        bagfile + '.bag')
    start_time = time.time()

    # write history
    with open(
            '/home/wuch/tracking_ws/src/Tracking/tracking_2d_lidar/outputs/history.csv',
            'w') as csvfile:
        writer = csv.writer(csvfile)

        r = rospy.Rate(rate)
        #while not rospy.is_shutdown():
        while (time.time() - start_time < bag_duration):
            # ========================================= loop =========================================
            _time = time.time()

            # --- 1. laser_odom input (copy self.laser_odom at this moment)
            laser_odom.is_copying = True
            while (laser_odom.updated == False):
                pass
            laser_odom_now = copy.deepcopy(laser_odom)
            laser_odom.is_copying = False

            assert len(laser_odom.cartesian) == len(laser_odom_now.cartesian)

            # scatter_laser(laser_odom_now)
            #time_1 = time.time()
            #duration_1 = time_1 - _time

            # --- 2. EMST (create input graph for segmentation)
            mst = EMST(laser_odom_now, neighbor_size=10)

            # plot_mst(mst, laser_odom_now)
            #time_2 = time.time()
            #duration_2 = time_2 - time_1

            # --- 3. EGBIS (apply image segmentation technique to clustering)
            clusters_now = Cluster(mst, k=k, min_size=min_size)

            # plot_segmentation(clusters_now, laser_odom_now)
            # time_3 = time.time()
            # duration_3 = time_3 - time_2

            # --- 4. Coarse-level data association (ICP): update JointState.track_indices
            CoarseLevel(laser_odom_now,
                        laser_prev,
                        clusters_now,
                        clusters_prev,
                        x,
                        tentative_threshold=0,
                        icp_max_dist=icp_max_dist)
            # time_4 = time.time()
            # duration_4 = time_4 - time_3

            # --- 5. KF
            kf.predict_update(laser_odom_now, clusters_now, rate)
            #kf.pub_estimated_vel()
            kf.publish_to_costmap()
            # kf.publish_kf_state(laser_odom_now)
            kf.publish_static_scan()

            #time_5 = time.time()
            #duration_5 = time_5 - time_4

            # --- 6. visualization
            vt_plt.plot()
            # vt_plt.plot(laser_odom_now)
            #time_6 = time.time()
            #duration_6 = time_6 - time_5

            # --- end of loop
            laser_prev = laser_odom_now
            clusters_prev = clusters_now

            # --- efficiency monitor
            duration = time.time() - _time
            # rospy.loginfo(duration_1)
            # rospy.loginfo(duration_2)
            # rospy.loginfo(duration_3)
            # rospy.loginfo(duration_4)
            # rospy.loginfo(duration_5)
            # rospy.loginfo(duration_6)
            # rospy.loginfo(duration)
            # rospy.loginfo('-------------')

            if duration > 1.0 / rate:
                rospy.loginfo(
                    '[tracking] tracking missed its control loop. It actually takes %f secs.'
                    % duration)
            r.sleep()

            # ========================================= loop =========================================
        for x_list, y_list in zip(kf.x_history, kf.y_history):
            writer.writerow(list(x_list))
            writer.writerow(list(y_list))
Ejemplo n.º 25
0
def main(args):
    print(args)
    cluster = Cluster(args.population, args.objects, args.colors, args.x_size,
                      args.y_size, args.ticks)
    cluster.print_board()
Ejemplo n.º 26
0
def realtime_test():
    # rosparam
    k = rospy.get_param('tracking/EGBIS/k', 1.0)  # node_name/a/a
    min_size = rospy.get_param('tracking/EGBIS/min_size', 5)
    icp_max_dist = rospy.get_param('tracking/ICP/icp_max_dist', 1.0)
    inflation_size = rospy.get_param('tracking/CheckStatic/inflation_size', 8)
    speed_threshold = rospy.get_param('tracking/CheckStatic/speed_threshold',
                                      0.2)
    static_threshold = rospy.get_param('tracking/CheckStatic/static_threshold',
                                       0.7)

    # rate
    rate = 5

    # joint states
    x = State(rate)

    # laser_odom sub
    laser_odom = LaserSub()

    # process kf
    kf = ProcessKF(x,
                   inflate_size=inflation_size,
                   static_threshold=static_threshold,
                   speed_threshold=speed_threshold,
                   prediction_time=3.0)

    # init loop
    r = rospy.Rate(1.0)
    while not rospy.is_shutdown(
    ):  # wait until first scan is received so that laser_odom is initialized
        if laser_odom.first_recieved_done:  # laser_odom specs initialized by ROS sensor_msgs/LaserScan message
            break
        else:
            rospy.loginfo('[test_coarse_level] Waiting for laser_sub to init.')
        r.sleep()

    laser_prev = None
    clusters_prev = None

    # init plot
    vt_plt = VisualizeTracking(x, kf)

    r = rospy.Rate(rate)
    while not rospy.is_shutdown():
        # ========================================= loop =========================================
        _time = time.time()

        # --- 1. laser_odom input (copy self.laser_odom at this moment)
        laser_odom_now = copy.deepcopy(laser_odom)
        #time_1 = time.time()
        #duration_1 = time_1 - _time

        # --- 2. EMST (create input graph for segmentation)
        mst = EMST(laser_odom_now, neighbor_size=10)
        #time_2 = time.time()
        #duration_2 = time_2 - time_1

        # --- 3. EGBIS (apply image segmentation technique to clustering)
        clusters_now = Cluster(mst, k=k, min_size=min_size)
        # time_3 = time.time()
        # duration_3 = time_3 - time_2

        # --- 4. Coarse-level data association (ICP): update JointState.track_indices
        CoarseLevel(laser_odom_now,
                    laser_prev,
                    clusters_now,
                    clusters_prev,
                    x,
                    tentative_threshold=0,
                    icp_max_dist=icp_max_dist)
        # time_4 = time.time()
        # duration_4 = time_4 - time_3

        # --- 5. KF
        kf.predict_update(laser_odom_now, clusters_now, rate)
        kf.publish_kf_state()

        #time_5 = time.time()
        #duration_5 = time_5 - time_4

        # --- 6. visualization
        vt_plt.plot()
        #time_6 = time.time()
        #duration_6 = time_6 - time_5

        # --- end of loop
        laser_prev = laser_odom_now
        clusters_prev = clusters_now

        # --- efficiency monitor
        duration = time.time() - _time
        # rospy.loginfo(duration_1)
        # rospy.loginfo(duration_2)
        # rospy.loginfo(duration_3)
        # rospy.loginfo(duration_4)
        # rospy.loginfo(duration_5)
        # rospy.loginfo(duration_6)
        # rospy.loginfo(duration)
        # rospy.loginfo('-------------')

        if duration > 1.0 / rate:
            rospy.loginfo(
                '[tracking] tracking missed its control loop. It actually takes %f secs.'
                % duration)
        r.sleep()

        # ========================================= loop =========================================
    vt_plt.show()
Ejemplo n.º 27
0
def test_cluster(self):
    a = Cluster("a", np.array([[2], [2], [4]]))
    b = Cluster("b", np.array([[4], [5], [6]]))
    self.assertEqual(a - b, 0)
    print(a | b | b | a | b)
Ejemplo n.º 28
0
from closest_search import ClosestSearch
from clustering import Cluster
from flask import Flask
import json
import closest_search

app = Flask(__name__)
cluster = Cluster()
cs = ClosestSearch()


@app.route("/event/<latitude>/<longitude>/<budget>/<category>")
def fetch_all_events(latitude, longitude, budget, category):
    cluster.store_details(latitude, longitude, budget)
    events = cluster.get_filtered_events(category)
    return json.dumps(events)


@app.route("/res_event/<cuisine>")
def fetch_res_based_on_event(cuisine):
    result = cluster.fetch_event_res_result(cuisine)
    return json.dumps(result)


@app.route("/rest/<latitude>/<longitude>/<budget>/<cuisine>")
def fetch_all_restaurants(latitude, longitude, budget, cuisine):
    cluster.store_details(latitude, longitude, budget)
    restaurants = cluster.get_filtered_restaurants(cuisine)
    result = cluster.fetch_res_result(restaurants)
    return json.dumps(restaurants)
Ejemplo n.º 29
0
        plt.figure(figsize=(10, 10))
        plt.subplots_adjust(bottom=0.1)

        plt.scatter(mean, skewness, label='True Position')
        plt.xlabel("mean")
        plt.ylabel("skewness")

        #plt.show()

processed_data = list(zip(mean, kurt))
training_data = np.reshape(training_data, [16508, 3])
print(training_data)
print(np.shape(training_data))

data_cluster = Cluster(processed_data, training_data, ideal_data)
feature_sets, labels = data_cluster.cluster()

feature_sets = np.asarray(feature_sets)
print(np.shape(feature_sets))
plt.figure(figsize=(10, 10))
plt.scatter(feature_sets[:, 2],
            feature_sets[:, 0],
            c=feature_sets[:, 3],
            cmap='rainbow')
plt.show()

ideal_label = max(labels) + 1

print(np.shape(feature_sets))
print(feature_sets)
Ejemplo n.º 30
0
def clustering(x_train, y_train, x_test, y_test):
    cluster = Cluster()
    cluster.train(x_train, y_train, x_test, y_test)