Example #1
0
 def setUp(self):
     Cluster.clusterIdCounter = 0
     self.docx = Document(1, {1: 2, 2: 4})
     self.docy = Document(2, {2: 4})
     self.cluster1 = Cluster(self.docx)
     self.cluster2 = Cluster(self.docy)
     self.doc1 = Document(3, Vector({3: 4}))
     self.doc2 = Document(4, Vector({2: 4}))
Example #2
0
 def setUp(self): 
     Cluster.clusterIdCounter = 0
     self.docx = Document(1, {1:2,2:4})
     self.docy = Document(2, {2:4})
     self.cluster1 = Cluster(self.docx)
     self.cluster2 = Cluster(self.docy)
     self.doc1 = Document(3, Vector({3:4}))
     self.doc2 = Document(4, Vector({2:4}))
 def getClusterAndUpdateExistingClusters(self, document):
     predictedCluster = self.getClusterForDocument(document)
     if predictedCluster!=None: self.clusters[predictedCluster].addDocument(document)
     else:
         newCluster = Cluster(document)
         newCluster.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap)
         for permutation in self.signaturePermutations: permutation.addDocument(newCluster)
         self.clusters[newCluster.clusterId] = newCluster
Example #4
0
def json_process(json_out):
    data = json_out["frame"]
    Xs = [i['x'] for i in data]
    Ys = [i['y'] for i in data]
    cluster_in = np.column_stack((Xs, Ys))
    cluster = Cluster(cluster_in, eps=0.35, min_samples=3)
    cluster.plot(fig=plt)
    plt.xlim(-10, 10)
    plt.ylim(-0.9, 18)
    plt.pause(0.00000001)
    plt.clf()
Example #5
0
def process_data(data):

    Xs = [i['x'] for i in data]
    Ys = [i['y'] for i in data]
    cluster_in = np.column_stack((Xs, Ys))
    cluster = Cluster(cluster_in, eps=0.35, min_samples=3)
    cluster.plot(fig=plt)
    plt.xlim(-10, 10)
    plt.ylim(-0.9, 18)
    plt.pause(0.00000001)
    plt.clf()
Example #6
0
 def getClusterAndUpdateExistingClusters(self, document):
     predictedCluster = self.getClusterForDocument(document)
     if predictedCluster != None:
         self.clusters[predictedCluster].addDocument(document)
     else:
         newCluster = Cluster(document)
         newCluster.setSignatureUsingVectorPermutations(
             self.unitVector, self.vectorPermutations,
             self.phraseTextAndDimensionMap)
         for permutation in self.signaturePermutations:
             permutation.addDocument(newCluster)
         self.clusters[newCluster.clusterId] = newCluster
Example #7
0
def json_process(json_out):
    data = json_out["frame"]
    Xs = [i['x'] for i in data]
    Ys = [i['y'] for i in data]
    cluster_in = np.column_stack((Xs, Ys))
    cluster = Cluster(cluster_in, eps=0.35, min_samples=3)
    cluster.plot(fig=plt)
    # plt.scatter(Xs, Ys)
    plt.xlim(-10, 10)
    plt.ylim(-0.9, 18)
    # plt.set_xbound(lower=xmin, upper=xmax)
    # plt.set_ybound(lower=ymin, upper=ymax)
    plt.pause(0.00000001)
    plt.clf()
def inspect_cluster_size(clustered_path: str, sense_inventory_path: str,
                         fig_path: str):
    from classes import Cluster
    # cluster_dict_original: Dict[str, List[Cluster]] = pickle.load(open(sense_inventory_path, "rb"))
    # clusters_count_original: int = sum([len(y.cluster_items) for x in cluster_dict_original.values() for y in x])
    cluster_dict_raw: Dict[str, List[Cluster]] = pickle.load(
        open(clustered_path, "rb"))
    largest_cluster: Cluster = Cluster([])
    for lemma in tqdm(cluster_dict_raw):
        for cluster in cluster_dict_raw[lemma]:
            if len(cluster.cluster_items) > len(largest_cluster.cluster_items):
                largest_cluster = cluster
    print(
        f"Max cluster size for {largest_cluster.get_dominant_lemma()}: {len(largest_cluster.cluster_items)}"
    )
    merged_clusters: List[Cluster] = [
        y for x in cluster_dict_raw.values() for y in x
        if len(y.cluster_items) > 1
    ]
    merged_clusters.sort(key=lambda x: len(x.cluster_items), reverse=True)
    merged_cluster_items_count: int = sum(
        [len(x.cluster_items) for x in merged_clusters])
    x_data: List[str] = list(range(len(merged_clusters) - 1))
    y_data: List[int] = [len(x.cluster_items) for x in merged_clusters[1:]]
    pyplot.scatter(x_data, y_data)
    pyplot.xlabel("Cluster ID")
    pyplot.ylabel("Number of contained usage contexts")
    pyplot.title("Distribution of cluster size")
    pyplot.savefig(fig_path, dpi=600)
    pyplot.show()
Example #9
0
def generate_cluster_objects(scooter_data: pd.DataFrame,
                             cluster_labels: list) -> [Cluster]:
    """
    Based on cluster labels and scooter data create Scooter and Cluster objects.
    Cluster class generates cluster center
    :param scooter_data: geospatial data for scooters
    :param cluster_labels: list of labels for scooter data
    :return: list of clusters
    """
    # Add cluster labels as a row to the scooter data dataframe
    scooter_data_w_labels = scooter_data.copy()
    scooter_data_w_labels["cluster_labels"] = cluster_labels
    # Generate series of scooters belonging to each cluster
    clusters = []
    for cluster_label in np.unique(cluster_labels):
        # Filter out scooters within cluster
        cluster_scooters = scooter_data_w_labels[
            scooter_data_w_labels["cluster_labels"] == cluster_label]
        # Generate scooter objets, using index as ID
        scooters = [
            Scooter(row["lat"], row["lon"], row["battery"], index)
            for index, row in cluster_scooters.iterrows()
        ]
        # Adding all scooters to cluster to find center location
        clusters.append(Cluster(cluster_label, scooters))
    return sorted(clusters, key=lambda cluster: cluster.id)
Example #10
0
 def test_iterateByAttribute(self):
     self.cluster1.addDocument(self.doc1)
     self.cluster2.addDocument(self.doc2)
     self.assertEqual([(self.cluster1, 'cluster_0'),
                       (self.cluster2, 'cluster_1')],
                      list(
                          Cluster.iterateByAttribute(
                              [self.cluster1, self.cluster2], 'clusterId')))
Example #11
0
 def test_filterClustersByAttribute(self):
     self.cluster1.addDocument(self.doc1)
     self.cluster2.addDocument(self.doc2)
     self.assertEqual([self.cluster1, self.cluster2],
                      list(
                          Cluster.getClustersByAttributeAndThreshold(
                              [self.cluster1, self.cluster2],
                              'vectorWeights', 1)))
     self.assertEqual([],
                      list(
                          Cluster.getClustersByAttributeAndThreshold(
                              [self.cluster1, self.cluster2],
                              'vectorWeights', 3)))
     self.assertEqual([self.cluster1, self.cluster2],
                      list(
                          Cluster.getClustersByAttributeAndThreshold(
                              [self.cluster1, self.cluster2],
                              'vectorWeights', 3, Cluster.BELOW_THRESHOLD)))
Example #12
0
def create_clustering_obj(hit_id, items_map, clustering_list):
    """Creates a clustering object for a clustering

    Args:
        hit_id (int): Hit #
        items_map (dict of tuple to Item): Maps items from occurances in hits
                to respective Item class object
        clustering_list (list(int)): A clustering of items (represented by
                indices of their occuance in hit)

    Returns:
        Clustering: Object of class clustering
    """
    clustering_obj = Clustering()
    for cluster in clustering_list:
        cluster_obj = Cluster()
        for item_id in cluster:
            cluster_obj.add_item(items_map[(hit_id, item_id)])
        clustering_obj.add_cluster(cluster_obj)
    return clustering_obj
Example #13
0
class ClusterTests(unittest.TestCase):
    def setUp(self):
        Cluster.clusterIdCounter = 0
        self.docx = Document(1, {1: 2, 2: 4})
        self.docy = Document(2, {2: 4})
        self.cluster1 = Cluster(self.docx)
        self.cluster2 = Cluster(self.docy)
        self.doc1 = Document(3, Vector({3: 4}))
        self.doc2 = Document(4, Vector({2: 4}))

    def test_initialization(self):
        self.assertEqual('cluster_0', self.cluster1.clusterId)
        self.assertEqual('cluster_1', self.cluster2.clusterId)
        self.assertEqual(2, Cluster.clusterIdCounter)
        self.assertEqual([self.docx],
                         list(self.cluster1.iterateDocumentsInCluster()))
        self.assertEqual([self.docy],
                         list(self.cluster2.iterateDocumentsInCluster()))

    def test_addDocument(self):
        self.cluster1.addDocument(self.doc1)
        # Test if cluster id is set.
        self.assertEqual(self.cluster1.clusterId, self.doc1.clusterId)
        # Test that cluster mean is updated.
        self.assertEqual({1: 2 / 2., 2: 2., 3: 2.}, self.cluster1)
        # Test that cluster aggrefate is updated.
        self.assertEqual({1: 2, 2: 4, 3: 4}, self.cluster1.aggregateVector)
        # Test that document is added to cluster documents.
        self.assertEqual(self.doc1,
                         self.cluster1.documentsInCluster[self.doc1.docId])
        self.cluster1.addDocument(self.doc2)
        self.assertEqual(3, self.cluster1.vectorWeights)
        self.assertEqual({1: 2 / 3., 2: 8 / 3., 3: 4 / 3.}, self.cluster1)
        self.assertEqual({1: 2, 2: 8, 3: 4}, self.cluster1.aggregateVector)

    def test_iterateDocumentsInCluster(self):
        # Test normal iteration.
        self.cluster1.addDocument(self.doc1)
        self.cluster1.addDocument(self.doc2)
        self.assertEqual([self.docx, self.doc1, self.doc2],
                         list(self.cluster1.iterateDocumentsInCluster()))
        self.assertEqual(3, self.cluster1.length)
        # Test removal of document from cluster, if the document is added to a different cluster.
        self.cluster2.addDocument(self.doc2)
        self.assertEqual([self.docx, self.doc1],
                         list(self.cluster1.iterateDocumentsInCluster()))
        self.assertEqual(2, self.cluster1.length)
        self.assertEqual(2, len(self.cluster1.documentsInCluster))
        self.assertEqual([self.docy, self.doc2],
                         list(self.cluster2.iterateDocumentsInCluster()))
        self.assertEqual(2, self.cluster2.length)

    def test_iterateByAttribute(self):
        self.cluster1.addDocument(self.doc1)
        self.cluster2.addDocument(self.doc2)
        self.assertEqual([(self.cluster1, 'cluster_0'),
                          (self.cluster2, 'cluster_1')],
                         list(
                             Cluster.iterateByAttribute(
                                 [self.cluster1, self.cluster2], 'clusterId')))

    def test_filterClustersByAttribute(self):
        self.cluster1.addDocument(self.doc1)
        self.cluster2.addDocument(self.doc2)
        self.assertEqual([self.cluster1, self.cluster2],
                         list(
                             Cluster.getClustersByAttributeAndThreshold(
                                 [self.cluster1, self.cluster2],
                                 'vectorWeights', 1)))
        self.assertEqual([],
                         list(
                             Cluster.getClustersByAttributeAndThreshold(
                                 [self.cluster1, self.cluster2],
                                 'vectorWeights', 3)))
        self.assertEqual([self.cluster1, self.cluster2],
                         list(
                             Cluster.getClustersByAttributeAndThreshold(
                                 [self.cluster1, self.cluster2],
                                 'vectorWeights', 3, Cluster.BELOW_THRESHOLD)))
Example #14
0
# $Id$
#
# pylint: disable-msg=E1101,W0612,W0142
#
"""superclass for all content-objects
"""

__version__ = "$Id$"

# phython imports
from classes import Cluster, Node, Ressource
import MenuSystem as menusystem
import hb_mini

glbmanager = hb_mini.miniManager()
cluster1 = Cluster("172.16.10.172", "hacluster", "ddd", "172.16.10.172", glbmanager)
if(glbmanager.login(cluster1.ip, cluster1.user, cluster1.passwd) == True):
    print "Connected succesfully"
else:
    print "Connection failure"
    exit()
    

def printNodeNames(data):
    print "\n"
    for node in cluster1.getNodes():
        print "Node: %s" % node.name

def printActiveNodes(data):
    print "\n"
    for node in cluster1.getActiveNodes():
Example #15
0
class ClusterTests(unittest.TestCase):
    def setUp(self): 
        Cluster.clusterIdCounter = 0
        self.docx = Document(1, {1:2,2:4})
        self.docy = Document(2, {2:4})
        self.cluster1 = Cluster(self.docx)
        self.cluster2 = Cluster(self.docy)
        self.doc1 = Document(3, Vector({3:4}))
        self.doc2 = Document(4, Vector({2:4}))
    def test_initialization(self):
        self.assertEqual('cluster_0', self.cluster1.clusterId)
        self.assertEqual('cluster_1', self.cluster2.clusterId)
        self.assertEqual(2, Cluster.clusterIdCounter)
        self.assertEqual([self.docx], list(self.cluster1.iterateDocumentsInCluster()))
        self.assertEqual([self.docy], list(self.cluster2.iterateDocumentsInCluster()))
    def test_addDocument(self):
        self.cluster1.addDocument(self.doc1)
        # Test if cluster id is set.
        self.assertEqual(self.cluster1.clusterId, self.doc1.clusterId)
        # Test that cluster mean is updated.
        self.assertEqual({1:2/2.,2:2.,3:2.}, self.cluster1)
        # Test that cluster aggrefate is updated.
        self.assertEqual({1:2,2:4,3:4}, self.cluster1.aggregateVector)
        # Test that document is added to cluster documents.
        self.assertEqual(self.doc1, self.cluster1.documentsInCluster[self.doc1.docId])
        self.cluster1.addDocument(self.doc2)
        self.assertEqual(3, self.cluster1.vectorWeights)
        self.assertEqual({1:2/3.,2:8/3.,3:4/3.}, self.cluster1)
        self.assertEqual({1:2,2:8,3:4}, self.cluster1.aggregateVector)
    def test_iterateDocumentsInCluster(self):
        # Test normal iteration.
        self.cluster1.addDocument(self.doc1)
        self.cluster1.addDocument(self.doc2)
        self.assertEqual([self.docx, self.doc1, self.doc2], list(self.cluster1.iterateDocumentsInCluster()))
        self.assertEqual(3, self.cluster1.length)
        # Test removal of document from cluster, if the document is added to a different cluster.
        self.cluster2.addDocument(self.doc2)
        self.assertEqual([self.docx, self.doc1], list(self.cluster1.iterateDocumentsInCluster()))
        self.assertEqual(2, self.cluster1.length)
        self.assertEqual(2, len(self.cluster1.documentsInCluster))
        self.assertEqual([self.docy, self.doc2], list(self.cluster2.iterateDocumentsInCluster()))
        self.assertEqual(2, self.cluster2.length)
    def test_iterateByAttribute(self):
        self.cluster1.addDocument(self.doc1)
        self.cluster2.addDocument(self.doc2)
        self.assertEqual([(self.cluster1, 'cluster_0'), (self.cluster2, 'cluster_1')], list(Cluster.iterateByAttribute([self.cluster1, self.cluster2], 'clusterId')))
    def test_filterClustersByAttribute(self):
        self.cluster1.addDocument(self.doc1)
        self.cluster2.addDocument(self.doc2)
        self.assertEqual([self.cluster1, self.cluster2], list(Cluster.getClustersByAttributeAndThreshold([self.cluster1, self.cluster2], 'vectorWeights', 1)))
        self.assertEqual([], list(Cluster.getClustersByAttributeAndThreshold([self.cluster1, self.cluster2], 'vectorWeights', 3)))
        self.assertEqual([self.cluster1, self.cluster2], list(Cluster.getClustersByAttributeAndThreshold([self.cluster1, self.cluster2], 'vectorWeights', 3, Cluster.BELOW_THRESHOLD)))
Example #16
0
 def test_iterateByAttribute(self):
     self.cluster1.addDocument(self.doc1)
     self.cluster2.addDocument(self.doc2)
     self.assertEqual([(self.cluster1, 'cluster_0'), (self.cluster2, 'cluster_1')], list(Cluster.iterateByAttribute([self.cluster1, self.cluster2], 'clusterId')))
Example #17
0
 def test_filterClustersByAttribute(self):
     self.cluster1.addDocument(self.doc1)
     self.cluster2.addDocument(self.doc2)
     self.assertEqual([self.cluster1, self.cluster2], list(Cluster.getClustersByAttributeAndThreshold([self.cluster1, self.cluster2], 'vectorWeights', 1)))
     self.assertEqual([], list(Cluster.getClustersByAttributeAndThreshold([self.cluster1, self.cluster2], 'vectorWeights', 3)))
     self.assertEqual([self.cluster1, self.cluster2], list(Cluster.getClustersByAttributeAndThreshold([self.cluster1, self.cluster2], 'vectorWeights', 3, Cluster.BELOW_THRESHOLD)))