Exemple #1
0
 def setUp(self):
     self.m1 = Message(1, 'sdf',
                       'A project to cluster high-dimensional streams.',
                       test_time - timedelta(seconds=60))
     self.m1.vector = Vector({1: 1., 2: 3.})
     self.stream = Stream(1, self.m1)
     self.cluster = StreamCluster(self.stream)
     self.crowd = Crowd(self.cluster, test_time)
Exemple #2
0
 def test_crowdSize(self):
     self.assertEqual(1, self.crowd.crowdSize)
     self.cluster.addDocument(Stream(2, self.m1))
     self.cluster.addDocument(Stream(3, self.m1))
     self.assertEqual(3, self.crowd.crowdSize)
     cluster = StreamCluster(Stream(3, self.m1))
     self.crowd.append(cluster, test_time + timedelta(days=2))
     self.assertNotEqual(4, self.crowd.crowdSize)
     self.assertEqual(3, self.crowd.crowdSize)
Exemple #3
0
 def setUp(self):
     self.m1 = Message(1, 'sdf',
                       'A project to cluster high-dimensional streams.',
                       test_time - timedelta(seconds=60))
     self.m1.vector = Vector({1: 2, 2: 4})
     self.stream1 = Stream(1, self.m1)
     self.m2 = Message(2, 'sdf',
                       'A project to cluster high-dimensional streams.',
                       test_time)
     self.m2.vector = Vector({2: 4})
     self.stream2 = Stream(2, self.m2)
     self.m3 = Message(3, 'sdf',
                       'A project to cluster high-dimensional streams.',
                       test_time + timedelta(seconds=60))
     self.m3.vector = Vector({2: 4})
     self.stream3 = Stream(3, self.m3)
     self.cluster1 = StreamCluster(self.stream1)
     self.cluster2 = StreamCluster(self.stream2)
     self.cluster3 = StreamCluster(self.stream3)
Exemple #4
0
 def getClusterFromMapFormat(clusterMap):
     dummyMessage = Message(1, '', '', datetime.now())
     dummyMessage.vector = Vector({})
     dummyStream = Stream(1, dummyMessage)
     cluster = StreamCluster(dummyStream)
     cluster.clusterId = clusterMap['clusterId']
     cluster.lastStreamAddedTime = getDateTimeObjectFromTweetTimestamp(
         clusterMap['lastStreamAddedTime'])
     cluster.mergedClustersList = clusterMap['mergedClustersList']
     cluster.documentsInCluster = clusterMap['streams']
     for k, v in clusterMap['dimensions'].iteritems():
         cluster[k] = v
     return cluster
Exemple #5
0
 def getClusterAndUpdateExistingClusters(self, stream):
     predictedCluster = self.getClusterForDocument(stream)
     if predictedCluster != None:
         self.clusters[predictedCluster].addDocument(
             stream, **self.stream_settings)
     else:
         newCluster = StreamCluster(stream)
         newCluster.setSignatureUsingVectorPermutations(
             self.unitVector, self.vectorPermutations,
             self.phraseTextAndDimensionMap)
         for permutation in self.signaturePermutations:
             permutation.addDocument(newCluster)
         self.clusters[newCluster.clusterId] = newCluster
Exemple #6
0
 def getClusterAndUpdateExistingClusters(self, stream):
     predictedCluster = self.getClusterForDocument(stream)
     '''
     Do not remove this comment. Might need this if StreamCluster is used again in future.
     if predictedCluster!=None: self.clusters[predictedCluster].addStream(stream, **self.stream_settings)
     '''
     if predictedCluster != None:
         self.clusters[predictedCluster].addDocument(
             stream, **self.stream_settings)
     else:
         newCluster = StreamCluster(stream)
         newCluster.setSignatureUsingVectorPermutations(
             self.unitVector, self.vectorPermutations,
             self.phraseTextAndDimensionMap)
         for permutation in self.signaturePermutations:
             permutation.addDocument(newCluster)
         self.clusters[newCluster.clusterId] = newCluster