def setUp(self): self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time - timedelta(seconds=60)) self.m1.vector = Vector({1: 1., 2: 3.}) self.stream = Stream(1, self.m1) self.cluster = StreamCluster(self.stream) self.crowd = Crowd(self.cluster, test_time)
def test_crowdSize(self): self.assertEqual(1, self.crowd.crowdSize) self.cluster.addDocument(Stream(2, self.m1)) self.cluster.addDocument(Stream(3, self.m1)) self.assertEqual(3, self.crowd.crowdSize) cluster = StreamCluster(Stream(3, self.m1)) self.crowd.append(cluster, test_time + timedelta(days=2)) self.assertNotEqual(4, self.crowd.crowdSize) self.assertEqual(3, self.crowd.crowdSize)
def setUp(self): self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time - timedelta(seconds=60)) self.m1.vector = Vector({1: 2, 2: 4}) self.stream1 = Stream(1, self.m1) self.m2 = Message(2, 'sdf', 'A project to cluster high-dimensional streams.', test_time) self.m2.vector = Vector({2: 4}) self.stream2 = Stream(2, self.m2) self.m3 = Message(3, 'sdf', 'A project to cluster high-dimensional streams.', test_time + timedelta(seconds=60)) self.m3.vector = Vector({2: 4}) self.stream3 = Stream(3, self.m3) self.cluster1 = StreamCluster(self.stream1) self.cluster2 = StreamCluster(self.stream2) self.cluster3 = StreamCluster(self.stream3)
def getClusterFromMapFormat(clusterMap): dummyMessage = Message(1, '', '', datetime.now()) dummyMessage.vector = Vector({}) dummyStream = Stream(1, dummyMessage) cluster = StreamCluster(dummyStream) cluster.clusterId = clusterMap['clusterId'] cluster.lastStreamAddedTime = getDateTimeObjectFromTweetTimestamp( clusterMap['lastStreamAddedTime']) cluster.mergedClustersList = clusterMap['mergedClustersList'] cluster.documentsInCluster = clusterMap['streams'] for k, v in clusterMap['dimensions'].iteritems(): cluster[k] = v return cluster
def getClusterAndUpdateExistingClusters(self, stream): predictedCluster = self.getClusterForDocument(stream) if predictedCluster != None: self.clusters[predictedCluster].addDocument( stream, **self.stream_settings) else: newCluster = StreamCluster(stream) newCluster.setSignatureUsingVectorPermutations( self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) for permutation in self.signaturePermutations: permutation.addDocument(newCluster) self.clusters[newCluster.clusterId] = newCluster
def getClusterAndUpdateExistingClusters(self, stream): predictedCluster = self.getClusterForDocument(stream) ''' Do not remove this comment. Might need this if StreamCluster is used again in future. if predictedCluster!=None: self.clusters[predictedCluster].addStream(stream, **self.stream_settings) ''' if predictedCluster != None: self.clusters[predictedCluster].addDocument( stream, **self.stream_settings) else: newCluster = StreamCluster(stream) newCluster.setSignatureUsingVectorPermutations( self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) for permutation in self.signaturePermutations: permutation.addDocument(newCluster) self.clusters[newCluster.clusterId] = newCluster