class CrowdTests(unittest.TestCase): def setUp(self): self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time-timedelta(seconds=60)) self.m1.vector=Vector({1:1.,2:3.}) self.stream = Stream(1, self.m1) self.cluster = StreamCluster(self.stream) self.crowd = Crowd(self.cluster, test_time) def test_intitialization(self): self.assertEqual(self.cluster.clusterId, self.crowd.crowdId) def test_append(self): self.crowd.append(self.cluster, test_time+timedelta(days=1)) self.assertEqual([GeneralMethods.getEpochFromDateTimeObject(test_time), GeneralMethods.getEpochFromDateTimeObject(test_time+timedelta(days=1))], sorted(self.crowd.clusters.keys())) self.assertEqual(StreamCluster, type(self.crowd.clusters[GeneralMethods.getEpochFromDateTimeObject(test_time)])) self.assertEqual(2, self.crowd.lifespan) self.assertEqual(getStringRepresentationForTweetTimestamp(test_time), getStringRepresentationForTweetTimestamp(self.crowd.startTime)) self.assertEqual(getStringRepresentationForTweetTimestamp(test_time+timedelta(days=1)), getStringRepresentationForTweetTimestamp(self.crowd.endTime)) def test_maxClusterSize(self): self.assertEqual(1, self.crowd.maxClusterSize) message2 = Message(4, 'sdf', 'A project to cluster high-dimensional streams.', test_time) message2.vector=Vector({2:4}) stream2 = Stream(4, message2) self.cluster.addDocument(stream2) self.assertEqual(2, self.crowd.maxClusterSize) def test_crowdSize(self): self.assertEqual(1, self.crowd.crowdSize) self.cluster.addDocument(Stream(2, self.m1));self.cluster.addDocument(Stream(3, self.m1)) self.assertEqual(3, self.crowd.crowdSize) cluster = StreamCluster(Stream(3, self.m1)) self.crowd.append(cluster, test_time+timedelta(days=2)) self.assertNotEqual(4, self.crowd.crowdSize) self.assertEqual(3, self.crowd.crowdSize)
def setUp(self): self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time - timedelta(seconds=60)) self.m1.vector = Vector({1: 1., 2: 3.}) self.stream = Stream(1, self.m1) self.cluster = StreamCluster(self.stream) self.crowd = Crowd(self.cluster, test_time)
class CrowdTests(unittest.TestCase): def setUp(self): self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time - timedelta(seconds=60)) self.m1.vector = Vector({1: 1., 2: 3.}) self.stream = Stream(1, self.m1) self.cluster = StreamCluster(self.stream) self.crowd = Crowd(self.cluster, test_time) def test_intitialization(self): self.assertEqual(self.cluster.clusterId, self.crowd.crowdId) def test_append(self): self.crowd.append(self.cluster, test_time + timedelta(days=1)) self.assertEqual([ GeneralMethods.getEpochFromDateTimeObject(test_time), GeneralMethods.getEpochFromDateTimeObject(test_time + timedelta(days=1)) ], sorted(self.crowd.clusters.keys())) self.assertEqual( StreamCluster, type(self.crowd.clusters[GeneralMethods.getEpochFromDateTimeObject( test_time)])) self.assertEqual(2, self.crowd.lifespan) self.assertEqual( getStringRepresentationForTweetTimestamp(test_time), getStringRepresentationForTweetTimestamp(self.crowd.startTime)) self.assertEqual( getStringRepresentationForTweetTimestamp(test_time + timedelta(days=1)), getStringRepresentationForTweetTimestamp(self.crowd.endTime)) def test_maxClusterSize(self): self.assertEqual(1, self.crowd.maxClusterSize) message2 = Message(4, 'sdf', 'A project to cluster high-dimensional streams.', test_time) message2.vector = Vector({2: 4}) stream2 = Stream(4, message2) self.cluster.addDocument(stream2) self.assertEqual(2, self.crowd.maxClusterSize) def test_crowdSize(self): self.assertEqual(1, self.crowd.crowdSize) self.cluster.addDocument(Stream(2, self.m1)) self.cluster.addDocument(Stream(3, self.m1)) self.assertEqual(3, self.crowd.crowdSize) cluster = StreamCluster(Stream(3, self.m1)) self.crowd.append(cluster, test_time + timedelta(days=2)) self.assertNotEqual(4, self.crowd.crowdSize) self.assertEqual(3, self.crowd.crowdSize)
def constructCrowdDataStructures(dataIterator): for currentTime, cluster in dataIterator(): crowdId, newCrowdAdded = None, False cluster.currentTime = currentTime AnalyzeData.clusterMap[cluster.clusterId]=cluster for clusterId in cluster.mergedClustersList: if clusterId in AnalyzeData.clusterIdToCrowdIdMap: crowdId=AnalyzeData.clusterIdToCrowdIdMap[clusterId]; break if crowdId==None: crowdId=cluster.mergedClustersList[0] AnalyzeData.crowdMap[crowdId]=Crowd(cluster, currentTime) # cluster.mergedClustersList=cluster.mergedClustersList[1:] newCrowdAdded = True else: AnalyzeData.crowdMap[crowdId].append(cluster, currentTime) if crowdId==None: raise Exception('Crowd id cannot be None.') AnalyzeData.clusterIdToCrowdIdMap[cluster.clusterId]=crowdId if not newCrowdAdded: mergedClustersList = cluster.mergedClustersList[:] else: mergedClustersList = cluster.mergedClustersList[1:][:] for clusterId in mergedClustersList: if clusterId in AnalyzeData.clusterIdToCrowdIdMap: AnalyzeData.crowdMap[AnalyzeData.clusterIdToCrowdIdMap[clusterId]].updateOutGoingCrowd(crowdId), AnalyzeData.crowdMap[crowdId].updateInComingCrowd(AnalyzeData.clusterIdToCrowdIdMap[clusterId]) AnalyzeData.constructCrowdIdToClusterIdMap()
def setUp(self): self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time-timedelta(seconds=60)) self.m1.vector=Vector({1:1.,2:3.}) self.stream = Stream(1, self.m1) self.cluster = StreamCluster(self.stream) self.crowd = Crowd(self.cluster, test_time)