class CrowdTests(unittest.TestCase):
    def setUp(self):
        self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time-timedelta(seconds=60))
        self.m1.vector=Vector({1:1.,2:3.})
        self.stream = Stream(1, self.m1)
        self.cluster = StreamCluster(self.stream)
        self.crowd = Crowd(self.cluster, test_time)
    def test_intitialization(self):
        self.assertEqual(self.cluster.clusterId, self.crowd.crowdId)
    def test_append(self):
        self.crowd.append(self.cluster, test_time+timedelta(days=1))
        self.assertEqual([GeneralMethods.getEpochFromDateTimeObject(test_time), GeneralMethods.getEpochFromDateTimeObject(test_time+timedelta(days=1))], sorted(self.crowd.clusters.keys()))
        self.assertEqual(StreamCluster, type(self.crowd.clusters[GeneralMethods.getEpochFromDateTimeObject(test_time)]))
        self.assertEqual(2, self.crowd.lifespan)
        self.assertEqual(getStringRepresentationForTweetTimestamp(test_time), getStringRepresentationForTweetTimestamp(self.crowd.startTime))
        self.assertEqual(getStringRepresentationForTweetTimestamp(test_time+timedelta(days=1)), getStringRepresentationForTweetTimestamp(self.crowd.endTime))
    def test_maxClusterSize(self):
        self.assertEqual(1, self.crowd.maxClusterSize)
        message2 = Message(4, 'sdf', 'A project to cluster high-dimensional streams.', test_time)
        message2.vector=Vector({2:4})
        stream2 = Stream(4, message2)
        self.cluster.addDocument(stream2)
        self.assertEqual(2, self.crowd.maxClusterSize)
    def test_crowdSize(self):
        self.assertEqual(1, self.crowd.crowdSize)
        self.cluster.addDocument(Stream(2, self.m1));self.cluster.addDocument(Stream(3, self.m1))
        self.assertEqual(3, self.crowd.crowdSize)
        cluster = StreamCluster(Stream(3, self.m1))
        self.crowd.append(cluster, test_time+timedelta(days=2))
        self.assertNotEqual(4, self.crowd.crowdSize)
        self.assertEqual(3, self.crowd.crowdSize)
Exemple #2
0
 def setUp(self):
     self.m1 = Message(1, 'sdf',
                       'A project to cluster high-dimensional streams.',
                       test_time - timedelta(seconds=60))
     self.m1.vector = Vector({1: 1., 2: 3.})
     self.stream = Stream(1, self.m1)
     self.cluster = StreamCluster(self.stream)
     self.crowd = Crowd(self.cluster, test_time)
Exemple #3
0
class CrowdTests(unittest.TestCase):
    def setUp(self):
        self.m1 = Message(1, 'sdf',
                          'A project to cluster high-dimensional streams.',
                          test_time - timedelta(seconds=60))
        self.m1.vector = Vector({1: 1., 2: 3.})
        self.stream = Stream(1, self.m1)
        self.cluster = StreamCluster(self.stream)
        self.crowd = Crowd(self.cluster, test_time)

    def test_intitialization(self):
        self.assertEqual(self.cluster.clusterId, self.crowd.crowdId)

    def test_append(self):
        self.crowd.append(self.cluster, test_time + timedelta(days=1))
        self.assertEqual([
            GeneralMethods.getEpochFromDateTimeObject(test_time),
            GeneralMethods.getEpochFromDateTimeObject(test_time +
                                                      timedelta(days=1))
        ], sorted(self.crowd.clusters.keys()))
        self.assertEqual(
            StreamCluster,
            type(self.crowd.clusters[GeneralMethods.getEpochFromDateTimeObject(
                test_time)]))
        self.assertEqual(2, self.crowd.lifespan)
        self.assertEqual(
            getStringRepresentationForTweetTimestamp(test_time),
            getStringRepresentationForTweetTimestamp(self.crowd.startTime))
        self.assertEqual(
            getStringRepresentationForTweetTimestamp(test_time +
                                                     timedelta(days=1)),
            getStringRepresentationForTweetTimestamp(self.crowd.endTime))

    def test_maxClusterSize(self):
        self.assertEqual(1, self.crowd.maxClusterSize)
        message2 = Message(4, 'sdf',
                           'A project to cluster high-dimensional streams.',
                           test_time)
        message2.vector = Vector({2: 4})
        stream2 = Stream(4, message2)
        self.cluster.addDocument(stream2)
        self.assertEqual(2, self.crowd.maxClusterSize)

    def test_crowdSize(self):
        self.assertEqual(1, self.crowd.crowdSize)
        self.cluster.addDocument(Stream(2, self.m1))
        self.cluster.addDocument(Stream(3, self.m1))
        self.assertEqual(3, self.crowd.crowdSize)
        cluster = StreamCluster(Stream(3, self.m1))
        self.crowd.append(cluster, test_time + timedelta(days=2))
        self.assertNotEqual(4, self.crowd.crowdSize)
        self.assertEqual(3, self.crowd.crowdSize)
    def constructCrowdDataStructures(dataIterator):
        for currentTime, cluster in dataIterator():
            crowdId, newCrowdAdded = None, False
            cluster.currentTime = currentTime
            AnalyzeData.clusterMap[cluster.clusterId]=cluster
            for clusterId in cluster.mergedClustersList: 
                if clusterId in AnalyzeData.clusterIdToCrowdIdMap: crowdId=AnalyzeData.clusterIdToCrowdIdMap[clusterId]; break
            if crowdId==None:
                crowdId=cluster.mergedClustersList[0]
                AnalyzeData.crowdMap[crowdId]=Crowd(cluster, currentTime)
#                cluster.mergedClustersList=cluster.mergedClustersList[1:]
                newCrowdAdded = True
            else: AnalyzeData.crowdMap[crowdId].append(cluster, currentTime)
            if crowdId==None: raise Exception('Crowd id cannot be None.')
            AnalyzeData.clusterIdToCrowdIdMap[cluster.clusterId]=crowdId
            if not newCrowdAdded: mergedClustersList = cluster.mergedClustersList[:]
            else: mergedClustersList = cluster.mergedClustersList[1:][:]
            for clusterId in mergedClustersList: 
                if clusterId in AnalyzeData.clusterIdToCrowdIdMap:  AnalyzeData.crowdMap[AnalyzeData.clusterIdToCrowdIdMap[clusterId]].updateOutGoingCrowd(crowdId), AnalyzeData.crowdMap[crowdId].updateInComingCrowd(AnalyzeData.clusterIdToCrowdIdMap[clusterId])
        AnalyzeData.constructCrowdIdToClusterIdMap()
 def setUp(self):
     self.m1 = Message(1, 'sdf', 'A project to cluster high-dimensional streams.', test_time-timedelta(seconds=60))
     self.m1.vector=Vector({1:1.,2:3.})
     self.stream = Stream(1, self.m1)
     self.cluster = StreamCluster(self.stream)
     self.crowd = Crowd(self.cluster, test_time)