def iterateExpertClusters(startingDay=datetime(2011,3,19), endingDay=datetime(2011,3, 30)):
#    def iterateExpertClusters(startingDay=datetime(2011,3,19), endingDay=datetime(2011,4,7)):
        while startingDay<=endingDay:
            for line in FileIO.iterateJsonFromFile(experts_twitter_stream_settings.lsh_clusters_folder+FileIO.getFileByDay(startingDay)): 
                currentTime = getDateTimeObjectFromTweetTimestamp(line['time_stamp'])
                for clusterMap in line['clusters']: yield (currentTime, TwitterCrowdsSpecificMethods.getClusterFromMapFormat(clusterMap))
            startingDay+=timedelta(days=1)
 def test_getClusterFromMapFormat(self):
     mapReresentation = {'clusterId': 1, 'mergedClustersList': [self.cluster1.clusterId], 'lastStreamAddedTime': getStringRepresentationForTweetTimestamp(test_time), 'streams': [self.doc1.docId], 'dimensions': {'#tcot':2, 'dsf':2}}
     cluster = TwitterCrowdsSpecificMethods.getClusterFromMapFormat(mapReresentation)
     self.assertEqual(1, cluster.clusterId)
     self.assertEqual([self.cluster1.clusterId], cluster.mergedClustersList)
     self.assertEqual([self.doc1.docId], cluster.documentsInCluster)
     self.assertEqual({'#tcot':2, 'dsf':2}, cluster)
     self.assertEqual(getStringRepresentationForTweetTimestamp(test_time), getStringRepresentationForTweetTimestamp(cluster.lastStreamAddedTime))
Example #3
0
 def dataIterator(self):
     for currentTime, clusterMaps in sorted(self.clusterMaps.iteritems()):
         for clusterMap in clusterMaps:
             yield (currentTime,
                    TwitterCrowdsSpecificMethods.getClusterFromMapFormat(
                        clusterMap))
 def dataIterator(self):
     for currentTime, clusterMaps in sorted(self.clusterMaps.iteritems()):
         for clusterMap in clusterMaps: yield (currentTime, TwitterCrowdsSpecificMethods.getClusterFromMapFormat(clusterMap))