def test_append(self):
     self.crowd.append(self.cluster, test_time+timedelta(days=1))
     self.assertEqual([GeneralMethods.getEpochFromDateTimeObject(test_time), GeneralMethods.getEpochFromDateTimeObject(test_time+timedelta(days=1))], sorted(self.crowd.clusters.keys()))
     self.assertEqual(StreamCluster, type(self.crowd.clusters[GeneralMethods.getEpochFromDateTimeObject(test_time)]))
     self.assertEqual(2, self.crowd.lifespan)
     self.assertEqual(getStringRepresentationForTweetTimestamp(test_time), getStringRepresentationForTweetTimestamp(self.crowd.startTime))
     self.assertEqual(getStringRepresentationForTweetTimestamp(test_time+timedelta(days=1)), getStringRepresentationForTweetTimestamp(self.crowd.endTime))
Esempio n. 2
0
 def __init__(self, cluster, clusterFormationTime):
     self.crowdId = cluster.clusterId
     self.clusters = {
         GeneralMethods.getEpochFromDateTimeObject(clusterFormationTime):
         cluster
     }
     self.ends, self.inComingCrowds, self.outGoingCrowd = False, [], None
Esempio n. 3
0
    def combineLocationGraphs(graphMap, startingGraphId, startingTime, intervalInSeconds, linear=True, **kwargs):
        if intervalInSeconds%TIME_UNIT_IN_SECONDS==0 and int(intervalInSeconds/TIME_UNIT_IN_SECONDS)!=0: numberOfGraphs = int(intervalInSeconds/TIME_UNIT_IN_SECONDS)
        else: numberOfGraphs = int(intervalInSeconds/TIME_UNIT_IN_SECONDS)+1
        graphId = GeneralMethods.approximateEpoch(GeneralMethods.getEpochFromDateTimeObject(startingTime), TIME_UNIT_IN_SECONDS)
        currentLogarithmicId = LocationGraphs.getLogarithmicGraphId(startingGraphId, graphId)
        currentCollectedGraphs = 0
        graphIdsToCombine = []
        while currentCollectedGraphs!=numberOfGraphs and currentLogarithmicId>0:
            numberOfGraphsToCollect = 2**int(math.log(numberOfGraphs-currentCollectedGraphs,2))
            if not linear and currentLogarithmicId%2==0: 
                indices = [1]+map(lambda j: 2**j, filter(lambda j: currentLogarithmicId%(2**j)==0, range(1, int(math.log(currentLogarithmicId+1,2))+1)))
                if max(indices)>numberOfGraphsToCollect and numberOfGraphsToCollect in indices: index = numberOfGraphsToCollect
                else: index = max(indices)
            else: index=1
            logGraphId = '%s_%s'%(LocationGraphs.getGraphId(startingGraphId, currentLogarithmicId), index)
            if logGraphId in graphMap: graphIdsToCombine.append(logGraphId)
            currentLogarithmicId-=index
            currentCollectedGraphs+=index
        graphIdsToCombine = sorted(graphIdsToCombine, key=lambda id:int(id.split('_')[1]), reverse=True)
#        print graphIdsToCombine
#        for i in graphIdsToCombine:
#            ep, l = i.split('_')
#            print i, datetime.datetime.fromtimestamp(float(ep)), l, graphMap[i].number_of_nodes()
        graphsToCombine = [graphMap[id] for id in graphIdsToCombine]
        return combineGraphList(graphsToCombine, **kwargs)
def getStreamStats(streamTweetsIterator):
    ''' 30-day
        Experts stats:
        # of users:  4804
        # of tweets:  1614510
        # of tweets per tu (mean, var):  186.497631974 7860.12570191
        
        Houston stats
        # of users:  107494
        # of tweets:  15946768
        # of tweets per tu (mean, var):  1730.33506944 4834419.37341
        
        10-day
        Experts stats
        # of users:  4674
        # of tweets:  608798
        # of tweets per tu (mean, var):  190.726190476 8132.75460228
        Houston stats
        # of users:  39618
        # of tweets:  2139829
        # of tweets per tu (mean, var):  619.163483796 94450.7334004

    '''
    numberOfTweets, users, distributionPerTU = 0, set(), defaultdict(int)
    for tweet in streamTweetsIterator: 
        users.add(tweet['user']['screen_name'])
        distributionPerTU[GeneralMethods.getEpochFromDateTimeObject(getDateTimeObjectFromTweetTimestamp(tweet['created_at']))//300]+=1
        numberOfTweets+=1
    print '# of users: ', len(users)
    print '# of tweets: ', numberOfTweets 
    print '# of tweets per tu (mean, var): ', np.mean(distributionPerTU.values()), np.var(distributionPerTU.values())
Esempio n. 5
0
 def test_append(self):
     self.crowd.append(self.cluster, test_time + timedelta(days=1))
     self.assertEqual([
         GeneralMethods.getEpochFromDateTimeObject(test_time),
         GeneralMethods.getEpochFromDateTimeObject(test_time +
                                                   timedelta(days=1))
     ], sorted(self.crowd.clusters.keys()))
     self.assertEqual(
         StreamCluster,
         type(self.crowd.clusters[GeneralMethods.getEpochFromDateTimeObject(
             test_time)]))
     self.assertEqual(2, self.crowd.lifespan)
     self.assertEqual(
         getStringRepresentationForTweetTimestamp(test_time),
         getStringRepresentationForTweetTimestamp(self.crowd.startTime))
     self.assertEqual(
         getStringRepresentationForTweetTimestamp(test_time +
                                                  timedelta(days=1)),
         getStringRepresentationForTweetTimestamp(self.crowd.endTime))
def iteratePhrases():
    for tweet in TweetFiles.iterateTweetsFromGzip('/mnt/chevron/kykamath/data/twitter/tweets_by_trends/2011_2_6.gz'):
        message = TwitterCrowdsSpecificMethods.convertTweetJSONToMessage(tweet, **settings)
        if message.vector:
            for phrase in message.vector: 
                if phrase!='': yield (phrase, GeneralMethods.approximateEpoch(GeneralMethods.getEpochFromDateTimeObject(message.timeStamp), 60))
Esempio n. 7
0
 def append(self, cluster, clusterFormationTime):
     self.clusters[GeneralMethods.getEpochFromDateTimeObject(
         clusterFormationTime)] = cluster
Esempio n. 8
0
 def append(self, cluster, clusterFormationTime):
     self.clusters[GeneralMethods.getEpochFromDateTimeObject(clusterFormationTime)] = cluster
Esempio n. 9
0
 def __init__(self, cluster, clusterFormationTime):
     self.crowdId = cluster.clusterId
     self.clusters = {GeneralMethods.getEpochFromDateTimeObject(clusterFormationTime): cluster}
     self.ends, self.inComingCrowds, self.outGoingCrowd = False, [], None