Exemple #1
0
    def test_get_memory_usage(self):
        import os

        process = StreamProcess()
        usage = process.get_memory_usage()
        if os.name == 'nt':
            self.assertEqual(usage, "Unknown")
        else:
            self.assertRegexpMatches(usage, r"\d+.\d+ MB")
    def test_get_memory_usage(self):
        import os

        process = StreamProcess()
        usage = process.get_memory_usage()
        if os.name == 'nt':
            self.assertEqual(usage, "Unknown")
        else:
            self.assertRegexpMatches(usage, r"\d+.\d+ MB")
Exemple #3
0
def stream_status():
    terms = FilterTerm.objects.filter(enabled=True)
    processes = StreamProcess.get_current_stream_processes()
    running = False
    for p in processes:
        if p.status == StreamProcess.STREAM_STATUS_RUNNING:
            running = True
            break

    Tweet = load_model("twitter_stream", "Tweet")
    tweet_count = Tweet.count_approx()
    earliest_time = Tweet.get_earliest_created_at()
    latest_time = Tweet.get_latest_created_at()

    avg_rate = None
    if earliest_time is not None and latest_time is not None:
        avg_rate = float(tweet_count) / (latest_time -
                                         earliest_time).total_seconds()

    # Get the tweets / minute over the past 10 minutes
    tweet_counts = []
    if latest_time is not None:
        latest_time_minute = latest_time.replace(second=0, microsecond=0)

        if settings.DATABASES['default']['ENGINE'].endswith('mysql'):
            drop_seconds = "created_at - INTERVAL SECOND(created_at) SECOND"
        elif settings.DATABASES['default']['ENGINE'].endswith(
                'postgresql_psycopg2'):
            drop_seconds = "date_trunc('minute', created_at)"
        else:
            drop_seconds = "created_at"

        tweet_counts = Tweet.objects.extra(select={
            'time': drop_seconds
        }) \
            .filter(created_at__gt=latest_time_minute - timedelta(minutes=20)) \
            .values('time') \
            .order_by('time') \
            .annotate(tweets=models.Count('id'))

        tweet_counts = list(tweet_counts)

    for row in tweet_counts:
        row['time'] = row['time'].isoformat()

    return {
        'running': running,
        'terms': [t.term for t in terms],
        'processes': processes,
        'tweet_count': tweet_count,
        'earliest': earliest_time,
        'latest': latest_time,
        'avg_rate': avg_rate,
        'timeline': tweet_counts
    }
def stream_status():
    terms = FilterTerm.objects.filter(enabled=True)
    processes = StreamProcess.get_current_stream_processes()
    running = False
    for p in processes:
        if p.status == StreamProcess.STREAM_STATUS_RUNNING:
            running = True
            break

    Tweet = load_model("twitter_stream", "Tweet")
    tweet_count = Tweet.count_approx()
    earliest_time = Tweet.get_earliest_created_at()
    latest_time = Tweet.get_latest_created_at()

    avg_rate = None
    if earliest_time is not None and latest_time is not None:
        avg_rate = float(tweet_count) / (latest_time - earliest_time).total_seconds()

    # Get the tweets / minute over the past 10 minutes
    tweet_counts = []
    if latest_time is not None:
        latest_time_minute = latest_time.replace(second=0, microsecond=0)

        if settings.DATABASES['default']['ENGINE'].endswith('mysql'):
            drop_seconds = "created_at - INTERVAL SECOND(created_at) SECOND"
        elif settings.DATABASES['default']['ENGINE'].endswith('postgresql_psycopg2'):
            drop_seconds = "date_trunc('minute', created_at)"
        else:
            drop_seconds = "created_at"

        tweet_counts = Tweet.objects.extra(select={
            'time': drop_seconds
        }) \
            .filter(created_at__gt=latest_time_minute - timedelta(minutes=20)) \
            .values('time') \
            .order_by('time') \
            .annotate(tweets=models.Count('id'))

        tweet_counts = list(tweet_counts)

    for row in tweet_counts:
        row['time'] = row['time'].isoformat()

    return {
        'running': running,
        'terms': [t.term for t in terms],
        'processes': processes,
        'tweet_count': tweet_count,
        'earliest': earliest_time,
        'latest': latest_time,
        'avg_rate': avg_rate,
        'timeline': tweet_counts
    }
def stream_status():
    terms = FilterTerm.objects.filter(enabled=True)
    processes = StreamProcess.get_current_stream_processes()
    running = False
    for p in processes:
        if p.status == StreamProcess.STREAM_STATUS_RUNNING:
            running = True
            break

    stream_class_memory_cutoffs = get_stream_cutoff_times()

    Tweet = load_model("twitter_stream", "Tweet")
    tweet_count = Tweet.count_approx()
    analyzed_count = 'a lot'

    if tweet_count < 10000000:
        for stream_class, cutoff_time in stream_class_memory_cutoffs.iteritems():
            if stream_class == TweetStream:
                analyzed_count = TweetStream().count_before(cutoff_time)

    stream = TweetStream()
    earliest_time = stream.get_earliest_stream_time()
    latest_time = stream.get_latest_stream_time()

    avg_rate = None
    if earliest_time is not None and latest_time is not None:
        avg_rate = float(tweet_count) / (latest_time - earliest_time).total_seconds()

    return {
        'running': running,
        'terms': [t.term for t in terms],
        'processes': processes,
        'tweet_count': tweet_count,
        'analyzed_count': analyzed_count,
        'earliest': earliest_time,
        'latest': latest_time,
        'avg_rate': avg_rate
    }