def test_get_memory_usage(self): import os process = StreamProcess() usage = process.get_memory_usage() if os.name == 'nt': self.assertEqual(usage, "Unknown") else: self.assertRegexpMatches(usage, r"\d+.\d+ MB")
def stream_status(): terms = FilterTerm.objects.filter(enabled=True) processes = StreamProcess.get_current_stream_processes() running = False for p in processes: if p.status == StreamProcess.STREAM_STATUS_RUNNING: running = True break Tweet = load_model("twitter_stream", "Tweet") tweet_count = Tweet.count_approx() earliest_time = Tweet.get_earliest_created_at() latest_time = Tweet.get_latest_created_at() avg_rate = None if earliest_time is not None and latest_time is not None: avg_rate = float(tweet_count) / (latest_time - earliest_time).total_seconds() # Get the tweets / minute over the past 10 minutes tweet_counts = [] if latest_time is not None: latest_time_minute = latest_time.replace(second=0, microsecond=0) if settings.DATABASES['default']['ENGINE'].endswith('mysql'): drop_seconds = "created_at - INTERVAL SECOND(created_at) SECOND" elif settings.DATABASES['default']['ENGINE'].endswith( 'postgresql_psycopg2'): drop_seconds = "date_trunc('minute', created_at)" else: drop_seconds = "created_at" tweet_counts = Tweet.objects.extra(select={ 'time': drop_seconds }) \ .filter(created_at__gt=latest_time_minute - timedelta(minutes=20)) \ .values('time') \ .order_by('time') \ .annotate(tweets=models.Count('id')) tweet_counts = list(tweet_counts) for row in tweet_counts: row['time'] = row['time'].isoformat() return { 'running': running, 'terms': [t.term for t in terms], 'processes': processes, 'tweet_count': tweet_count, 'earliest': earliest_time, 'latest': latest_time, 'avg_rate': avg_rate, 'timeline': tweet_counts }
def stream_status(): terms = FilterTerm.objects.filter(enabled=True) processes = StreamProcess.get_current_stream_processes() running = False for p in processes: if p.status == StreamProcess.STREAM_STATUS_RUNNING: running = True break Tweet = load_model("twitter_stream", "Tweet") tweet_count = Tweet.count_approx() earliest_time = Tweet.get_earliest_created_at() latest_time = Tweet.get_latest_created_at() avg_rate = None if earliest_time is not None and latest_time is not None: avg_rate = float(tweet_count) / (latest_time - earliest_time).total_seconds() # Get the tweets / minute over the past 10 minutes tweet_counts = [] if latest_time is not None: latest_time_minute = latest_time.replace(second=0, microsecond=0) if settings.DATABASES['default']['ENGINE'].endswith('mysql'): drop_seconds = "created_at - INTERVAL SECOND(created_at) SECOND" elif settings.DATABASES['default']['ENGINE'].endswith('postgresql_psycopg2'): drop_seconds = "date_trunc('minute', created_at)" else: drop_seconds = "created_at" tweet_counts = Tweet.objects.extra(select={ 'time': drop_seconds }) \ .filter(created_at__gt=latest_time_minute - timedelta(minutes=20)) \ .values('time') \ .order_by('time') \ .annotate(tweets=models.Count('id')) tweet_counts = list(tweet_counts) for row in tweet_counts: row['time'] = row['time'].isoformat() return { 'running': running, 'terms': [t.term for t in terms], 'processes': processes, 'tweet_count': tweet_count, 'earliest': earliest_time, 'latest': latest_time, 'avg_rate': avg_rate, 'timeline': tweet_counts }
def stream_status(): terms = FilterTerm.objects.filter(enabled=True) processes = StreamProcess.get_current_stream_processes() running = False for p in processes: if p.status == StreamProcess.STREAM_STATUS_RUNNING: running = True break stream_class_memory_cutoffs = get_stream_cutoff_times() Tweet = load_model("twitter_stream", "Tweet") tweet_count = Tweet.count_approx() analyzed_count = 'a lot' if tweet_count < 10000000: for stream_class, cutoff_time in stream_class_memory_cutoffs.iteritems(): if stream_class == TweetStream: analyzed_count = TweetStream().count_before(cutoff_time) stream = TweetStream() earliest_time = stream.get_earliest_stream_time() latest_time = stream.get_latest_stream_time() avg_rate = None if earliest_time is not None and latest_time is not None: avg_rate = float(tweet_count) / (latest_time - earliest_time).total_seconds() return { 'running': running, 'terms': [t.term for t in terms], 'processes': processes, 'tweet_count': tweet_count, 'analyzed_count': analyzed_count, 'earliest': earliest_time, 'latest': latest_time, 'avg_rate': avg_rate }