def create_graph_impl(request, event_id): e = Event.objects.get(id=event_id) sdate = e.start_date edate = e.end_date tweets = Tweet.objects.filter(keyword__event = event_id) if sdate == None: sdate=tweets.order_by('created_at')[0].created_at if edate == None: edate=tweets.order_by('-created_at')[0].created_at tdelta=(edate-sdate) total_sec=tdelta.seconds + tdelta.days * 24 *3600 total_min=total_sec / 60.0 total_hours=total_min / 60.0 if total_min <= 1440: td=timedelta(minutes=1) sec_divisor = 60 stf = {"date": ('%Y,%m,%d,%H,%M'),"d": 'new Date(%Y,%m-1,%d,%H,%M)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24,MI')"} else: select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)"} elif total_hours <= 2016: # 24 hours x 28 days x 3 = about 3 months td=timedelta(hours=1) sec_divisor = 3600 stf = {"date": ('%Y,%m,%d,%H'),"d": 'new Date(%Y,%m-1,%d,%H)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24')"} else: select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H') , created_at)"} else: td=timedelta(days=1) sec_divisor = 86400 stf = {"date": ('%Y,%m,%d'),"d": 'new Date(%Y,%m-1,%d)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')" , "date":"to_char(created_at, 'YYYY,MM,DD')"} else: select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)" , "date":"strftime(('%%Y,%%m,%%d') , created_at)"} tweets = tweets.filter(created_at__gte = sdate).filter(created_at__lte = edate).extra(select = select_data).values('d','date').annotate(num_tweets = Count('tweet')).order_by('date') tweets=list(tweets) i = 1 detector = MeanOutliers.factory() list_peaks = [] # loop through the tweets and detect a peak based on mean deviation function provided. save the start date # and the date of the peak in a dictionary. save each peak in list_peaks. while i < len(tweets): tweets[0]['title'] = 'null' tweets[0]['data'] = 'null' # sd_p=tweets[i-1]['date'].split(',') # sd_p=map(int , sd_p) # sdt_p=datetime(*sd_p) sdt_p=convert_date(tweets[i-1]['date']) sd_n=tweets[i]['date'].split(',') sd_n=map(int , sd_n) sdt_n=datetime(*sd_n) delta_d=(sdt_n-sdt_p) delta_d = (delta_d.seconds + delta_d.days * 24 *3600)/sec_divisor count=0 if delta_d != 1: j=0 while(j<delta_d-1): insert_tweet={'title':'null','num_tweets':0,'data':'null','children':'null'} sdt_p = sdt_p+td insert_tweet['date']=sdt_p.strftime(stf['date']) insert_tweet['d']=sdt_p.strftime(stf['d']) tweets.insert(i+j,insert_tweet) j+=1 current_val = tweets[i]['num_tweets'] previous_val = tweets[i-1]['num_tweets'] mdiv = detector(None,tweets[i]['num_tweets'], 1) if mdiv > 2.0 and current_val > previous_val and current_val > 10: start_freq = previous_val start_date = tweets[i-1]['date'] # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep # running the mdiv function on each value because it is requires previous values to calculate the mean. while(current_val > previous_val): tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' if i+1<len(tweets): i+=1 mdiv = detector(None,tweets[i]['num_tweets'], 1) current_val = tweets[i]['num_tweets'] previous_val = tweets[i-1]['num_tweets'] peak_date = tweets[i-1]['date'] else: peak_date = tweets[i]['date'] i+=1 break d = {"start_date":start_date,"start_freq":start_freq ,"peak_date":peak_date} list_peaks.append(d) else: tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' i+=1 keywords = Keyword.objects.filter(event__id = event_id) words = [kw.key_word for kw in keywords] peaks = find_end_dates(tweets,list_peaks) tweets = annotate_peaks(peaks,tweets,event_id,words) try: children = e.children.order_by('start_date') tweets = peak_child_detection(children,list_peaks,tweets) except: tweets = tweets t = loader.get_template('twitinfo/create_graph.html') resp_string = t.render(Context({ 'tweets': tweets })) return resp_string
def create_graph_impl(request, event_id): e = Event.objects.get(id=event_id) sdate = e.start_date edate = e.end_date tweets = Tweet.objects.filter(keyword__event=event_id) if sdate == None: sdate = tweets.order_by('created_at')[0].created_at if edate == None: edate = tweets.order_by('-created_at')[0].created_at tdelta = (edate - sdate) total_sec = tdelta.seconds + tdelta.days * 24 * 3600 total_min = total_sec / 60.0 total_hours = total_min / 60.0 if total_min <= 1440: td = timedelta(minutes=1) sec_divisor = 60 stf = {"date": ('%Y,%m,%d,%H,%M'), "d": 'new Date(%Y,%m-1,%d,%H,%M)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = { "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')", "date": "to_char(created_at, 'YYYY,MM,DD,HH24,MI')" } else: select_data = { "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)", "date": "strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)" } elif total_hours <= 2016: # 24 hours x 28 days x 3 = about 3 months td = timedelta(hours=1) sec_divisor = 3600 stf = {"date": ('%Y,%m,%d,%H'), "d": 'new Date(%Y,%m-1,%d,%H)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = { "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')", "date": "to_char(created_at, 'YYYY,MM,DD,HH24')" } else: select_data = { "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)", "date": "strftime(('%%Y,%%m,%%d,%%H') , created_at)" } else: td = timedelta(days=1) sec_divisor = 86400 stf = {"date": ('%Y,%m,%d'), "d": 'new Date(%Y,%m-1,%d)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = { "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')", "date": "to_char(created_at, 'YYYY,MM,DD')" } else: select_data = { "d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)", "date": "strftime(('%%Y,%%m,%%d') , created_at)" } tweets = tweets.filter(created_at__gte=sdate).filter( created_at__lte=edate).extra(select=select_data).values( 'd', 'date').annotate(num_tweets=Count('tweet')).order_by('date') tweets = list(tweets) i = 1 detector = MeanOutliers.factory() list_peaks = [] # loop through the tweets and detect a peak based on mean deviation function provided. save the start date # and the date of the peak in a dictionary. save each peak in list_peaks. while i < len(tweets): tweets[0]['title'] = 'null' tweets[0]['data'] = 'null' # sd_p=tweets[i-1]['date'].split(',') # sd_p=map(int , sd_p) # sdt_p=datetime(*sd_p) sdt_p = convert_date(tweets[i - 1]['date']) sd_n = tweets[i]['date'].split(',') sd_n = map(int, sd_n) sdt_n = datetime(*sd_n) delta_d = (sdt_n - sdt_p) delta_d = (delta_d.seconds + delta_d.days * 24 * 3600) / sec_divisor count = 0 if delta_d != 1: j = 0 while (j < delta_d - 1): insert_tweet = { 'title': 'null', 'num_tweets': 0, 'data': 'null', 'children': 'null' } sdt_p = sdt_p + td insert_tweet['date'] = sdt_p.strftime(stf['date']) insert_tweet['d'] = sdt_p.strftime(stf['d']) tweets.insert(i + j, insert_tweet) j += 1 current_val = tweets[i]['num_tweets'] previous_val = tweets[i - 1]['num_tweets'] mdiv = detector(None, tweets[i]['num_tweets'], 1) if mdiv > 2.0 and current_val > previous_val and current_val > 10: start_freq = previous_val start_date = tweets[i - 1]['date'] # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep # running the mdiv function on each value because it is requires previous values to calculate the mean. while (current_val > previous_val): tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' if i + 1 < len(tweets): i += 1 mdiv = detector(None, tweets[i]['num_tweets'], 1) current_val = tweets[i]['num_tweets'] previous_val = tweets[i - 1]['num_tweets'] peak_date = tweets[i - 1]['date'] else: peak_date = tweets[i]['date'] i += 1 break d = { "start_date": start_date, "start_freq": start_freq, "peak_date": peak_date } list_peaks.append(d) else: tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' i += 1 keywords = Keyword.objects.filter(event__id=event_id) words = [kw.key_word for kw in keywords] peaks = find_end_dates(tweets, list_peaks) tweets = annotate_peaks(peaks, tweets, event_id, words) try: children = e.children.order_by('start_date') tweets = peak_child_detection(children, list_peaks, tweets) except: tweets = tweets t = loader.get_template('twitinfo/create_graph.html') resp_string = t.render(Context({'tweets': tweets})) return resp_string