Beispiel #1
0
def create_graph_impl(request, event_id):
    e = Event.objects.get(id=event_id)
    sdate = e.start_date
    edate = e.end_date
    tweets = Tweet.objects.filter(keyword__event = event_id)
    
    if sdate == None:
        sdate=tweets.order_by('created_at')[0].created_at
    if edate == None:
        edate=tweets.order_by('-created_at')[0].created_at
        
    tdelta=(edate-sdate)
    total_sec=tdelta.seconds + tdelta.days * 24 *3600
    total_min=total_sec / 60.0
    total_hours=total_min / 60.0
    
    if total_min <= 1440:
        td=timedelta(minutes=1)
        sec_divisor = 60
        stf = {"date": ('%Y,%m,%d,%H,%M'),"d": 'new Date(%Y,%m-1,%d,%H,%M)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24,MI')"}
        else:
            select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)"}
      
    elif total_hours <= 2016: # 24 hours x 28 days x 3 = about 3 months
        td=timedelta(hours=1)
        sec_divisor = 3600
        stf = {"date": ('%Y,%m,%d,%H'),"d": 'new Date(%Y,%m-1,%d,%H)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24')"}
        else:
            select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H') , created_at)"}
    else:
        td=timedelta(days=1)
        sec_divisor = 86400
        stf = {"date": ('%Y,%m,%d'),"d": 'new Date(%Y,%m-1,%d)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')" , "date":"to_char(created_at, 'YYYY,MM,DD')"}
        else:
            select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)" , "date":"strftime(('%%Y,%%m,%%d') , created_at)"}

    
    tweets = tweets.filter(created_at__gte = sdate).filter(created_at__lte = edate).extra(select = select_data).values('d','date').annotate(num_tweets = Count('tweet')).order_by('date')
    tweets=list(tweets)
    
    i = 1
    detector = MeanOutliers.factory()
    list_peaks = []
    # loop through the tweets and detect a peak based on mean deviation function provided. save the start date
    # and the date of the peak in a dictionary.  save each peak in list_peaks.
    while i < len(tweets):
        tweets[0]['title'] = 'null'
        tweets[0]['data'] = 'null'
        # sd_p=tweets[i-1]['date'].split(',')
        # sd_p=map(int , sd_p)
        # sdt_p=datetime(*sd_p)
        sdt_p=convert_date(tweets[i-1]['date'])
        sd_n=tweets[i]['date'].split(',')
        sd_n=map(int , sd_n)
        sdt_n=datetime(*sd_n)
        delta_d=(sdt_n-sdt_p)
        delta_d = (delta_d.seconds + delta_d.days * 24 *3600)/sec_divisor  
    
        count=0
        if delta_d != 1:
            j=0
            while(j<delta_d-1):
                insert_tweet={'title':'null','num_tweets':0,'data':'null','children':'null'}
                sdt_p = sdt_p+td    
                insert_tweet['date']=sdt_p.strftime(stf['date'])
                insert_tweet['d']=sdt_p.strftime(stf['d'])
                tweets.insert(i+j,insert_tweet)
                j+=1
       
        current_val = tweets[i]['num_tweets']
        previous_val = tweets[i-1]['num_tweets']
        mdiv = detector(None,tweets[i]['num_tweets'], 1)
        if mdiv > 2.0 and current_val > previous_val and current_val > 10:
            start_freq = previous_val 
            start_date = tweets[i-1]['date']
            # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep
            # running the mdiv function on each value because it is requires previous values to calculate the mean.
            while(current_val > previous_val):
                  tweets[i]['title'] = 'null'
                  tweets[i]['data'] = 'null'
                  if i+1<len(tweets):
                      i+=1
                      mdiv = detector(None,tweets[i]['num_tweets'], 1)
                      current_val = tweets[i]['num_tweets']
                      previous_val = tweets[i-1]['num_tweets'] 
                      peak_date = tweets[i-1]['date']  
                  else:
                      peak_date = tweets[i]['date']
                      i+=1
                      break
            d = {"start_date":start_date,"start_freq":start_freq ,"peak_date":peak_date}
            list_peaks.append(d)
        else:
            tweets[i]['title'] = 'null'
            tweets[i]['data'] = 'null'
            i+=1

    keywords = Keyword.objects.filter(event__id = event_id)
    words = [kw.key_word for kw in keywords]
    peaks = find_end_dates(tweets,list_peaks)
    
    tweets = annotate_peaks(peaks,tweets,event_id,words)
    try:
        children = e.children.order_by('start_date')
        tweets = peak_child_detection(children,list_peaks,tweets)
    except:
        tweets = tweets
    t = loader.get_template('twitinfo/create_graph.html')
    resp_string = t.render(Context({ 'tweets': tweets }))
    return resp_string
Beispiel #2
0
def create_graph_impl(request, event_id):
    e = Event.objects.get(id=event_id)
    sdate = e.start_date
    edate = e.end_date
    tweets = Tweet.objects.filter(keyword__event=event_id)

    if sdate == None:
        sdate = tweets.order_by('created_at')[0].created_at
    if edate == None:
        edate = tweets.order_by('-created_at')[0].created_at

    tdelta = (edate - sdate)
    total_sec = tdelta.seconds + tdelta.days * 24 * 3600
    total_min = total_sec / 60.0
    total_hours = total_min / 60.0

    if total_min <= 1440:
        td = timedelta(minutes=1)
        sec_divisor = 60
        stf = {"date": ('%Y,%m,%d,%H,%M'), "d": 'new Date(%Y,%m-1,%d,%H,%M)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {
                "d":
                "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')",
                "date": "to_char(created_at, 'YYYY,MM,DD,HH24,MI')"
            }
        else:
            select_data = {
                "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)",
                "date": "strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)"
            }

    elif total_hours <= 2016:  # 24 hours x 28 days x 3 = about 3 months
        td = timedelta(hours=1)
        sec_divisor = 3600
        stf = {"date": ('%Y,%m,%d,%H'), "d": 'new Date(%Y,%m-1,%d,%H)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {
                "d":
                "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')",
                "date": "to_char(created_at, 'YYYY,MM,DD,HH24')"
            }
        else:
            select_data = {
                "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)",
                "date": "strftime(('%%Y,%%m,%%d,%%H') , created_at)"
            }
    else:
        td = timedelta(days=1)
        sec_divisor = 86400
        stf = {"date": ('%Y,%m,%d'), "d": 'new Date(%Y,%m-1,%d)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {
                "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')",
                "date": "to_char(created_at, 'YYYY,MM,DD')"
            }
        else:
            select_data = {
                "d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)",
                "date": "strftime(('%%Y,%%m,%%d') , created_at)"
            }

    tweets = tweets.filter(created_at__gte=sdate).filter(
        created_at__lte=edate).extra(select=select_data).values(
            'd', 'date').annotate(num_tweets=Count('tweet')).order_by('date')
    tweets = list(tweets)

    i = 1
    detector = MeanOutliers.factory()
    list_peaks = []
    # loop through the tweets and detect a peak based on mean deviation function provided. save the start date
    # and the date of the peak in a dictionary.  save each peak in list_peaks.
    while i < len(tweets):
        tweets[0]['title'] = 'null'
        tweets[0]['data'] = 'null'
        # sd_p=tweets[i-1]['date'].split(',')
        # sd_p=map(int , sd_p)
        # sdt_p=datetime(*sd_p)
        sdt_p = convert_date(tweets[i - 1]['date'])
        sd_n = tweets[i]['date'].split(',')
        sd_n = map(int, sd_n)
        sdt_n = datetime(*sd_n)
        delta_d = (sdt_n - sdt_p)
        delta_d = (delta_d.seconds + delta_d.days * 24 * 3600) / sec_divisor

        count = 0
        if delta_d != 1:
            j = 0
            while (j < delta_d - 1):
                insert_tweet = {
                    'title': 'null',
                    'num_tweets': 0,
                    'data': 'null',
                    'children': 'null'
                }
                sdt_p = sdt_p + td
                insert_tweet['date'] = sdt_p.strftime(stf['date'])
                insert_tweet['d'] = sdt_p.strftime(stf['d'])
                tweets.insert(i + j, insert_tweet)
                j += 1

        current_val = tweets[i]['num_tweets']
        previous_val = tweets[i - 1]['num_tweets']
        mdiv = detector(None, tweets[i]['num_tweets'], 1)
        if mdiv > 2.0 and current_val > previous_val and current_val > 10:
            start_freq = previous_val
            start_date = tweets[i - 1]['date']
            # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep
            # running the mdiv function on each value because it is requires previous values to calculate the mean.
            while (current_val > previous_val):
                tweets[i]['title'] = 'null'
                tweets[i]['data'] = 'null'
                if i + 1 < len(tweets):
                    i += 1
                    mdiv = detector(None, tweets[i]['num_tweets'], 1)
                    current_val = tweets[i]['num_tweets']
                    previous_val = tweets[i - 1]['num_tweets']
                    peak_date = tweets[i - 1]['date']
                else:
                    peak_date = tweets[i]['date']
                    i += 1
                    break
            d = {
                "start_date": start_date,
                "start_freq": start_freq,
                "peak_date": peak_date
            }
            list_peaks.append(d)
        else:
            tweets[i]['title'] = 'null'
            tweets[i]['data'] = 'null'
            i += 1

    keywords = Keyword.objects.filter(event__id=event_id)
    words = [kw.key_word for kw in keywords]
    peaks = find_end_dates(tweets, list_peaks)

    tweets = annotate_peaks(peaks, tweets, event_id, words)
    try:
        children = e.children.order_by('start_date')
        tweets = peak_child_detection(children, list_peaks, tweets)
    except:
        tweets = tweets
    t = loader.get_template('twitinfo/create_graph.html')
    resp_string = t.render(Context({'tweets': tweets}))
    return resp_string