Example #1
0
def short_utilization(user_id, app_type_id, ymd):
    '''
    Usage pattern of applicance in Previous week by calculating hour wise and Inserting into BigTable.
    Result : Data Type of numpy array values either 'Integer' or 'nan'.
    Return:
       BigTable insertion response of Usage pattern of applicance numpy array.
    '''
    import msgpack
    import settings
    reload(settings)
    import sys
    import os
    import numpy as np
    import datetime
    import time
    import calendar
    from datetime import timedelta, datetime, date
    from BigTable import insertCell
    from BigTable import readCell
    day = 86400
    week = day * 7

    ymd = int(ymd)

    h_tag = np.zeros(24)  # 24-hour
    aDay = np.zeros(24)  # 24-hour
    tuples_list = ()
    user_count = []
    hours = []
    hour = 3600
    table_name = settings.tablename
    today_ymd = ymd - (day * 0)
    lastweek_ymd = today_ymd - (week * 1)
    delta = today_ymd - lastweek_ymd
    delta_days = delta / day
    for i in range(1, delta_days + 1):  # generate h_tag
        idate = lastweek_ymd + (day * i)
        yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate))
        table_id = '{}{}'.format(table_name, yyyymm)
        cal_OneTag = readCell(table_id, user_id, app_type_id, idate, 'OneTag',
                              'Otag')
        if (cal_OneTag != None):
            one_tag = msgpack.unpackb(cal_OneTag)
            start_times = zip(*one_tag)[0]
            tuples_list += (start_times)
    for index, start_time in enumerate(tuples_list):
        st_time_jst = int(start_time) + (9 * hour)
        st_time_hour = int("{:%H}".format(
            datetime.utcfromtimestamp(st_time_jst)))
        hours.append(st_time_hour)

    nphours = np.array(hours)
    for h in nphours:
        aDay[h] = 1

    h_tag = h_tag + aDay
    today_ymd = ymd - (day * 0)
    lastweek_ymd = today_ymd - (week * 1)
    delta = today_ymd - lastweek_ymd
    delta_days = delta / day
    for i in range(1, delta_days + 1):
        idate = lastweek_ymd + (day * i)
        yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate))
        table_id = '{}{}'.format(table_name, yyyymm)
        cal_count = readCell(table_id, user_id, app_type_id, idate, 'Daily',
                             'cnt')
        if (cal_count != None and cal_count > 0):  # calculate num_on_days
            user_count.append(cal_count)
    temp_arr = np.array(user_count)
    num_on_days = len(temp_arr)
    ut_pct = ''
    st_return = ''
    ut_pct = h_tag / num_on_days  #num_on_days > 0 then Integer or else 'nan'
    yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(ymd))
    table_id = '{}{}'.format(table_name, yyyymm)
    idate = ymd
    ymd = "{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd))
    st_return = "{} [".format(ymd)
    for i in range(24):
        if (i < 10):
            column = "s0{}".format(i)
            count = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                               column, str(ut_pct[i]))
            st_return = st_return + str(count) + ","
        else:
            column = "s{}".format(i)
            count = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                               column, str(ut_pct[i]))
            st_return = st_return + str(count) + ","

    st_return = st_return[:-1]
    st_return = st_return + "]"

    return (st_return)
def Weekly(user_id, app_type_id, ymd):
    '''
    Calculating    
          wcnt,acnt,wdur,adur,wow_cnt,wow_dur,ma4_count,ma4_duration,unusual_h_cnt,unusual_l_cnt,unusual_h_dur,unusual_l_dur
    Return:
          BigTable Insertion responces of 
     wcnt,acnt,wdur,adur,wow_cnt,wow_dur,ma4_count,ma4_duration,unusual_h_cnt,unusual_l_cnt,unusual_h_dur,unusual_l_dur
    '''

    import settings
    reload(settings)
    import os, sys
    import numpy as np
    import time
    from datetime import timedelta, datetime, date
    from BigTable import insertCell
    from BigTable import readCell
    import calendar
    day = 86400
    week = day * 7

    ymd = int(ymd)
    user_record = []
    cal_count = 0
    cal_duration = 0
    table_name = settings.tablename
    today_ymd = ymd - (day * 0)
    lastweek_ymd = today_ymd - (week * 1)
    delta = today_ymd - lastweek_ymd
    delta_days = delta / day
    for i in range(1, delta_days + 1):
        idate = lastweek_ymd + (day * i)
        yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate))
        table_id = '{}{}'.format(table_name, yyyymm)
        cal_count = readCell(table_id, user_id, app_type_id, idate, 'Daily',
                             'cnt')
        cal_duration = readCell(table_id, user_id, app_type_id, idate, 'Daily',
                                'dur')
        if (cal_count != None and cal_duration != None):
            user_record.append((cal_count, cal_duration))

    week_days = np.array(user_record).astype(np.int)
    bt_wcnt = bt_acnt = bt_wdur = bt_adur = 0

    if (len(week_days) != 0):
        counts = week_days[:, 0]
        durations = week_days[:, 1]
        w_count = counts.sum(0)
        num_days = len([i for i in counts if i > 0])
        ave_count = 0 if w_count == 0 else w_count / num_days
        w_duration = durations.sum()
        ave_duration = 0 if w_duration == 0 else (w_duration / num_days)
        idate = ymd
        yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate))
        table_id = '{}{}'.format(table_name, yyyymm)
        bt_wcnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                             'wCnt', w_count)
        bt_acnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                             'aCnt', ave_count)
        bt_wdur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                             'wDur', w_duration)
        bt_adur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                             'aDur', ave_duration)

    if (
            bt_wcnt != 0 and bt_acnt != 0 and bt_wdur != 0 and bt_adur != 0
    ):  #BigTable Unsuccesful Insertion i.e if at least anyone is failed to insert into BigTable.
        user_record = []
        cal_count = 0
        cal_duration = 0
        today_ymd = ymd - (day * 0)
        lastweek_ymd = today_ymd - (week * 4)
        for i in range(1, 5):
            idate = lastweek_ymd + (week * i)
            yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate))
            table_id = '{}{}'.format(table_name, yyyymm)
            cal_count = readCell(table_id, user_id, app_type_id, idate,
                                 'Weekly', 'wCnt')
            cal_duration = readCell(table_id, user_id, app_type_id, idate,
                                    'Weekly', 'wDur')
            if (cal_count != None and cal_duration != None):
                user_record.append((cal_count, cal_duration))

        weeks_data = np.array(user_record).astype(np.int)
        wow_cnt = 0
        wow_dur = 0
        ma4_count = 0
        ma4_duration = 0
        if len(weeks_data) == 0:
            wow_cnt = int(0)
            wow_dur = int(0)
        else:
            wow_cnt = w_count - int(
                weeks_data[-1, 0])  # Current week minus previous week
            wow_dur = w_duration - int(
                weeks_data[-1, 1])  # Current week minus previous week
        if len(weeks_data) == settings.window_size:
            weeks_count = weeks_data[:, 0]
            weeks_duration = weeks_data[:, 1]
            ma4_count = float(np.ma.average(weeks_count))
            ma4_duration = float(np.ma.average(weeks_duration))
        else:
            ma4_count = float(0)
            ma4_duration = float(0)

        unusual_h_cnt = False
        unusual_l_cnt = False
        unusual_h_dur = False
        unusual_l_dur = False
        count_buffer = 1.0
        duration_buffer = 210.0  # 30min per day * 7 days = 210min
        if (ma4_count != 0):  # Unusual count?
            large_top_count = (ma4_count +
                               count_buffer) * settings.param_th_top
            small_bottom_count = (ma4_count -
                                  count_buffer) * settings.param_th_bottom

            if (w_count > large_top_count):
                unusual_h_cnt = True
            elif w_count < small_bottom_count:
                unusual_l_cnt = True

            large_top_duration = (
                ma4_duration +
                duration_buffer) * settings.param_th_top  # Unusual duration?
            small_bottom_duration = (
                ma4_duration - duration_buffer) * settings.param_th_bottom
            if (w_duration > large_top_duration):
                unusual_h_dur = True
            elif w_duration < small_bottom_duration:
                unusual_l_dur = True
        yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(ymd))
        idate = ymd
        table_id = '{}{}'.format(table_name, yyyymm)
        wow_cnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                             'wowCnt', wow_cnt)
        wow_dur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                             'wowDur', wow_dur)
        ma4_count = insertCell(table_id, user_id, app_type_id, idate, 'Weekly',
                               'ma4Cnt', ma4_count)
        ma4_duration = insertCell(table_id, user_id, app_type_id, idate,
                                  'Weekly', 'ma4Dur', ma4_duration)
        unusual_h_cnt = insertCell(table_id, user_id, app_type_id, idate,
                                   'Weekly', 'uHcnt', unusual_h_cnt)
        unusual_l_cnt = insertCell(table_id, user_id, app_type_id, idate,
                                   'Weekly', 'uLcnt', unusual_l_cnt)
        unusual_h_dur = insertCell(table_id, user_id, app_type_id, idate,
                                   'Weekly', 'uHdur', unusual_h_dur)
        unusual_l_dur = insertCell(table_id, user_id, app_type_id, idate,
                                   'Weekly', 'uLdur', unusual_l_dur)
    else:
        """ Not Eligible For BigTableInsertion """
        bt_wcnt = bt_acnt = bt_wdur = bt_adur = wow_cnt = wow_dur = ma4_count = ma4_duration = unusual_h_cnt = unusual_l_cnt = unusual_h_dur = unusual_l_dur = -1
    ymd = "{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd))
    return (ymd, bt_wcnt, bt_acnt, bt_wdur, bt_adur, wow_cnt, wow_dur,
            ma4_count, ma4_duration, unusual_h_cnt, unusual_l_cnt,
            unusual_h_dur, unusual_l_dur)
def on_time(user_id,app_type_id,ts_power,ymd):
    '''
    Calculating count,duration,one-tag,unusual things, interval days.
    Return:
       BigTable Insertion responces of count,duration,one-tag,unusual things, interval days.
    '''
    import msgpack  
    import settings
    reload(settings)
    import sys
    import os
    import numpy as np
    import datetime
    import time
    import calendar
    from datetime import timedelta,datetime,date
    from BigTable import insertCell
    from BigTable import readCell
    threshold_minpower = settings.thresholds[str(app_type_id)][0]
    threshold_duration = settings.thresholds[str(app_type_id)][1]
    threshold_interval = settings.thresholds[str(app_type_id)][2]
    timestamp_list=[]
    day = 86400    
    week = day*7
    active_count = []
    ymd = int(ymd)
    count = 0
    duration = 0
    unusual_h_cnt = False
    unusual_l_cnt = False       
    unusual_h_dur = False
    unusual_l_dur = False 
    interval_days = -1
    one_tag = 0

    if len(ts_power) == 0: 
        count = 0
        duration = 0
        unusual_h_cnt = False
        unusual_l_cnt = False       
        unusual_h_dur = False
        unusual_l_dur = False 
        interval_days = -1
	one_tag = 0
    else:
         
        for i in range(len(ts_power)):			# Filtering eligible powers.  
            power = ts_power[i][1]
            if power > threshold_minpower:
                timestamp = int(ts_power[i][0])
                timestamp_list.append(timestamp)
        ts = np.array(timestamp_list)
        if ts.size == 0:                                                      
            count = 0
            duration = 0
            unusual_h_cnt = False
            unusual_l_cnt = False       
    	    unusual_h_dur = False
    	    unusual_l_dur = False 
    	    interval_days = -1
	    one_tag = 0

        else:			#Eligible Powers
            ts.sort()
            one_tag = []
            start = int(ts[0])  
            for i in range(len(ts)):
                if i == (len(ts)) - 1:           		# no activity on ontime
                    end = int(ts[i])
                    a_round = [start, end]  
                    one_tag.append(a_round)
                else:
                    if (int(ts[i+1]) - int(ts[i])) > threshold_interval:                
                        end = int(ts[i])
                        a_round = [start, end]			# start and end of array
                        one_tag.append(a_round)
                        start = (ts[i+1])

            one_tag = [u for u in one_tag if u[1] - u[0] > threshold_duration]  
            count = int(len(one_tag))
            duration = int(np.diff(one_tag).sum())     
            user_record=[]
    	    cal_count = 0
    	    cal_duration = 0
    	    table_name = settings.tablename
            pastday_ymd = ymd - (day*1)            
            past_8weeks_ymd = pastday_ymd - (week*8)
            delta = pastday_ymd - past_8weeks_ymd
     	    delta_days = delta/day
    	    for i in range(1,delta_days+1):
                idate = past_8weeks_ymd + (day*i)
	        yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate))
                table_id = '{}{}'.format(table_name,yyyymm)
                cal_count = readCell(table_id,user_id, app_type_id, idate,'Daily','cnt')
                cal_duration = readCell(table_id,user_id, app_type_id, idate,'Daily','dur')
                if(cal_count != None and cal_duration !=None):
                    user_record.append((cal_count,cal_duration))
	        if(cal_count != None and cal_count > 0): 	#ActiveCount for Intervaldays calculations
                    active_count.append(idate)

            past_days_active= np.array(active_count)

            past_days = np.array(user_record).astype(np.int)
	
            if len(past_days)!=0:				#filtering new_user or not active since past 8weeks users

                ave_count = np.mean(map(lambda x: x[0], 
                                    filter(lambda x: x[0] != 0, past_days)))	# Filter out zero usages and take average/std of the rest
                ave_duration = np.mean(map(lambda x: x[1], 
                                       filter(lambda x: x[0] != 0, past_days)))
            	std_count = np.std(map(lambda x: x[0], 
                                   filter(lambda x: x[0] != 0, past_days)))
            	std_duration = np.std(map(lambda x: x[1], 
                                      filter(lambda x: x[0] != 0, past_days)))
            	STD_MULTIPLE = settings.STD_MULTIPLE		
            	count_th = STD_MULTIPLE * std_count		# Define thresholds for outlier (i.e. 3x standard deviation)
            	duration_th = STD_MULTIPLE * std_duration
            	if count != 0:					#current count is active ?
                    large_top_count = ave_count + count_th	# Unusual count?
                    small_bottom_count = ave_count - count_th

                    if count > large_top_count:
                        unusual_h_cnt = True
                    elif count < small_bottom_count:
                    	unusual_l_cnt = True      
  
                    large_top_duration = ave_duration + duration_th	# Unusual count?
                    small_bottom_duration = ave_duration - duration_th

                    if duration > large_top_duration:
                        unusual_h_dur = True
                    elif duration < small_bottom_duration:
                          unusual_l_dur = True                            
                          
		else:						# current count is not active 
            	    unusual_h_cnt = False
            	    unusual_l_cnt = False       
            	    unusual_h_dur = False
           	    unusual_l_dur = False  
	
                                     
	    if count != 0:
            	if(len(past_days_active)==0):		#filtering new_user or not active since past 8weeks users
                    interval_days = -1 
            	else:
		
                    interval_days = int((ymd-past_days_active[-1])/day) 
 		    if(interval_days > 28):		#assigining -1 if interval_days result is more than 28days.
			interval_days = -1
			
	    else:
		interval_days = -1           

    if(count != 0 and duration !=0 and one_tag !=0):
	'''Eligible For BigTable Insertion'''
        yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(ymd))
   	idate = ymd
        table_id = '{}{}'.format(table_name,yyyymm)
        serialized_Otag = msgpack.packb(one_tag)
        one_tag=insertCell(table_id,user_id, app_type_id, idate, 'OneTag', 'Otag', serialized_Otag)
        count=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'cnt', count)
        duration=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'dur', duration)
        unusual_h_cnt=insertCell(table_id,user_id, app_type_id,idate, 'Daily', 'uHcnt', unusual_h_cnt)
        unusual_l_cnt=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'uLcnt', unusual_l_cnt)
        unusual_h_dur=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'uHdur', unusual_h_dur)
        unusual_l_dur=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'uLdur', unusual_l_dur)
        interval_days=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'iDay', interval_days)
    else:
	'''Not Eligible For BigTable Insertion '''
        one_tag=count=duration=unusual_h_cnt=unusual_l_cnt=unusual_h_dur=unusual_l_dur=interval_days = -1
    ymd = "{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd))
    return(ymd,str(one_tag),count,duration,unusual_h_cnt,unusual_l_cnt,unusual_h_dur,unusual_l_dur,interval_days)
def on_time(st, xxx, xxx, ts_xxx):
    '''
    Calculating last activity
    Return:
       BigTable Insertion responces of last activity
    '''
    import settings
    reload(settings)
    import sys
    import os
    import numpy as np
    import datetime
    import time
    import calendar
    from datetime import timedelta, datetime, date
    from BigTable import insertCell
    from BigTable import readCell
    threshold_minxxx = (settings.thresholds[str(xxx)][0])
    xxx_list = []
    day = 86400
    counter = None
    one_tag_et = None
    ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(st))
    dt_ts = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
    jst_0 = dt_ts.replace(minute=0, hour=0, second=0, microsecond=0)
    jst_0_utc = jst_0 + timedelta(hours=-9)
    ymd = calendar.timegm(jst_0_utc.timetuple())

    if len(ts_xxx) == 0:
        counter = None
        one_tag_et = None
    else:
        for i in range(len(ts_xxx)):
            xxx = ts_xxx[i][1]
            if xxx > threshold_minxxx:  # Filtering eligible xxxs.
                xxx = int(ts_xxx[i][0])
                xxx_list.append(xxx)
        ts = np.array(xxx_list)
        if ts.size == 0:
            counter = None
            one_tag_et = None
        else:
            ts.sort()

    table_name = settings.tablename
    yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(ymd))
    table_id = '{}{}'.format(table_name, yyyymm)

    if (len(ts) > 0):
        one_tag_et = insertCell(
            table_id, xxx, xxx, ymd, 'Daily', 'la',
            ts[-1])  #adding last endtime as latest activity
    else:
        yesterday_ymd = ymd - day
        yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(yesterday_ymd))
        table_id = '{}{}'.format(table_name, yyyymm)
        ystrday_et = readCell(table_id, xxx, xxx, yesterday_ymd, 'Daily', 'la')
        if (ystrday_et != None):  #yesterday's last activity
            yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(ymd))
            table_id = '{}{}'.format(table_name, yyyymm)
            one_tag_et = insertCell(table_id, xxx, xxx, ymd, 'Daily', 'la',
                                    ystrday_et)
        else:
            one_tag_et = 0  #default one-tag is zero
            yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(ymd))
            table_id = '{}{}'.format(table_name, yyyymm)
            one_tag_et = insertCell(table_id, xxx, xxx, ymd, 'Daily', 'la',
                                    one_tag_et)

    return (ymd, one_tag_et)
def wd_utilization(user_id,app_type_id, ymd):
    '''
    ---weekday [wDay] over last 8 weeks calculating hour wise and Inserting into BigTable of numpy array.
    ---Day of the week as an integer, where Monday is 0 and Sunday is 6. 
    Result : Data Type of numpy array values either 'Integer' or 'nan'.
    Return:
       BigTable insertion responses of weekday and  numpy array.
    '''
    import msgpack  
    import settings
    reload(settings)
    import sys
    import os
    import numpy as np
    import datetime
    import time
    import calendar
    from datetime import timedelta,datetime,date
    from BigTable import insertCell
    from BigTable import readCell
    day = 86400    
    week = day*7
    
    ymd = int(ymd)

    h_tag = np.zeros(24) 			#24hour
    aDay = np.zeros(24)				#24hour
    tuples_list = ()
    user_count =[]
    hours = []
    hour = 3600
    table_name = settings.tablename
    n_weeks = settings.n_weeks
    for i in range(n_weeks):					#generate h_tag
        idate= ymd - (week * i)
	yyyymm= "{:%Y%m}".format(datetime.utcfromtimestamp(idate))
        table_id = '{}{}'.format(table_name,yyyymm)
        cal_OneTag= readCell(table_id,user_id, app_type_id, idate,'OneTag','Otag')
 	if(cal_OneTag != None):
            one_tag = msgpack.unpackb(cal_OneTag)
            start_times = zip(*one_tag)[0]
 	    tuples_list +=(start_times)
    for index,start_time in enumerate(tuples_list):
        st_time_jst = int(start_time) + (9 * hour)
        st_time_hour = int("{:%H}".format(datetime.utcfromtimestamp(st_time_jst)))
        hours.append(st_time_hour)
    
    nphours = np.array(hours)  
    for h in nphours:
        aDay[h] = 1 
    
    h_tag = h_tag + aDay
    
    ut_pct = ''
    st_return =''
    ut_pct = h_tag / n_weeks
    yyyymm= "{:%Y%m}".format(datetime.utcfromtimestamp(ymd)) 
    table_id = '{}{}'.format(table_name,yyyymm)
    idate = ymd
    date= "{:%Y-%m-%d}".format(datetime.utcfromtimestamp(ymd))
    weekday_res = datetime.strptime(date,"%Y-%m-%d").weekday()
    weekday_bt =insertCell(table_id,user_id, app_type_id,idate, 'Daily', 'wDay',weekday_res)
    ymd ="{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd))
    st_return = "{} {} [".format(ymd,weekday_bt)
    for i in range(24):
        if(i <10):
            column = "0{}".format(i)
            count=insertCell(table_id,user_id, app_type_id,idate, 'Daily', column, str(ut_pct[i]))
            st_return = st_return + str(count) + ","
        else:
	    column = "{}".format(i)
            count=insertCell(table_id,user_id, app_type_id, idate, 'Daily', column, str(ut_pct[i]))
   	    st_return = st_return+str(count) +","
    
    st_return = st_return[:-1]
    st_return = st_return + "]"
    return(st_return)