def short_utilization(user_id, app_type_id, ymd): ''' Usage pattern of applicance in Previous week by calculating hour wise and Inserting into BigTable. Result : Data Type of numpy array values either 'Integer' or 'nan'. Return: BigTable insertion response of Usage pattern of applicance numpy array. ''' import msgpack import settings reload(settings) import sys import os import numpy as np import datetime import time import calendar from datetime import timedelta, datetime, date from BigTable import insertCell from BigTable import readCell day = 86400 week = day * 7 ymd = int(ymd) h_tag = np.zeros(24) # 24-hour aDay = np.zeros(24) # 24-hour tuples_list = () user_count = [] hours = [] hour = 3600 table_name = settings.tablename today_ymd = ymd - (day * 0) lastweek_ymd = today_ymd - (week * 1) delta = today_ymd - lastweek_ymd delta_days = delta / day for i in range(1, delta_days + 1): # generate h_tag idate = lastweek_ymd + (day * i) yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate)) table_id = '{}{}'.format(table_name, yyyymm) cal_OneTag = readCell(table_id, user_id, app_type_id, idate, 'OneTag', 'Otag') if (cal_OneTag != None): one_tag = msgpack.unpackb(cal_OneTag) start_times = zip(*one_tag)[0] tuples_list += (start_times) for index, start_time in enumerate(tuples_list): st_time_jst = int(start_time) + (9 * hour) st_time_hour = int("{:%H}".format( datetime.utcfromtimestamp(st_time_jst))) hours.append(st_time_hour) nphours = np.array(hours) for h in nphours: aDay[h] = 1 h_tag = h_tag + aDay today_ymd = ymd - (day * 0) lastweek_ymd = today_ymd - (week * 1) delta = today_ymd - lastweek_ymd delta_days = delta / day for i in range(1, delta_days + 1): idate = lastweek_ymd + (day * i) yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate)) table_id = '{}{}'.format(table_name, yyyymm) cal_count = readCell(table_id, user_id, app_type_id, idate, 'Daily', 'cnt') if (cal_count != None and cal_count > 0): # calculate num_on_days user_count.append(cal_count) temp_arr = np.array(user_count) num_on_days = len(temp_arr) ut_pct = '' st_return = '' ut_pct = h_tag / num_on_days #num_on_days > 0 then Integer or else 'nan' yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(ymd)) table_id = '{}{}'.format(table_name, yyyymm) idate = ymd ymd = "{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd)) st_return = "{} [".format(ymd) for i in range(24): if (i < 10): column = "s0{}".format(i) count = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', column, str(ut_pct[i])) st_return = st_return + str(count) + "," else: column = "s{}".format(i) count = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', column, str(ut_pct[i])) st_return = st_return + str(count) + "," st_return = st_return[:-1] st_return = st_return + "]" return (st_return)
def Weekly(user_id, app_type_id, ymd): ''' Calculating wcnt,acnt,wdur,adur,wow_cnt,wow_dur,ma4_count,ma4_duration,unusual_h_cnt,unusual_l_cnt,unusual_h_dur,unusual_l_dur Return: BigTable Insertion responces of wcnt,acnt,wdur,adur,wow_cnt,wow_dur,ma4_count,ma4_duration,unusual_h_cnt,unusual_l_cnt,unusual_h_dur,unusual_l_dur ''' import settings reload(settings) import os, sys import numpy as np import time from datetime import timedelta, datetime, date from BigTable import insertCell from BigTable import readCell import calendar day = 86400 week = day * 7 ymd = int(ymd) user_record = [] cal_count = 0 cal_duration = 0 table_name = settings.tablename today_ymd = ymd - (day * 0) lastweek_ymd = today_ymd - (week * 1) delta = today_ymd - lastweek_ymd delta_days = delta / day for i in range(1, delta_days + 1): idate = lastweek_ymd + (day * i) yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate)) table_id = '{}{}'.format(table_name, yyyymm) cal_count = readCell(table_id, user_id, app_type_id, idate, 'Daily', 'cnt') cal_duration = readCell(table_id, user_id, app_type_id, idate, 'Daily', 'dur') if (cal_count != None and cal_duration != None): user_record.append((cal_count, cal_duration)) week_days = np.array(user_record).astype(np.int) bt_wcnt = bt_acnt = bt_wdur = bt_adur = 0 if (len(week_days) != 0): counts = week_days[:, 0] durations = week_days[:, 1] w_count = counts.sum(0) num_days = len([i for i in counts if i > 0]) ave_count = 0 if w_count == 0 else w_count / num_days w_duration = durations.sum() ave_duration = 0 if w_duration == 0 else (w_duration / num_days) idate = ymd yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate)) table_id = '{}{}'.format(table_name, yyyymm) bt_wcnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'wCnt', w_count) bt_acnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'aCnt', ave_count) bt_wdur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'wDur', w_duration) bt_adur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'aDur', ave_duration) if ( bt_wcnt != 0 and bt_acnt != 0 and bt_wdur != 0 and bt_adur != 0 ): #BigTable Unsuccesful Insertion i.e if at least anyone is failed to insert into BigTable. user_record = [] cal_count = 0 cal_duration = 0 today_ymd = ymd - (day * 0) lastweek_ymd = today_ymd - (week * 4) for i in range(1, 5): idate = lastweek_ymd + (week * i) yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate)) table_id = '{}{}'.format(table_name, yyyymm) cal_count = readCell(table_id, user_id, app_type_id, idate, 'Weekly', 'wCnt') cal_duration = readCell(table_id, user_id, app_type_id, idate, 'Weekly', 'wDur') if (cal_count != None and cal_duration != None): user_record.append((cal_count, cal_duration)) weeks_data = np.array(user_record).astype(np.int) wow_cnt = 0 wow_dur = 0 ma4_count = 0 ma4_duration = 0 if len(weeks_data) == 0: wow_cnt = int(0) wow_dur = int(0) else: wow_cnt = w_count - int( weeks_data[-1, 0]) # Current week minus previous week wow_dur = w_duration - int( weeks_data[-1, 1]) # Current week minus previous week if len(weeks_data) == settings.window_size: weeks_count = weeks_data[:, 0] weeks_duration = weeks_data[:, 1] ma4_count = float(np.ma.average(weeks_count)) ma4_duration = float(np.ma.average(weeks_duration)) else: ma4_count = float(0) ma4_duration = float(0) unusual_h_cnt = False unusual_l_cnt = False unusual_h_dur = False unusual_l_dur = False count_buffer = 1.0 duration_buffer = 210.0 # 30min per day * 7 days = 210min if (ma4_count != 0): # Unusual count? large_top_count = (ma4_count + count_buffer) * settings.param_th_top small_bottom_count = (ma4_count - count_buffer) * settings.param_th_bottom if (w_count > large_top_count): unusual_h_cnt = True elif w_count < small_bottom_count: unusual_l_cnt = True large_top_duration = ( ma4_duration + duration_buffer) * settings.param_th_top # Unusual duration? small_bottom_duration = ( ma4_duration - duration_buffer) * settings.param_th_bottom if (w_duration > large_top_duration): unusual_h_dur = True elif w_duration < small_bottom_duration: unusual_l_dur = True yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(ymd)) idate = ymd table_id = '{}{}'.format(table_name, yyyymm) wow_cnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'wowCnt', wow_cnt) wow_dur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'wowDur', wow_dur) ma4_count = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'ma4Cnt', ma4_count) ma4_duration = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'ma4Dur', ma4_duration) unusual_h_cnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'uHcnt', unusual_h_cnt) unusual_l_cnt = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'uLcnt', unusual_l_cnt) unusual_h_dur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'uHdur', unusual_h_dur) unusual_l_dur = insertCell(table_id, user_id, app_type_id, idate, 'Weekly', 'uLdur', unusual_l_dur) else: """ Not Eligible For BigTableInsertion """ bt_wcnt = bt_acnt = bt_wdur = bt_adur = wow_cnt = wow_dur = ma4_count = ma4_duration = unusual_h_cnt = unusual_l_cnt = unusual_h_dur = unusual_l_dur = -1 ymd = "{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd)) return (ymd, bt_wcnt, bt_acnt, bt_wdur, bt_adur, wow_cnt, wow_dur, ma4_count, ma4_duration, unusual_h_cnt, unusual_l_cnt, unusual_h_dur, unusual_l_dur)
def on_time(user_id,app_type_id,ts_power,ymd): ''' Calculating count,duration,one-tag,unusual things, interval days. Return: BigTable Insertion responces of count,duration,one-tag,unusual things, interval days. ''' import msgpack import settings reload(settings) import sys import os import numpy as np import datetime import time import calendar from datetime import timedelta,datetime,date from BigTable import insertCell from BigTable import readCell threshold_minpower = settings.thresholds[str(app_type_id)][0] threshold_duration = settings.thresholds[str(app_type_id)][1] threshold_interval = settings.thresholds[str(app_type_id)][2] timestamp_list=[] day = 86400 week = day*7 active_count = [] ymd = int(ymd) count = 0 duration = 0 unusual_h_cnt = False unusual_l_cnt = False unusual_h_dur = False unusual_l_dur = False interval_days = -1 one_tag = 0 if len(ts_power) == 0: count = 0 duration = 0 unusual_h_cnt = False unusual_l_cnt = False unusual_h_dur = False unusual_l_dur = False interval_days = -1 one_tag = 0 else: for i in range(len(ts_power)): # Filtering eligible powers. power = ts_power[i][1] if power > threshold_minpower: timestamp = int(ts_power[i][0]) timestamp_list.append(timestamp) ts = np.array(timestamp_list) if ts.size == 0: count = 0 duration = 0 unusual_h_cnt = False unusual_l_cnt = False unusual_h_dur = False unusual_l_dur = False interval_days = -1 one_tag = 0 else: #Eligible Powers ts.sort() one_tag = [] start = int(ts[0]) for i in range(len(ts)): if i == (len(ts)) - 1: # no activity on ontime end = int(ts[i]) a_round = [start, end] one_tag.append(a_round) else: if (int(ts[i+1]) - int(ts[i])) > threshold_interval: end = int(ts[i]) a_round = [start, end] # start and end of array one_tag.append(a_round) start = (ts[i+1]) one_tag = [u for u in one_tag if u[1] - u[0] > threshold_duration] count = int(len(one_tag)) duration = int(np.diff(one_tag).sum()) user_record=[] cal_count = 0 cal_duration = 0 table_name = settings.tablename pastday_ymd = ymd - (day*1) past_8weeks_ymd = pastday_ymd - (week*8) delta = pastday_ymd - past_8weeks_ymd delta_days = delta/day for i in range(1,delta_days+1): idate = past_8weeks_ymd + (day*i) yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(idate)) table_id = '{}{}'.format(table_name,yyyymm) cal_count = readCell(table_id,user_id, app_type_id, idate,'Daily','cnt') cal_duration = readCell(table_id,user_id, app_type_id, idate,'Daily','dur') if(cal_count != None and cal_duration !=None): user_record.append((cal_count,cal_duration)) if(cal_count != None and cal_count > 0): #ActiveCount for Intervaldays calculations active_count.append(idate) past_days_active= np.array(active_count) past_days = np.array(user_record).astype(np.int) if len(past_days)!=0: #filtering new_user or not active since past 8weeks users ave_count = np.mean(map(lambda x: x[0], filter(lambda x: x[0] != 0, past_days))) # Filter out zero usages and take average/std of the rest ave_duration = np.mean(map(lambda x: x[1], filter(lambda x: x[0] != 0, past_days))) std_count = np.std(map(lambda x: x[0], filter(lambda x: x[0] != 0, past_days))) std_duration = np.std(map(lambda x: x[1], filter(lambda x: x[0] != 0, past_days))) STD_MULTIPLE = settings.STD_MULTIPLE count_th = STD_MULTIPLE * std_count # Define thresholds for outlier (i.e. 3x standard deviation) duration_th = STD_MULTIPLE * std_duration if count != 0: #current count is active ? large_top_count = ave_count + count_th # Unusual count? small_bottom_count = ave_count - count_th if count > large_top_count: unusual_h_cnt = True elif count < small_bottom_count: unusual_l_cnt = True large_top_duration = ave_duration + duration_th # Unusual count? small_bottom_duration = ave_duration - duration_th if duration > large_top_duration: unusual_h_dur = True elif duration < small_bottom_duration: unusual_l_dur = True else: # current count is not active unusual_h_cnt = False unusual_l_cnt = False unusual_h_dur = False unusual_l_dur = False if count != 0: if(len(past_days_active)==0): #filtering new_user or not active since past 8weeks users interval_days = -1 else: interval_days = int((ymd-past_days_active[-1])/day) if(interval_days > 28): #assigining -1 if interval_days result is more than 28days. interval_days = -1 else: interval_days = -1 if(count != 0 and duration !=0 and one_tag !=0): '''Eligible For BigTable Insertion''' yyyymm = "{:%Y%m}".format(datetime.utcfromtimestamp(ymd)) idate = ymd table_id = '{}{}'.format(table_name,yyyymm) serialized_Otag = msgpack.packb(one_tag) one_tag=insertCell(table_id,user_id, app_type_id, idate, 'OneTag', 'Otag', serialized_Otag) count=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'cnt', count) duration=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'dur', duration) unusual_h_cnt=insertCell(table_id,user_id, app_type_id,idate, 'Daily', 'uHcnt', unusual_h_cnt) unusual_l_cnt=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'uLcnt', unusual_l_cnt) unusual_h_dur=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'uHdur', unusual_h_dur) unusual_l_dur=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'uLdur', unusual_l_dur) interval_days=insertCell(table_id,user_id, app_type_id, idate, 'Daily', 'iDay', interval_days) else: '''Not Eligible For BigTable Insertion ''' one_tag=count=duration=unusual_h_cnt=unusual_l_cnt=unusual_h_dur=unusual_l_dur=interval_days = -1 ymd = "{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd)) return(ymd,str(one_tag),count,duration,unusual_h_cnt,unusual_l_cnt,unusual_h_dur,unusual_l_dur,interval_days)
def on_time(st, xxx, xxx, ts_xxx): ''' Calculating last activity Return: BigTable Insertion responces of last activity ''' import settings reload(settings) import sys import os import numpy as np import datetime import time import calendar from datetime import timedelta, datetime, date from BigTable import insertCell from BigTable import readCell threshold_minxxx = (settings.thresholds[str(xxx)][0]) xxx_list = [] day = 86400 counter = None one_tag_et = None ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(st)) dt_ts = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") jst_0 = dt_ts.replace(minute=0, hour=0, second=0, microsecond=0) jst_0_utc = jst_0 + timedelta(hours=-9) ymd = calendar.timegm(jst_0_utc.timetuple()) if len(ts_xxx) == 0: counter = None one_tag_et = None else: for i in range(len(ts_xxx)): xxx = ts_xxx[i][1] if xxx > threshold_minxxx: # Filtering eligible xxxs. xxx = int(ts_xxx[i][0]) xxx_list.append(xxx) ts = np.array(xxx_list) if ts.size == 0: counter = None one_tag_et = None else: ts.sort() table_name = settings.tablename yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(ymd)) table_id = '{}{}'.format(table_name, yyyymm) if (len(ts) > 0): one_tag_et = insertCell( table_id, xxx, xxx, ymd, 'Daily', 'la', ts[-1]) #adding last endtime as latest activity else: yesterday_ymd = ymd - day yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(yesterday_ymd)) table_id = '{}{}'.format(table_name, yyyymm) ystrday_et = readCell(table_id, xxx, xxx, yesterday_ymd, 'Daily', 'la') if (ystrday_et != None): #yesterday's last activity yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(ymd)) table_id = '{}{}'.format(table_name, yyyymm) one_tag_et = insertCell(table_id, xxx, xxx, ymd, 'Daily', 'la', ystrday_et) else: one_tag_et = 0 #default one-tag is zero yyyymm = "{:%Y%m}".format(datetime.utcfromxxx(ymd)) table_id = '{}{}'.format(table_name, yyyymm) one_tag_et = insertCell(table_id, xxx, xxx, ymd, 'Daily', 'la', one_tag_et) return (ymd, one_tag_et)
def wd_utilization(user_id,app_type_id, ymd): ''' ---weekday [wDay] over last 8 weeks calculating hour wise and Inserting into BigTable of numpy array. ---Day of the week as an integer, where Monday is 0 and Sunday is 6. Result : Data Type of numpy array values either 'Integer' or 'nan'. Return: BigTable insertion responses of weekday and numpy array. ''' import msgpack import settings reload(settings) import sys import os import numpy as np import datetime import time import calendar from datetime import timedelta,datetime,date from BigTable import insertCell from BigTable import readCell day = 86400 week = day*7 ymd = int(ymd) h_tag = np.zeros(24) #24hour aDay = np.zeros(24) #24hour tuples_list = () user_count =[] hours = [] hour = 3600 table_name = settings.tablename n_weeks = settings.n_weeks for i in range(n_weeks): #generate h_tag idate= ymd - (week * i) yyyymm= "{:%Y%m}".format(datetime.utcfromtimestamp(idate)) table_id = '{}{}'.format(table_name,yyyymm) cal_OneTag= readCell(table_id,user_id, app_type_id, idate,'OneTag','Otag') if(cal_OneTag != None): one_tag = msgpack.unpackb(cal_OneTag) start_times = zip(*one_tag)[0] tuples_list +=(start_times) for index,start_time in enumerate(tuples_list): st_time_jst = int(start_time) + (9 * hour) st_time_hour = int("{:%H}".format(datetime.utcfromtimestamp(st_time_jst))) hours.append(st_time_hour) nphours = np.array(hours) for h in nphours: aDay[h] = 1 h_tag = h_tag + aDay ut_pct = '' st_return ='' ut_pct = h_tag / n_weeks yyyymm= "{:%Y%m}".format(datetime.utcfromtimestamp(ymd)) table_id = '{}{}'.format(table_name,yyyymm) idate = ymd date= "{:%Y-%m-%d}".format(datetime.utcfromtimestamp(ymd)) weekday_res = datetime.strptime(date,"%Y-%m-%d").weekday() weekday_bt =insertCell(table_id,user_id, app_type_id,idate, 'Daily', 'wDay',weekday_res) ymd ="{:%Y%m%d}".format(datetime.utcfromtimestamp(ymd)) st_return = "{} {} [".format(ymd,weekday_bt) for i in range(24): if(i <10): column = "0{}".format(i) count=insertCell(table_id,user_id, app_type_id,idate, 'Daily', column, str(ut_pct[i])) st_return = st_return + str(count) + "," else: column = "{}".format(i) count=insertCell(table_id,user_id, app_type_id, idate, 'Daily', column, str(ut_pct[i])) st_return = st_return+str(count) +"," st_return = st_return[:-1] st_return = st_return + "]" return(st_return)