def get_user_ids_created_by_date(target_date): start_time=helper_regex.date_add(target_date,0)+' 00:00:00' end_time=helper_regex.date_add(target_date,1)+' 00:00:00' fault_id = randint(1,100) sql=r''' SELECT id FROM gumi_live.auth_user WHERE date_joined>='%s' and date_joined<'%s'; ''' % (start_time,end_time) #sql=r''' #SELECT id FROM gumi_live.auth_user WHERE date_joined>='%s' and date_joined<'%s' UNION SELECT '%d' FROM gumi_live.auth_user; #''' % (start_time,end_time,1) result_set=helper_mysql.fetch_set(sql,config.conn_stat_gumi_live) result_set=set([str(i) for i in result_set]) return result_set
def check_stat_plan_log_file_reading_completion(stat_plan): """ self.log_table_name='raw_data_monitor' self.log_oem_name=self.script_file_name self.log_category=helper_ip.get_current_server_ip()+'_'+self.start_time_str.replace(' ','_').replace(':','_').replace('-','_')+'_'+self.uuid helper_mysql.put_raw_data(oem_name=self.log_oem_name, \ category=self.log_category, \ key='original_file_size', \ sub_key=log_file_name, \ value=helper_file.get_file_size(log_file_name), \ table_name=self.log_table_name) """ # 1.check total log file number current_date=helper_regex.extract(stat_plan.log_category,r'_(\d{4}_\d{2}_\d{2})_').replace('_','-') previous_date=helper_regex.date_add(current_date,-1) previoud_date_category_like=helper_regex.extract(stat_plan.log_category.replace(current_date,previous_date),r'([\d\.]+_\d{4}_\d{2}_\d{2})') sql=r''' select (select count(distinct sub_key) from raw_data_monitor where oem_name='%s' and category='%s') - (select count(distinct sub_key) from raw_data_monitor where oem_name='%s' and category=( select count(distinct sub_key) from raw_data_monitor where oem_name='%s' and category like '%s%%' )) ''' % (stat_plan.log_oem_name,stat_plan.log_category,stat_plan.log_oem_name,stat_plan.log_oem_name,previoud_date_category_like) print sql distance=helper_mysql.get_one_value_string(sql) print distance return distance
date=row['date'], created_on=row['created_on'], db_conn=item[5]) counter+=1 #print counter current_idx+=step if __name__=='__main__': for i in range(0,length_backwards): target_date=helper_regex.date_add(end_date,-i) for item in data_name_spaces: duplicate_record(item,target_date)
def do_calculate(current_date): urls = [ "mobileshabik.morange.com/mophoto_popular_photos.aspx?src_evflg_1", "mobileshabik.morange.com/mophoto_popular_photos.aspx?src_evflg_1&isprefetch", "mobileshabik.morange.com/mophoto_popular_photos.aspx?isprefetch&src_evflg_0", "mobileshabik.morange.com/mophoto_popular_photos.aspx?src_evflg_0", "mobileshabik.morange.com/mophoto_popular_photos.aspx?", "mobileshabik.morange.com/mophoto_popular_photos.aspx?isprefetch", ] urls = [ "mobileshabik.morange.com/mophoto_photo.aspx?albumid&src_pe&tag&photoid&type", "mobileshabik.morange.com/mophoto_photo.aspx?photoid&albumid&src_pe&tag&type&isprefetch", ] urls = [ "mobilevoda.morange.com/mophoto_popular_photos_[digits].aspx?src_app", "mobilevoda.morange.com/mophoto_popular_photos_[digits].aspx?src_feed", "mobilevoda.morange.com/mophoto_popular_photos_[digits].aspx?src_myphoto", "mobilevoda.morange.com/mophoto_popular_photos.aspx?start", "mobilevoda.morange.com/mophoto_popular_photos.aspx?src_app", ] urls = ["mobilevoda.morange.com/mophoto_photo.aspx?albumid&src_pe&tag&photoid&type"] collection_current = set([]) for u in urls: collection_temp = helper_mysql.get_raw_collection_from_key( oem_name="Vodafone", category="moagent", key="app_page_by_url_pattern_daily_visitor_unique", sub_key=u, date=current_date, table_name="data_url_pattern_vodafone", db_conn=None, ) collection_current |= collection_temp # print len(collection_current),len(collection_temp) collection_current_1 = set([]) for u in urls: collection_temp = helper_mysql.get_raw_collection_from_key( oem_name="Vodafone", category="moagent", key="app_page_by_url_pattern_daily_visitor_unique", sub_key=u, date=helper_regex.date_add(current_date, -1), table_name="data_url_pattern_vodafone", db_conn=None, ) collection_current_1 |= collection_temp # print len(collection_current),len(collection_temp) retained = collection_current_1 & collection_current # print set([1,2,3,4,8]) & set([9,3,4,8,10]) print len(collection_current_1 | collection_current) print len(collection_current), len(collection_current_1) print len(retained) print 1.0 * len(retained) / len(collection_current_1)
import helper_sql_server import helper_mysql import helper_regex import config current_date=helper_regex.date_add(helper_regex.get_date_str_now(),-1) def c(oem_name='',category='',key='',sub_key='',date='',table_name='raw_data',db_conn=None): if isinstance(date,str) or not date: return helper_mysql.get_raw_collection_from_key(oem_name=oem_name,category=category,key=key,sub_key=sub_key,date=date,table_name=table_name,db_conn=db_conn) return helper_mysql.get_raw_collection_from_key_date_range(oem_name=oem_name,category=category,key=key,sub_key=sub_key,begin_date=min(date),end_date=max(date),table_name=table_name,db_conn=db_conn) if __name__ =='__main__': pass
def calculate_date_range_retain_rate(date_unit,oem_name,category,key,sub_key,date,table_name='raw_data'): if date_unit<1: date_unit=1 base_size,retain_rate,fresh_rate,lost_rate=0,0,1,1 retained_base_size,lost_base_size,fresh_base_size=0,0,0 if not date: return base_size,retain_rate,fresh_rate,lost_rate,retained_base_size,lost_base_size,fresh_base_size #sql=r"select `date`,`value` from `%s` where `oem_name`='%s' and `category`='%s' and `key`='%s' and `sub_key`='%s'" \ # %(table_name,oem_name,category,key,sub_key) key=key.replace('_collection_id','')+'_collection_id' sql=_get_sql_select_collection_id_by_date(oem_name,category,key,sub_key,table_name) collection_id_dict=helper_mysql.fetch_dict(sql) key_temp=collection_id_dict.keys() key_temp.sort(reverse=True) """ print 'existing collection list:' for i in key_temp[0:65]: print i+': '+str(collection_id_dict[i]) """ col_1=set([]) for i in range(0,date_unit): date_temp=helper_regex.date_add(date,-i) col_id_temp=collection_id_dict[date_temp] if collection_id_dict.has_key(date_temp) else 0 col_temp=helper_mysql.get_raw_collection_by_id(col_id_temp) col_1 |= col_temp if col_id_temp==0: #force return null when data not complete return base_size,retain_rate,fresh_rate,lost_rate,retained_base_size,lost_base_size,fresh_base_size base_size=len(col_1) col_2=set([]) for i in range(0+date_unit,date_unit+date_unit): date_temp=helper_regex.date_add(date,-i) col_id_temp=collection_id_dict[date_temp] if collection_id_dict.has_key(date_temp) else 0 col_temp=helper_mysql.get_raw_collection_by_id(col_id_temp) col_2 |= col_temp retain=col_1 & col_2 fresh=col_1 - col_2 lost=col_2 - col_1 """ print str(col_1) print str(col_2) print str(retain) print str(fresh) print str(lost) """ retained_base_size,lost_base_size,fresh_base_size=len(retain),len(lost),len(fresh) if len(col_2)>0: retain_rate=1.0*len(retain)/len(col_2) if len(col_1)>0 and len(col_2)>0: fresh_rate=1.0*len(fresh)/len(col_1) if len(col_2)>0: lost_rate=1.0*len(lost)/len(col_2) return base_size,retain_rate,fresh_rate,lost_rate,retained_base_size,lost_base_size,fresh_base_size
def calculate_count_distinct_named_collection(date_unit,oem_name,category,key,sub_key,date,table_name='raw_data',allow_collection_empty=False): #date_unit accepts 1,2,3,...,'weekly','monthly' #for weekly, it produces result only when date is Sunday, else 0 #for monthly, it produces result only when date is the last day of a week, else 0 #for all cases, it doesn't produce value when required collections are not all ready unique=0 total=0 average=0 if not date: return unique,total,average if date_unit=='weekly': if helper_regex.get_weekday_from_date_str(date)!=7: return unique,total,average date_unit=7 elif date_unit=='monthly': if helper_regex.extract(helper_regex.date_add(date,1),r'\d+\-\d+\-(\d+)')!='01': return unique,total,average first_date=helper_regex.extract(date,r'(\d+\-\d+\-)\d+')+'01' date_unit=helper_regex.get_day_diff_from_date_str(date,first_date)+1 if date_unit<1: date_unit=1 key=key.replace('_collection_id','') sql=_get_sql_select_collection_id_by_date(oem_name,category,key,sub_key,table_name) collection_id_dict=helper_mysql.fetch_dict(sql) key_temp=collection_id_dict.keys() key_temp.sort(reverse=True) sql=_get_sql_select_collection_id_by_date(oem_name,category,key+'_base',sub_key,table_name) #print sql collection_base_dict=helper_mysql.fetch_dict(sql) #print collection_base_dict """ print 'existing collection list:' for i in key_temp[0:65]: print i+': '+str(collection_id_dict[i]) """ col_1=set([]) base_total=0 for i in range(0,date_unit): date_temp=helper_regex.date_add(date,-i) col_id_temp=collection_id_dict[date_temp] if collection_id_dict.has_key(date_temp) else 0 #col_temp=helper_mysql.get_raw_collection_by_id(col_id_temp) col_temp=helper_collection.get_named_collection(table_name=table_name,oem_name=oem_name,category=category, \ key=key,sub_key=sub_key,date=date_temp) col_1 |= col_temp base_total+=int(collection_base_dict[date_temp]) if collection_base_dict.has_key(date_temp) else 0 if col_id_temp==0: #force return null when data not complete if allow_collection_empty: print date_temp,table_name,oem_name,category,key,sub_key,date_temp,'collection empty error! passed.' else: print date_temp,table_name,oem_name,category,key,sub_key,date_temp,'collection empty error! exit.' return unique,total,average unique=len(col_1) total=base_total average=base_total*1.0/unique if unique>0 else 0 return unique,total,average
def calculate_date_range_average_life_cycle(date_unit,oem_name,category,key,sub_key,date,table_name='raw_data'): if date_unit<1: date_unit=1 #base_size,retain_rate,fresh_rate,lost_rate=0,0,1,1 lost_col_average_life_cycle=0 retained_col_average_life_cycle=0 if not date: return lost_col_average_life_cycle,retained_col_average_life_cycle,{},{} #return base_size,retain_rate,fresh_rate,lost_rate #sql=r"select `date`,`value` from `%s` where `oem_name`='%s' and `category`='%s' and `key`='%s' and `sub_key`='%s'" \ # %(table_name,oem_name,category,key,sub_key) key=key.replace('_collection_id','')+'_collection_id' sql=_get_sql_select_collection_id_by_date(oem_name,category,key,sub_key,table_name) collection_id_dict=helper_mysql.fetch_dict(sql) key_temp=collection_id_dict.keys() key_temp.sort(reverse=True) """ print 'existing collection list:' for i in key_temp[0:65]: print i+': '+str(collection_id_dict[i]) """ col_1=set([]) for i in range(0,date_unit): date_temp=helper_regex.date_add(date,-i) col_id_temp=collection_id_dict[date_temp] if collection_id_dict.has_key(date_temp) else 0 col_temp=helper_mysql.get_raw_collection_by_id(col_id_temp) col_1 |= col_temp if col_id_temp==0: #force return null when data not complete return lost_col_average_life_cycle,retained_col_average_life_cycle,{},{} base_size=len(col_1) col_2=set([]) for i in range(0+date_unit,date_unit+date_unit): date_temp=helper_regex.date_add(date,-i) col_id_temp=collection_id_dict[date_temp] if collection_id_dict.has_key(date_temp) else 0 col_temp=helper_mysql.get_raw_collection_by_id(col_id_temp) col_2 |= col_temp lost_col=col_2 - col_1 retained_col=col_2 & col_1 lost_col_len=len(lost_col) retained_col_len=len(retained_col) lost_col_dict=dict([(k, 0) for k in lost_col]) retained_col_dict=dict([(k, 0) for k in retained_col]) for i in range(0,2000): date_temp=helper_regex.date_add(date,-i) if date_temp=='2010-01-01': break col_id_temp=collection_id_dict[date_temp] if collection_id_dict.has_key(date_temp) else 0 col_temp=helper_mysql.get_raw_collection_by_id(col_id_temp) for i in col_temp: if lost_col_dict.has_key(i): lost_col_dict[i]+=1 if retained_col_dict.has_key(i): retained_col_dict[i]+=1 if lost_col_len>0: lost_col_average_life_cycle=sum(lost_col_dict.values())*1.0/lost_col_len if retained_col_len>0: retained_col_average_life_cycle=sum(retained_col_dict.values())*1.0/retained_col_len return lost_col_average_life_cycle,retained_col_average_life_cycle,lost_col_dict,retained_col_dict
target_tables=[ 'raw_data_url_pattern' ] print helper_regex.get_time_str_now() for table_name in target_tables: #current_max_id=helper_mysql.get_raw_data(oem_name='Stat_Portal',category='data_migrate',key='max_transfered_id',sub_key=table_name,default_value=0,table_name='raw_data_debug',date='',db_conn=None) begin_date='2011-07-11' step=1000 for i in range(0,1000): current_date=helper_regex.date_add(begin_date,-i) print table_name,current_date r''' select * from %s where date='%s' and `oem_name`="Vodafone" and `category`="website" ''' % (table_name,current_date) source_rows=helper_mysql.fetch_rows(sql=r''' select * from %s where date='%s' and `oem_name`="Vodafone" and `category`="website" ''' % (table_name,current_date),db_conn=source_conn)
def _do_calculation(self): #generate new user set temp_new_user_dict={} temp_date=self.begin_date temp_end_date=self.end_date while True: if temp_date>temp_end_date: break temp_daily_new_user_set=self._get_daily_created_user_dict(temp_date) for user_id in temp_daily_new_user_set: temp_new_user_dict[user_id]=temp_date temp_date=helper_regex.date_add(temp_date,1) print len(temp_new_user_dict) print temp_new_user_dict #exit() #generate user active hitory self.temp_new_user_active_history_dict={} temp_date=self.begin_date temp_end_date=helper_regex.date_add(self.end_date,self.observation_day_length-1) while True: if temp_date>temp_end_date: break temp_active_user_set=self._get_daily_active_user_set(temp_date) for user_id in temp_active_user_set: if temp_new_user_dict.has_key(user_id): self.temp_new_user_active_history_dict.setdefault(user_id,[]) if temp_date<=helper_regex.date_add(temp_new_user_dict[user_id],self.observation_day_length-1): self.temp_new_user_active_history_dict[user_id].append(temp_date) temp_date=helper_regex.date_add(temp_date,1) print len(self.temp_new_user_active_history_dict) print self.temp_new_user_active_history_dict #exit() #generate user action history temp_new_user_action_history_dict={} temp_date=self.begin_date temp_end_date=helper_regex.date_add(self.end_date,self.observation_day_length-1) while True: if temp_date>temp_end_date: break temp_user_action_dict=self._get_daily_user_action(temp_date) temp_user_action_dict=dict((k,v) for k,v in temp_user_action_dict.iteritems() if k in temp_new_user_dict) temp_new_user_action_history_dict[temp_date]=temp_user_action_dict temp_date=helper_regex.date_add(temp_date,1) print len(temp_new_user_action_history_dict) print temp_new_user_action_history_dict #exit() #generate user group temp_user_group={} for user_group_by_max_active_time in range(1,self.observation_day_length+1,self.observation_day_step): temp_user_group[user_group_by_max_active_time]=set([user_id for user_id,history \ in self.temp_new_user_active_history_dict.iteritems() \ if len(history)>=user_group_by_max_active_time \ and len(history)<user_group_by_max_active_time \ +self.observation_day_step]) print len(temp_user_group) print temp_user_group #exit() #generate evolution matrix temp_matrix_of_user_action={} # Dimension-1:time Dimension-2:user groups temp_matrix_of_user_online_day={} for active_time_period in range(1,self.observation_day_length+1,self.observation_day_step): for user_group_by_max_active_time in range(1,self.observation_day_length+1,self.observation_day_step): temp_matrix_of_user_action.setdefault(active_time_period,{}) temp_matrix_of_user_action[active_time_period].setdefault(user_group_by_max_active_time,0) temp_matrix_of_user_online_day.setdefault(active_time_period,{}) temp_matrix_of_user_online_day[active_time_period].setdefault(user_group_by_max_active_time,0) total_action=0 total_online_day=0 temp_user_set=temp_user_group[user_group_by_max_active_time] for user_id in temp_user_set: """ dates=self.temp_new_user_active_history_dict[user_id][active_time_period-1:min(active_time_period+self.observation_day_step-1,len(self.temp_new_user_active_history_dict[user_id]))] total_online_day+=len(dates) for d in dates: if temp_new_user_action_history_dict[d].has_key(user_id): total_action+=temp_new_user_action_history_dict[d][user_id] """ if active_time_period>len(self.temp_new_user_active_history_dict[user_id]): continue dates=self.temp_new_user_active_history_dict[user_id] temp_begin_date=temp_new_user_dict[user_id] \ if active_time_period==1 \ else helper_regex.date_add(dates[active_time_period-1-1],1) # include those actions happend when user is offline temp_end_date=dates[-1] \ if active_time_period+self.observation_day_step-1>len(self.temp_new_user_active_history_dict[user_id]) \ else dates[active_time_period+self.observation_day_step-1-1] for temp_d in helper_regex.date_iterator(temp_begin_date,temp_end_date): if temp_new_user_action_history_dict[temp_d].has_key(user_id): total_action+=temp_new_user_action_history_dict[temp_d][user_id] total_online_day+=min(active_time_period+self.observation_day_step-1,len(self.temp_new_user_active_history_dict[user_id])) \ -(active_time_period-1) temp_matrix_of_user_action[active_time_period][user_group_by_max_active_time]=total_action temp_matrix_of_user_online_day[active_time_period][user_group_by_max_active_time]=total_online_day print temp_matrix_of_user_action print temp_matrix_of_user_online_day #export result analysis_factor_name=self.analysis_factor_name #'Mutual Friend Relation' table_name=self.table_name #'raw_data_test' oem_name=self.oem_name #'Shabik_360' category=self.category #'evolution_analysis' key_prefix=analysis_factor_name.lower().replace(' ','_')+'_ol%s,n%s,s%s_evolution_' % (self.observation_day_length,1+helper_regex.get_day_diff_from_date_str(self.end_date,self.begin_date),self.observation_day_step) date=self.begin_date view_name='Report %s Evolution Analysis %s (%s Days Step, %s Days)' % (oem_name,analysis_factor_name,self.observation_day_step, self.observation_day_length) view_description=r''' Date Range of Observed User: %s to %s Total Observed New User: %s Observing Users' First %s Days ''' % (self.begin_date,self.end_date,len(self.temp_new_user_active_history_dict),self.observation_day_length) for active_time_period,v in temp_matrix_of_user_action.iteritems(): for user_group_by_max_active_time,total_action in v.iteritems(): helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'unique_base', \ sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \ value=temp_matrix_of_user_action[active_time_period][user_group_by_max_active_time], \ date=date,table_name=table_name) helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'unique', \ sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \ value=len(temp_user_group[user_group_by_max_active_time]), \ date=date,table_name=table_name) adjusted_base=0 if temp_matrix_of_user_online_day[active_time_period][user_group_by_max_active_time]>0: adjusted_base=1.0*self.observation_day_step \ *temp_matrix_of_user_action[active_time_period][user_group_by_max_active_time] \ /temp_matrix_of_user_online_day[active_time_period][user_group_by_max_active_time] helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'unique_base_adjusted', \ sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \ value=adjusted_base,date=date,table_name=table_name) helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'total', \ sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \ value=len(self.temp_new_user_active_history_dict), \ date=date,table_name=table_name) # generate view sql sql_template=r''' SELECT concat( 'Online for ' ,replace(SUBSTRING_INDEX(`sub_key`,'_',1),'g',''), 'd-' ,lpad(replace(SUBSTRING_INDEX(`sub_key`,'_',1),'g','')+%(observation_day_step)s-1,2,'0') ,'d') as `Group Name` ,max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique',`value`,0)) as `Group Size` ,concat(format(100.0 *max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique',`value`,0)) /max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)stotal',`value`,0)) ,2),'%%%%') as `Group Proportion` %%(column_sql)s FROM `%(table_name)s` WHERE ( `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique' or `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base' or `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base_adjusted' or `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)stotal' )and date='%(date)s' GROUP BY `Group Name` ORDER BY `Group Name` DESC ''' % { 'observation_day_step':self.observation_day_step, 'oem_name':oem_name, 'category':category, 'key_prefix':key_prefix, 'table_name':table_name, 'date':date, } sql_column_template=r''' ,case when max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base' and SUBSTRING_INDEX(`sub_key`,'_',-1)='a%(active_time_period)s',`value`,0))>0 then max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base_adjusted' and SUBSTRING_INDEX(`sub_key`,'_',-1)='a%(active_time_period)s',`value`,0)) else '-' end as `Avg %(analysis_factor_name)s Created in [%(active_time_period)sd,%(active_time_period_end)sd]` ''' sql_columns=[] for active_time_period in range(1,self.observation_day_length+1,self.observation_day_step): temp_sql_column=sql_column_template % { 'observation_day_step':self.observation_day_step, 'oem_name':oem_name, 'category':category, 'key_prefix':key_prefix, 'table_name':table_name, 'date':date, 'active_time_period':str(active_time_period).zfill(2), 'active_time_period_end':str(active_time_period+self.observation_day_step-1).zfill(2), 'analysis_factor_name':analysis_factor_name, } sql_columns.append(temp_sql_column) view_sql=sql_template % { 'column_sql':'\n'.join(sql_columns) } print view_sql import helper_view helper_view.replace_view(view_name=view_name,view_sql=view_sql,view_description=view_description,charting_javascript=r''' add_highcharts_basic_line_chart({ 'tab_name':'Trend Comparison', 'column_names_pattern':/Online /ig, 'marginRight':300, 'reverse_key_column':false, 'reverse_table':true, 'exclude_rows':['Group Size','Group Proportion'], 'reverse_column':true }); ''') helper_view.grant_view(view_name,'5') helper_view.grant_view(view_name,'17') pass
def calculate_ndays_unique(key_space,db_name,date_units): # calculate n days' unique min_date=helper_mysql.get_one_value_string(r''' select min(`date`) from `%s` where `oem_name`='%s' and `category`='%s' and `key`='%s_collection_id' and `sub_key`='%s' and `date`>='2011-12-16' ''' % (db_name,key_space['oem_name'],key_space['category'],key_space['key'],key_space['sub_key'])) max_date=helper_mysql.get_one_value_string(r''' select max(`date`) from `%s` where `oem_name`='%s' and `category`='%s' and `key`='%s_collection_id' and `sub_key`='%s' and `date`>='2011-12-16' ''' % (db_name,key_space['oem_name'],key_space['category'],key_space['key'],key_space['sub_key'])) if not min_date or not max_date: print 'date error.' return date_temp=min_date #print date_temp #exit() while True: if date_temp>=max_date: break for date_unit in date_units: unique,total,average=helper_math.calculate_count_distinct(date_unit=date_unit,oem_name=key_space['oem_name'],category=key_space['category'],key=key_space['key'],sub_key=key_space['sub_key'],date=date_temp,table_name=db_name,allow_collection_empty=True) print 'distinct collection calc '+date_temp+': date_unit '+str(date_unit)+' unique '+str(unique)+' total '+str(total)+' average '+str(average) #exit() key_prefix=helper_regex.regex_replace('_unique$','',key_space['key']) if unique>0: suffix=str(date_unit) if isinstance(date_unit, (int, long)): suffix+='_days' helper_mysql.put_raw_data(oem_name=key_space['oem_name'],category=key_space['category'],key=key_prefix+'_'+suffix+'_unique',sub_key=key_space['sub_key'],value=unique,date=date_temp,table_name=db_name) helper_mysql.put_raw_data(oem_name=key_space['oem_name'],category=key_space['category'],key=key_prefix+'_'+suffix+'_unique_base',sub_key=key_space['sub_key'],value=total,date=date_temp,table_name=db_name) helper_mysql.put_raw_data(oem_name=key_space['oem_name'],category=key_space['category'],key=key_prefix+'_'+suffix+'_unique_average',sub_key=key_space['sub_key'],value=average,date=date_temp,table_name=db_name) date_temp=helper_regex.date_add(date_temp,1)
def stat_login(): global date_min,date_max,base_user_sets oem_name='All' stat_category='daily_active_user_retain' db_name='raw_data_login_trend' # you can change day range (30 days) date_max=helper_regex.date_add(helper_regex.get_date_str_now(),-1) date_min=helper_regex.date_add(date_max,-30) for i in range(1,10000): current_date=helper_regex.date_add(date_min,i) print 'current date',current_date if current_date>date_max: break # new user set from db (overall daily active user) new_user_set=gumi_helper_user.get_user_ids_created_by_date(current_date) # daily active user SG active_user_sg = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \ category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='SG', \ date=current_date, \ table_name='raw_data',db_conn=None) # daily active user US active_user_us = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \ category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='US', \ date=current_date, \ table_name='raw_data',db_conn=None) # daily active user PL active_user_pl = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \ category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='PL', \ date=current_date, \ table_name='raw_data',db_conn=None) # daily active user Unknow IP active_user_zz = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \ category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='ZZ', \ date=current_date, \ table_name='raw_data',db_conn=None) base_user_sets={ 'pt-new-user-':new_user_set, 'pt-new-user-SG':new_user_set & active_user_sg, 'pt-new-user-US':new_user_set & active_user_us, 'pt-new-user-PL':new_user_set & active_user_pl, 'pt-new-user-ZZ':new_user_set & active_user_zz } for k,user_set in base_user_sets.iteritems(): k=k.replace('*','') # calculate total print 'user base of',k,':',len(user_set) key='active_user_initial_%s_total_unique' % (k,) #sub_key = k[-2:] #if sub_key.find('-')>-1: # sub_key='' helper_mysql.put_raw_data(oem_name,stat_category,key,'',len(user_set),db_name,current_date) helper_mysql.put_collection(collection=user_set,oem_name=oem_name,category=stat_category, \ key=key,sub_key='',date=current_date,table_name=db_name) # calculate ranges=[(1,8,1),(1,30,7),(1,60,14)] for r in ranges: start=r[0] end=r[1] step=r[2] accumulative_logined_user={ 'pt':set([]), } for i in range(start,end,step): print start print end logined_user={ 'pt':set([]), } for day_delta in range(i,i+step): target_date=helper_regex.date_add(current_date,day_delta) collection = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \ category='user',key='live_log_daily_uid_unique_collection_id',sub_key='', \ date=target_date, \ table_name='raw_data',db_conn=None) logined_user['pt'] = logined_user['pt'] | collection for k1,v1 in logined_user.iteritems(): accumulative_logined_user[k1] |= v1 for k,user_set in base_user_sets.iteritems(): k=k.replace('*','') logined_user_temp=set([]) if k.find('pt')>-1: logined_user_temp=logined_user['pt'] accumulative_logined_user_temp=accumulative_logined_user['pt'] base_user_logined_user= user_set & logined_user_temp key='daily_active_user_'+str(step)+'_day_logined_%s_total_unique' % (k,) helper_mysql.put_raw_data(oem_name,stat_category,key,i,len(base_user_logined_user),db_name,current_date) base_user_no_logined_user= user_set - accumulative_logined_user_temp key='daily_active_user_'+str(step)+'_day_no_logined_%s_total_unique' % (k,) helper_mysql.put_raw_data(oem_name,stat_category,key,i,len(base_user_no_logined_user),db_name,current_date) return
def export(date_length=30): user_login_history={} user_last_login_date={} today=helper_regex.date_add(helper_regex.get_date_str_now(),-17) start_time=helper_regex.date_add(today,-date_length)+' 05:00:00' end_time=helper_regex.date_add(today,-1)+' 05:00:00' # user_id -> msisdn sql=r''' SELECT [user_id],replace([user_name],'@shabik.com','') as msisdn FROM [mozone_user].[dbo].[Profile] with(nolock) where [creationDate]>='%s' and [creationDate]<'%s' and user_name like '%%shabik.com%%' ''' % (start_time,end_time) user_id_to_msisdn=helper_sql_server.fetch_dict(conn_config=config.conn_stc,sql=sql) # new user user_id new_user_collection=user_id_to_msisdn.keys() new_user_collection=set([str(user_id) for user_id in new_user_collection]) # subscription status sql=r''' select distinct '0'+replace(msisdn,'+966','')+'@shabik.com' as [user_name] into #tmp from db86.shabik_mt.dbo.accounts with(nolock) where is_deleted=0 SELECT [user_id] FROM [mozone_user].[dbo].[Profile] with(nolock) where [creationDate]>='%s' and [creationDate]<'%s' and user_name like '%%shabik.com%%' and user_name in ( select user_name from #tmp ) drop table #tmp ''' % (start_time,end_time) user_id_in_sub=helper_sql_server.fetch_set(conn_config=config.conn_stc,sql=sql) user_id_in_sub=set([str(user_id) for user_id in user_id_in_sub]) for i in range(date_length,-17,-1): date_temp=helper_regex.date_add(today,-i) shabik_5_collection=helper_mysql.get_raw_collection_from_key(oem_name='STC',category='moagent', \ key='app_page_only_shabik_5_daily_visitor_unique',sub_key='', \ date=date_temp,table_name='raw_data',db_conn=None) shabik_5_collection=shabik_5_collection & new_user_collection for user_id in shabik_5_collection: user_login_history.setdefault(user_id,'') user_login_history[user_id]+='5' user_last_login_date.setdefault(user_id,'') user_last_login_date[user_id]=date_temp shabik_360_collection=helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_daily_visitor_unique',sub_key='', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None) shabik_360_collection=shabik_360_collection & new_user_collection for user_id in shabik_360_collection: user_login_history.setdefault(user_id,'') user_login_history[user_id]+='6' user_last_login_date.setdefault(user_id,'') user_last_login_date[user_id]=date_temp #calculate """ target_groups_names=[ '1.More than 2 weeks users using Shabik 360 (Totally New User to Shabik) [only using 360]', '2.Users who Shifted from Shabik360 to Shabik 5 [for each at least using 3 days, still in sub]', '3.Unsubscribed users of Shabik 360 [last using 360 for >=7 days and then unsub]', '4.Users who uses Shabik 5 more than 2 weeks [actually is online for >=14 days]', '5.Users who shifted from Shabik 5 to Shabik 360 [for each at least using 3 days, still in sub]', '6.User base of new user in last 50 days, which is used to generate above lists', ] target_groups=[ [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6{14,})$')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(6{3,}5{3,}$)')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(6{7,}$)') and user_id in user_id_in_sub], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(5{14,}$)')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(5{3,}6{3,}$)') and user_id in user_id_in_sub], [user_id for user_id,sequence in user_login_history.iteritems()], ] target_groups_names={ 'User only use Shabik 360': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6+)$')], 'User only use Shabik 360 [more than 10d]': , 'User only use Shabik 5', 'User only use Shabik 5 [more than 10d]', 'User use both Shabik 360 / Shabik 5', 'User used both and choosed Shabik 5 [recently used only Shabik 5 for 5d]', 'User used both and choosed Shabik 5 [recently used only Shabik 360 for 5d]', } target_groups=[ [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6{10,})$')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5+)$')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5{10,})$')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,r'(5{5,})$')], [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,r'(6{5,})$')], ] """ threshold_of_settle_down='5' target_groups={ '1.new_user': [user_id for user_id,sequence in user_login_history.iteritems()], '2.new_user_start_from_5': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5)')], '3.new_user_start_from_360': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6)')], '4.new_user_only_5': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5+)$')], '5.new_user_only_360': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6+)$')], '6.new_user_both': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)')], '7.new_user_both_and_finally_5': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,'(5{'+threshold_of_settle_down+',})$')], '8.new_user_both_and_finally_360': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,'(6{'+threshold_of_settle_down+',})$')], '9.new_user_both_and_not_stable': [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and not helper_regex.extract(sequence,'(5{'+threshold_of_settle_down+',})$') and not helper_regex.extract(sequence,'(6{'+threshold_of_settle_down+',})$')], } #export keys=sorted(target_groups.keys()) for key in keys: user_id_collection=target_groups[key] print key print 'size:',len(user_id_collection) print '[last login date - msisdn - sub status - login history]' user_id_collection.sort(key=lambda user_id:user_last_login_date[user_id],reverse=True) for user_id in user_id_collection: print user_last_login_date[user_id],'\t',user_id_to_msisdn[user_id],'\t','sub' if user_id in user_id_in_sub else 'unsub','\t',user_login_history[user_id] for key in keys: user_id_collection=target_groups[key] print '==',key,'==' print 'size:',len(user_id_collection) print 'unsub:',len([user_id for user_id in user_id_collection if not user_id in user_id_in_sub]) """
def export(): today=helper_regex.date_add(helper_regex.get_date_str_now(),-1) # new user stc start_time=helper_regex.date_add(today,-30)+' 05:00:00' end_time=helper_regex.date_add(today,-1)+' 05:00:00' sql=r''' SELECT [user_id],phone FROM [mozone_user].[dbo].[Profile] with(nolock) where [creationDate]>='%s' and [creationDate]<'%s' and user_name like '%%shabik.com%%' and phone not like '+966%%' and phone<>'' ''' % (start_time,end_time) new_user_msisdn_dict=helper_sql_server.fetch_dict(conn_config=config.conn_stc,sql=sql) new_user_msisdn_dict=dict((str(i),j) for i,j in new_user_msisdn_dict.iteritems()) print len(new_user_msisdn_dict) # old user stc date_length=30 start_time=helper_regex.date_add(today,-90)+' 05:00:00' end_time=helper_regex.date_add(today,-30)+' 05:00:00' sql=r''' SELECT [user_id],phone FROM [mozone_user].[dbo].[Profile] with(nolock) where [creationDate]>='%s' and [creationDate]<'%s' and user_name like '%%shabik.com%%' and phone not like '+966%%' and phone<>'' ''' % (start_time,end_time) old_user_msisdn_dict=helper_sql_server.fetch_dict(conn_config=config.conn_stc,sql=sql) old_user_msisdn_dict=dict((str(i),j) for i,j in old_user_msisdn_dict.iteritems()) print len(old_user_msisdn_dict) # daily active user set date_temp=helper_regex.date_add(today,-1) target_sets={ 'JME':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_by_morange_version_type_daily_user_unique',sub_key='JME', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None), 'S60-3':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_by_morange_version_type_daily_user_unique',sub_key='S60-3', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None), 'S60-5':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_by_morange_version_type_daily_user_unique',sub_key='S60-5', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None), 'Android':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_by_morange_version_type_daily_user_unique',sub_key='Android', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None), 'iOS':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_by_morange_version_type_daily_user_unique',sub_key='iOS', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None), 'BlackBerry':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_by_morange_version_type_daily_user_unique',sub_key='BlackBerry', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None), 'All Client':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \ key='app_page_daily_visitor_unique',sub_key='', \ date=date_temp,table_name='raw_data_shabik_360',db_conn=None), } for k,total_active_collection in target_sets.iteritems(): old_user_msisdn_set=set([msisdn for user_id,msisdn in old_user_msisdn_dict.iteritems() if user_id in total_active_collection]) new_user_msisdn_set=set([msisdn for user_id,msisdn in new_user_msisdn_dict.iteritems() if user_id in total_active_collection]) print print '## Non-STC Old Users',k for msisdn in list(old_user_msisdn_set)[0:300]: if len(msisdn)>10: print msisdn print print '## Non-STC New Users',k for msisdn in list(new_user_msisdn_set)[0:300]: if len(msisdn)>10: print msisdn
% (db_name,oem_name,category,key,sub_key,value,date)) print 'updated value:'+str(helper_mysql.get_one_value_int(r''' select `value` from `%s` where oem_name='%s' and category='%s' and `key`='%s' and `date`='%s' ''' % (db_name,oem_name,category,key,date))) exit() for i in range(1,100,1): sql=r''' select `sub_key`,`value` from raw_data_url_pattern where `oem_name`='STC' and `category`='moagent' and `key`='app_page_by_url_pattern_daily_visitor_unique' and `date`="%s" and `sub_key` like '%%jit%%' order by date desc; ''' % (helper_regex.date_add('2011-06-01',i),) #print sql print helper_regex.date_add('2011-07-01',i) result=helper_mysql.fetch_dict(sql) for k,v in result.iteritems(): print k,v exit()
row_view['raw_data_ais.oem_name'],row_view['raw_data_ais.category'],row_view['raw_data_ais.key'],row_view['raw_data_ais.sub_key'].replace("\'","\\\'"),patch_date_start,patch_date_end)) print sql db.query(sql) #db.close() if __name__=='__main__': for offset in range(0,3): print offset patch_date_start=helper_regex.date_add('2012-05-01',offset) patch_date_end=helper_regex.date_add('2012-05-04',offset) target_date=helper_regex.date_add('2012-05-05',offset) print target_date print patch_date_start print patch_date_end sql=r''' SELECT * FROM raw_data_ais /* FORCE INDEX (`date`) force index to prevent wide range scan */ where `oem_name`='Mozat'
row_view['raw_data.oem_name'],row_view['raw_data.category'],row_view['raw_data.key'],row_view['raw_data.sub_key'].replace("\'","\\\'"),patch_date_start,patch_date_end)) print sql db.query(sql) #db.close() if __name__=='__main__': for offset in range(0,1): print offset patch_date_start=helper_regex.date_add('2011-05-06',offset) patch_date_end=helper_regex.date_add('2011-05-09',offset) target_date=helper_regex.date_add('2011-05-10',offset) print target_date print patch_date_start print patch_date_end sql=r''' SELECT * FROM raw_data FORCE INDEX (`date`) /* force index to prevent wide range scan */ where `oem_name`='Vodafone'
''' #get reference value base reference_value_base_sql=sql_reference_columns_tpl % (sql_base_date_range_start,sql_base_date_range_end) #print reference_value_base_sql reference_value_base=helper_mysql.get_one_value_string(reference_value_base_sql) print 'reference_value_base: '+reference_value_base #loop taget dates for offset in range(0,target_date_length): current_date=helper_regex.date_add(target_date_start,offset) print 'current_date: '+current_date #get reference value of target date reference_value_target=helper_mysql.get_one_value_string( sql_reference_columns_tpl % (current_date,current_date)) print 'reference_value_target: '+reference_value_target #get base columns db.query(sql_base_columns) result_view = db.store_result()