コード例 #1
0
def get_cached_sql_result_as_dict(sql,db_conn):
    if db_conn['db_type'] not in ('sql_server','mysql'):
        raise Error('db_type error')
    unique_key=helper_math.md5(','.join(sorted(db_conn.values()))+sql)

    cached_result=helper_mysql.get_dict_of_raw_collection_from_key(oem_name='',category='', key='',\
                                sub_key=unique_key,date='',table_name='raw_data_cache_sql_result',db_conn=None)

    if cached_result:
        print 'get from cache:',unique_key
        return cached_result
    
    if db_conn['db_type']=='sql_server':
        cached_result=helper_sql_server.fetch_dict(conn_config=db_conn,sql=sql)
    else:
        cached_result=helper_mysql.fetch_dict(db_conn=db_conn,sql=sql)

    helper_mysql.put_collection_with_value(collection=cached_result,oem_name='',category='',key='',sub_key=unique_key, \
                                table_name='raw_data_cache_sql_result',date='',created_on=None,db_conn=None)
    
    print 'push into cache:',unique_key
    return cached_result
コード例 #2
0
    def _do_calculation(self):
        
        #generate new user set

        temp_new_user_dict={}

        temp_date=self.begin_date
        temp_end_date=self.end_date

        while True:
            if temp_date>temp_end_date:
                break
            temp_daily_new_user_set=self._get_daily_created_user_dict(temp_date)

            for user_id in temp_daily_new_user_set:
                temp_new_user_dict[user_id]=temp_date
            
            temp_date=helper_regex.date_add(temp_date,1)
            
        print len(temp_new_user_dict)
        print temp_new_user_dict
        #exit()

        #generate user active hitory

        self.temp_new_user_active_history_dict={}

        temp_date=self.begin_date
        temp_end_date=helper_regex.date_add(self.end_date,self.observation_day_length-1)

        while True:
            if temp_date>temp_end_date:
                break
            temp_active_user_set=self._get_daily_active_user_set(temp_date)
            for user_id in temp_active_user_set:
                if temp_new_user_dict.has_key(user_id):
                    self.temp_new_user_active_history_dict.setdefault(user_id,[])
                    if temp_date<=helper_regex.date_add(temp_new_user_dict[user_id],self.observation_day_length-1):
                        self.temp_new_user_active_history_dict[user_id].append(temp_date)
            
            temp_date=helper_regex.date_add(temp_date,1)
        
        print len(self.temp_new_user_active_history_dict)
        print self.temp_new_user_active_history_dict
        #exit()

        #generate user action history

        temp_new_user_action_history_dict={}

        temp_date=self.begin_date
        temp_end_date=helper_regex.date_add(self.end_date,self.observation_day_length-1)

        while True:
            if temp_date>temp_end_date:
                break
            temp_user_action_dict=self._get_daily_user_action(temp_date)
            temp_user_action_dict=dict((k,v) for k,v in temp_user_action_dict.iteritems() if k in temp_new_user_dict)
            temp_new_user_action_history_dict[temp_date]=temp_user_action_dict
            
            temp_date=helper_regex.date_add(temp_date,1)
        
        print len(temp_new_user_action_history_dict)
        print temp_new_user_action_history_dict
        #exit()

        #generate user group

        temp_user_group={}
        for user_group_by_max_active_time in range(1,self.observation_day_length+1,self.observation_day_step):
            temp_user_group[user_group_by_max_active_time]=set([user_id for user_id,history \
                                                            in self.temp_new_user_active_history_dict.iteritems() \
                                                            if len(history)>=user_group_by_max_active_time \
                                                            and len(history)<user_group_by_max_active_time \
                                                            +self.observation_day_step])

        print len(temp_user_group)
        print temp_user_group
        #exit()
        
        #generate evolution matrix

        temp_matrix_of_user_action={}                              #   Dimension-1:time    Dimension-2:user groups
        temp_matrix_of_user_online_day={}

        for active_time_period in range(1,self.observation_day_length+1,self.observation_day_step):
            for user_group_by_max_active_time in range(1,self.observation_day_length+1,self.observation_day_step):
                
                temp_matrix_of_user_action.setdefault(active_time_period,{})
                temp_matrix_of_user_action[active_time_period].setdefault(user_group_by_max_active_time,0)
                
                temp_matrix_of_user_online_day.setdefault(active_time_period,{})
                temp_matrix_of_user_online_day[active_time_period].setdefault(user_group_by_max_active_time,0)

                total_action=0
                total_online_day=0

                temp_user_set=temp_user_group[user_group_by_max_active_time]

                for user_id in temp_user_set:
                    """
                    dates=self.temp_new_user_active_history_dict[user_id][active_time_period-1:min(active_time_period+self.observation_day_step-1,len(self.temp_new_user_active_history_dict[user_id]))]
                    total_online_day+=len(dates)
                    
                    for d in dates:
                        if temp_new_user_action_history_dict[d].has_key(user_id):
                            total_action+=temp_new_user_action_history_dict[d][user_id]
                    """

                    if active_time_period>len(self.temp_new_user_active_history_dict[user_id]):
                        continue

                    dates=self.temp_new_user_active_history_dict[user_id]
                    
                    temp_begin_date=temp_new_user_dict[user_id] \
                                    if active_time_period==1 \
                                    else helper_regex.date_add(dates[active_time_period-1-1],1) # include those actions happend when user is offline

                    temp_end_date=dates[-1] \
                                  if active_time_period+self.observation_day_step-1>len(self.temp_new_user_active_history_dict[user_id]) \
                                  else dates[active_time_period+self.observation_day_step-1-1]
                    
                    for temp_d in helper_regex.date_iterator(temp_begin_date,temp_end_date):
                        if temp_new_user_action_history_dict[temp_d].has_key(user_id):
                            total_action+=temp_new_user_action_history_dict[temp_d][user_id]                        

                    total_online_day+=min(active_time_period+self.observation_day_step-1,len(self.temp_new_user_active_history_dict[user_id])) \
                                      -(active_time_period-1)

                temp_matrix_of_user_action[active_time_period][user_group_by_max_active_time]=total_action    
                temp_matrix_of_user_online_day[active_time_period][user_group_by_max_active_time]=total_online_day    

        print temp_matrix_of_user_action
        print temp_matrix_of_user_online_day
            
        #export result

        analysis_factor_name=self.analysis_factor_name      #'Mutual Friend Relation'
        table_name=self.table_name                          #'raw_data_test'
        oem_name=self.oem_name                              #'Shabik_360'
        category=self.category                              #'evolution_analysis'

        key_prefix=analysis_factor_name.lower().replace(' ','_')+'_ol%s,n%s,s%s_evolution_' % (self.observation_day_length,1+helper_regex.get_day_diff_from_date_str(self.end_date,self.begin_date),self.observation_day_step)
        date=self.begin_date
        view_name='Report %s Evolution Analysis %s (%s Days Step, %s Days, %s)' % (oem_name,analysis_factor_name,self.observation_day_step, \
                                                                                   self.observation_day_length, self.begin_date)
        view_description=r'''
        
        Date Range of Observed User: %s to %s
        Total Observed New User: %s
        Observing Users' First %s Days

        ''' % (self.begin_date,self.end_date,len(self.temp_new_user_active_history_dict),self.observation_day_length)

        for active_time_period,v in temp_matrix_of_user_action.iteritems():
            for user_group_by_max_active_time,total_action in v.iteritems():
                
                helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'unique_base', \
                                          sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \
                                          value=temp_matrix_of_user_action[active_time_period][user_group_by_max_active_time], \
                                          date=date,table_name=table_name)

                helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'unique', \
                                          sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \
                                          value=len(temp_user_group[user_group_by_max_active_time]), \
                                          date=date,table_name=table_name)

                helper_mysql.put_collection_with_value(oem_name=oem_name,category=category,key=key_prefix+'unique', \
                                          sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \
                                          collection=temp_user_group[user_group_by_max_active_time], \
                                          date=date,table_name=table_name)

                adjusted_base=0
                if temp_matrix_of_user_online_day[active_time_period][user_group_by_max_active_time]>0:
                    adjusted_base=1.0*self.observation_day_step \
                                        *temp_matrix_of_user_action[active_time_period][user_group_by_max_active_time] \
                                        /temp_matrix_of_user_online_day[active_time_period][user_group_by_max_active_time]
                                        
                helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'unique_base_adjusted', \
                                          sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \
                                          value=adjusted_base,date=date,table_name=table_name)

                helper_mysql.put_raw_data(oem_name=oem_name,category=category,key=key_prefix+'total', \
                                          sub_key='g'+str(user_group_by_max_active_time).zfill(2)+'_a'+str(active_time_period).zfill(2), \
                                          value=len(self.temp_new_user_active_history_dict), \
                                          date=date,table_name=table_name)
        
        


        # generate view sql
        
        sql_template=r'''

        SELECT

        concat(
        'Online for '
        ,replace(SUBSTRING_INDEX(`sub_key`,'_',1),'g',''),
        'd-'
        ,lpad(replace(SUBSTRING_INDEX(`sub_key`,'_',1),'g','')+%(observation_day_step)s-1,2,'0')
        ,'d') as `Group Name`
        
        /*
        ,max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique',`value`,0)) as `Group Size`
        */

        ,concat(
            '<a href="./downloadcol.py?collection_id='
            ,max( if( `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_collection_id' , `value` , 0 ) )
            ,'">'
            ,max( if( `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique' , `value` , 0 ) )
            ,'</a>'
        ) AS `Group Size`



        ,concat(format(100.0
        *max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique',`value`,0)) 
        /max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)stotal',`value`,0)) 
        ,2),'%%%%') as `Group Proportion`

        %%(column_sql)s

        FROM `%(table_name)s` 

        WHERE (

        `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique'
        or `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base'
        or `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_collection_id'
        or `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base_adjusted'
        or `oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)stotal'

        )and date='%(date)s'

        GROUP BY `Group Name`
        ORDER BY `Group Name` DESC

        ''' % {
            'observation_day_step':self.observation_day_step,
            'oem_name':oem_name,
            'category':category,
            'key_prefix':key_prefix,
            'table_name':table_name,
            'date':date,
        }


        sql_column_template=r'''
        
        ,case
        when max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base' and SUBSTRING_INDEX(`sub_key`,'_',-1)='a%(active_time_period)s',`value`,0))>0 then
            max(if(`oem_name`='%(oem_name)s' and category='%(category)s' and `key` = '%(key_prefix)sunique_base_adjusted' and SUBSTRING_INDEX(`sub_key`,'_',-1)='a%(active_time_period)s',`value`,0)) 
        else '-'
        end as `Avg %(analysis_factor_name)s Created in [%(active_time_period)sd,%(active_time_period_end)sd]`

        '''

        sql_columns=[]

        for active_time_period in range(1,self.observation_day_length+1,self.observation_day_step):
            temp_sql_column=sql_column_template % {
                'observation_day_step':self.observation_day_step,
                'oem_name':oem_name,
                'category':category,
                'key_prefix':key_prefix,
                'table_name':table_name,
                'date':date,
                'active_time_period':str(active_time_period).zfill(2),
                'active_time_period_end':str(active_time_period+self.observation_day_step-1).zfill(2),
                'analysis_factor_name':analysis_factor_name,
            }
            sql_columns.append(temp_sql_column)
            
        
        view_sql=sql_template % {
            'column_sql':'\n'.join(sql_columns)
        }

        print view_sql

        import helper_view
        helper_view.replace_view(view_name=view_name,view_sql=view_sql,view_description=view_description,charting_javascript=r'''
        
        add_highcharts_basic_line_chart({
            'tab_name':'Trend Comparison',
            'column_names_pattern':/Online /ig,
            'marginRight':300,
            'reverse_key_column':false,
            'reverse_table':true,
            'exclude_rows':['Group Size','Group Proportion'],
            'reverse_column':true
        });

        ''')

        helper_view.grant_view(view_name,'5')
        helper_view.grant_view(view_name,'17')

        pass