def check():

    table_name='raw_data_shabik_360'
    target_date='2012-06-04'

    client_type_keys=[
        ["Shabik_360","moagent","app_page_by_morange_version_type_daily_user_unique","JME"],
        ["Shabik_360","moagent","app_page_by_morange_version_type_daily_user_unique","BlackBerry"],
        ["Shabik_360","moagent","app_page_by_morange_version_type_daily_user_unique","S60"],
        ["Shabik_360","moagent","app_page_by_morange_version_type_daily_user_unique","S60-3"],
        ["Shabik_360","moagent","app_page_by_morange_version_type_daily_user_unique","S60-5"],
        ["Shabik_360","moagent","app_page_by_morange_version_type_daily_user_unique","Android"],
        ["Shabik_360","moagent","app_page_by_morange_version_type_daily_user_unique","iOS"],
    ]

    result=[]
    
    for i in client_type_keys:
        set_i=helper_mysql.get_raw_collection_from_key(oem_name=i[0],category=i[1],key=i[2],sub_key=i[3],date=target_date,table_name=table_name)
        
        for j in client_type_keys:
            set_j=helper_mysql.get_raw_collection_from_key(oem_name=j[0],category=j[1],key=j[2],sub_key=j[3],date=target_date,table_name=table_name)
            
            if set_i==set_j:
                continue

            result.append('\t'.join([i[3],j[3],str(len(set_i & set_j)),str(len(set_i)),str(len(set_j))]))
        
    for r in result:
        print r
 def _get_daily_active_user_set(self, current_date):
     active_user_set=helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                             key='app_page_daily_visitor_unique',sub_key='', \
                             date=current_date,table_name='raw_data_shabik_360',db_conn=None)
     if not active_user_set:
         raise Exception('empty active_user_set on '+current_date)
     return active_user_set
def do_calculate(current_date):

    urls = [
        "mobileshabik.morange.com/mophoto_popular_photos.aspx?src_evflg_1",
        "mobileshabik.morange.com/mophoto_popular_photos.aspx?src_evflg_1&isprefetch",
        "mobileshabik.morange.com/mophoto_popular_photos.aspx?isprefetch&src_evflg_0",
        "mobileshabik.morange.com/mophoto_popular_photos.aspx?src_evflg_0",
        "mobileshabik.morange.com/mophoto_popular_photos.aspx?",
        "mobileshabik.morange.com/mophoto_popular_photos.aspx?isprefetch",
    ]

    urls = [
        "mobileshabik.morange.com/mophoto_photo.aspx?albumid&src_pe&tag&photoid&type",
        "mobileshabik.morange.com/mophoto_photo.aspx?photoid&albumid&src_pe&tag&type&isprefetch",
    ]

    urls = [
        "mobilevoda.morange.com/mophoto_popular_photos_[digits].aspx?src_app",
        "mobilevoda.morange.com/mophoto_popular_photos_[digits].aspx?src_feed",
        "mobilevoda.morange.com/mophoto_popular_photos_[digits].aspx?src_myphoto",
        "mobilevoda.morange.com/mophoto_popular_photos.aspx?start",
        "mobilevoda.morange.com/mophoto_popular_photos.aspx?src_app",
    ]

    urls = ["mobilevoda.morange.com/mophoto_photo.aspx?albumid&src_pe&tag&photoid&type"]

    collection_current = set([])

    for u in urls:
        collection_temp = helper_mysql.get_raw_collection_from_key(
            oem_name="Vodafone",
            category="moagent",
            key="app_page_by_url_pattern_daily_visitor_unique",
            sub_key=u,
            date=current_date,
            table_name="data_url_pattern_vodafone",
            db_conn=None,
        )
        collection_current |= collection_temp
        # print len(collection_current),len(collection_temp)

    collection_current_1 = set([])

    for u in urls:
        collection_temp = helper_mysql.get_raw_collection_from_key(
            oem_name="Vodafone",
            category="moagent",
            key="app_page_by_url_pattern_daily_visitor_unique",
            sub_key=u,
            date=helper_regex.date_add(current_date, -1),
            table_name="data_url_pattern_vodafone",
            db_conn=None,
        )
        collection_current_1 |= collection_temp
        # print len(collection_current),len(collection_temp)

    retained = collection_current_1 & collection_current
    # print set([1,2,3,4,8]) & set([9,3,4,8,10])

    print len(collection_current_1 | collection_current)
    print len(collection_current), len(collection_current_1)
    print len(retained)
    print 1.0 * len(retained) / len(collection_current_1)
def c(oem_name='',category='',key='',sub_key='',date='',table_name='raw_data',db_conn=None):
    if isinstance(date,str) or not date:
        return helper_mysql.get_raw_collection_from_key(oem_name=oem_name,category=category,key=key,sub_key=sub_key,date=date,table_name=table_name,db_conn=db_conn)
    return helper_mysql.get_raw_collection_from_key_date_range(oem_name=oem_name,category=category,key=key,sub_key=sub_key,begin_date=min(date),end_date=max(date),table_name=table_name,db_conn=db_conn)
def stat_login():
    global date_min,date_max,base_user_sets

    oem_name='All'
    stat_category='daily_active_user_retain'
    db_name='raw_data_login_trend'

    # you can change day range (30 days)
    date_max=helper_regex.date_add(helper_regex.get_date_str_now(),-1)
    date_min=helper_regex.date_add(date_max,-30)

    for i in range(1,10000):

        current_date=helper_regex.date_add(date_min,i)
        print 'current date',current_date
        
        if current_date>date_max:
            break
        
        # new user set from db (overall daily active user)
        new_user_set=gumi_helper_user.get_user_ids_created_by_date(current_date)
        # daily active user SG
        active_user_sg = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \
                        category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='SG', \
                        date=current_date, \
                        table_name='raw_data',db_conn=None)
        # daily active user US
        active_user_us = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \
                        category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='US', \
                        date=current_date, \
                        table_name='raw_data',db_conn=None)
        # daily active user PL
        active_user_pl = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \
                        category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='PL', \
                        date=current_date, \
                        table_name='raw_data',db_conn=None)
        # daily active user Unknow IP
        active_user_zz = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \
                        category='user',key='live_log_by_country_daily_uid_unique_collection_id',sub_key='ZZ', \
                        date=current_date, \
                        table_name='raw_data',db_conn=None)

        base_user_sets={
            'pt-new-user-':new_user_set,
            'pt-new-user-SG':new_user_set & active_user_sg,
            'pt-new-user-US':new_user_set & active_user_us,
            'pt-new-user-PL':new_user_set & active_user_pl,
            'pt-new-user-ZZ':new_user_set & active_user_zz
        }
        for k,user_set in base_user_sets.iteritems():
            k=k.replace('*','')
            # calculate total
            print 'user base of',k,':',len(user_set)
            key='active_user_initial_%s_total_unique' % (k,)
            #sub_key = k[-2:]
            #if sub_key.find('-')>-1:
            #    sub_key=''
            helper_mysql.put_raw_data(oem_name,stat_category,key,'',len(user_set),db_name,current_date)
            helper_mysql.put_collection(collection=user_set,oem_name=oem_name,category=stat_category, \
                                    key=key,sub_key='',date=current_date,table_name=db_name)

        # calculate 
        ranges=[(1,8,1),(1,30,7),(1,60,14)]

        for r in ranges:
            start=r[0]
            end=r[1]
            step=r[2]

            accumulative_logined_user={
                'pt':set([]),
            }
                
            for i in range(start,end,step):
                print start
                print end
                logined_user={
                    'pt':set([]),
                }

                for day_delta in range(i,i+step):
                    target_date=helper_regex.date_add(current_date,day_delta)
                    collection = helper_mysql.get_raw_collection_from_key(oem_name='Gumi_puzzle', \
                        category='user',key='live_log_daily_uid_unique_collection_id',sub_key='', \
                        date=target_date, \
                        table_name='raw_data',db_conn=None) 
                    logined_user['pt'] = logined_user['pt'] | collection

                for k1,v1 in logined_user.iteritems():
                    accumulative_logined_user[k1] |= v1

                for k,user_set in base_user_sets.iteritems():
                    k=k.replace('*','')
                    
                    logined_user_temp=set([])

                    if k.find('pt')>-1:
                        logined_user_temp=logined_user['pt']
                        accumulative_logined_user_temp=accumulative_logined_user['pt']

                    base_user_logined_user= user_set & logined_user_temp
                    key='daily_active_user_'+str(step)+'_day_logined_%s_total_unique' % (k,)
                    helper_mysql.put_raw_data(oem_name,stat_category,key,i,len(base_user_logined_user),db_name,current_date)
                    
                    base_user_no_logined_user= user_set - accumulative_logined_user_temp 
                    key='daily_active_user_'+str(step)+'_day_no_logined_%s_total_unique' % (k,)
                    helper_mysql.put_raw_data(oem_name,stat_category,key,i,len(base_user_no_logined_user),db_name,current_date)

    return
def export(date_length=30):
    

    user_login_history={}
    user_last_login_date={}



    today=helper_regex.date_add(helper_regex.get_date_str_now(),-17)

    start_time=helper_regex.date_add(today,-date_length)+' 05:00:00'
    end_time=helper_regex.date_add(today,-1)+' 05:00:00'



    # user_id -> msisdn

    sql=r'''

    SELECT [user_id],replace([user_name],'@shabik.com','') as msisdn
    FROM [mozone_user].[dbo].[Profile] with(nolock)
    where [creationDate]>='%s' and [creationDate]<'%s'
    and user_name like '%%shabik.com%%'

    ''' % (start_time,end_time)

    user_id_to_msisdn=helper_sql_server.fetch_dict(conn_config=config.conn_stc,sql=sql)



    # new user user_id

    new_user_collection=user_id_to_msisdn.keys()
    new_user_collection=set([str(user_id) for user_id in new_user_collection])



    # subscription status

    sql=r'''


    select distinct '0'+replace(msisdn,'+966','')+'@shabik.com' as [user_name]
    into #tmp
    from db86.shabik_mt.dbo.accounts with(nolock)
    where 
    is_deleted=0


    SELECT [user_id]
    FROM [mozone_user].[dbo].[Profile] with(nolock)
    where [creationDate]>='%s' and [creationDate]<'%s'
    and user_name like '%%shabik.com%%'
    and user_name in (
		select user_name
		from #tmp
    )

    drop table #tmp

    ''' % (start_time,end_time)

    user_id_in_sub=helper_sql_server.fetch_set(conn_config=config.conn_stc,sql=sql)
    user_id_in_sub=set([str(user_id) for user_id in user_id_in_sub])



    for i in range(date_length,-17,-1):
        
        date_temp=helper_regex.date_add(today,-i)
        
        shabik_5_collection=helper_mysql.get_raw_collection_from_key(oem_name='STC',category='moagent', \
                                        key='app_page_only_shabik_5_daily_visitor_unique',sub_key='', \
                                        date=date_temp,table_name='raw_data',db_conn=None)

        shabik_5_collection=shabik_5_collection & new_user_collection

        for user_id in shabik_5_collection:
            user_login_history.setdefault(user_id,'')
            user_login_history[user_id]+='5'

            user_last_login_date.setdefault(user_id,'')
            user_last_login_date[user_id]=date_temp
            
        shabik_360_collection=helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_daily_visitor_unique',sub_key='', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None)

        shabik_360_collection=shabik_360_collection & new_user_collection

        for user_id in shabik_360_collection:
            user_login_history.setdefault(user_id,'')
            user_login_history[user_id]+='6'

            user_last_login_date.setdefault(user_id,'')
            user_last_login_date[user_id]=date_temp


        


    #calculate

    """
    target_groups_names=[
        '1.More than 2 weeks users using Shabik 360 (Totally New User to Shabik) [only using 360]',
        '2.Users who Shifted from Shabik360 to Shabik 5 [for each at least using 3 days, still in sub]',
        '3.Unsubscribed users of Shabik 360 [last using 360 for >=7 days and then unsub]',
        '4.Users who uses Shabik 5 more than 2 weeks [actually is online for >=14 days]',
        '5.Users who shifted from Shabik 5 to Shabik 360 [for each at least using 3 days, still in sub]',
        '6.User base of new user in last 50 days, which is used to generate above lists',
    ]

    target_groups=[
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6{14,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(6{3,}5{3,}$)')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(6{7,}$)') and user_id in user_id_in_sub],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(5{14,}$)')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(5{3,}6{3,}$)') and user_id in user_id_in_sub],
        [user_id for user_id,sequence in user_login_history.iteritems()],
    ]

    target_groups_names={
        'User only use Shabik 360':
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6+)$')],
        'User only use Shabik 360 [more than 10d]':
        ,
        'User only use Shabik 5',
        'User only use Shabik 5 [more than 10d]',
        'User use both Shabik 360 / Shabik 5',
        'User used both and choosed Shabik 5 [recently used only Shabik 5 for 5d]',
        'User used both and choosed Shabik 5 [recently used only Shabik 360 for 5d]',
    }

    target_groups=[
        
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6{10,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5+)$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5{10,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and  helper_regex.extract(sequence,r'(5{5,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and  helper_regex.extract(sequence,r'(6{5,})$')],
    ]
    """

    threshold_of_settle_down='5'

    target_groups={
        '1.new_user':
            [user_id for user_id,sequence in user_login_history.iteritems()],
        '2.new_user_start_from_5':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5)')],
        '3.new_user_start_from_360':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6)')],
        '4.new_user_only_5':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5+)$')],
        '5.new_user_only_360':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6+)$')],
        '6.new_user_both':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)')],
        '7.new_user_both_and_finally_5':
            [user_id for user_id,sequence in user_login_history.iteritems() 
            if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,'(5{'+threshold_of_settle_down+',})$')],
        '8.new_user_both_and_finally_360':
            [user_id for user_id,sequence in user_login_history.iteritems() 
            if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,'(6{'+threshold_of_settle_down+',})$')],
        '9.new_user_both_and_not_stable':
            [user_id for user_id,sequence in user_login_history.iteritems() 
            if helper_regex.extract(sequence,r'(56|65)') 
            and not helper_regex.extract(sequence,'(5{'+threshold_of_settle_down+',})$') 
            and not helper_regex.extract(sequence,'(6{'+threshold_of_settle_down+',})$')],
    }

    #export

    keys=sorted(target_groups.keys())

    for key in keys:

        user_id_collection=target_groups[key]
        print key
        print 'size:',len(user_id_collection)
        
        print '[last login date - msisdn - sub status - login history]'
        
        user_id_collection.sort(key=lambda user_id:user_last_login_date[user_id],reverse=True)
        for user_id in user_id_collection:
            print user_last_login_date[user_id],'\t',user_id_to_msisdn[user_id],'\t','sub' if user_id in user_id_in_sub else 'unsub','\t',user_login_history[user_id]


    for key in keys:

        user_id_collection=target_groups[key]
        print '==',key,'=='
        print 'size:',len(user_id_collection)
        print 'unsub:',len([user_id for user_id in user_id_collection if not user_id in user_id_in_sub])
        
        """
def export():

    today=helper_regex.date_add(helper_regex.get_date_str_now(),-1)

    # new user stc

    start_time=helper_regex.date_add(today,-30)+' 05:00:00'
    end_time=helper_regex.date_add(today,-1)+' 05:00:00'

    sql=r'''

    SELECT [user_id],phone
    FROM [mozone_user].[dbo].[Profile] with(nolock)
    where [creationDate]>='%s' and [creationDate]<'%s'
    and user_name like '%%shabik.com%%'
    and phone not like '+966%%'
    and phone<>''

    ''' % (start_time,end_time)

    new_user_msisdn_dict=helper_sql_server.fetch_dict(conn_config=config.conn_stc,sql=sql)
    new_user_msisdn_dict=dict((str(i),j) for i,j in new_user_msisdn_dict.iteritems())
    print len(new_user_msisdn_dict)



    # old user stc

    date_length=30
    start_time=helper_regex.date_add(today,-90)+' 05:00:00'
    end_time=helper_regex.date_add(today,-30)+' 05:00:00'

    sql=r'''

    SELECT [user_id],phone
    FROM [mozone_user].[dbo].[Profile] with(nolock)
    where [creationDate]>='%s' and [creationDate]<'%s'
    and user_name like '%%shabik.com%%'
    and phone not like '+966%%'
    and phone<>''

    ''' % (start_time,end_time)

    old_user_msisdn_dict=helper_sql_server.fetch_dict(conn_config=config.conn_stc,sql=sql)
    old_user_msisdn_dict=dict((str(i),j) for i,j in old_user_msisdn_dict.iteritems())
    print len(old_user_msisdn_dict)



    # daily active user set
    
    date_temp=helper_regex.date_add(today,-1)

    target_sets={

        'JME':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_by_morange_version_type_daily_user_unique',sub_key='JME', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None),
    
        'S60-3':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_by_morange_version_type_daily_user_unique',sub_key='S60-3', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None),
    
        'S60-5':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_by_morange_version_type_daily_user_unique',sub_key='S60-5', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None),
    
        'Android':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_by_morange_version_type_daily_user_unique',sub_key='Android', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None),
    
        'iOS':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_by_morange_version_type_daily_user_unique',sub_key='iOS', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None),
    
        'BlackBerry':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_by_morange_version_type_daily_user_unique',sub_key='BlackBerry', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None),
    
        'All Client':helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_daily_visitor_unique',sub_key='', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None),
    
    }



    for k,total_active_collection in target_sets.iteritems():
        
        old_user_msisdn_set=set([msisdn for user_id,msisdn in old_user_msisdn_dict.iteritems() if user_id in total_active_collection])
        new_user_msisdn_set=set([msisdn for user_id,msisdn in new_user_msisdn_dict.iteritems() if user_id in total_active_collection])


        print
        print '## Non-STC Old Users',k
        for msisdn in list(old_user_msisdn_set)[0:300]:
            if len(msisdn)>10:
                print msisdn
        
        
        print    
        print '## Non-STC New Users',k
        for msisdn in list(new_user_msisdn_set)[0:300]:
            if len(msisdn)>10:
                print msisdn