Example #1
0
def check_stat_plan_log_file_reading_completion(stat_plan):
    
    """
    self.log_table_name='raw_data_monitor'
    self.log_oem_name=self.script_file_name
    self.log_category=helper_ip.get_current_server_ip()+'_'+self.start_time_str.replace(' ','_').replace(':','_').replace('-','_')+'_'+self.uuid

    helper_mysql.put_raw_data(oem_name=self.log_oem_name, \
                              category=self.log_category, \
                              key='original_file_size', \
                              sub_key=log_file_name, \
                              value=helper_file.get_file_size(log_file_name), \
                              table_name=self.log_table_name)
    """

    # 1.check total log file number

    current_date=helper_regex.extract(stat_plan.log_category,r'_(\d{4}_\d{2}_\d{2})_').replace('_','-')
    previous_date=helper_regex.date_add(current_date,-1)
    previoud_date_category_like=helper_regex.extract(stat_plan.log_category.replace(current_date,previous_date),r'([\d\.]+_\d{4}_\d{2}_\d{2})')

    sql=r'''

    select 

    (select count(distinct sub_key)
    from raw_data_monitor
    where oem_name='%s'
    and category='%s')

    -

    (select count(distinct sub_key)
    from raw_data_monitor
    where oem_name='%s'
    and category=(
        select count(distinct sub_key)
        from raw_data_monitor
        where oem_name='%s'
        and category like '%s%%'
    ))

    ''' % (stat_plan.log_oem_name,stat_plan.log_category,stat_plan.log_oem_name,stat_plan.log_oem_name,previoud_date_category_like)

    print sql

    distance=helper_mysql.get_one_value_string(sql)

    print distance
    return distance
def get_http_response_size(full_file_path):
    try:
        #print len(urllib.urlopen(full_file_path).info())
        #print str(urllib.urlopen(full_file_path).info())
        length=int(helper_regex.extract(str(urllib.urlopen(full_file_path).info()),r'Content\-Length:\s*(\d+)'))
    except:
        length=-1
    return length
def get_sub_dir_list_from_dir(base_path=os.curdir,name_pattern='(.)'):
    base_path=base_path.rstrip('/').rstrip('\\')
    result_sub_dir_list=[]
    dirs = [name for name in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, name))]
    for d in dirs:
        if helper_regex.extract(os.path.join(base_path, d),name_pattern):
            result_sub_dir_list.append(os.path.join(base_path, d))
    return result_sub_dir_list
def get_filtered_file_list_from_dir_tree(base_path=os.curdir,name_pattern='(.)'):
    base_path=base_path.rstrip('/').rstrip('\\')
    result_file_list=[]
    for path, dirs, files in os.walk(os.path.abspath(base_path)):
        #print path, dirs, files
        for file_name in files:
            if helper_regex.extract(path+r'\\'+file_name,name_pattern):
                result_file_list.append(os.path.join(path, file_name))
    return result_file_list
def prepare_directory_level(file_full_name,root_dir=config.conn_stat_portal['collection_root_dir'],step=3):
    file_name=helper_regex.extract(file_full_name,r'^\s*(.*?)(?:\.\w+)$')
    path=root_dir
    if not file_name:
        return path
    directory_levels=[file_name[i:i+step] for i in range(0, len(file_name)-step, step)]
    for level in directory_levels:
        path+='\\'+level
        if not os.path.exists(path):
            os.makedirs(path)
    return path
def do_update():
    script_path='E:\\AppServ\\www\\xstat\\htdocs\\subpages\\view.php'
    script_content=helper_file.get_content_from_file(script_path)
    old_token=helper_regex.extract(script_content,r'token=(\w+)')
    url=helper_regex.extract(script_content,r'href="(http://192.168.0.158:81/phpmyadmin-2[^"]+)"').replace("<?=$_PAGE['view_id']?>",'150')

    page_content=helper_file.get_http_content(url)
    new_token=helper_regex.extract(page_content,r'token=(\w+)')

    new_token='da1b5116e305194ca8fd7806df008453'

    script_content=script_content.replace(old_token,new_token)
    #print page_content
    helper_file.put_content_to_file(script_content,script_path)
    print old_token,'to',new_token

    url=url.replace(old_token,new_token)
    helper_file.get_http_content(url)

    script_path='E:\\AppServ\\www\\xstat\\htdocs\\subpages\\view.php'
    script_content=helper_file.get_content_from_file(script_path)
    current_token=helper_regex.extract(script_content,r'token=(\w+)')
    print current_token
Example #7
0
def filter_and_count_distinct(list_obj,pattern='(.*)',ignore_empty=True):
    count=0
    count_distinct=0
    count_distinct_dict={}

    for i in list_obj:
        flag=helper_regex.extract(str(i),pattern)
        if ignore_empty and not flag:
            continue
        if not count_distinct_dict.has_key(flag):
            count_distinct_dict[flag]=0
        count_distinct_dict[flag]+=1
        count+=1

    count_distinct=len(count_distinct_dict)
    avg=0
    if count_distinct>0:
        avg=1.0*count/count_distinct

    return (count,count_distinct,avg,count_distinct_dict)
Example #8
0
def calculate_count_distinct_named_collection(date_unit,oem_name,category,key,sub_key,date,table_name='raw_data',allow_collection_empty=False):
    
    #date_unit accepts 1,2,3,...,'weekly','monthly'
    #for weekly, it produces result only when date is Sunday, else 0
    #for monthly, it produces result only when date is the last day of a week, else 0
    #for all cases, it doesn't produce value when required collections are not all ready

    unique=0
    total=0
    average=0

    if not date:
        return unique,total,average

    if date_unit=='weekly':
        if helper_regex.get_weekday_from_date_str(date)!=7:
            return unique,total,average
        date_unit=7

    elif date_unit=='monthly':
        if helper_regex.extract(helper_regex.date_add(date,1),r'\d+\-\d+\-(\d+)')!='01':
            return unique,total,average

        first_date=helper_regex.extract(date,r'(\d+\-\d+\-)\d+')+'01'
        date_unit=helper_regex.get_day_diff_from_date_str(date,first_date)+1

    if date_unit<1:
        date_unit=1        

    key=key.replace('_collection_id','')
    sql=_get_sql_select_collection_id_by_date(oem_name,category,key,sub_key,table_name)

    collection_id_dict=helper_mysql.fetch_dict(sql)

    key_temp=collection_id_dict.keys()
    key_temp.sort(reverse=True)


    sql=_get_sql_select_collection_id_by_date(oem_name,category,key+'_base',sub_key,table_name)
    
    #print sql
    collection_base_dict=helper_mysql.fetch_dict(sql)
    #print collection_base_dict

    """
    print 'existing collection list:'    
    for i in key_temp[0:65]:
        print i+': '+str(collection_id_dict[i])
    """

    col_1=set([])
    base_total=0
    for i in range(0,date_unit):
        date_temp=helper_regex.date_add(date,-i)

        col_id_temp=collection_id_dict[date_temp] if collection_id_dict.has_key(date_temp) else 0
        #col_temp=helper_mysql.get_raw_collection_by_id(col_id_temp)

        col_temp=helper_collection.get_named_collection(table_name=table_name,oem_name=oem_name,category=category, \
                                                        key=key,sub_key=sub_key,date=date_temp)
        col_1 |= col_temp

        base_total+=int(collection_base_dict[date_temp]) if collection_base_dict.has_key(date_temp) else 0
        
        if col_id_temp==0: #force return null when data not complete
            if allow_collection_empty:
                print date_temp,table_name,oem_name,category,key,sub_key,date_temp,'collection empty error! passed.'
            else:
                print date_temp,table_name,oem_name,category,key,sub_key,date_temp,'collection empty error! exit.'
                return unique,total,average

    unique=len(col_1)
    total=base_total
    average=base_total*1.0/unique if unique>0 else 0

    return unique,total,average
 def process_line(line='',exist='',group_key=''):
     shared_folder_dir=helper_regex.extract('(\\{2,}\d+\.\d+\.\d+\.\d+\\+(\w+\\+)*)',line)
     print shared_folder_dir
Example #10
0
def _is_in_dict_keys(line,field_def='',the_dict={}):
    if callable(field_def):
        return the_dict.has_key(field_def(line))
    else:
        return the_dict.has_key(helper_regex.extract(line,field_def))
def zip_file_to_storage(source_file_smb_path,storage_root):

    target_path=storage_root.rstrip('\\')+'\\'+helper_regex.extract(source_file_smb_path,'\\(\\(?:[^\\]+\\)*)')
    file_name=helper_regex.extract(source_file_smb_path,'([^\\]+)$')
    
    helper_file.prepare_directory_on_windows(target_path)
found_sqls=[]
#re_key_sql=re.compile(r'SQL1:.*?\nSQL2:.*?\n')
re_key_sql=re.compile(r'SQL:.*?\n')

files=glob.glob(filePath)

for f in files:
    file=open(f,'r',1024*1024)
    content=file.read(-1)
    print 'file: '+f+' ('+str(len(content))+')'
    m=re.findall(re_key_sql,content)
    
    if m:
        for i in m:
            #sql_delete=helper_regex.extract(i,r'SQL1:(.*?)\n').replace('raw_data_test','raw_data_debug')
            sql_insert=helper_regex.extract(i,r'SQL:(.*?)\n')#.replace('raw_data_test','raw_data_debug')
            #found_sqls.append(sql_delete)
            found_sqls.append(sql_insert)

    file.close()

for k in found_sqls:
    print k+';'
        
    





def export(date_length=30):
    

    user_login_history={}
    user_last_login_date={}



    today=helper_regex.date_add(helper_regex.get_date_str_now(),-17)

    start_time=helper_regex.date_add(today,-date_length)+' 05:00:00'
    end_time=helper_regex.date_add(today,-1)+' 05:00:00'



    # user_id -> msisdn

    sql=r'''

    SELECT [user_id],replace([user_name],'@shabik.com','') as msisdn
    FROM [mozone_user].[dbo].[Profile] with(nolock)
    where [creationDate]>='%s' and [creationDate]<'%s'
    and user_name like '%%shabik.com%%'

    ''' % (start_time,end_time)

    user_id_to_msisdn=helper_sql_server.fetch_dict(conn_config=config.conn_stc,sql=sql)



    # new user user_id

    new_user_collection=user_id_to_msisdn.keys()
    new_user_collection=set([str(user_id) for user_id in new_user_collection])



    # subscription status

    sql=r'''


    select distinct '0'+replace(msisdn,'+966','')+'@shabik.com' as [user_name]
    into #tmp
    from db86.shabik_mt.dbo.accounts with(nolock)
    where 
    is_deleted=0


    SELECT [user_id]
    FROM [mozone_user].[dbo].[Profile] with(nolock)
    where [creationDate]>='%s' and [creationDate]<'%s'
    and user_name like '%%shabik.com%%'
    and user_name in (
		select user_name
		from #tmp
    )

    drop table #tmp

    ''' % (start_time,end_time)

    user_id_in_sub=helper_sql_server.fetch_set(conn_config=config.conn_stc,sql=sql)
    user_id_in_sub=set([str(user_id) for user_id in user_id_in_sub])



    for i in range(date_length,-17,-1):
        
        date_temp=helper_regex.date_add(today,-i)
        
        shabik_5_collection=helper_mysql.get_raw_collection_from_key(oem_name='STC',category='moagent', \
                                        key='app_page_only_shabik_5_daily_visitor_unique',sub_key='', \
                                        date=date_temp,table_name='raw_data',db_conn=None)

        shabik_5_collection=shabik_5_collection & new_user_collection

        for user_id in shabik_5_collection:
            user_login_history.setdefault(user_id,'')
            user_login_history[user_id]+='5'

            user_last_login_date.setdefault(user_id,'')
            user_last_login_date[user_id]=date_temp
            
        shabik_360_collection=helper_mysql.get_raw_collection_from_key(oem_name='Shabik_360',category='moagent', \
                                        key='app_page_daily_visitor_unique',sub_key='', \
                                        date=date_temp,table_name='raw_data_shabik_360',db_conn=None)

        shabik_360_collection=shabik_360_collection & new_user_collection

        for user_id in shabik_360_collection:
            user_login_history.setdefault(user_id,'')
            user_login_history[user_id]+='6'

            user_last_login_date.setdefault(user_id,'')
            user_last_login_date[user_id]=date_temp


        


    #calculate

    """
    target_groups_names=[
        '1.More than 2 weeks users using Shabik 360 (Totally New User to Shabik) [only using 360]',
        '2.Users who Shifted from Shabik360 to Shabik 5 [for each at least using 3 days, still in sub]',
        '3.Unsubscribed users of Shabik 360 [last using 360 for >=7 days and then unsub]',
        '4.Users who uses Shabik 5 more than 2 weeks [actually is online for >=14 days]',
        '5.Users who shifted from Shabik 5 to Shabik 360 [for each at least using 3 days, still in sub]',
        '6.User base of new user in last 50 days, which is used to generate above lists',
    ]

    target_groups=[
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6{14,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(6{3,}5{3,}$)')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(6{7,}$)') and user_id in user_id_in_sub],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(5{14,}$)')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(5{3,}6{3,}$)') and user_id in user_id_in_sub],
        [user_id for user_id,sequence in user_login_history.iteritems()],
    ]

    target_groups_names={
        'User only use Shabik 360':
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6+)$')],
        'User only use Shabik 360 [more than 10d]':
        ,
        'User only use Shabik 5',
        'User only use Shabik 5 [more than 10d]',
        'User use both Shabik 360 / Shabik 5',
        'User used both and choosed Shabik 5 [recently used only Shabik 5 for 5d]',
        'User used both and choosed Shabik 5 [recently used only Shabik 360 for 5d]',
    }

    target_groups=[
        
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6{10,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5+)$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5{10,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and  helper_regex.extract(sequence,r'(5{5,})$')],
        [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)') and  helper_regex.extract(sequence,r'(6{5,})$')],
    ]
    """

    threshold_of_settle_down='5'

    target_groups={
        '1.new_user':
            [user_id for user_id,sequence in user_login_history.iteritems()],
        '2.new_user_start_from_5':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5)')],
        '3.new_user_start_from_360':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6)')],
        '4.new_user_only_5':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(5+)$')],
        '5.new_user_only_360':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'^(6+)$')],
        '6.new_user_both':
            [user_id for user_id,sequence in user_login_history.iteritems() if helper_regex.extract(sequence,r'(56|65)')],
        '7.new_user_both_and_finally_5':
            [user_id for user_id,sequence in user_login_history.iteritems() 
            if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,'(5{'+threshold_of_settle_down+',})$')],
        '8.new_user_both_and_finally_360':
            [user_id for user_id,sequence in user_login_history.iteritems() 
            if helper_regex.extract(sequence,r'(56|65)') and helper_regex.extract(sequence,'(6{'+threshold_of_settle_down+',})$')],
        '9.new_user_both_and_not_stable':
            [user_id for user_id,sequence in user_login_history.iteritems() 
            if helper_regex.extract(sequence,r'(56|65)') 
            and not helper_regex.extract(sequence,'(5{'+threshold_of_settle_down+',})$') 
            and not helper_regex.extract(sequence,'(6{'+threshold_of_settle_down+',})$')],
    }

    #export

    keys=sorted(target_groups.keys())

    for key in keys:

        user_id_collection=target_groups[key]
        print key
        print 'size:',len(user_id_collection)
        
        print '[last login date - msisdn - sub status - login history]'
        
        user_id_collection.sort(key=lambda user_id:user_last_login_date[user_id],reverse=True)
        for user_id in user_id_collection:
            print user_last_login_date[user_id],'\t',user_id_to_msisdn[user_id],'\t','sub' if user_id in user_id_in_sub else 'unsub','\t',user_login_history[user_id]


    for key in keys:

        user_id_collection=target_groups[key]
        print '==',key,'=='
        print 'size:',len(user_id_collection)
        print 'unsub:',len([user_id for user_id in user_id_collection if not user_id in user_id_in_sub])
        
        """
filePath=r'E:\RoutineScripts\log\daily_all_login_service.py.2010-08-05.log'

found_keys={}
re_key_content=re.compile(r'where .*? limit 1')
re_key=re.compile(r'where (.*?`key`=".*?")')
files=glob.glob(filePath)

for f in files:
    file=open(f,'r',1024*1024)
    content=file.read(-1)
    print 'file: '+f+' ('+str(len(content))+')'
    m=re.findall(re_key_content,content)
    
    if m:
        for i in m:
            k=helper_regex.extract(i,re_key)
            if not found_keys.has_key(k):
                found_keys[k]=0
            found_keys[k]+=1

    file.close()

keys=found_keys.keys()
keys.sort()

for k in keys:
    print k
        
    

def get_country_name(line):
    ip=helper_regex.extract(line,r'(\d+\.\d+\.\d+\.\d+)')
    if not ip:
        return 'ip_empty'
    return helper_ip.get_country_code_from_ip(ip)