def export_db_to_log(my_date): oem_name='Shabik_360' stat_category='friend_relation' current_date=helper_regex.translate_date(sg_timestamp=my_date,timezone_offset_to_sg=config.timezone_offset_shabik_360) start_time=helper_regex.time_add_by_hour(current_date+' 00:00:00',-3) end_time=helper_regex.time_add_by_hour(current_date+' 00:00:00',-3+24) source_db=config.conn_stc_1 source_table='friendship.friendship' source_condition_name='adding_friend' target_log_name='E:\\RoutineScripts\\log\\%s_%s_%s_record_from_%s.log.%s' \ % (oem_name,stat_category,source_condition_name,source_table,current_date) start_id=helper_mysql.guess_pk_id_by_time(table_name=source_table, \ target_time=start_time,pk_column_name='id', \ sequence_time_column_name='created_on',db_conn=config.conn_stc_1) end_id=helper_mysql.guess_pk_id_by_time(table_name=source_table, \ target_time=end_time,pk_column_name='id', \ sequence_time_column_name='created_on',db_conn=config.conn_stc_1) sql=r''' select id,created_on,user_id,friend_id from friendship.friendship where id>=%s and id<%s and following=1 order by id asc ''' % (start_id,end_id) existing_adding=set([]) target_log=codecs.open(target_log_name,'w','utf-8',None,1024*1024*4) rows=helper_mysql.fetch_rows(sql=sql,db_conn=config.conn_stc_1) for r in rows: if (r['friend_id'],r['user_id']) in existing_adding: existing_adding.remove((r['friend_id'],r['user_id'])) continue t=helper_regex.time_add_by_hour(r['created_on'],8) target_log.write('%s [[friend add]] monetid=%s to=%s\n' % \ (t,r['user_id'],r['friend_id'])) existing_adding.add((r['friend_id'],r['user_id'])) target_log.close() print "start_id,end_id:",start_id,end_id
def duplicate_record(item,target_date): step=10000 current_idx=0 helper_mysql.quick_insert=True helper_mysql.print_log=False sql=r"select * from %s where `oem_name`='%s' and `category`='%s'" % (item[0],item[1],item[2]) if item[3]: sql+=r" and `key`='%s'" % (item[3],) sql+=r" and `date`='%s'" % (target_date,) for i in range(1000): counter=1 sql_limit=r" limit %s,%s" % (current_idx,step) #print sql+sql_limit rows=helper_mysql.fetch_rows(sql+sql_limit,db_conn=item[4]) if not rows: print 'end.' break print target_date,current_idx,len(rows),item #do copy for row in rows: helper_mysql.put_raw_data( oem_name=row['oem_name'], category=row['category'], key=row['key'], sub_key=row['sub_key'], value=row['value'], table_name=item[0], date=row['date'], created_on=row['created_on'], db_conn=item[5]) counter+=1 #print counter current_idx+=step
def calculate_collection(oem_name,category,db_name='raw_data',date_units_nday_unique=[600],date_units_retain_rate=[1,2,3]): #fetch target keys config.collection_cache_enabled=True target_collection_name_sql=r''' select distinct `oem_name` ,`category` ,replace(`key`,'_average','') as `key` ,`sub_key` from `raw_data_shabik_360` where `oem_name`="Shabik_360" and `category`="moagent" and `key`="app_page_daily_visitor_unique" ''' keys=helper_mysql.fetch_rows(target_collection_name_sql) print keys #exit() #do action for key_space in keys: print key_space calculate_ndays_unique(key_space,db_name,date_units_nday_unique)
for table_name in target_tables: #current_max_id=helper_mysql.get_raw_data(oem_name='Stat_Portal',category='data_migrate',key='max_transfered_id',sub_key=table_name,default_value=0,table_name='raw_data_debug',date='',db_conn=None) current_start_id=helper_mysql.get_raw_data(oem_name='Stat_Portal',category='data_migrate',key='max_transfered_id',sub_key=table_name,default_value=0,table_name='raw_data_debug',date='',db_conn=None) source_step=10000 target_step=10000 for i in range(0,100000000): current_start_id+=1 print '===now at:',table_name,current_start_id source_rows=helper_mysql.fetch_rows(sql=r''' select * from %s where id>=%s order by id limit %s ''' % (table_name,current_start_id,source_step),db_conn=source_conn) if not source_rows: print '===finished at:',table_name,current_start_id print helper_regex.get_time_str_now() break print 'length of source:',len(source_rows) print helper_regex.get_time_str_now() sql_temp=[] for row in source_rows: sql_temp.append("('%s','%s','%s','%s','%s')" % \ (row['id'],row['created_on'],row['element_count'],row['element_string_md5'],helper_mysql.escape_string(row['element_string']),)) current_start_id=max(current_start_id,int(row['id']))
@author: development ''' import helper_mysql import urllib import os.path _conn_sticker={ 'host':'m1-mysql-master-03.mozat.com', 'port':'3333', 'account':'mozone', 'pwd':'morangerunmozone', 'db':'sticker', 'db_type':'mysql', } sql = 'select * from sticker.sticker' sticker= helper_mysql.fetch_rows(sql, _conn_sticker) print len(sticker) for row in sticker: print row['id'] print row['url'] f_name='./%s.png'%(row['id']) if os.path.isfile(f_name): continue f = open(f_name,'wb') f.write(urllib.urlopen(row['url']).read()) f.close() # break print 'finish' if __name__ == '__main__':
source_step=50000 #target_step=50000 #current_start_id=20000000 for i in xrange(0,100000001): time.sleep(3) current_start_id+=1 print '===now at:',table_name,current_start_id sql=r''' select id,user_id,friend_id,0+following as following,0+followed as followed,0+blocking as blocking,0+blocked as blocked,0+flags as flags,created_on,modified_on from %s where id>=%s and id<%s+%s ''' % (table_name,current_start_id,current_start_id,source_step) source_rows=helper_mysql.fetch_rows(sql,db_conn=source_conn) print sql if not source_rows: print '===finished at:',table_name,current_start_id print helper_regex.get_time_str_now() break print 'length of source:',len(source_rows) print helper_regex.get_time_str_now() sql_temp=[] for row in source_rows: sql_temp.append("('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % \ (row['id'],row['user_id'],row['friend_id'],row['following'],row['followed'],row['blocking'], \ row['blocked'],row['flags'],row['created_on'],row['modified_on'],))
begin_date='2011-07-11' step=1000 for i in range(0,1000): current_date=helper_regex.date_add(begin_date,-i) print table_name,current_date r''' select * from %s where date='%s' and `oem_name`="Vodafone" and `category`="website" ''' % (table_name,current_date) source_rows=helper_mysql.fetch_rows(sql=r''' select * from %s where date='%s' and `oem_name`="Vodafone" and `category`="website" ''' % (table_name,current_date),db_conn=source_conn) #exit() print 'length of source:',len(source_rows) print helper_regex.get_time_str_now() sql_temp=[] for row in source_rows: sql_temp.append("('%s','%s','%s','%s','%s','%s','%s')" % \ (row['oem_name'],row['category'],row['key'],helper_mysql.escape_string(row['sub_key']),row['date'],row['value'],row['created_on'],)) for i in range(0,100000001,step): print helper_regex.get_time_str_now()