def get_user_file(start, end): global ufCount ufCount = 0 formType = 'UserFile' dateStr = start.strftime('%Y-%m-%d') rc = checkWriteState(dateStr, formType) if rc: print 'already batch inserted' return print '开始存储 ' + dateStr + ' ' + formType + ' 的数据' #判断今天是不是已经搞定了 ufObj = Object.extend('UserFile') ufQuery = Query(ufObj) ufQuery.greater_than_or_equal_to('createdAt',start) ufQuery.less_than_or_equal_to('createdAt',end) def uf_call_back(lst): global ufCount writeList = [] for item in lst: ufMap = {} ufMap['openId'] = item.get('openId') ufMap['fileType'] = item.get('fileType') ufMap['srcId'] = item.id ufMap['createdAt'] = item.created_at ufMap['updatedAt'] = item.updated_at writeList.append(ufMap) ufCount = ufCount + len(writeList) if len(writeList) > 0: loadUserFile(writeList) map_query(ufQuery, 1000, uf_call_back) print '本次一共存储了' + str(ufCount) + '条纪录' #一共存储了多少 finishLoadData(dateStr, formType, ufCount)
def sync_data_table_update(startDateTime, endDateTime, formType): ''' 同步数据库表信息(每隔一天抓取)、同步昨天 00:00:00 至 今天天 23:59:59 增加以及更新的数据 ''' startStr = startDateTime.strftime('%Y-%m-%d') rc = checkWriteState(startStr, formType, operateType[0]) if rc is True: Log(content='数据库表 ' + formType + ' ' + startStr + ' 更新数据已同步!!', fromTask='system', parameter='', createTime=datetime.now()).save() print '数据库表 ' + formType + ' ' + startStr + ' 更新数据已同步!!' return # 开始同步数据库表的操作 obj = Object.extend(formType) query = Query(obj) # 包括新创建的和原来已有的,但昨天之前更新的数据 if rc is False: query.greater_than_or_equal_to('createdAt', startDateTime) query.less_than('updatedAt', endDateTime) elif rc is None: # 说明数据没有进行同步操作, 那么抓去全部的数据。 query.less_than('createdAt', datetime.now()) # query.less_than('createdAt', datetime(year=2016, month=8, day=15)) def update_local_data(lst): dataList = [] for item in lst: recordMap = covert_obj_to_dict(item) dataList.append(recordMap) recordsCount = len(dataList) if recordsCount > 0: __update_local_data(dataList, formType) # 将更新后的数据写入本地mangodb Log(content=formType + ' 表本次一共存储了 ' + str(recordsCount) + ' 条记录!', fromTask='system', parameter='', createTime=datetime.now()).save() print formType + ' 表本次一共存储了 ' + str(recordsCount) + ' 条记录!' recordsCount = map_query(query, 1000, update_local_data) finishLoadData(startStr, formType, recordsCount, operateType[0])
def get_daily_checkin(start, end): global dcCount dcCount = 0 formType = 'DailyCheckIn' dateStr = start.strftime('%Y-%m-%d') rc = checkWriteState(dateStr, formType) if rc: print 'already batch inserted' return print '开始存储 ' + dateStr + ' ' + formType + ' 的数据' ckObj = Object.extend('DailyCheckIn') ckQuery = Query(ckObj) ckQuery.greater_than_or_equal_to('createdAt',start) ckQuery.less_than_or_equal_to('createdAt',end) def call_back(lst): global dcCount writeList = [] for item in lst: ufMap = {} ufMap['openId'] = item.get('openId') ufMap['checkTime'] = item.get('checkTime') ufMap['srcId'] = item.id ufMap['createdAt'] = item.created_at ufMap['updatedAt'] = item.updated_at writeList.append(ufMap) dcCount = dcCount + len(writeList) if len(writeList) > 0: loadDailyCheckIn(writeList) map_query(ckQuery, 1000, call_back) finishLoadData(dateStr, formType, dcCount)
def associate_event_and_activity(db_name='MergedUserContext'): application_event_dict = get_all_event() print 'already get the application_event_dict' print str(len(application_event_dict.keys())) DBTable = Object.extend(db_name) # print 'event_list: %s' %str(application_event_dict.values()) for application,event_dict in application_event_dict.items(): # print event_dict if event_dict: print 'application_event_dict values first count: %s' %str(event_dict.keys()) EventActivity = Object.extend('FakeEventActivity') for event_name,event_list in event_dict.items(): total_count = len(event_list) print 'event_list total_count: %s with event_name is: %s' %(str(total_count),event_name) print 'application id is: %s' %str(application.id) event_activity = EventActivity() relation = event_activity.relation('event') activity_dict = {} for index,event in enumerate(event_list): relation.add(event) query = Query(DBTable) query.equal_to('tracker',event.get('tracker')) query.less_than_or_equal_to('startTime',event.get('timestamp')) query.greater_than_or_equal_to('endTime',event.get('timestamp')) activity_list = query.find() if len(activity_list) == 1 or len(activity_list) == 2 : # for the convenience of adding the dimension of time to the analyzer event.set('activity',activity_list[0]) event.save() # activity = activity_list[0].get('eventType')[0] activity = activity_list[0].get('eventType')[0] if activity in activity_dict.keys(): activity_dict[activity]+=1 else: activity_dict[activity] =1 else: event.destroy() print 'length of activity_list: %s' %(str(len(activity_list))) print 'Seems to be an error,index: %s,user: %s; timestamp: %s \n' %(str(index),str(event.get('tracker').id ),str(event.get('timestamp'))) other_activity_total_count =total_count-sum(activity_dict.values()) if other_activity_total_count: activity_dict['others'] = other_activity_total_count # EventActivity = Object.extend('EventActivity') # event_activity = EventActivity() event_activity.set('application',application) event_activity.set('event_name',event_list[0].get('event_name')) event_activity.set('activity_dict',activity_dict) event_activity.save()
def sync_data_table_delete(startDatetime, formType): ''' 同步数据库表数据,昨天之前被删除的数据 ''' startStr = startDatetime.strftime('%Y-%m-%d') lastMonthDatetime = startDatetime - timedelta(days=30) rc = checkWriteState(startStr, formType, operateType[1]) if rc: Log(content='数据库表 ' + formType + ' ' + startStr + ' 删除数据已同步!!', fromTask='system', parameter='', createTime=datetime.now()).save() print '数据库表 ' + formType + ' ' + startStr + ' 删除数据已同步!!' return # 首先查询本地及线上数据库昨天之前的数据记录总数是否相等(30天内) obj = Object.extend(formType) query = Query(obj) query.less_than('createdAt', startDatetime) query.greater_than_or_equal_to('createdAt', lastMonthDatetime) onLineCount = query.count() localCount = db[formType].find({'$and': [ {'created_at': {'$lt': startDatetime}}, {'created_at': {'$gte': lastMonthDatetime}} ]}).count() if onLineCount != localCount: data_online = map_query(query, 1000) online_id_set = set([]) for item in data_online: online_id_set.add(ObjectId(item.id)) # leancloud查询时会自动将本地时间转化为UTC时间,而本地数据库(存储的也是UTC时间)不能 # 所以本地数据库在进行时间相关的操作时先要将时间转化为UTC时间 data_local = db[formType].find({'$and': [ {'created_at': {'$lt': local2utc(startDatetime)}}, {'created_at': {'$gte': local2utc(lastMonthDatetime)}} ]}) local_id_set = set([]) for item in data_local: local_id_set.add(item['_id']) delete_set = local_id_set - online_id_set Log(content=formType + ' 表本次一共删除了 ' + str(len(delete_set)) + ' 条记录!!', fromTask='system', parameter='', createTime=datetime.now()).save() print formType + ' 表本次一共删除了 ' + str(len(delete_set)) + ' 条记录!!' db[formType].remove({'_id': {'$in': list(delete_set)}}) finishLoadData(startStr, formType, -1, operateType[1], onLineCount)
def get_account(start, end): global acCount acCount = 0 formType = 'Account' dateStr = '2016-05-30'#start.strftime('%Y-%m-%d') rc = checkWriteState(dateStr, formType) if rc: print 'already batch inserted' return print '开始存储 ' + dateStr + ' ' + formType + ' 的数据' accObj = Object.extend('Account') accQuery = Query(accObj) if(start): accQuery.greater_than_or_equal_to('updatedAt',start) if(end): accQuery.less_than_or_equal_to('updatedAt',end) def call_back(lst): global acCount; writeList = [] for item in lst: ufMap = {} ufMap['card'] = item.get('card') ufMap['clazzKey'] = item.get('clazzKey') ufMap['isPublishCard'] = item.get('isPublishCard') ufMap['openId'] = item.get('openId') ufMap['sex'] = item.get('sex') ufMap['isVerify'] = item.get('isVerify') ufMap['srcId'] = item.id ufMap['createdAt'] = item.created_at ufMap['updatedAt'] = item.updated_at writeList.append(ufMap) if len(writeList) > 0: loadaAccount(writeList) acCount = acCount + len(writeList) map_query(accQuery, 1000, call_back) finishLoadData(dateStr, formType, acCount)
from leancloud import init, Object, Query from datetime import datetime, date, timedelta Log = Object.extend("Log") UserLocation = Object.extend("UserLocation") if __name__ == '__main__': init(u'9ra69chz8rbbl77mlplnl4l2pxyaclm612khhytztl8b1f9o', u'1zohz2ihxp9dhqamhfpeaer8nh1ewqd9uephe9ztvkka544b') log_query = Query(Log) location_query = Query(UserLocation) today = datetime.strptime(date.today().strftime('%Y-%m-%d'), '%Y-%m-%d') tommorow = datetime.strptime((date.today() + timedelta(days=1)).strftime('%Y-%m-%d'), '%Y-%m-%d') log_query.equal_to('type', 'location') log_query.greater_than_or_equal_to("createdAt", today) log_query.less_than_or_equal_to("createdAt", tommorow) log_count = log_query.count() location_query.greater_than_or_equal_to("createdAt", today) location_query.less_than_or_equal_to("createdAt", tommorow) location_count = location_query.count() print "Today's Log of location: ", log_count print "Today's Location count: ", location_count print "Rate: ", location_count * 1.0 / log_count