Beispiel #1
0
def get_user_file(start, end):
    global ufCount
    ufCount = 0
    formType = 'UserFile'
    dateStr = start.strftime('%Y-%m-%d')
    rc = checkWriteState(dateStr, formType)
    if rc:
        print 'already batch inserted'
        return
    print '开始存储 ' + dateStr + ' ' + formType + ' 的数据'
    #判断今天是不是已经搞定了
    ufObj = Object.extend('UserFile')
    ufQuery = Query(ufObj)
    ufQuery.greater_than_or_equal_to('createdAt',start)
    ufQuery.less_than_or_equal_to('createdAt',end)

    def uf_call_back(lst):
        global ufCount
        writeList =  []
        for item in lst:
            ufMap = {}
            ufMap['openId'] = item.get('openId')
            ufMap['fileType'] = item.get('fileType')
            ufMap['srcId'] = item.id
            ufMap['createdAt'] = item.created_at
            ufMap['updatedAt'] = item.updated_at
            writeList.append(ufMap)
        ufCount = ufCount + len(writeList)
        if len(writeList) > 0:
            loadUserFile(writeList)
    map_query(ufQuery, 1000, uf_call_back)
    print '本次一共存储了' + str(ufCount) + '条纪录' #一共存储了多少
    finishLoadData(dateStr, formType, ufCount)
def sync_data_table_update(startDateTime, endDateTime, formType):
    '''
        同步数据库表信息(每隔一天抓取)、同步昨天 00:00:00 至 今天天 23:59:59 增加以及更新的数据
    '''
    startStr = startDateTime.strftime('%Y-%m-%d')
    rc = checkWriteState(startStr, formType, operateType[0])
    if rc is True:
        Log(content='数据库表 ' + formType + ' ' + startStr + ' 更新数据已同步!!',
            fromTask='system', parameter='', createTime=datetime.now()).save()
        print '数据库表 ' + formType + ' ' + startStr + ' 更新数据已同步!!'
        return
    # 开始同步数据库表的操作
    obj = Object.extend(formType)
    query = Query(obj)
    # 包括新创建的和原来已有的,但昨天之前更新的数据
    if rc is False:
        query.greater_than_or_equal_to('createdAt', startDateTime)
        query.less_than('updatedAt', endDateTime)
    elif rc is None:  # 说明数据没有进行同步操作, 那么抓去全部的数据。
        query.less_than('createdAt', datetime.now())
        # query.less_than('createdAt', datetime(year=2016, month=8, day=15))
    def update_local_data(lst):
        dataList = []
        for item in lst:
            recordMap = covert_obj_to_dict(item)
            dataList.append(recordMap)
        recordsCount = len(dataList)
        if recordsCount > 0:
            __update_local_data(dataList, formType)  # 将更新后的数据写入本地mangodb
        Log(content=formType + ' 表本次一共存储了 ' + str(recordsCount) + ' 条记录!',
            fromTask='system', parameter='', createTime=datetime.now()).save()
        print formType + ' 表本次一共存储了 ' + str(recordsCount) + ' 条记录!'

    recordsCount = map_query(query, 1000, update_local_data)
    finishLoadData(startStr, formType, recordsCount, operateType[0])
Beispiel #3
0
def get_daily_checkin(start, end):
    global dcCount
    dcCount = 0
    formType = 'DailyCheckIn'
    dateStr = start.strftime('%Y-%m-%d')
    rc = checkWriteState(dateStr, formType)
    if rc:
        print 'already batch inserted'
        return
    print '开始存储 ' + dateStr + ' ' + formType + ' 的数据'
    ckObj = Object.extend('DailyCheckIn')
    ckQuery = Query(ckObj)
    ckQuery.greater_than_or_equal_to('createdAt',start)
    ckQuery.less_than_or_equal_to('createdAt',end)
    def call_back(lst):
        global dcCount
        writeList =  []
        for item in lst:
            ufMap = {}
            ufMap['openId'] = item.get('openId')
            ufMap['checkTime'] = item.get('checkTime')
            ufMap['srcId'] = item.id
            ufMap['createdAt'] = item.created_at
            ufMap['updatedAt'] = item.updated_at
            writeList.append(ufMap)
        dcCount = dcCount + len(writeList)
        if len(writeList) > 0:
            loadDailyCheckIn(writeList)
    map_query(ckQuery, 1000, call_back)
    finishLoadData(dateStr, formType, dcCount)
def associate_event_and_activity(db_name='MergedUserContext'):

    application_event_dict = get_all_event()
    print 'already get the application_event_dict'
    print str(len(application_event_dict.keys()))
    DBTable = Object.extend(db_name)

    # print 'event_list: %s' %str(application_event_dict.values())
    for application,event_dict in application_event_dict.items():
        # print event_dict
        if event_dict:
            print 'application_event_dict values first count: %s' %str(event_dict.keys())

        EventActivity = Object.extend('FakeEventActivity')
        for event_name,event_list in event_dict.items():
            total_count = len(event_list)
            print 'event_list total_count: %s with event_name is: %s' %(str(total_count),event_name)
            print 'application id is: %s' %str(application.id)

            event_activity = EventActivity()
            relation = event_activity.relation('event')
            activity_dict = {}
            for index,event in enumerate(event_list):
                relation.add(event)
                query = Query(DBTable)
                query.equal_to('tracker',event.get('tracker'))
                query.less_than_or_equal_to('startTime',event.get('timestamp'))
                query.greater_than_or_equal_to('endTime',event.get('timestamp'))
                activity_list = query.find()
                if len(activity_list) == 1 or len(activity_list) == 2 :
                    # for the convenience of adding the dimension of time to the analyzer
                    event.set('activity',activity_list[0])
                    event.save()
                    # activity = activity_list[0].get('eventType')[0]
                    activity = activity_list[0].get('eventType')[0]

                    if activity in activity_dict.keys():
                        activity_dict[activity]+=1
                    else:
                        activity_dict[activity] =1
                else:
                    event.destroy()
                    print 'length of activity_list: %s' %(str(len(activity_list)))
                    print 'Seems to be an error,index: %s,user: %s; timestamp: %s \n' %(str(index),str(event.get('tracker').id ),str(event.get('timestamp')))

            other_activity_total_count =total_count-sum(activity_dict.values())
            if other_activity_total_count:
                activity_dict['others'] = other_activity_total_count

            # EventActivity = Object.extend('EventActivity')
            # event_activity = EventActivity()
            event_activity.set('application',application)
            event_activity.set('event_name',event_list[0].get('event_name'))
            event_activity.set('activity_dict',activity_dict)
            event_activity.save()
def sync_data_table_delete(startDatetime, formType):
    '''
        同步数据库表数据,昨天之前被删除的数据
    '''
    startStr = startDatetime.strftime('%Y-%m-%d')
    lastMonthDatetime = startDatetime - timedelta(days=30)
    rc = checkWriteState(startStr, formType, operateType[1])
    if rc:
        Log(content='数据库表 ' + formType + ' ' + startStr + ' 删除数据已同步!!',
            fromTask='system', parameter='', createTime=datetime.now()).save()
        print '数据库表 ' + formType + ' ' + startStr + ' 删除数据已同步!!'
        return
    # 首先查询本地及线上数据库昨天之前的数据记录总数是否相等(30天内)
    obj = Object.extend(formType)
    query = Query(obj)
    query.less_than('createdAt', startDatetime)
    query.greater_than_or_equal_to('createdAt', lastMonthDatetime)
    onLineCount = query.count()
    localCount = db[formType].find({'$and': [ {'created_at': {'$lt': startDatetime}}, {'created_at': {'$gte': lastMonthDatetime}} ]}).count()
    if onLineCount != localCount:
        data_online = map_query(query, 1000)
        online_id_set = set([])
        for item in data_online:
            online_id_set.add(ObjectId(item.id))
        # leancloud查询时会自动将本地时间转化为UTC时间,而本地数据库(存储的也是UTC时间)不能
        # 所以本地数据库在进行时间相关的操作时先要将时间转化为UTC时间
        data_local = db[formType].find({'$and': [ {'created_at': {'$lt': local2utc(startDatetime)}}, {'created_at': {'$gte': local2utc(lastMonthDatetime)}} ]})
        local_id_set = set([])
        for item in data_local:
            local_id_set.add(item['_id'])
        delete_set = local_id_set - online_id_set
        Log(content=formType + ' 表本次一共删除了 ' + str(len(delete_set)) + ' 条记录!!',
            fromTask='system', parameter='', createTime=datetime.now()).save()
        print formType + ' 表本次一共删除了 ' + str(len(delete_set)) + ' 条记录!!'
        db[formType].remove({'_id': {'$in': list(delete_set)}})
    finishLoadData(startStr, formType, -1, operateType[1], onLineCount)
Beispiel #6
0
def get_account(start, end):
    global acCount
    acCount = 0
    formType = 'Account'
    dateStr = '2016-05-30'#start.strftime('%Y-%m-%d')
    rc = checkWriteState(dateStr, formType)
    if rc:
        print 'already batch inserted'
        return
    print '开始存储 ' + dateStr + ' ' + formType + ' 的数据'
    accObj = Object.extend('Account')
    accQuery = Query(accObj)
    if(start):
        accQuery.greater_than_or_equal_to('updatedAt',start)
    if(end):
        accQuery.less_than_or_equal_to('updatedAt',end)
    def call_back(lst):
        global acCount;
        writeList =  []
        for item in lst:
            ufMap = {}
            ufMap['card'] = item.get('card')
            ufMap['clazzKey'] = item.get('clazzKey')
            ufMap['isPublishCard'] = item.get('isPublishCard')
            ufMap['openId'] = item.get('openId')
            ufMap['sex'] = item.get('sex')
            ufMap['isVerify'] = item.get('isVerify')
            ufMap['srcId'] = item.id
            ufMap['createdAt'] = item.created_at
            ufMap['updatedAt'] = item.updated_at
            writeList.append(ufMap)
        if len(writeList) > 0:
            loadaAccount(writeList)
        acCount = acCount + len(writeList)
    map_query(accQuery, 1000, call_back)
    finishLoadData(dateStr, formType, acCount)
from leancloud import init, Object, Query
from datetime import datetime, date, timedelta

Log = Object.extend("Log")
UserLocation = Object.extend("UserLocation")

if __name__ == '__main__':
    init(u'9ra69chz8rbbl77mlplnl4l2pxyaclm612khhytztl8b1f9o',
         u'1zohz2ihxp9dhqamhfpeaer8nh1ewqd9uephe9ztvkka544b')
    log_query = Query(Log)
    location_query = Query(UserLocation)

    today = datetime.strptime(date.today().strftime('%Y-%m-%d'), '%Y-%m-%d')
    tommorow = datetime.strptime((date.today() + timedelta(days=1)).strftime('%Y-%m-%d'), '%Y-%m-%d')

    log_query.equal_to('type', 'location')
    log_query.greater_than_or_equal_to("createdAt", today)
    log_query.less_than_or_equal_to("createdAt", tommorow)
    log_count = log_query.count()

    location_query.greater_than_or_equal_to("createdAt", today)
    location_query.less_than_or_equal_to("createdAt", tommorow)
    location_count = location_query.count()

    print "Today's Log of location: ", log_count
    print "Today's Location count: ", location_count
    print "Rate: ", location_count * 1.0 / log_count