コード例 #1
0
def clear_friendship_from_redis():
    import redis
    redis_host = 'localhost'
    redis_port = 6379
    redis_conn = redis.Redis(redis_host, redis_port)

    sys.path.append('../../profile')
    from getReadonlyDB import getReadonlyDB
    mongo_master_timeline = getReadonlyDB('master_timeline')

    total_count = mongo_master_timeline.master_timeline_user.find({'$or': [{'friends': {'$ne':[]}}, {'followers': {'$ne':[]}}]}).count()
    print 'hit count from mongodb: ', total_count
    cursor = mongo_master_timeline.master_timeline_user.find({'$or': [{'friends': {'$ne':[]}}, {'followers': {'$ne':[]}}]})

    count = 0
    ts = te = time.time()
    for user in cursor:
        friends = user['friends']
        followers = user['followers']
        uid = user['_id']
        if friends and len(friends):
            key_prefix = 'linhao_friends_' + str(uid)
            redis_conn.delete(key_prefix)
        if followers and len(followers):
            key_prefix = 'linhao_followers_' + str(uid)
            redis_conn.delete(key_prefix)
        count += 1
        if count % 10000 == 0:
            te = time.time()
            print count, '%s sec' % (te - ts)
            ts = te
コード例 #2
0
def initial_load_field_user_from_mysql(update_date='20130430'):
    sys.path.append('../../profile')
    from config import db
    from model import UserField
    from getReadonlyDB import getReadonlyDB
    mongo_master_timeline = getReadonlyDB('master_timeline')

    count = 0
    ts = te = time.time()
    daily_fields_kv = {}
    results = db.session.query(UserField).all()
    for r in results:
        uid = r.uid
        try:
            fields = ','.join([r.fieldFirst, r.fieldSecond])
            if r.fieldFirst not in fields_value or r.fieldSecond not in fields_value:
                count += 1
                continue
        except TypeError,e:
            fields = r.fieldFirst
            if r.fieldFirst not in fields_value:
                count += 1
                continue
        daily_fields_kv[str(uid)] = fields
        count += 1
        if count % 10000 == 0:
            te = time.time()
            print count, '%s sec' % (te - ts)
            ts = te
コード例 #3
0
def initial_load_field_verify(update_date='20130430'):
    sys.path.append('../../profile')
    from getReadonlyDB import getReadonlyDB
    mongo_master_timeline = getReadonlyDB('master_timeline')

    count = 0
    ts = te = time.time()
    daily_count_kv = {}
    for field in fields_value:
        uids = getUidsByFieldFromRedis(field)
        for uid in uids:
            count += 1
            if count % 10000 == 0:
                te = time.time()
                print count, '%s sec' % (te - ts)
                ts = te
            user = mongo_master_timeline.master_timeline_user.find_one({'_id': int(uid)})
            if user:
                try:
                    verified = user['verified']
                except KeyError:
                    continue
            else:
                continue
            try:
                v_count = daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(verified)]
                daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(verified)] = str(int(v_count) + 1)
            except KeyError:
                daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(verified)] = str(1)

    print len(daily_count_kv.keys())
    batch = leveldb.WriteBatch()
    for k, v in daily_count_kv.iteritems():
        batch.Put(k, str(v))
    field_daily_verify_count_bucket.Write(batch, sync=True)
コード例 #4
0
ファイル: protou1.py プロジェクト: huxiaoqian/project
def readFriendsByUid(uid):
    db_master_timeline = getReadonlyDB("master_timeline")
    cursor = db_master_timeline.master_timeline_user.find({"_id": uid})
    for user in cursor:
        if user['friends']!=[]:
            friends = user['friends']
            uid = user['_id']
            return {'_id': uid,'friends':friends}
コード例 #5
0
def initial_load_field_location(update_date='20130430'):
    sys.path.append('../../profile')
    from getReadonlyDB import getReadonlyDB
    mongo_master_timeline = getReadonlyDB('master_timeline')

    count = 0
    ts = te = time.time()
    daily_count_kv = {}
    for field in fields_value:
        uids = getUidsByFieldFromRedis(field)
        for uid in uids:
            count += 1
            if count % 10000 == 0:
                te = time.time()
                print count, '%s sec' % (te - ts)
                ts = te
            user = mongo_master_timeline.master_timeline_user.find_one({'_id': int(uid)})
            if user:
                try:
                    location = user['location']
                    province_str = str(location.split(' ')[0].encode('utf-8'))
                    province_id = str(user['province'])
                    province_city_bucket.Put('province_' + province_str, province_id)
                    province_city_bucket.Put('provinceid_' + province_id, province_str)
                except KeyError:
                    continue
            else:
                continue
            try:
                v_count = daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(province_id)]
                daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(province_id)] = str(int(v_count) + 1)
            except KeyError:
                daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(province_id)] = str(1)

    print len(daily_count_kv.keys())
    
    batch = leveldb.WriteBatch()
    for k, v in daily_count_kv.iteritems():
        batch.Put(k, str(v))
    field_daily_location_count_bucket.Write(batch, sync=True)