def clear_friendship_from_redis(): import redis redis_host = 'localhost' redis_port = 6379 redis_conn = redis.Redis(redis_host, redis_port) sys.path.append('../../profile') from getReadonlyDB import getReadonlyDB mongo_master_timeline = getReadonlyDB('master_timeline') total_count = mongo_master_timeline.master_timeline_user.find({'$or': [{'friends': {'$ne':[]}}, {'followers': {'$ne':[]}}]}).count() print 'hit count from mongodb: ', total_count cursor = mongo_master_timeline.master_timeline_user.find({'$or': [{'friends': {'$ne':[]}}, {'followers': {'$ne':[]}}]}) count = 0 ts = te = time.time() for user in cursor: friends = user['friends'] followers = user['followers'] uid = user['_id'] if friends and len(friends): key_prefix = 'linhao_friends_' + str(uid) redis_conn.delete(key_prefix) if followers and len(followers): key_prefix = 'linhao_followers_' + str(uid) redis_conn.delete(key_prefix) count += 1 if count % 10000 == 0: te = time.time() print count, '%s sec' % (te - ts) ts = te
def initial_load_field_user_from_mysql(update_date='20130430'): sys.path.append('../../profile') from config import db from model import UserField from getReadonlyDB import getReadonlyDB mongo_master_timeline = getReadonlyDB('master_timeline') count = 0 ts = te = time.time() daily_fields_kv = {} results = db.session.query(UserField).all() for r in results: uid = r.uid try: fields = ','.join([r.fieldFirst, r.fieldSecond]) if r.fieldFirst not in fields_value or r.fieldSecond not in fields_value: count += 1 continue except TypeError,e: fields = r.fieldFirst if r.fieldFirst not in fields_value: count += 1 continue daily_fields_kv[str(uid)] = fields count += 1 if count % 10000 == 0: te = time.time() print count, '%s sec' % (te - ts) ts = te
def initial_load_field_verify(update_date='20130430'): sys.path.append('../../profile') from getReadonlyDB import getReadonlyDB mongo_master_timeline = getReadonlyDB('master_timeline') count = 0 ts = te = time.time() daily_count_kv = {} for field in fields_value: uids = getUidsByFieldFromRedis(field) for uid in uids: count += 1 if count % 10000 == 0: te = time.time() print count, '%s sec' % (te - ts) ts = te user = mongo_master_timeline.master_timeline_user.find_one({'_id': int(uid)}) if user: try: verified = user['verified'] except KeyError: continue else: continue try: v_count = daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(verified)] daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(verified)] = str(int(v_count) + 1) except KeyError: daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(verified)] = str(1) print len(daily_count_kv.keys()) batch = leveldb.WriteBatch() for k, v in daily_count_kv.iteritems(): batch.Put(k, str(v)) field_daily_verify_count_bucket.Write(batch, sync=True)
def readFriendsByUid(uid): db_master_timeline = getReadonlyDB("master_timeline") cursor = db_master_timeline.master_timeline_user.find({"_id": uid}) for user in cursor: if user['friends']!=[]: friends = user['friends'] uid = user['_id'] return {'_id': uid,'friends':friends}
def initial_load_field_location(update_date='20130430'): sys.path.append('../../profile') from getReadonlyDB import getReadonlyDB mongo_master_timeline = getReadonlyDB('master_timeline') count = 0 ts = te = time.time() daily_count_kv = {} for field in fields_value: uids = getUidsByFieldFromRedis(field) for uid in uids: count += 1 if count % 10000 == 0: te = time.time() print count, '%s sec' % (te - ts) ts = te user = mongo_master_timeline.master_timeline_user.find_one({'_id': int(uid)}) if user: try: location = user['location'] province_str = str(location.split(' ')[0].encode('utf-8')) province_id = str(user['province']) province_city_bucket.Put('province_' + province_str, province_id) province_city_bucket.Put('provinceid_' + province_id, province_str) except KeyError: continue else: continue try: v_count = daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(province_id)] daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(province_id)] = str(int(v_count) + 1) except KeyError: daily_count_kv[str(field) + '_' + str(update_date) + '_' + str(province_id)] = str(1) print len(daily_count_kv.keys()) batch = leveldb.WriteBatch() for k, v in daily_count_kv.iteritems(): batch.Put(k, str(v)) field_daily_location_count_bucket.Write(batch, sync=True)