def move_url_from_mongo_redis(): #更新到的时间 1431048036 conn = db.connect('192.168.241.25', 'zhihu') redis_conn = redis.redis_connect() try: start_time = db.find_one(conn, 'record')['time'] except: start_time = 0 end_time = time.time() authorids = db.find(conn, 'author', {'tg': { '$lt': end_time, '$gt': start_time }}) print 'get from mongo: ', authorids.count() num = 0 for authorid in authorids: try: gender = authorid['gender'] fansnum = authorid['fansnum'] except: redis.add_url(redis_conn, authorid['id']) num += 1 record = db.find_one(conn, 'record', {'id': '1'}) print record if not record: db.insert(conn, 'record', {'time': end_time, 'id': '1'}) else: db.update(conn, 'record', {'id': '1'}, {'time': end_time}) print 'add to redis: ', num db.close(conn)
def __init__(self): self.conn = db.connect('192.168.241.25', 'zhihu') #create index for article # db.create_index(self.conn, self.article, indexs=['tg']) # db.create_unique_index(self.conn, self.article, uniques=['url']) #create index for author db.create_unique_index(self.conn, self.author, uniques=['id']) #create index for author2 db.create_unique_index(self.conn, self.author2, uniques=['id','tg']) #create index for topic db.create_unique_index(self.conn, self.topic, uniques=['id']) #askinfo # db.create_unique_index(self.conn, self.ask, uniques=['url']) # db.create_index(self.conn, self.ask, indexs=['tg']) #answer # db.create_unique_index(self.conn, self.answer, uniques=['url']) # db.create_index(self.conn, self.answer, indexs=['tg']) #post db.create_unique_index(self.conn, self.post, uniques=['url']) db.create_index(self.conn, self.post, indexs=['tg']) #author_topic db.create_unique_index(self.conn,self.author_topic,uniques=[[('authorid',1),('topic_url',1)]]) db.create_index(self.conn, self.author_topic,indexs=['authorid']) #author_zl db.create_unique_index(self.conn, self.author_zl, uniques=[[('authorid',1),('zl_url',1)]]) db.create_index(self.conn, self.author_zl, indexs=['authorid']) #ownerpost db.create_unique_index(self.conn, self.owerpost, uniques=[[('authorid',1),('zl_url',1)]]) db.create_index(self.conn, self.owerpost,indexs=['authorid']) self.start = datetime.now()
def mv_data(): conn = db.connect('192.168.241.25', 'zhihu') db.create_unique_index(conn, 'author', uniques=['id']) db.create_index(conn, 'author', indexs=['tg']) datas = db.find(conn, 'author_find') flag = 0 tag = 0 for data in datas: url = data['url'] id = data['id'] topic_id = data['topic_id'] if flag%10000==0: tg = time.time() - tag*24*3600 tag += 1 # o = ObjectId() value = {'url':url,'id':id,'topic_id':topic_id,'tg':tg} db.insert(conn, 'author', value) datas = db.find(conn, 'author_tmp') for data in datas: # url = data['url'] # id = data['id'] # topic_id = data['topic_id'] # tg = data['tg'] # value = {'url':url,'id':id,'topic_id':topic_id,'tg':tg} db.insert(conn, 'author', data)
#encoding:utf-8 import mongo_util2 as db import time import sys reload(sys) sys.setdefaultencoding('utf-8') conn = db.connect('192.168.241.25', 'zhihu') now = time.time() authors = db.find(conn, 'author', {'tg': {'$lt': now, '$gt': 0}}) num_500 = 0 num_1000 = 0 num_2000 = 0 num_5000 = 0 num_10000 = 0 num_20000 = 0 num_50000 = 0 num2 = 0 for author in authors: try: fansnum = author['fansnum'] likenum = author['likenum'] if int(fansnum) > 500: num_500 += 1 if int(fansnum) > 1000: num_1000 += 1 if int(fansnum) > 2000: num_2000 += 1 if int(likenum) >= 5000: num_5000 += 1 if int(likenum) >= 10000:
#encoding:utf-8 import mongo_util2 as db import time import sys reload(sys) sys.setdefaultencoding('utf-8') conn = db.connect('192.168.241.25', 'zhihu') now = time.time() authors = db.find(conn, 'author',{'tg':{'$lt':now,'$gt':0}}) num = 0 del_num = 0 for author in authors: try: authorid = author['id'] pos = authorid.find('?') if pos!=-1: print authorid authorid_tmp = authorid[:pos] auth = db.find_one(conn, 'author', {'id':authorid_tmp}) if not auth: db.update(conn, 'author', {'id':authorid}, {'id':authorid_tmp}) num += 1 else: del_num = db.delete_one(conn, 'author', {'id':authorid}) del_num += 1 except: pass print 'update num: ',num