Exemple #1
0
def move_url_from_mongo_redis():
    #更新到的时间 1431048036
    conn = db.connect('192.168.241.25', 'zhihu')
    redis_conn = redis.redis_connect()
    try:
        start_time = db.find_one(conn, 'record')['time']
    except:
        start_time = 0
    end_time = time.time()
    authorids = db.find(conn, 'author',
                        {'tg': {
                            '$lt': end_time,
                            '$gt': start_time
                        }})
    print 'get from mongo: ', authorids.count()
    num = 0
    for authorid in authorids:
        try:
            gender = authorid['gender']
            fansnum = authorid['fansnum']
        except:
            redis.add_url(redis_conn, authorid['id'])
            num += 1
    record = db.find_one(conn, 'record', {'id': '1'})
    print record
    if not record:
        db.insert(conn, 'record', {'time': end_time, 'id': '1'})
    else:
        db.update(conn, 'record', {'id': '1'}, {'time': end_time})

    print 'add to redis: ', num
    db.close(conn)
Exemple #2
0
    def __init__(self):
        self.conn = db.connect('192.168.241.25', 'zhihu')
        #create index for article
#         db.create_index(self.conn, self.article, indexs=['tg'])
#         db.create_unique_index(self.conn, self.article, uniques=['url'])
        #create index for author
        db.create_unique_index(self.conn, self.author, uniques=['id'])
        #create index for author2
        db.create_unique_index(self.conn, self.author2, uniques=['id','tg'])
        #create index for topic
        db.create_unique_index(self.conn, self.topic, uniques=['id'])
        #askinfo
#         db.create_unique_index(self.conn, self.ask, uniques=['url'])
#         db.create_index(self.conn, self.ask, indexs=['tg'])
        #answer
#         db.create_unique_index(self.conn, self.answer, uniques=['url'])
#         db.create_index(self.conn, self.answer, indexs=['tg'])
        #post
        db.create_unique_index(self.conn, self.post, uniques=['url'])
        db.create_index(self.conn, self.post, indexs=['tg'])
        #author_topic
        db.create_unique_index(self.conn,self.author_topic,uniques=[[('authorid',1),('topic_url',1)]])
        db.create_index(self.conn, self.author_topic,indexs=['authorid'])
        #author_zl
        db.create_unique_index(self.conn, self.author_zl, uniques=[[('authorid',1),('zl_url',1)]])
        db.create_index(self.conn, self.author_zl, indexs=['authorid'])
        #ownerpost
        db.create_unique_index(self.conn, self.owerpost, uniques=[[('authorid',1),('zl_url',1)]])
        db.create_index(self.conn, self.owerpost,indexs=['authorid'])
        
        self.start = datetime.now()
Exemple #3
0
def mv_data():
    conn = db.connect('192.168.241.25', 'zhihu')
    db.create_unique_index(conn, 'author', uniques=['id'])
    db.create_index(conn, 'author', indexs=['tg'])
    
    datas = db.find(conn, 'author_find')
    flag = 0
    tag = 0
    for data in datas:
        url = data['url']
        id = data['id']
        topic_id = data['topic_id']
        if flag%10000==0:
            tg = time.time() - tag*24*3600
            tag += 1
#         o = ObjectId()
        value = {'url':url,'id':id,'topic_id':topic_id,'tg':tg}
        db.insert(conn, 'author', value)
    
    datas = db.find(conn, 'author_tmp')
    for data in datas:
#         url = data['url']
#         id = data['id']
#         topic_id = data['topic_id']
#         tg = data['tg']
#         value = {'url':url,'id':id,'topic_id':topic_id,'tg':tg}
        db.insert(conn, 'author', data)
Exemple #4
0
#encoding:utf-8
import mongo_util2 as db
import time
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

conn = db.connect('192.168.241.25', 'zhihu')
now = time.time()
authors = db.find(conn, 'author', {'tg': {'$lt': now, '$gt': 0}})
num_500 = 0
num_1000 = 0
num_2000 = 0
num_5000 = 0
num_10000 = 0
num_20000 = 0
num_50000 = 0
num2 = 0
for author in authors:
    try:
        fansnum = author['fansnum']
        likenum = author['likenum']
        if int(fansnum) > 500:
            num_500 += 1
        if int(fansnum) > 1000:
            num_1000 += 1
        if int(fansnum) > 2000:
            num_2000 += 1
        if int(likenum) >= 5000:
            num_5000 += 1
        if int(likenum) >= 10000:
#encoding:utf-8
import mongo_util2 as db
import time
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

conn = db.connect('192.168.241.25', 'zhihu')
now = time.time()
authors = db.find(conn, 'author',{'tg':{'$lt':now,'$gt':0}})
num = 0
del_num = 0
for author in authors:
    try:
        authorid = author['id']
        pos = authorid.find('?')
        if pos!=-1:
            print authorid
            authorid_tmp = authorid[:pos]
            auth = db.find_one(conn, 'author', {'id':authorid_tmp})
            if not auth:
                db.update(conn, 'author', {'id':authorid}, {'id':authorid_tmp})
                num += 1
            else:
                del_num = db.delete_one(conn, 'author', {'id':authorid})
                del_num += 1
            
    except:
        pass

print 'update num: ',num