def repost_timeline(self, client, fid, limit): mid = misc.decode62(fid) fh = open('%s_repost.txt' % fid, 'a') kwds = { 'id': mid, 'count': 100, 'page': 1, } buf = [] #while True: for _ in xrange(1): rs = client.api.repost_timeline(**kwds) or list() print 'page: %s,%s' % (kwds['page'], len(rs)) if not rs and kwds['page'] > limit: break if not rs: print 'retry...' continue for e in rs: user = e.user txt = e.text.encode('utf8', 'ignore') user._txt = txt user._ts = int(time.mktime(e.created_at.timetuple())), user._ct = e.created_at.strftime("%Y-%m-%d %H:%M:%S"), self._write_user(fh, user) buf.append(user) kwds['page'] += 1 time.sleep(3) fh.close() fh = open('%s_repost2.txt' % fid, 'w') skips = set() buf.reverse() for user in buf: if user.id in skips: continue if '此微博已被删除' in user._txt: continue skips.add(user.id) self._write_user(fh, user) fh.close()
def repost_timeline(self, client, fid, limit): mid = misc.decode62(fid) fh = open('%s_repost.txt'%fid , 'a') kwds = { 'id': mid, 'count': 100, 'page': 1, } buf = [] #while True: for _ in xrange(1): rs = client.api.repost_timeline(**kwds) or list() print 'page: %s,%s'%(kwds['page'], len(rs) ) if not rs and kwds['page']>limit: break if not rs: print 'retry...' continue for e in rs: user = e.user txt = e.text.encode('utf8','ignore') user._txt = txt user._ts = int(time.mktime(e.created_at.timetuple())), user._ct = e.created_at.strftime("%Y-%m-%d %H:%M:%S"), self._write_user(fh, user) buf.append( user ) kwds['page'] +=1 time.sleep(3) fh.close() fh = open('%s_repost2.txt'%fid , 'w') skips =set() buf.reverse() for user in buf: if user.id in skips: continue if '此微博已被删除' in user._txt: continue skips.add( user.id ) self._write_user( fh, user ) fh.close()
def repost2_timeline(self, client, fid, limit): mid = misc.decode62(fid) #fh = open('%s_repost2.txt'%fid , 'a') fh = open('%s_repost2.txt' % fid, 'w') kwds = { 'id': mid, 'count': 100, 'page': 1, } iRetry = 0 while True: time.sleep(5) rs = client.api.repost_timeline(**kwds) or list() print 'page: %s,%s' % (kwds['page'], len(rs)) if not rs and kwds['page'] > limit: break if not rs: print 'retry...' iRetry += 1 if iRetry > 3: break continue iRetry = 0 print '... find %s' % len(rs) for e in rs: txt = e.text.encode('utf8', 'ignore'), #if not '@屈臣氏中国' in txt: continue #user = e.user obj = { 'mid': e.mid, 'txt': e.text.encode('utf8', 'ignore'), 'ts': int(time.mktime(e.created_at.timetuple())), 'ca': e.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'user_name': e.user.screen_name.encode('utf8', 'ignore'), 'user_id': e.user.id, } fh.write('%s\n' % json.dumps(obj)) fh.flush() kwds['page'] += 1 fh.close()
def repost2_timeline(self, client, fid, limit): mid = misc.decode62(fid) #fh = open('%s_repost2.txt'%fid , 'a') fh = open('%s_repost2.txt'%fid ,'w' ) kwds = { 'id': mid, 'count': 100, 'page': 1, } iRetry = 0 while True: time.sleep(5) rs = client.api.repost_timeline(**kwds) or list() print 'page: %s,%s'%(kwds['page'], len(rs) ) if not rs and kwds['page']>limit: break if not rs: print 'retry...' iRetry +=1 if iRetry>3: break continue iRetry = 0 print '... find %s'%len(rs) for e in rs: txt = e.text.encode('utf8','ignore'), #if not '@屈臣氏中国' in txt: continue #user = e.user obj = { 'mid': e.mid, 'txt': e.text.encode('utf8','ignore'), 'ts': int(time.mktime(e.created_at.timetuple())), 'ca': e.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'user_name': e.user.screen_name.encode('utf8','ignore'), 'user_id': e.user.id, } fh.write('%s\n'%json.dumps(obj)) fh.flush() kwds['page'] +=1 fh.close()