def index(mid, page=None): user = login_user(session) client = get_client(user['access_token'], user['expires_in']) per_page = 200 total_page = 0 reposts_count = 0 source_weibo = None if page is None: source_weibo = client.get('statuses/show', id=mid) mongo.db.all_source_weibos.update({'id': source_weibo['id']}, source_weibo, upsert=True) items2mongo(resp2item_v2(source_weibo)) reposts_count = source_weibo['reposts_count'] total_page = int(math.ceil(reposts_count * 1.0 / per_page)) page = total_page else: source_weibo = mongo.db.all_source_weibos.find_one({'id': mid}) if source_weibo is None: return '' reposts_count = source_weibo['reposts_count'] total_page = int(math.ceil(reposts_count * 1.0 / per_page)) try: reposts = client.get('statuses/repost_timeline', id=mid, count=200, page=page)['reposts'] # 如果reposts为空,且是最开始访问的一页,有可能是页数多算了一页,直接将页数减一页跳转 if reposts == [] and total_page > 1 and page == total_page: return redirect(url_for('graph.index', mid=mid, page=page - 1)) items = [] for repost in reposts: items.extend(resp2item_v2(repost)) items2mongo(items) for item in items: if isinstance(item, WeiboItem) and item['id'] != source_weibo['id']: item = item.to_dict() item['source_weibo'] = source_weibo['id'] mongo.db.all_repost_weibos.update({'id': item['id']}, item, upsert=True) except RuntimeError: pass reposts = list(mongo.db.all_repost_weibos.find({'source_weibo': source_weibo['id']})) if reposts == []: return '' page_count = total_page - page + 1 if total_page >= page else 0 tree, tree_stats = reposts2tree(source_weibo, reposts, per_page, page_count) graph, max_depth, max_width = tree2graph(tree) tree_stats['max_depth'] = max_depth tree_stats['max_width'] = max_width # 存储转发状态 tree_stats['id'] = mid tree_stats['page'] = page mongo.db.tree_stats.update({'id': mid, 'page': page}, tree_stats, upsert=True, w=1) return graph
def more_friends(self, response): uid = response.meta['uid'] source_user = response.meta['source_user'] resp = json.loads(response.body) results = [] for friend in resp['users']: items = resp2item_v2(friend) if items == []: continue user = items[0] # 取出用户信息 source_user['friends'].append(user['id']) results.extend(items) next_cursor = resp['next_cursor'] if next_cursor != 0: request = Request(FRIENDS_URL.format(uid=uid, cursor=next_cursor), headers=None, callback=self.more_friends) request.meta['uid'] = uid request.meta['source_user'] = source_user results.append(request) else: results.append(source_user) return results
def parse(self, response): page = response.meta['page'] uid = response.meta['uid'] resp = json.loads(response.body) results = [] if not resp.get('statuses'): raise ShouldNotEmptyError() for status in resp['statuses']: items = resp2item_v2(status) results.extend(items) # filter or mongo, 检查是否有大于70个有效更新,有则翻页,如果是page=1 还得做积分反馈 update_count = 0 if self.bloom: for status in resp['statuses']: if 'mid' in status and not self.bloom.check(status['mid']): update_count += 1 # 更新到filter self.bloom.add(status['mid'], int(time.time() * 1000)) else: for status in resp['statuses']: if 'id' in status and self.db.master_timeline_weibo.find({ '_id': status['id'] }).limit(1).count() == 0: update_count += 1 if page == 1: if update_count > 0 and self.r.hget(self.uids_priority_set, uid) < 10: self.r.hincrby(self.uids_priority_set, uid, 1) elif update_count == 0 and self.r.hget(self.uids_priority_set, uid) > 0: self.r.hincrby(self.uids_priority_set, uid, -1) log.msg(format='Score [uid:%(uid)s] update to %(score)s', level=log.INFO, uid=uid, score=self.r.hget(self.uids_priority_set, uid)) if update_count > AT_LEAST_UPDATE_COUNT: page += 1 request = Request(BASE_URL.format(uid=uid, page=page), headers=None) request.meta['page'] = page request.meta['uid'] = uid results.append(request) log.msg( format= 'One more page [uid:%(uid)s] page:%(page)s update_count:%(update_count)s', level=log.INFO, uid=uid, page=page, update_count=update_count) return results
def parse(self, response): resp = json.loads(response.body) results = [] for status in resp['statuses']: items = resp2item_v2(status) results.extend(items) return results
def index(): user = login_user(session) q = request.args.get('q', '') q = q.strip('@ \r\n\t') client = get_client(user['access_token'], user['expires_in']) try: target_user = client.get('users/show', screen_name=q) mongo.db.all_visited_users.update({'id': target_user['id']}, target_user, upsert=True) items2mongo(resp2item_v2(target_user)) return redirect(url_for('search.weibos_by_uid_and_page', uid=target_user['id'])) except RuntimeError: flash(u'您输入的昵称不存在,请重新输入') return redirect(url_for('simple.index'))
def parse(self, response): page = response.meta['page'] uid = response.meta['uid'] resp = json.loads(response.body) results = [] if not resp.get('statuses'): raise ShouldNotEmptyError() for status in resp['statuses']: items = resp2item_v2(status) results.extend(items) # filter or mongo, 检查是否有大于70个有效更新,有则翻页,如果是page=1 还得做积分反馈 update_count = 0 if self.bloom: for status in resp['statuses']: if 'mid' in status and not self.bloom.check(status['mid']): update_count += 1 # 更新到filter self.bloom.add(status['mid'], int(time.time() * 1000)) else: for status in resp['statuses']: if 'id' in status and self.db.master_timeline_weibo.find({'_id': status['id']}).limit(1).count() == 0: update_count += 1 if page == 1: if update_count > 0 and self.r.hget(self.uids_priority_set, uid) < 10: self.r.hincrby(self.uids_priority_set, uid, 1) elif update_count == 0 and self.r.hget(self.uids_priority_set, uid) > 0: self.r.hincrby(self.uids_priority_set, uid, -1) log.msg(format='Score [uid:%(uid)s] update to %(score)s', level=log.INFO, uid=uid, score=self.r.hget(self.uids_priority_set, uid)) if update_count > AT_LEAST_UPDATE_COUNT: page += 1 request = Request(BASE_URL.format(uid=uid, page=page), headers=None) request.meta['page'] = page request.meta['uid'] = uid results.append(request) log.msg(format='One more page [uid:%(uid)s] page:%(page)s update_count:%(update_count)s', level=log.INFO, uid=uid, page=page, update_count=update_count) return results
def more_reposts(self, response): source_weibo = response.meta['source_weibo'] resp = json.loads(response.body) results = [] if resp['reposts'] == []: raise ShouldNotEmptyError() for repost in resp['reposts']: items = resp2item_v2(repost) if items == []: continue weibo = items[0] # 取出转发微博 source_weibo['reposts'].append(weibo['id']) results.extend(items) results.append(source_weibo) return results
def more_reposts(self, response): source_weibo = response.meta['source_weibo'] resp = json.loads(response.body) results = [] if not resp.get('reposts'): raise ShouldNotEmptyError() for repost in resp['reposts']: items = resp2item_v2(repost) if items == []: continue weibo = items[0] # 取出转发微博 source_weibo['reposts'].append(weibo['id']) results.extend(items) results.append(source_weibo) return results
def source_user(self, response): uid = response.meta["uid"] resp = json.loads(response.body) results = [] items = resp2item_v2(resp) if len(items) < 2: raise ShouldNotEmptyError() results.extend(items) user = items[0] request = Request(FRIENDS_URL.format(uid=uid, cursor=0), headers=None, callback=self.more_friends) request.meta["uid"] = uid request.meta["cursor"] = 0 request.meta["source_user"] = user results.append(request) return results
def source_user(self, response): uid = response.meta['uid'] resp = json.loads(response.body) results = [] items = resp2item_v2(resp) if len(items) < 2: raise ShouldNotEmptyError() results.extend(items) user = items[0] request = Request(FOLLOWERS_URL.format(uid=uid, cursor=0), headers=None, callback=self.more_followers) request.meta['uid'] = uid request.meta['cursor'] = 0 request.meta['source_user'] = user results.append(request) return results
def soucre_weibo(self, response): resp = json.loads(response.body) results = [] items = resp2item_v2(resp) if len(items) < 2: raise ShouldNotEmptyError() results.extend(items) weibo = items[0] reposts_count = weibo['reposts_count'] wid = weibo['id'] for i in range(1, int(math.ceil(reposts_count / 200.0)) + 1): request = Request(BASE_URL.format(id=wid, page=i), headers=None, callback=self.more_reposts) request.meta['page'] = i request.meta['wid'] = wid request.meta['source_weibo'] = weibo results.append(request) return results
def parse(self, response): page = response.meta['page'] uid = response.meta['uid'] resp = json.loads(response.body) results = [] if resp.get('statuses') == []: raise ShouldNotEmptyError() for status in resp['statuses']: items = resp2item_v2(status) results.extend(items) page += 1 request = Request(BASE_URL.format(uid=uid, page=page, since_id=self.since_id, max_id=self.max_id), headers=None) request.meta['page'] = page request.meta['uid'] = uid results.append(request) return results
def index(mid, page=None): if page is None: per_page = 200 user = login_user(session) client = get_client(user['access_token'], user['expires_in']) source_weibo = client.get('statuses/show', id=mid) mongo.db.all_source_weibos.update({'id': source_weibo['id']}, source_weibo, upsert=True) items2mongo(resp2item_v2(source_weibo)) reposts_count = source_weibo['reposts_count'] total_page = int(math.ceil(reposts_count * 1.0 / per_page)) page = total_page return redirect(url_for('show_graph.index', mid=mid, page=page)) screen_name = session['screen_name'] profile_image_url = session['profile_image_url'] return render_template('graph.html', btnuserpicvisible='inline', btnloginvisible='none', screen_name=screen_name, profile_image_url=profile_image_url, mid=mid, page=page)
def parse(self, response): resp = json.loads(response.body) items = resp2item_v2(resp) return items
def weibos_by_uid_and_page(uid, page=1): user = login_user(session) q = request.args.get('q', '') q = q.strip('@ \r\n\t') auto_redirect = request.args.get('auto_redirect') if auto_redirect: auto_redirect = int(auto_redirect) target_user = mongo.db.all_visited_users.find_one({'id': uid}) tar_screen_name = target_user['screen_name'] tar_profile_image_url = target_user['profile_image_url'] tar_location = target_user['location'] if q and auto_redirect and auto_redirect > 3: statuses = [] else: try: client = get_client(user['access_token'], user['expires_in']) statuses = client.get('statuses/user_timeline', uid=uid, count=50, page=page)['statuses'] items = [] for status in statuses: items.extend(resp2item_v2(status)) items2mongo(items) if q: temp_statuses = [] for status in statuses: if ('text' in status and q in status['text']) or ('retweeted_status' in status and q in status['retweeted_status']['text']): temp_statuses.append(status) statuses = temp_statuses if statuses == []: page += 1 if auto_redirect: auto_redirect += 1 else: auto_redirect = 1 return redirect(url_for('search.weibos_by_uid_and_page', uid=uid, page=page) + '?q=%s&auto_redirect=%s' % (q, auto_redirect)) except RuntimeError: flash(u'获取微博信息失败,请刷新') statuses = [] if statuses == []: flash(u'没有搜索到相关微博,请尝试下一页或者采用其他关键词') for i in xrange(len(statuses)): weibo_url = base62.weiboinfo2url(statuses[i]['user']['id'], statuses[i]['mid']) statuses[i]['weibo_url'] = weibo_url screen_name = session['screen_name'] profile_image_url = session['profile_image_url'] has_prev = True if page > 1 else False has_next = True # 默认始终有下一页 page_url = lambda page: url_for('search.weibos_by_uid_and_page', uid=uid, page=page) return render_template('weibolist.html', btnuserpicvisible='inline', btnloginvisible='none', screen_name=screen_name, profile_image_url=profile_image_url, tar_screen_name=tar_screen_name, tar_profile_image_url=tar_profile_image_url, tar_location=tar_location, statuses=statuses, page=page, has_prev=has_prev, has_next=has_next, page_url=page_url )