Esempio n. 1
0
    def _fetch_followees(self, total):
        # 获取 该用户关注的人
        # http://www.zhihu.com/people/leng-zhe/followees
        url = "http://www.zhihu.com/node/ProfileFolloweesListV2"
        """
            HTTP POST:
                method:next
                params:{"offset":20,"order_by":"created","hash_id":"06f3b1c891d0d504eea8af883150b497"}
                _xsrf:f11a7023d52d5a0ec95914ecff30885f

            <div class="zm-profile-card zm-profile-section-item zg-clear no-hovercard"> 
                <div class="zg-right"> 
                    <button 
                        data-follow="m:button" 
                        data-id="dfadd95bc7af994cc8933c444cc9327e" 
                        class="zg-btn zg-btn-follow zm-rich-follow-btn small nth-0">
                            关注
                    </button> 
                </div>
                <a title="黄云忠" data-tip="p$t$huangdoc" class="zm-item-link-avatar" href="/people/huangdoc">
                    <img src="https://pic2.zhimg.com/b7dde5a21_m.jpg" class="zm-item-img-avatar">
                </a>
                <div class="zm-list-content-medium">
                    <h2 class="zm-list-content-title">
                        <a data-tip="p$t$huangdoc" href="http://www.zhihu.com/people/huangdoc" class="zg-link" title="黄云忠">黄云忠</a>
                    </h2>
                    <div class="zg-big-gray">风险投资人</div>
                    <div class="details zg-gray"> 
                        <a target="_blank" href="/people/huangdoc/followers" class="zg-link-gray-normal">4846 关注者</a> / 
                        <a target="_blank" href="/people/huangdoc/asks" class="zg-link-gray-normal">17 提问</a> / 
                        <a target="_blank" href="/people/huangdoc/answers" class="zg-link-gray-normal">23 回答</a> / 
                        <a target="_blank" href="/people/huangdoc" class="zg-link-gray-normal">8 赞同</a> 
                    </div> 
                </div> 
            </div>
        """
        offset = 0
        followees = []
        while offset < total:
            params = {"offset": offset, "order_by": "created", "hash_id": self.hash_id}
            data = {"method": "next", "params": json.dumps(params), "_xsrf": self.xsrf}

            Logging.info(u"获取该用户关注者: %s " % json.dumps(data))

            r = requests.post(url, data=data)
            if r.status_code != 200:
                raise IOError("network error.")
            try:
                res = json.loads(r.content)
                if res["r"] == 0 and type(res["msg"]) == type([]):
                    result = res["msg"]
                else:
                    result = []
            except Exception as e:
                Logging.error(u"数据格式解析失败")
                Logging.debug(e)
                result = []
            for p in result:
                r = re.compile(r"\/people/(\S+)\"|\'", re.DOTALL).findall(p)
                if len(r) > 0:
                    followees.append(r[0])
                else:
                    Logging.warn(u"提取用户token失败")
                    Logging.warn(p)
            offset += len(result)
        return followees