Example #1
0
    def _extractAnswer(self, block):
        # aid
        aid = block['data-aid']
        # 回答人
        responderBlock = block.find('a', class_='zm-item-link-avatar')
        if responderBlock is None:
            responder = -1  # 匿名用户
        else:
            responder = responderBlock['href'][8:]  # /people/<responder>
        # 日期
        date = block['data-created']
        # 内容
        content = block.find('div', class_='zm-editable-content').text.strip()
        # 赞同数
        upvote = parseNum(block.find('span', class_='count').text)
        # 评论数目
        comments = block.find('a', class_='toggle-comment').text.strip()
        p = comments.find('条评论')
        if p > 0:
            commentsCount = int(comments[:p])
        else:
            commentsCount = 0

        answer = dict()
        answer['aid'] = aid
        answer['responder'] = responder
        answer['date'] = date
        answer['content'] = content
        answer['upvote'] = upvote
        answer['commentsCount'] = commentsCount
        return answer
Example #2
0
    def update(self):
        '''
        更新Question,并获取Answers
        '''
        self.lastModified = str(datetime.datetime.now())

        qurl = 'http://www.zhihu.com/question/%d' % (self.qid)
        r = get(qurl)
        if r.status_code != 200:
            return False

        soup = BeautifulSoup(r.text)
        # 标题
        self.title = soup.find('h2', class_='zm-item-title').text.strip()
        # 内容
        self.detail = soup.find('div', id='zh-question-detail').div.text.strip()
        # 所属的话题标签
        self.tags = [a.string.strip() for a in soup.find_all("a", class_='zm-item-tag')]
        # 关注人数
        followersCountBlock = soup.find('div', class_='zg-gray-normal')
        if followersCountBlock is None or followersCountBlock.strong is None:
            # 当”还没有人关注该问题” followersCountBlock.strong is None
            self.followersCount = 0
        else:
            self.followersCount = parseNum(followersCountBlock.strong.text)

        self.answers = []
        # 回答数目
        answersCountBlock = soup.find('h3', id='zh-question-answer-num')
        if answersCountBlock is None:
            if soup.find('span', class_='count') is not None:
                answersCount = 1
            else:
                answersCount = 0
        else:
            answersCount = int(answersCountBlock['data-num'])

        # 答案部分 每次50个
        for block in soup.find_all('div', class_='zm-item-answer'):
            if block.find('div', class_='answer-status') is not None:
                continue  # 忽略建议修改的答案
            self.answers.append(self._extractAnswer(block))
        if answersCount > 50:
            _xsrf = soup.find('input', attrs={'name': '_xsrf'})['value']
            otherHeaders = {'Referer': qurl}
            for i in range(1, math.ceil(answersCount/50)):  # more answers
                data = {"_xsrf": _xsrf, "method": 'next', 'params': '{"url_token": %d, "pagesize": 50, "offset": %d}' % (self.qid, i*50)}
                r = post('http://www.zhihu.com/node/QuestionAnswerListV2', otherHeaders, data)
                for block in r.json()['msg']:
                    div = BeautifulSoup(block).div
                    if div.find('div', class_='answer-status') is not None:
                        continue  # 忽略建议修改的答案
                    self.answers.append(self._extractAnswer(div))

        return True