コード例 #1
0
    def parse(self, response):

        data = json.loads(response.text)['data']
        # if not data:
        #     self.runFlag = False
        #     print u'爬取结束'
        # print len(data)

        for item in data:
            try:
                group_id = item['group_id']
                comment_count = item['comment_count']
                title = item['title']
                article_url = item['article_url']
                media_name = item['media_name']
                datetime = formatTime(item['datetime'])
                abstract = item['abstract']

                publish_time = formatTimestamp(item['publish_time'])
                behot_time = formatTimestamp(item['behot_time'])
                yield Article(group_id=group_id,
                              comment_count=comment_count,
                              title=title,
                              article_url=article_url,
                              offset=0,
                              media_name=media_name,
                              datetime=datetime,
                              abstract=abstract,
                              publish_time=publish_time,
                              behot_time=behot_time)
            except Exception, e:
                continue
                pass
コード例 #2
0
    def parse(self, response):
        "从搜索结果中直接解析出相关文章的评论"
        data = json.loads(response.text)['data']
        for item in data:
            try:
                group_id = item['group_id']
                comment_count = item['comment_count']
                title = item['title']
                article_url = item['article_url']
                media_name = item['media_name']
                datetime = formatTime(item['datetime'])
                abstract = item['abstract']

                publish_time = formatTimestamp(item['publish_time'])
                behot_time = formatTimestamp(item['behot_time'])
                if datetime < self.lastTime:
                    print "datetime:", datetime
                    print "lastTime:", self.lastTime
                    self.runFlag = False

            except Exception, e:
                continue
                pass

            yield Article(group_id=group_id,
                          comment_count=comment_count,
                          title=title,
                          article_url=article_url,
                          offset=0,
                          media_name=media_name,
                          datetime=datetime,
                          abstract=abstract,
                          publish_time=publish_time,
                          behot_time=behot_time)
コード例 #3
0
    def parse(self, response):
        "从搜索结果中直接解析出相关文章的评论"
        data = json.loads(response.text)['data']
        article_url = response.meta['article_url']
        title = response.meta['title']

        if len(data) < 20:
            url = response.meta['url']
            dict = {}
            dict['len'] = len(data)
            dict['article_url'] = article_url
            dict['title'] = title
            dict['url'] = url
            self.urls.append(dict)

        for item in data:
            data = item['comment']

            id = data['id']
            reply_count = data['reply_count']
            digg_count = data['digg_count']
            create_time = formatTimestamp(data['create_time'])
            score = data['score']
            user_id = data['user_id']

            user_name = data['user_name']
            text = data['text']

            yield Comment(user_name=user_name, text=text, article_url=article_url, title=title, id=id,
                          reply_count=reply_count, digg_count=digg_count, create_time=create_time, score=score,
                          user_id=user_id)
        pass

        pass
コード例 #4
0
    def parse(self, response):
        comment_id = response.meta['id']
        title = response.meta['title']
        text = response.meta['text']

        data = json.loads(response.text)['data']['data']
        # print len(data)
        for item in data:
            digg_count = item['digg_count']
            content = item['content']
            create_time = formatTimestamp(item['create_time'])
            name = item['user']['name']
            id = item['id']
            yield CommentReply(comment_id=comment_id,
                               text=text,
                               digg_count=digg_count,
                               content=content,
                               create_time=create_time,
                               name=name,
                               id=id,
                               title=title)
        pass