def parse_item(self, response):
     info = response.request.info
     html = response.body.decode()
     match = self.get_addr(html)
     if len(match) > 0:
         info['videoUrl'] = match[0]
     else:
         return
     bs4 = BeautifulSoup(response.text, 'html.parser')
     info['img'] = bs4.select_one("div[id=\"poster\"]").select_one('img')['src']
     missionBean = MissionBean(response.url, 3, ['fishing_new'])
     missionBean.html = html
     missionBean.title = info['title']
     missionBean.info = info
     self.client.save(missionBean)
    def parse_item(self, response):
        info = response.request.info
        html = response.text
        bs4 = BeautifulSoup(html, "html.parser")
        content = bs4.select_one('div[class=\"content\"]').prettify()
        info['content'] = content
        missionBean = MissionBean(response.url, 1001, ['qutoutiao'])
        missionBean.info = info
        missionBean.html = html
        missionBean.title = info['title']
        # 组装正式版Bean
        newsBean = NewsBean()
        newsBean.titleInfo = info['title']
        newsBean.content = info['content']
        newsBean.url = response.url
        newsBean.newsId = info['id']
        newsBean.tags = info['tag']

        newsBean.etc = {'news_type': info['type']}
        newsBean.fromChannel = self.TYPE_DICT.get(int(info['type']), '其他')
        newsBean.fromSpider = '推荐流'
        newsBean.fromType = 8
        newsBean.goodNum = int(info['like_num'])
        newsBean.commentNum = int(info['comment_count'])
        newsBean.readNum = int(info['read_count'])
        newsBean.mediaName = info['source_name']
        newsBean.mediaId = info['source_name']
        newsBean.introduction = info['introduction']
        newsBean.imgUrls = info['cover']
        newsBean.shareNum = info['share_count']
        missionBean.info = newsBean.__dict__
        # 其中publishDate和createTime由于redis的格式问题
        # TODO 只能传递时间戳
        newsBean.publishDate = datetime.datetime.fromtimestamp(
            int(info['publish_time']) / 1000).timestamp()
        newsBean.createTime = newsBean.createTime.timestamp()
        daoFilterAndSave.MongoFilterSave(missionBean)