예제 #1
0
    def _get_tags(self, article_info):
        '''
        获取tags
        :return:
        '''
        tmp_tags = list_duplicate_remove(
            [str(item.get('name', '')) for item in article_info.get('noteInfo', {}).get('relatedTags', [])])
        # self.my_lg.info(str(tmp_tags))
        # list先转str, 去掉敏感字眼, 再转list, 并去除''元素, 得到最后list
        tmp_tags = delete_list_null_str(self.wash_sensitive_info('|'.join(tmp_tags)).split('|'))
        tags = [{  # tags可以为空list!
            'keyword': item,
        } for item in tmp_tags]

        return tags
예제 #2
0
    async def _get_article(self, data, taobao_short_url):
        '''
        得到该文章的需求信息
        :param data:
        :return:
        '''
        try:
            nick_name = data.get('data', {}).get('models', {}).get('account', {}).get('name', '')
            assert nick_name != '', '获取到的nick_name为空值!'

            head_url = await self._get_head_url(data=data)

            # 推荐人的简介或者个性签名
            tmp_profile = data.get('data', {}).get('models', {}).get('account', {}).get('accountDesc', '')
            profile = tmp_profile if tmp_profile is not None else ''

            title = self._wash_sensitive_info(data.get('data', {}).get('models', {}).get('content', {}).get('title', ''))
            # self.my_lg.info(title)
            assert title != '', '获取到的title为空值!请检查!'

            # 达人的评论,可用于荐好首页的文字信息
            comment_content = self._wash_sensitive_info(data.get('data', {}).get('models', {}).get('content', {}).get('summary', ''))

            '''微淘抓包的接口: 图片,商品依次对应'''
            tmp_goods_list = data.get('data', {}).get('models', {}).get('content', {}).get('drawerList', [])
            assert tmp_goods_list != [], '获取到的goods_id_list为空list! 请检查! 可能该文章推荐商品为空[]!'

            share_img_url_list = [{'img_url': 'https:' + item.get('itemImages', [])[0].get('picUrl', '')} for item in tmp_goods_list]
            goods_id_list = [{'goods_id': item.get('itemId', '')} for item in tmp_goods_list]

            # 由于微淘的图片跟商品信息一一对应,so直接存一个字段, 清除重复的推荐商品(list去重,并保持原来的顺序)
            share_goods_base_info = list_duplicate_remove([{
                'img_url': 'https:' + item.get('itemImages', [])[0].get('picUrl', ''),
                'goods_id': item.get('itemId', ''),
            } for item in tmp_goods_list])

            # div_body
            div_body = self._wash_sensitive_info(await self._get_div_body(rich_text=data.get('data', {}).get('models', {}).get('content', {}).get('richText', [])))
            # print(div_body)

            # 待抓取的商品地址, 统一格式为淘宝的,如果是tmall地址, 浏览器会重定向到天猫
            goods_url_list = [{'goods_url': 'https://item.taobao.com/item.htm?id=' + item.get('goods_id', '')} for item in goods_id_list]

            _ = (await self._get_target_url_and_content_id_and_csid(taobao_short_url))
            gather_url = _[0]
            share_id = _[1]  # 即content_id

            create_time = get_shanghai_time()

            site_id = 2  # 淘宝微淘

            # tags 额外的文章地址
            tags = await self._get_tags(data=data)
            # pprint(tags)

        except Exception as e:
            self.my_lg.error('出错短链接地址:{0}'.format(taobao_short_url))
            self.my_lg.exception(e)
            return {}

        article = WellRecommendArticle()
        article['nick_name'] = nick_name
        article['head_url'] = head_url
        article['profile'] = profile
        article['share_id'] = share_id
        article['title'] = title
        article['comment_content'] = comment_content
        article['share_img_url_list'] = share_img_url_list
        article['goods_id_list'] = goods_id_list
        article['div_body'] = div_body
        article['gather_url'] = gather_url
        article['create_time'] = create_time
        article['site_id'] = site_id
        article['goods_url_list'] = goods_url_list
        article['tags'] = tags
        article['share_goods_base_info'] = share_goods_base_info

        return article