def _get_tags(self, article_info): ''' 获取tags :return: ''' tmp_tags = list_duplicate_remove( [str(item.get('name', '')) for item in article_info.get('noteInfo', {}).get('relatedTags', [])]) # self.my_lg.info(str(tmp_tags)) # list先转str, 去掉敏感字眼, 再转list, 并去除''元素, 得到最后list tmp_tags = delete_list_null_str(self.wash_sensitive_info('|'.join(tmp_tags)).split('|')) tags = [{ # tags可以为空list! 'keyword': item, } for item in tmp_tags] return tags
async def _get_article(self, data, taobao_short_url): ''' 得到该文章的需求信息 :param data: :return: ''' try: nick_name = data.get('data', {}).get('models', {}).get('account', {}).get('name', '') assert nick_name != '', '获取到的nick_name为空值!' head_url = await self._get_head_url(data=data) # 推荐人的简介或者个性签名 tmp_profile = data.get('data', {}).get('models', {}).get('account', {}).get('accountDesc', '') profile = tmp_profile if tmp_profile is not None else '' title = self._wash_sensitive_info(data.get('data', {}).get('models', {}).get('content', {}).get('title', '')) # self.my_lg.info(title) assert title != '', '获取到的title为空值!请检查!' # 达人的评论,可用于荐好首页的文字信息 comment_content = self._wash_sensitive_info(data.get('data', {}).get('models', {}).get('content', {}).get('summary', '')) '''微淘抓包的接口: 图片,商品依次对应''' tmp_goods_list = data.get('data', {}).get('models', {}).get('content', {}).get('drawerList', []) assert tmp_goods_list != [], '获取到的goods_id_list为空list! 请检查! 可能该文章推荐商品为空[]!' share_img_url_list = [{'img_url': 'https:' + item.get('itemImages', [])[0].get('picUrl', '')} for item in tmp_goods_list] goods_id_list = [{'goods_id': item.get('itemId', '')} for item in tmp_goods_list] # 由于微淘的图片跟商品信息一一对应,so直接存一个字段, 清除重复的推荐商品(list去重,并保持原来的顺序) share_goods_base_info = list_duplicate_remove([{ 'img_url': 'https:' + item.get('itemImages', [])[0].get('picUrl', ''), 'goods_id': item.get('itemId', ''), } for item in tmp_goods_list]) # div_body div_body = self._wash_sensitive_info(await self._get_div_body(rich_text=data.get('data', {}).get('models', {}).get('content', {}).get('richText', []))) # print(div_body) # 待抓取的商品地址, 统一格式为淘宝的,如果是tmall地址, 浏览器会重定向到天猫 goods_url_list = [{'goods_url': 'https://item.taobao.com/item.htm?id=' + item.get('goods_id', '')} for item in goods_id_list] _ = (await self._get_target_url_and_content_id_and_csid(taobao_short_url)) gather_url = _[0] share_id = _[1] # 即content_id create_time = get_shanghai_time() site_id = 2 # 淘宝微淘 # tags 额外的文章地址 tags = await self._get_tags(data=data) # pprint(tags) except Exception as e: self.my_lg.error('出错短链接地址:{0}'.format(taobao_short_url)) self.my_lg.exception(e) return {} article = WellRecommendArticle() article['nick_name'] = nick_name article['head_url'] = head_url article['profile'] = profile article['share_id'] = share_id article['title'] = title article['comment_content'] = comment_content article['share_img_url_list'] = share_img_url_list article['goods_id_list'] = goods_id_list article['div_body'] = div_body article['gather_url'] = gather_url article['create_time'] = create_time article['site_id'] = site_id article['goods_url_list'] = goods_url_list article['tags'] = tags article['share_goods_base_info'] = share_goods_base_info return article