コード例 #1
0
ファイル: getPvpInfo.py プロジェクト: wannianma/py35Lib
    def getBaseJson(self):
        try:
            heros = self.req.get_html(self.hero_js, is_json=True)
            items = self.req.get_html(self.item_js, is_json=True)
            summoners = self.req.get_html(self.summoner_js, is_json=True)
            mings = self.req.get_html(self.ming_js, is_json=True)
        except Exception as e:
            logger.error('王者荣耀 Base Json Fetche error:' + e)
        self.heros = []
        # 对hero添加topic_id映射
        for hero in heros:
            tmp_hero = hero
            hero_id = 0
            ms_hero = self.session.query(Hero).filter(
                Hero.ename == tmp_hero['ename']).first()
            if not ms_hero is None:
                hero_id = ms_hero.topicid
            tmp_hero['id'] = hero_id
            self.heros.append(tmp_hero)

        # 持久json文件
        self._saveJson(json.dumps(self.heros), 'hero.json')
        self._saveJson(json.dumps(items), 'item.json')
        self._saveJson(json.dumps(summoners), 'summoner.json')
        self._saveJson(json.dumps(mings), 'ming.json')
コード例 #2
0
ファイル: BaseFetcher.py プロジェクト: wannianma/py35Lib
 def qiniu_fetch_file(self, url, key):
     try:
         ret, info = self.__bucket.fetch(url, self.bucket_name, key)
         if ret and str(ret['key']) == key:
             return True
         else:
             return False
     except Exception as e:
         logger.error("qiniu远程fetch文件出错,info:{0}".format(e))
         return False
コード例 #3
0
ファイル: getJDGoods.py プロジェクト: wannianma/py35Lib
 def fetchSingleGoodInfo(self, good_url):
     self.__sleep()
     good_id = ''
     good_desc = ''
     good_pics = {}
     good_price = {}
     html = self.req.get_html(good_url, is_json=False)
     if html.strip() != '':
         try:
             soup = BeautifulSoup(html,
                                  'html.parser',
                                  from_encoding='utf-8')
             # 获取keywords
             good_keyword = soup.find('meta', attrs={'name': 'keywords'})
             seo_keyword = good_keyword['content']
             # 获取description
             good_desc = soup.find('meta', attrs={'name': 'description'})
             seo_desc = good_keyword['content']
             # 获取商品列表图
             lis = soup.find(id='spec-list').find_all('li')
             good_pics = []
             for li in lis:
                 img_src = li.img['src']
                 # 替换为高分辨图片
                 img_src = img_src.replace('s54x54_jfs', 's450x450_jfs')
                 img_src = img_src.replace('s75x75_jfs', 's450x450_jfs')
                 img_src = img_src.replace('n5/jfs', 'n1/jfs')
                 good_pics.append('http:{0}'.format(img_src))
             # 获取商品描述
             good_desc = str(
                 soup.find(class_='parameter2 p-parameter-list'))
             # 获取商品ID
             match_obj = re.search(r'http:(.*)\/(\d+)\.html', str(good_url),
                                   re.M | re.I)
             if match_obj:
                 good_id = match_obj.group(2)
             # 获取商品价格(Modify, 上篇价格单独处理
             # good_price = self.fetchGoodPrice(good_url)
         except Exception as e:
             logger.error('parse Html error: {0}'.format(e))
     return good_id, good_desc, good_pics, seo_keyword, seo_desc
コード例 #4
0
ファイル: getJDGoods.py プロジェクト: wannianma/py35Lib
 def fetchJdGoods(self):
     # 初始page
     start_page = 1
     max_page = 2
     good_count = 0
     error_time = 0
     for page in range(start_page, max_page):
         goods_list = []
         # 休息1秒钟
         time.sleep(1)
         list_url = self.start_url + '&page={0}'.format(page)
         logger.info(
             "###########list Url: {0} ###############".format(list_url))
         html = self.req.get_html(list_url, is_json=False)
         if html.strip() != '':
             try:
                 soup = BeautifulSoup(html,
                                      'html.parser',
                                      from_encoding='utf-8')
                 items = soup.find_all(class_='gl-item')
                 for good in items:
                     good_info = {}
                     good_thumb = good.find(class_='p-img').a.img
                     good_info['name'] = good.find(
                         class_='p-name').a.em.string
                     good_info['link'] = 'http:{0}'.format(
                         good.find(class_='p-img').a['href'])
                     goods_list.append(good_info)
             except Exception as e:
                 logger.error('parse Html error: {0}'.format(e))
         else:
             error_time += 1
             logger.error(
                 'Failed Get Return Message, error_time:{0}, res:{1}'.
                 format(error_time, html))
             if error_time > 5:
                 break
         # 对当前列表页的商品进行处理
         for good in goods_list:
             good_count += 1
             good_link = good['link']
             good_name = good['name']
             logger.info('good_count: {0}, good_link: {1}'.format(
                 good_count, good_link))
             good_id, good_desc, good_pics, seo_keywords, seo_desc = self.fetchSingleGoodInfo(
                 good_link)
             if good_pics:
                 good_thumb = good_pics[0]
             else:
                 good_thumb = ''
             # 其他信息更新到mysql
             now = int(time.time())
             shopping_good = Shopping_Goods(catid=self.goods_cat,
                                            pid=self.brand,
                                            title=good_name,
                                            content=good_desc,
                                            desc=good_link,
                                            price=0,
                                            seo_title=good_name,
                                            seo_keywords=seo_keywords,
                                            seo_desc=seo_desc,
                                            thumb=good_thumb,
                                            inputtime=now)
             self.session.add(shopping_good)
             self.session.commit()
             # 将轮播图片和price信息存入redis,待下载和更新
             self.redis.lpush(
                 self.redis_list,
                 json.dumps({
                     good_id: {
                         'thumb': good_thumb,
                         'pics': good_pics,
                         'id': shopping_good.id
                     }
                 }))
コード例 #5
0
ファイル: getJDGoods.py プロジェクト: wannianma/py35Lib
        for pic_url in data:
            new_key = self.generate_pic_key()
            self.qiniu_fetch_file(pic_url, new_key)
            logger.info('new_pic_url: {0}{1}'.format(
                'http://static.shenyou.tv/', new_key))
            new_pics.append('{0}{1}'.format('http://static.shenyou.tv/',
                                            new_key))
        return new_pics


def fetchJD(brand, category, url, process_step='fetch'):
    good_fetch = GoodsFetcher(brand, category, url)
    if process_step == 'fetch':
        good_fetch.fetchJdGoods()
    else:
        good_fetch.updateGoodsExtraInfo()


# 四个参数
# 1 方法类型,fetch | process
# 2 商品列表地址, url
# 3 商品分类,category
# 4 品牌名称,
if __name__ == '__main__':
    if len(sys.argv) != 5:
        logger.error('参数输入有误')
    process_step = sys.argv[1]
    brand = sys.argv[2]
    category = sys.argv[3]
    url = sys.argv[4]
    fetchJD(brand, category, url, process_step)
コード例 #6
0
ファイル: getPvpInfo.py プロジェクト: wannianma/py35Lib
    def _processSingleHero(self, url, hero_id):
        hero_res = {}
        html = self.req.get_html(url, is_json=False)
        if html.strip() != '':
            try:
                soup = BeautifulSoup(html, 'html.parser', from_encoding='gbk')
                beijing = soup.find(class_='story-info info').find(
                    class_='nr').p
                hero_res['id'] = hero_id
                #hero_res['desc'] = beijing.text
                skills = soup.find(class_='skill-show').find_all(
                    class_='show-list')
                skill_imgs = soup.find(class_='skill-u1').find_all('li')
                # 组装英雄技能
                skill_arr = []
                for idx, skill in enumerate(skills):
                    tmp = {}
                    tmp['name'] = skill.a.text
                    tmp['p1'] = skill.find(class_='skill-p1').text
                    tmp['p2'] = skill.find(class_='skill-p2').text
                    tmp['p3'] = skill.find(class_='skill-p3').text
                    tmp['img'] = skill_imgs[idx].img['src']
                    skill_arr.append(tmp)
                hero_res['skills'] = skill_arr
                # 组装相关英雄
                rel_hero_arr = []
                rel_hreos = soup.find_all(
                    class_='hero-list hero-relate-list fl')
                for hero in rel_hreos:
                    str_ids = hero['data-relatename']
                    str_new_ids = ''
                    rel_hero_arr.append(str_ids)
                    # 处理英雄映射ID
                    for id in str_ids.split('|'):
                        ms_hero = self.session.query(Hero).filter(
                            Hero.ename == id).first()
                        if not ms_hero is None:
                            str_new_ids += str(ms_hero.topicid)
                            str_new_ids += '|'
                    str_new_ids = str_new_ids[:-1]
                    rel_hero_arr.append(str_new_ids)
                pprint(rel_hero_arr)

                # 组装出装
                hero_res['rel_heros'] = rel_hero_arr
                equipment_arr = []
                equipments = soup.find_all(class_='equip-list fl')
                for equipment in equipments:
                    equipment_arr.append(equipment['data-item'])
                hero_res['equipments'] = equipment_arr
                # 组装明文
                mings = soup.find(class_='sugg-u1')['data-ming']
                hero_res['mings'] = mings
                # 组装能力
                abilities = soup.find_all(class_='cover-list-bar')
                hero_ability = ''
                for ability in abilities:
                    tmp = str(ability.i['style'])[6:] + '|'
                    hero_ability += tmp
                hero_res['ability'] = hero_ability[:-1]
                # 组装出装
                sug_skills = soup.find_all('img', class_='jn-pic1')
                sug_skills_arr = []
                for sug_skill in sug_skills:
                    sug_skills_arr.append(sug_skill['src'])
                hero_res['sug_skills'] = sug_skills_arr
                # 组装召唤师技能
                hero_zhs_skills = soup.find(id='skill3')['data-skill']
                hero_res['zhs_skills'] = hero_zhs_skills
                # 组装皮肤
                skins = soup.find(
                    class_='pic-pf-list pic-pf-list3')['data-imgname']
                hero_res['skins'] = skins
                hero_json = json.dumps(hero_res)
                self._saveJson(hero_json, '{0}.json'.format(hero_id))
            except Exception as e:
                logger.error('parse Html error:' + e)