コード例 #1
0
 def crawlMonthSales(self, nid, agentip):
     try:
         month_Sales = ""
         nid_url = "https://mdskip.taobao.com/core/initItemDetail.htm?itemId={nid}"
         refer_url = "https://detail.taobao.com/item.htm?id={nid}"
         nid_Url = nid_url.format(nid=nid)
         nid_refer = refer_url.format(nid=nid)
         cookies = "ab=12; UM_distinctid=15e7a46caf311b-0188d989dac01e-5c153d17-144000-15e7a46caf4552; thw=cn; ali_apache_id=11.131.226.119.1505353641652.239211.1; miid=2033888855982094870; l=AllZcEkSLTy0io2vJcWc-ksY6U4zk02Y; _cc_=WqG3DMC9EA%3D%3D; tg=0; _uab_collina=150780747345339957932139; cna=dNU/EvGIRjsCAQ4XY4PdDkHN; _tb_token_=3d73497b6b4b1; ali_ab=14.23.99.131.1510570522194.8; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_0; _m_h5_tk=c690a92415e1684e37a0d852f95c4237_1511139636041; _m_h5_tk_enc=03e0735d1910593631f521e6615c4e4b; x=124660357; uc3=sg2=AVMH%2FcTVYAeWJwo98UZ6Ld9wxpMCVcQb0e1XXZrd%2BhE%3D&nk2=&id2=&lg2=; uss=VAmowkFljKPmUhfhc%2B1GBuXNJWn9cLMEX%2FtIkJ5j0tQgoNppvUlaKrn3; tracknick=; sn=%E4%BE%9D%E4%BF%8A%E6%9C%8D%E9%A5%B0%3A%E8%BF%90%E8%90%A5; skt=53a079a2a620057d; v=0; cookie2=17f5415096176ca88c03d1fed693a1d4; unb=2077259956; t=1630b104e4d32df897451d6c96642469; uc1=cookie14=UoTdev2%2BYyNASg%3D%3D&lng=zh_CN; _umdata=85957DF9A4B3B3E8F872A3094256432F0F1549AE1C92C6CCF1E68B982581686F23BFC13A60CCABD1CD43AD3E795C914C5B383FEA6B5C410F78EAF10A11987746; isg=Au_vsoMX6XTuPe7jEO7aMMjafgM5PEijMRuJ0QF8i95lUA9SCWTTBu2ApHYV"
         # cookies="_tb_token_=f3fe5d65a6591;cookie2=171e5eb92d66332b1d52d9e2730fed33;t=bf64b0d40d912c08dd434661471b2c98;v=0"
         cookie_dict = {item.split('=')[0]: item.split('=')[1] for item in cookies.split(';')}
         header = {'ip': agentip, 'Referer': nid_refer,
                   "cookies": cookie_dict,
                   'User-Agent': Html_Downloader.GetUserAgent()}
         ok, response = Html_Downloader.Download_Html(nid_Url, {}, header)
         if not ok:
             count = 0
             while count < 5:
                 sleep(2)
                 agentip = Utils.GetMyAgent()
                 header = {'ip': agentip, 'Referer': nid_refer,
                           'timeout': '5000',
                           "cookies": cookie_dict,
                           'User-Agent': Html_Downloader.GetUserAgent()}
                 ok, response = Html_Downloader.Download_Html(nid_Url, {}, header)
                 if ok:
                     break
                 count += 1
                 print "获取月销量第{count}试错".format(count=count)
         if ok and "sellCount\":" not in response.text:
             count = 0
             while count <5:
                 sleep(2)
                 agentip = Utils.GetMyAgent()
                 header = {'ip': agentip, 'Referer': nid_refer,
                           'timeout': '5000',
                           "cookies": cookie_dict,
                           'User-Agent': Html_Downloader.GetUserAgent()}
                 if count ==4:
                     header = {}
                 ok, response = Html_Downloader.Download_Html(nid_Url, {}, header)
                 if ok and "sellCount\":" in response.text:
                     break
                 count += 1
                 print "sellCount不在反馈中,获取月销量第{count}试错".format(count=count)
         if ok and "sellCount\":" in response.text:
             month_Sales = str(re.compile("sellCount\":(.*?)(?=\"success\")").findall(response.text)[0]).replace(",",
                                                                                                                 "").replace(
                     ",", "").strip()
             print "获得月销量为:{month_Sales}".format(month_Sales=month_Sales)
             return month_Sales
     except Exception, e:
         logging.info("月销量爬取错误{m}".format(m=e.message))
コード例 #2
0
def get_shop_item_list(session, proxy_ip, page_num):
    proxies = {"http": proxy_ip, "https": proxy_ip}
    parms_pager = "{{\"shopId\":\"{shop_id}\",\"currentPage\":{page_num},\"pageSize\":\"30\",\"sort\":\"hotsell\",\"q\":\"\"}}"
    parms_url = "https://api.m.taobao.com/h5/com.taobao.search.api.getshopitemlist/2.0/?appKey=12574478&t={stmp}&sign={sign}&api=com.taobao.search.api.getShopItemList&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp12&data={pager}"
    params_referer = "https://shop{shop_id}.m.taobao.com/?shop_id={shop_id}&sort=d".format(shop_id=shop_id)
    # print(params_referer)
    stmp = "%s739" % (long(time.time()))
    referer = params_referer.format(shop_id=shop_id)
    pager = parms_pager.format(shop_id=shop_id, page_num=page_num)
    if session.cookies.get_dict('.taobao.com') and session.cookies.get_dict('.taobao.com').has_key('_m_h5_tk'):
        h5_tk = session.cookies.get_dict('.taobao.com')['_m_h5_tk']
        token = re.compile('(.*)(?=_)').findall(h5_tk)[0]
        value = '%s&%s&12574478&%s' % (token, stmp, pager)
        sign = execute_javascript(value)
    else:
        sign = "a013c868718eddb116eac3da0aa7974a"
    url = parms_url.format(pager=pager, stmp=stmp, sign=sign)
    # print(url)
    requests_parms = {}
    headers = {'Referer': referer,
               'Host': 'api.m.taobao.com',
               'Cache-Control': 'no-cache',
               'Pragma': 'no-cache',
               'User-Agent': Html_Downloader.GetUserAgent()}
    if proxy_ip:
        requests_parms['proxies'] = proxies
        requests_parms['verify'] = False
    result = session.get(url, headers=headers, **requests_parms)
    if result.ok:
        return result.content
    else:
        return None
コード例 #3
0
 def crawlMonthSales(self, nid, agentip):
     try:
         month_Sales = ""
         nid_url = "https://mdskip.taobao.com/core/initItemDetail.htm?itemId={nid}"
         refer_url = "https://detail.taobao.com/item.htm?id={nid}"
         nid_Url = nid_url.format(nid=nid)
         nid_refer = refer_url.format(nid=nid)
         cookies = "ab=56; UM_distinctid=15e7a46caf311b-0188d989dac01e-5c153d17-144000-15e7a46caf4552; thw=cn; ali_apache_id=11.131.226.119.1505353641652.239211.1; miid=2033888855982094870; l=AllZcEkSLTy0io2vJcWc-ksY6U4zk02Y; uc2=wuf=https%3A%2F%2Ftrade.tmall.com%2Fdetail%2ForderDetail.htm%3Fbiz_order_id%3D70514222507416230%26forward_action%3D; _cc_=WqG3DMC9EA%3D%3D; tg=0; _uab_collina=150780747345339957932139; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_0; _tb_token_=3e0501668eb3b; x=124660357; uc3=sg2=AVMH%2FcTVYAeWJwo98UZ6Ld9wxpMCVcQb0e1XXZrd%2BhE%3D&nk2=&id2=&lg2=; uss=VW9L9wvPPdgBBh%2BJHeH%2BVW8D%2FgmRg%2B6YCnShUPaOH0CFHrL4%2FVpP4v7d; tracknick=; sn=%E4%BE%9D%E4%BF%8A%E6%9C%8D%E9%A5%B0%3A%E8%BF%90%E8%90%A5; skt=efe1ec1051eec814; v=0; cookie2=1ce9fff7464537de3d45fe012006d49d; unb=2077259956; t=1630b104e4d32df897451d6c96642469; _m_h5_tk=37be146862abddcfc955f9ec15ebb25d_1508307778971; _m_h5_tk_enc=7ab9ef3ea063dd2c4cd6d33cf84ea2a4; cna=dNU/EvGIRjsCAQ4XY4PdDkHN; uc1=cookie14=UoTcBzysjIcUbw%3D%3D&lng=zh_CN; isg=Amxsuy9SGdk0Xg26l9-JufebPUpejRva_jrq6MateJe60Qzb7jXgX2Ljh68S; _umdata=85957DF9A4B3B3E8F872A3094256432F0F1549AE1C92C6CCF1E68B982581686F23BFC13A60CCABD1CD43AD3E795C914C9A2685321202E656A2C4B44241C24328"
         # cookies="_tb_token_=f3fe5d65a6591;cookie2=171e5eb92d66332b1d52d9e2730fed33;t=bf64b0d40d912c08dd434661471b2c98;v=0"
         cookie_dict = {item.split('=')[0]: item.split('=')[1] for item in cookies.split(';')}
         header = {'ip': agentip, 'Referer': nid_refer,
                   "cookies": cookie_dict,
                   'User-Agent': Html_Downloader.GetUserAgent()}
         ok, response = Html_Downloader.Download_Html(nid_Url, {}, header)
         if not ok:
             count = 0
             while (count < 11):
                 sleep(2)
                 agentip = Utils.GetMyAgent()
                 header = {'ip': agentip}
                 ok, response = Html_Downloader.Download_Html(nid_Url, {}, header)
                 if ok:
                     break
                 count += 1
                 print "获取月销量第{conut}试错".format(count=count)
         if ok:
             month_Sales = str(re.compile("sellCount\":(.*?)(?=\"success\")").findall(response.text)[0]).replace(",",
                                                                                                                 "").replace(
                 ",", "").strip()
             if month_Sales == None:
                 self.crawlMonthSales(nid, agentip)
             print  "获得月销量为:{month_Sales}".format(month_Sales=month_Sales)
             return month_Sales
     except Exception, e:
         logging.info("月销量爬取错误{m}".format(m=e.message))
コード例 #4
0
ファイル: crawlNid1.py プロジェクト: xwjonline/taobaoSpider
 def get_total_sales(self, session, agentipjj, page_num, shop_id):
     try:
         count = 0
         while (count < 20):
             print("agentipjj:" + agentipjj)
             proxies = {"http": agentipjj, "https": agentipjj}
             parms_pager = "{{\"shopId\":\"{shop_id}\",\"currentPage\":{page_num},\"pageSize\":\"30\",\"sort\":\"hotsell\",\"q\":\"\"}}"
             parms_url = "https://unzbmix25g.api.m.taobao.com/h5/com.taobao.search.api.getshopitemlist/2.0/?appKey=12574478&t={stmp}&sign={sign}&api=com.taobao.search.api.getShopItemList&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp12&data={pager}"
             params_referer = "https://shop{shop_id}.m.taobao.com/?shop_id={shop_id}&sort=d".format(
                 shop_id=shop_id)
             stmp = "%s739" % (long(time.time()))
             referer = params_referer.format(shop_id=shop_id)
             pager = parms_pager.format(shop_id=shop_id, page_num=page_num)
             if session.cookies.get_dict(
                     '.taobao.com') and session.cookies.get_dict(
                         '.taobao.com').has_key('_m_h5_tk'):
                 h5_tk = session.cookies.get_dict('.taobao.com')['_m_h5_tk']
                 token = re.compile('(.*)(?=_)').findall(h5_tk)[0]
                 value = '%s&%s&12574478&%s' % (token, stmp, pager)
                 sign = self.execute_javascript(value)
             else:
                 sign = "a013c868718eddb116eac3da0aa7974a"
             url = parms_url.format(pager=pager, stmp=stmp, sign=sign)
             requests_parms = {}
             headers = {
                 'Referer': referer,
                 'Host': 'api.m.taobao.com',
                 'Cache-Control': 'no-cache',
                 'Pragma': 'no-cache',
                 'timeout': '5000',
                 'User-Agent': Html_Downloader.GetUserAgent()
             }
             if agentipjj:
                 requests_parms['proxies'] = proxies
                 requests_parms['verify'] = False
             try:
                 result = session.get(url,
                                      headers=headers,
                                      **requests_parms)
             except Exception, e:
                 agentipjj = Utils.GetMyAgent()
                 continue
             count = count + 1
             if result.status_code != 200:
                 logging.info("代理ip返回结果{log_code}".format(
                     log_code=result.status_code))
                 agentipjj = Utils.GetMyAgent()
                 sleep(2)
             else:
                 print(result.status_code)
             if result.ok:
                 sleep(2)
                 return result.content
                 break
     except Exception, e:
         #shop_id={shop_id}&sort=d".format(shop_id=shop_id)
         logging.info("抓取totalSoldQuantity有错{m}".format(m=e.message))
         print("抓取totalSoldQuantity有错{e}".format(e=e.message))
コード例 #5
0
 def crawl_yxl(self,auctionId,agentIp):
     yxl=-1
     count =0
     while(count<20):
         agentIp=Utils.GetMyAgent()
         userAgent=Html_Downloader.GetUserAgent()
         header = {'ip': agentIp,'user-agent':userAgent}
         text_detail_url="https://detail.m.tmall.com/item.htm?spm=a320p.7692363.0.0&id={auctionId}".format(auctionId=auctionId)
         ok, response = Html_Downloader.Download_Html(text_detail_url,{}, header)
         if ok:
             matchs=re.compile("sellCount\":(.*?)(?=showShopActivitySize)").findall(response.text)
             if len(matchs) > 0:
              if "sellCount" in response.text:
                 yxl=re.compile("sellCount\":(.*?)(?=showShopActivitySize)").findall(response.text)[0].encode('utf-8')
                 yxl=yxl.replace(",\"","")
                 break
         sleep(3)
         count+=1
     return  yxl
コード例 #6
0
    def crawl_shop_all_item(self, url):
        agentIp = Utils.GetMyAgent()
        shop_id = self.shop_id
        shop_name = self.shop_name
        userAgent = Html_Downloader.GetUserAgent()
        header = {'ip': agentIp, 'user-agent': userAgent}
        text_detail_url = url
        ok, response = Html_Downloader.Download_Html(text_detail_url, {},
                                                     header)
        if ok:
            jsonArray = json.loads(response.content)  # 获取解析的json
            total_page = jsonArray.get("total_page")
            total_results = jsonArray.get("total_results")
            page_size = jsonArray.get("page_size")
            jsonResult = jsonArray.get("items")
            for item in jsonResult:
                shop_item = {}
                item_id = str(item.get("item_id")).strip()
                shop_item['item_id'] = item_id
                shop_item['title'] = item.get('title').encode('utf-8')
                shop_item['picUrl'] = "http:" + item.get('img')
                #现在的销售价
                shop_item['salePrice'] = item.get('price')
                shop_item['totalSoldQuantity'] = item.get('totalSoldQuantity')
                shop_item['crawl_url'] = item.get('url')
                shop_item['crawl_time'] = long(time.time())
                #接口url 获取宝贝种类(颜色分类)不需要这个接口了,下面那个接口就可以得到颜色分类等信息
                '''
                test_Url="http://d.da-mai.com/index.php?r=itemApi/getItemInfoByItemId&item_id="+item_id
                ok, response = Html_Downloader.Download_Html(test_Url,{}, header)
                if ok:
                   jsonItems=json.loads(response.content)  # 获取解析的json
                '''
                #接口url 获取SKU详细信息()
                shop_item['quantity'] = 0
                getSKU_Url = "http://yj.da-mai.com/index.php?r=itemskus/getSkus&fields=*&num_iids={item_id}".format(
                    item_id=item_id)
                ok, response = Html_Downloader.Download_Html(
                    getSKU_Url, {}, header)
                if ok:
                    jsonItems = json.loads(response.content)
                    total_data = jsonItems.get("data")
                    for date in total_data:
                        quantity = date.get("quantity")
                        shop_item[
                            'quantity'] = shop_item['quantity'] + quantity
                #获取宝贝详情页信息 (第二屏信息)
                getDetail_Url = "http://d.da-mai.com/index.php?r=itemApi/getItemInfoByItemId&item_id={item_id}".format(
                    item_id=item_id)
                ok, response_detail = Html_Downloader.Download_Html(
                    getDetail_Url, {}, header)
                if ok:
                    shop_item['attribute'] = []
                    #jsonDetails = response_detail['data']['data']
                    jsonDetails = json.loads(response_detail.content)
                    properties = jsonDetails['data']['data']['properties']
                    stringName = ""
                    for attri in properties:
                        #string = "{name}:{value}&&||".format(name=attri.get('name'),value=attri.get('value'))
                        name = attri.get('name')
                        value = attri.get('value')
                        if name in stringName:
                            #shop_item['attribute'].append(name)
                            string = "{value} ".format(value=value)
                            shop_item['attribute'].append(string)
                        if name not in stringName:
                            string = "{name}:{value}&&||".format(name=name,
                                                                 value=value)
                            shop_item['attribute'].append(string)
                            stringName = name + stringName

        for page in total_page:
            #重写json的URL并完成回调函数
            ###!!!!!注意这里店铺的url写死了,应该传参进来!!!!
            getlist_url="https://yiqianny.m.tmall.com/shop/shop_auction_search.do?ajson=1&_tm_source=tmallsearch&" \
                    "spm=a320p.7692171.0.0&sort=d&p={page}&page_size=24&from=h5".format(page=page)
            p = multiprocessing.Process(
                target=self.crawl_shop_all_item(getlist_url), args=(page, ))
            p.start()
            logging.info("开始多进程爬虫,爬取的json列表为:{url}".format(url=getlist_url))
            self.crawl_shop_all_item(getlist_url)
コード例 #7
0
 def crawl_shop_all_item(self):
     agentIp = Utils.GetMyAgent()
     shop_id = self.shop_id
     shop_name = self.shop_name
     userAgent = Html_Downloader.GetUserAgent()
     header = {'ip': agentIp, 'user-agent': userAgent}
     test_detail_url = "{shop_url}shop/shop_auction_search.do?ajson=1&_tm_source=tmallsearch&spm=a320p.7692171.0.0&sort" \
                       "=d&p={page}&page_size={page_size}&from=h5".format(shop_url=self.shop_url, page_size=1,
                                                                          page=1)
     test_detail_url = test_detail_url.replace(".tmall.com", ".m.tmall.com")
     try:
         ok, response = Html_Downloader.Download_Html(
             test_detail_url, {}, header)
         if not ok:
             count = 0
             while (count < 4):
                 sleep(2)
                 agentip = Utils.GetMyAgent()
                 header = {'ip': agentip}
                 ok, response = Html_Downloader.Download_Html(
                     test_detail_url, {}, header)
                 if ok:
                     break
                 count += 1
                 if count == 3:
                     header = {}
         if ok:
             jsonArray = json.loads(response.content)  # 获取解析的json
             total_page = jsonArray.get("total_page")
             total_results = jsonArray.get("total_results")
             page_size = jsonArray.get("page_size")
             logging.info("shopname:" + shop_name + " total_page:" +
                          total_page + " total_results:" + total_results +
                          " page_size:" + page_size)
             print "total_page:" + total_page + "total_results:" + total_results + "page_size:" + page_size
             for i in range(int(total_page)):
                 print i + 1
                 test_detail_url = "{shop_url}shop/shop_auction_search.do?ajson=1&_tm_source=tmallsearch&spm=a320p.7692171.0.0&sort=d&p={page}&page_size={page_size}&from=h5".format(
                     shop_url=self.shop_url,
                     page_size=page_size,
                     page=i + 1)
                 test_detail_url = test_detail_url.replace(
                     ".tmall.com", ".m.tmall.com")
                 '''
                 if int(total_page)==(i+1):
                     lastCount=int(total_results)-i*int(page_size)
                     ok, response = Html_Downloader.Download_Html(test_detail_url,{}, header)
                     if not ok:
                         count =0
                         while(count<11):
                             sleep(2)
                             agentip = Utils.GetMyAgent()
                             header = {'ip': agentip}
                             ok, response = Html_Downloader.Download_Html(test_detail_url,{},header)
                             if ok  and "price" in response.text and lastCount-response.text.count("price")<2:
                                 break
                             count+=1
                             if count==10:
                                 header={}
                     print  response.text.count('price')
                     if ok and  "price" not in response.text:
                        print "111"
                        count =0
                        while(count<11):
                             sleep(2)
                             agentip = Utils.GetMyAgent()
                             header = {'ip': agentip}
                             ok, response = Html_Downloader.Download_Html(test_detail_url,{},header)
                             if ok and "price" in response.text and lastCount-response.text.count("price")<2:
                                 break
                             count+=1
                             if count==10:
                                 header={}
                     if ok and  lastCount-response.text.count("price")>2:
                        while(count<11):
                             sleep(2)
                             agentip = Utils.GetMyAgent()
                             header = {'ip': agentip}
                             ok, response = Html_Downloader.Download_Html(test_detail_url,{},header)
                             if ok and "price" in response.text and lastCount-response.text.count("price")<2:
                                 break
                             count+=1
                             if count==10:
                                 header={}
                     if ok  and lastCount-response.text.count("price")<2:
                         logging.info("成功获取price字符串并开始解析")
                         self.parse_items(response.content,shop_id,agentIp,shop_name,userAgent)
                 else:
                     '''
                 ok, response = Html_Downloader.Download_Html(
                     test_detail_url, {}, header)
                 if not ok:
                     count = 0
                     while (count < 11):
                         sleep(2)
                         agentip = Utils.GetMyAgent()
                         header = {'ip': agentip}
                         ok, response = Html_Downloader.Download_Html(
                             test_detail_url, {}, header)
                         if ok:
                             break
                         count += 1
                         if count == 10:
                             header = {}
                 if ok:
                     # logging.info("成功获取price字符串并开始解析")
                     self.parse_items(response.content, shop_id, agentIp,
                                      shop_name, userAgent)
     except Exception, e:
         logging.error("抓取店铺:{shop_name}失败,店铺id:{shop_id},错误内容{m}".format(
             shop_name=shop_name,
             shop_id=shop_id,
             m=e.message,
         ))
         crawl_content = "抓取列表页有错"
         message = e.message
         start_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                    time.localtime(time.time()))
         insertLog(crawl_content, message, shop_id, agentIp,
                   test_detail_url, start_time, shop_name)
コード例 #8
0
 def crawlMonthSales(self, nid, agentip):
     try:
         month_Sales = ""
         nid_url = "https://mdskip.taobao.com/core/initItemDetail.htm?itemId={nid}"
         refer_url = "https://detail.taobao.com/item.htm?id={nid}"
         nid_Url = nid_url.format(nid=nid)
         nid_refer = refer_url.format(nid=nid)
         cookies = "x=__ll%3D-1%26_ato%3D0; l=AhERSU92PmRba9QUgSCkQMF6oRaqOoXt; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=dNU/EvGIRjsCAQ4XY4PdDkHN; _m_h5_tk=7d8d6e65e5c676a6d0a69c26f7436ea1_1510363282671; _m_h5_tk_enc=e32129060738b7ce01e9114c9bec037f; sm4=440100; hng=CN%7Czh-CN%7CCNY%7C156; uc1=cookie14=UoTde95xncLyFQ%3D%3D&lng=zh_CN; uc3=sg2=Vq0THzNyGHIH22DuvMx9ZEwXL5qc2kn7REWHdois6v0%3D&nk2=&id2=&lg2=; uss=AQDPJiEXAu47o41b5k%2BKpKRT3Ckpz9nqnJX2F%2F7kZG6ttuI82ZnQa7ZL; t=1630b104e4d32df897451d6c96642469; unb=2607292494; sn=sitiselected%E6%97%97%E8%88%B0%E5%BA%97%3A%E5%A4%A7%E9%BA%A6; _tb_token_=eef7bd7b7abd6; cookie2=23bb087c638814ce8a8e329ead5332d4; isg=ApqaMZmelJirXxuDoGSRqtW160B8YxWwfLxcMqQTRi34FzpRjFtutWDlkdVw"
         # cookies="_tb_token_=f3fe5d65a6591;cookie2=171e5eb92d66332b1d52d9e2730fed33;t=bf64b0d40d912c08dd434661471b2c98;v=0"
         cookie_dict = {
             item.split('=')[0]: item.split('=')[1]
             for item in cookies.split(';')
         }
         header = {
             'ip': agentip,
             'Referer': nid_refer,
             "cookies": cookie_dict,
             'User-Agent': Html_Downloader.GetUserAgent()
         }
         ok, response = Html_Downloader.Download_Html(nid_Url, {}, header)
         if not ok:
             count = 0
             while count < 5:
                 sleep(2)
                 agentip = Utils.GetMyAgent()
                 header = {
                     'ip': agentip,
                     'Referer': nid_refer,
                     'timeout': '5000',
                     "cookies": cookie_dict,
                     'User-Agent': Html_Downloader.GetUserAgent()
                 }
                 ok, response = Html_Downloader.Download_Html(
                     nid_Url, {}, header)
                 if ok:
                     break
                 count += 1
                 print "获取月销量第{count}试错".format(count=count)
         if ok and "sellCount\":" not in response.text:
             count = 0
             while count < 10:
                 sleep(2)
                 agentip = Utils.GetMyAgent()
                 header = {
                     'ip': agentip,
                     'Referer': nid_refer,
                     'timeout': '5000',
                     "cookies": cookie_dict,
                     'User-Agent': Html_Downloader.GetUserAgent()
                 }
                 if count == 9:
                     header = {}
                 ok, response = Html_Downloader.Download_Html(
                     nid_Url, {}, header)
                 if ok and "sellCount\":" in response.text:
                     break
                 count += 1
                 print "sellCount不在反馈中,获取月销量第{count}试错".format(count=count)
         if ok and "sellCount\":" in response.text:
             month_Sales = str(
                 re.compile("sellCount\":(.*?)(?=\"success\")").findall(
                     response.text)[0]).replace(",",
                                                "").replace(",",
                                                            "").strip()
             print "获得月销量为:{month_Sales}".format(month_Sales=month_Sales)
             return month_Sales
     except Exception, e:
         logging.info("月销量爬取错误{m}".format(m=e.message))