def requestByProxy(url):
    i = 0
    src = ''
    proxy_port = my_proxy.is_proxy_exists()
    proxy = random.sample(proxy_port, 1)[0]
    proxy = proxy[0] + ':' + proxy[1]
    # print('=' * 5 + u'开始使用的代理:' + 'http://%s' % proxy)
    proxyHandler = urllib2.ProxyHandler({'http': r'http://%s' % proxy})
    cookies = urllib2.HTTPCookieProcessor
    opener = urllib2.build_opener(cookies, proxyHandler)
    opener.addheaders = [
        ('User-agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0')]
    while i == 0:
        try:
            res_temp = opener.open(url)
            src = res_temp.read()
            res_temp.close()
            i += 1
        except:
            proxy = random.sample(proxy_port, 1)[0]
            # print(u'更换代理:%s:%s' % (proxy[0], proxy[1]))
            proxy = proxy[0] + ':' + proxy[1]
            proxyHandler = urllib2.ProxyHandler({'http': r'http://%s' % proxy})
            opener = urllib2.build_opener(cookies, proxyHandler)
            continue
    return src
    def get_pageCount(self,product_sn):
        # url='http://www.xtep.com.cn/list_goods/'
        url='http://www.xtep.com.cn/?app_act=goods/comment&goods_sn='+str(product_sn)+'&app_page=null&t=0.00000000000000000&is_ajax'
        # post参数
        data={'ajax_div':'all_commentList','ajax_url':'/?app_act=goods/comment&goods_sn='+str(product_sn)+'&app_page=null','page':1}
        data=urllib.urlencode(data)

        proxy_port=my_proxy.is_proxy_exists()
        proxy=random.sample(proxy_port,1)[0]
        proxy=proxy[0]+':'+proxy[1]
        print(proxy)
        while True:
            proxyHandler=urllib2.ProxyHandler({'http':r'http://%s'%proxy})
            cookies=urllib2.HTTPCookieProcessor
            opener=urllib2.build_opener(cookies,proxyHandler)
            opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0')]
            try:
                req=opener.open(fullurl=url,data=data)
                result=req.read()
            except:
                proxy=random.sample(proxy_port,1)[0]
                proxy=proxy[0]+':'+proxy[1]
                print(u'更换代理:%s'%(proxy))
                continue
            req.close()
            soup=BeautifulSoup(result)
            try:
                pagecount=soup.find_all(attrs={'class':'pageNext'})[1]['onclick']
                pagecount=int(pagecount.split(',')[3])
            except:pagecount=1
            print(pagecount)
            # 跳出死循环
            break
        return pagecount
 def get_info(self):
     proxy_port=my_proxy.is_proxy_exists()
     proxy=random.sample(proxy_port,1)[0]
     proxy=proxy[0]+':'+proxy[1]
     proxyHandler=urllib2.ProxyHandler({'http':r'http://%s'%proxy})
     cookies=urllib2.HTTPCookieProcessor
     opener=urllib2.build_opener(cookies,proxyHandler)
     opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0')]
     # 循环1:分割url列表,若需更换代理url将被遗弃
     # for url in self.href_list:
     # 循环2:利用queue在线程间同步数据,若需更换代理则将url put到queue,不会遗弃
     while queue_for_href_list.qsize()>0:
         url=queue_for_href_list.get()
         if 'goods' in url:
             print('*'*60)
             print(url)
             try:
                 res_temp=opener.open(url)
             except:
                 queue_for_href_list.put(url)
                 proxy=random.sample(proxy_port,1)[0]
                 print(u'更换代理:%s:%s'%(proxy[0],proxy[1]))
                 proxy=proxy[0]+':'+proxy[1]
                 proxyHandler=urllib2.ProxyHandler({'http':r'http://%s'%proxy})
                 opener=urllib2.build_opener(cookies,proxyHandler)
                 continue
             src=res_temp.read()
             res_temp.close()
             d=pq(src)
             frame=d.find('.detailsRight.fr')
             d=pq(frame)
             title=d.find('.fb').text()
             try:price=d.find('.goodsPrice .styleUL span').eq(0).text()[1:]
             except:continue
             price_original=d.find('.goodsPrice .styleUL span').eq(1).text()
             price_original=price_original.split(':')[1][1:]
             product_sn=d.find('.goodsPrice dd').eq(2).text().split(': ')[1]
             sale=d.find('.goodsPrice dd').eq(3).text().split(': ')[1]
             jifeng=d.find('.goodsPrice dd').eq(4).text().split(': ')[1]
             colors=d.find('#gxm_selcolor')
             res_col=[]
             for item in colors:
                 temp=pq(item).attr('value')
                 res_col.append(temp)
             color='+'.join(res_col)
             sizes=d.find('.goodsSize li a')[:-1]
             res_size=[]
             for item in sizes:
                 temp=pq(item).attr('value')
                 res_size.append(temp)
             size='+'.join(res_size)
             result=[title,price,price_original,product_sn,sale,jifeng,color,size]
             print(result)
             # for item in result:
             #     print(item)
             queue_for_result.put(result)
    def get_commentDetail(self):
        result=[]
        while queue_for_comment_productSn.qsize()>0:
            product_sn=queue_for_comment_productSn.get()
            page_count=self.get_pageCount(product_sn)
            url='http://www.xtep.com.cn/?app_act=goods/comment&goods_sn='+str(product_sn)+'&app_page=null&t=0.00000000000000000&is_ajax'
            data_list=[]
            for i in range(page_count):
                # 生成所有post参数,添加到data_list中
                data={'ajax_div':'all_commentList','ajax_url':'/?app_act=goods/comment&goods_sn='+str(product_sn)+'&app_page=null','page':(i+1)}
                data=urllib.urlencode(data)
                data_list.append(data)

            proxy_port=my_proxy.is_proxy_exists()
            proxy=random.sample(proxy_port,1)[0]
            proxy=proxy[0]+':'+proxy[1]
            proxyHandler=urllib2.ProxyHandler({'http':r'http://%s'%proxy})
            cookies=urllib2.HTTPCookieProcessor
            opener=urllib2.build_opener(cookies,proxyHandler)
            opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0')]
            while len(data_list)>0:
                # post参数
                data_temp=random.sample(data_list,1)[0]
                data_list.remove(data_temp)
                time.sleep(1)
                try:
                    req=opener.open(fullurl=url,data=data_temp)
                    res=req.read()
                except:
                    data_list.append(data_temp)
                    proxy=random.sample(proxy_port,1)[0]
                    print(u'更换代理:%s:%s'%(proxy[0],proxy[1]))
                    proxy=proxy[0]+':'+proxy[1]
                    proxyHandler=urllib2.ProxyHandler({'http':r'http://%s'%proxy})
                    opener=urllib2.build_opener(cookies,proxyHandler)
                    continue
                res=res.decode('utf-8','ignore')
                req.close()
                d=pq(res)
                frames=d.find('.commentList')
                page_num=data_temp.split('page=')[1].split('&')[0]
                for item in frames:
                    d=pq(item)
                    comment_person=d('p').text()
                    comment_time=d.find('.commentTitle').my_text()[0]
                    comment_detail=d.find('.mt10.colorA7').text()
                    res_temp=[product_sn,page_num,comment_person,comment_time,comment_detail]
                    result.append(res_temp)
                    for item in res_temp:
                        print(item)
        title=['product_sn','page_num','comment_person','comment_time','comment_detail']
        # 数据写入csv文件
        writer=My_Csv.Write_Csv(path=r'd:/spider/xtep',name='xtep_commentDetail',title=title,result=result)
        writer.add_title_data()
def use_proxy():
    proxy_port=my_proxy.is_proxy_exists()
    return proxy_port