コード例 #1
0
def review_monitor(self, asin, country='us'):
    try:
        proxy = pro_chi()
        url_start = country_url(country)
        headers['Host'] = country_url(country).split("/")[2]
        page = requests.get(
            url_start + 'product-reviews/' + asin +
            '/ref=cm_cr_arp_d_viewopt_srt?sortBy=recent&pageNumber=1',
            headers=headers,
            proxies=proxy)
        review_monitor = {}
        for i in range(10):
            star = Selector(text=page.content).xpath(
                ".//*[@class='a-section review']/div/div[1]/a[1]/@title"
            ).extract()[i][0:3]
            customer_id = (Selector(text=page.content).xpath(
                ".//*[@class='a-section review']/div/div[2]/span/a/@href").
                           extract()[i]).split("/")[3]
            review_time = Selector(text=page.content).xpath(
                ".//*[@class='a-section review']/div/div[2]/span[4]/text()"
            ).extract()[i][3:]
            review_monitor[i] = {
                'star': star,
                'customer_id': customer_id,
                'review_time': review_time
            }
        print(review_monitor)

    except Exception as e:
        dt = datetime.datetime.now(pytz.utc) + datetime.timedelta(seconds=30)
        self.retry(eta=dt, exc=e, max_retries=3)
コード例 #2
0
def review_monitor(asin, country='us'):
    #try:
    #proxy=pro_chi()
    url_start = country_url('us')
    headers['Host'] = country_url('us').split("/")[2]
    url = url_start + 'product-reviews/' + 'B013US9FFY' + '/ref=cm_cr_arp_d_viewopt_srt?sortBy=recent&pageNumber=1'
    print(url)
    page = requests.get(url, headers=headers)
    page = page.text.encode(page.encoding).decode('utf-8')
    print(page)
    review_monitor = {}
    tree = fromstring(page.content)
    print(tree.findtext('.//title'))

    print(Selector(text=page.content).xpath(".//*[@class='a-section review']"))
    for i in range(10):
        star = Selector(text=page.content).xpath(
            ".//*[@class='a-section review']/div/div[1]/a[1]/@title").extract(
            )[i][0:3]
        customer_id = (Selector(text=page.content).xpath(
            ".//*[@class='a-section review']/div/div[2]/span/a/@href").extract(
            )[i]).split("/")[3]
        review_time = Selector(text=page.content).xpath(
            ".//*[@class='a-section review']/div/div[2]/span[4]/text()"
        ).extract()[i][3:]
        review_monitor[i] = {
            'star': star,
            'customer_id': customer_id,
            'review_time': review_time
        }
    print(review_monitor)
コード例 #3
0
ファイル: test.py プロジェクト: newer027/amazon_crawler
def asin_title(asin, country):
    headers['Host'] = country_url(country).split("/")[2]
    url_start = country_url(country)
    page = s.get(url_start + 'gp/offer-listing/' + asin +
                 '/ref=olp_page_5?ie=UTF8&startIndex=500',
                 headers=headers)
    return page
コード例 #4
0
 def save(self, *args, **kwargs):
     if not self.customer_url:
         self.customer_url = "%sgp/pdp/profile/%s/ref=cm_cr_arp_d_pdp?ie=UTF8" % (
             country_url(self.product.country), self.customer_id)
     try:
         if self.review_time:
             self.review_time = time.strftime(
                 '%Y-%m-%d', time.strptime(self.review_time, '%B %d, %Y'))
     except:
         pass
     super(Review_detail, self).save(*args, **kwargs)
コード例 #5
0
ファイル: models.py プロジェクト: newer027/amazon_crawler
 def save(self, *args, **kwargs):
     if not self.customer_url:
         self.customer_url = "%sgp/pdp/profile/%s/ref=cm_cr_arp_d_pdp?ie=UTF8" % (
             country_url(self.product.country), self.customer_id)
     super(Review, self).save(*args, **kwargs)
コード例 #6
0
ファイル: qa_collect.py プロジェクト: newer027/amazon_crawler
def qa_collect(self,asin,country='us'):
    try:
        product=Product.objects.filter(asin=asin,country=country)[0]
        page=get_url('ask/questions/asin/'+asin+'/ref=ask_ql_psf_ql_hza?sort=SUBMIT_DATE', country)
        tree = fromstring(page.content)
        print(tree.findtext('.//title'))
        if tree.findtext('.//title')=='Robot Check' or tree.findtext('.//title')=='Amazon CAPTCHA':
            info = {'to':0}
            return info

        if Selector(text=page.content).xpath('.//*[@id="noResultsTitle"]'):
            info = {'to':0}
            return info

        qa_collection={}
        if Selector(text=page.content).xpath("//ul[@class='a-pagination']/li[@class='a-last']//a/@href"):
            page_num=0
            while True:
                boxes=Selector(text=page.content).xpath(".//*[@class='a-section askTeaserQuestions']/div[@class='a-fixed-left-grid a-spacing-base']")
                for box in boxes:
                    answer_url,answer,answer_user,qa_time=None,None,None,None
                    vote=int(box.xpath(".//ul[@class='vote voteAjax']/li[2]/span[1]/text()").extract()[0])
                    question=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-small']//a[@class='a-link-normal']/text()").extract()[0]
                    try:
                        qa_time=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[2]/text()").extract()[-1:][0]
                    except:
                        pass
                    try:
                        if box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[1]/text()").extract() and country != 'jp':
                            answer=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[1]/text()").extract()[0]
                        elif box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[1]/text()").extract() and country == 'jp':
                            answer=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[1]/text()").extract()[0]
                            if answer == "":
                                try:
                                    answer=" ".join(box.xpath(".//span[@class='askLongText']/text()").extract()).strip()
                                except:
                                    pass
                        else:
                            answer=" ".join(box.xpath(".//span[@class='askLongText']/text()").extract()).strip()
                    except:
                        pass
                    try:
                        answer_user=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[2]/text()").extract()[0]
                    except:
                        pass
                    try:
                        answer_quan=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-section a-spacing-none a-spacing-top-mini']/a/text()").extract()[0]
                        answer_quan = re.search(r'\d+', answer_quan).group(0)
                    except:
                        pass
                    try:
                        answer_url=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-section a-spacing-none a-spacing-top-mini']/a/@href").extract()[0]
                        answer_url=country_url(country)[:-1]+answer_url
                        #print("answer_url:",answer_url)
                    except:
                        pass

                    #print(answer_user,qa_time)
                    if answer_user == None:
                        pass
                    elif answer_user==qa_time:
                        if country in['us','uk','ca','de']:
                            name_date=re.split(' on |By |Von | am ', answer_user)
                        elif country=='it':
                            name_date=re.split(' in |Da ', answer_user)
                        elif country=='fr':
                            name_date=re.split(' le |Par ', answer_user)
                        elif country=='es':
                            name_date=re.split(' el |Por ', answer_user)
                        elif country=='jp':
                            name_date=re.split('投稿者: |、投稿日: ', answer_user)
                        answer_user=name_date[1]
                        qa_time=name_date[2]
                    else:
                        answer_user=re.split(' on |By |Von | am ', answer_user)[-1:][0]
                        qa_time=re.split(' on |By |Von | am ', qa_time)[-1:][0]

                    if answer_url and answer_quan:
                        qa_collection[question]={'vote':vote,'question':question,'qa_time':qa_time.strip(),'answer':answer,'answer_user':answer_user.strip(),'answer_quan':answer_quan,'answer_url':answer_url}
                    elif answer:
                        qa_collection[question]={'vote':vote,'question':question,'qa_time':qa_time.strip(),'answer':answer,'answer_user':answer_user.strip()}
                print(len(qa_collection))

                if Selector(text=page.content).xpath("//ul[@class='a-pagination']/li[@class='a-last']//a/@href") and page_num<200:
                    time.sleep(2+random.random()*5)
                    page=get_url((Selector(text=page.content).xpath("//ul[@class='a-pagination']/li[@class='a-last']//a/@href")).extract()[0],country=country)
                    page_num += 1
                else:
                    break

        else:
            boxes=Selector(text=page.content).xpath(".//*[@class='a-section askTeaserQuestions']/div[@class='a-fixed-left-grid a-spacing-base']")
            for box in boxes:
                answer_url,answer,answer_user,qa_time=None,None,None,None
                vote=int(box.xpath(".//ul[@class='vote voteAjax']/li[2]/span[1]/text()").extract()[0])
                question=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-small']//a[@class='a-link-normal']/text()").extract()[0]
                try:
                    qa_time=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[2]/text()").extract()[-1:][0]
                except:
                    pass
                try:
                    if box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[1]/text()").extract():
                        answer=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[1]/text()").extract()[0]
                    else:
                        answer=" ".join(box.xpath(".//span[@class='askLongText']/text()").extract()).strip()
                except:
                    pass
                try:
                    answer_user=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-fixed-left-grid-col a-col-right']/span[2]/text()").extract()[0]
                except:
                    pass
                try:
                    answer_quan=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-section a-spacing-none a-spacing-top-mini']/a/text()").extract()[0]
                    answer_quan = re.search(r'\d+', answer_quan).group(0)
                except:
                    pass
                try:
                    answer_url=box.xpath(".//div[@class='a-fixed-left-grid a-spacing-base']//div[@class='a-section a-spacing-none a-spacing-top-mini']/a/@href").extract()[0]
                    answer_url=country_url(country)[:-1]+answer_url
                except:
                    pass

                if answer_user == None:
                    pass
                elif answer_user==qa_time:
                    if country in['us','uk','ca','de']:
                        name_date=re.split(' on |By |Von | am ', answer_user)
                    elif country=='it':
                        name_date=re.split(' in |Da ', answer_user)
                    elif country=='fr':
                        name_date=re.split(' le |Par ', answer_user)
                    elif country=='es':
                        name_date=re.split(' el |Por ', answer_user)
                    elif country=='jp':
                        name_date=re.split('投稿者: |、投稿日: ', answer_user)
                    answer_user=name_date[1]
                    qa_time=name_date[2]
                else:
                    answer_user=re.split(' on |By |Von | am ', answer_user)[-1:][0]
                    qa_time=re.split(' on |By |Von | am ', qa_time)[-1:][0]

                if answer_url and answer_quan:
                    qa_collection[question]={'vote':vote,'question':question,'qa_time':qa_time,'answer':answer,'answer_user':answer_user,'answer_quan':answer_quan,'answer_url':answer_url}
                elif answer:
                    qa_collection[question]={'vote':vote,'question':question,'qa_time':qa_time,'answer':answer,'answer_user':answer_user}

        for qa in qa_collection:
            try:
                num=qa_collection[qa]['answer_quan']
            except:
                num="1"
            try:
            #if qa_collection[qa]['answer_url']:
                QA_detail.objects.get_or_create(product=product,vote=qa_collection[qa]['vote'],question=qa_collection[qa]['question'],qa_time=qa_collection[qa]['qa_time'],
                    answer=qa_collection[qa]['answer'],answer_person=qa_collection[qa]['answer_user'],num=num,answer_url=qa_collection[qa]['answer_url'])
            except:
                QA_detail.objects.get_or_create(product=product,vote=qa_collection[qa]['vote'],question=qa_collection[qa]['question'],qa_time=qa_collection[qa]['qa_time'],
                    answer=qa_collection[qa]['answer'],answer_person=qa_collection[qa]['answer_user'],num=num)
            #except:
            #    pass

        #report = GlucoseCsvReport(product)
        #report.email(product.user, 'subject', 'message')

    except Exception as e:
        dt = datetime.now(pytz.utc) + timedelta(seconds=40)
        self.retry(eta=dt, exc=e, max_retries=2)
コード例 #7
0
def title_sellers(page, product, country, initial):
    sell_items = {}
    counter = 12
    not_first = False
    try:
        if Product_seller.objects.filter(product=product):
            not_first = True
    except:
        not_first = False
    while counter > 0:
        counter -= 1
        for i in range(10):
            try:
                #//*[@id="olpOfferList"]/div/div/div[2]/div[3]/h3/span/a
                if Selector(text=page.content).xpath(
                        ".//*[@id='olpOfferList']/div/div/div[" + str(i + 2) +
                        "]/div[4]/h3/span//a/@href"):
                    seller = (Selector(text=page.content).xpath(
                        ".//*[@id='olpOfferList']/div/div/div[" + str(i + 2) +
                        "]/div[4]/h3/span//a/@href").extract()[0]
                              ).split("=")[-1]
                    sell_url = country_url(country)[:-1] + (Selector(
                        text=page.content).xpath(
                            ".//*[@id='olpOfferList']/div/div/div[" +
                            str(i + 2) +
                            "]/div[4]/h3/span//a/@href").extract()[0])
                    name = fromstring(page.content).findtext(
                        './/*[@id="olpOfferList"]/div/div/div[' + str(i + 2) +
                        ']/div[4]/h3/span/a')
                else:
                    seller = (Selector(text=page.content).xpath(
                        ".//*[@id='olpOfferList']/div/div/div[" + str(i + 2) +
                        "]/div[3]/h3/span//a/@href").extract()[0]
                              ).split("=")[-1]
                    sell_url = country_url(country)[:-1] + (Selector(
                        text=page.content).xpath(
                            ".//*[@id='olpOfferList']/div/div/div[" +
                            str(i + 2) +
                            "]/div[3]/h3/span//a/@href").extract()[0])
                    name = fromstring(page.content).findtext(
                        './/*[@id="olpOfferList"]/div/div/div[' + str(i + 2) +
                        ']/div[3]/h3/span/a')

                price = fromstring(page.content).findtext(
                    './/*[@id="olpOfferList"]/div/div/div[' + str(i + 2) +
                    ']/div[1]/span[1]').strip()
                sell_items[seller] = {
                    'seller': seller,
                    'name': name,
                    'price': price,
                    'sell_url': sell_url
                }
                print(sell_items)

            except:
                print('寻找页面元素的逻辑错误')
                #product_seller.mark_time=datetime.datetime.now()

        if Selector(text=page.content).xpath(
                "//ul[@class='a-pagination']//a/@href") != '#' and Selector(
                    text=page.content).xpath(
                        "//ul[@class='a-pagination']//a/@href"):
            url_path = Selector(text=page.content).xpath(
                "//ul[@class='a-pagination']//a/@href").extract()[0]
            page = get_url(url_path, country=country)

    if not sell_items:
        return "没有跟卖卖家"

    if initial:
        for seller_id in sell_items:
            product_seller = Product_seller(
                product=product,
                name=sell_items[seller_id]['name'],
                seller_id=seller_id,
                price=sell_items[seller_id]['price'],
                sell_url=sell_items[seller_id]['sell_url'])
            product_seller.save()
    else:
        changed = False
        for seller_id in sell_items:
            if not Product_seller.objects.filter(product=product,
                                                 seller_id=seller_id):
                product_seller = Product_seller(
                    product=product,
                    name=sell_items[seller_id]['name'],
                    sell_url=sell_items[seller_id]['sell_url'],
                    seller_id=seller_id,
                    price=sell_items[seller_id]['price'])
                product_seller.save()
                if not_first and not Seller_change.objects.filter(
                        product=product,
                        status='old',
                        created__gte=timezone.now() -
                        datetime.timedelta(days=1)):
                    seller_change = Seller_change(
                        product=product,
                        status='new',
                        name=sell_items[seller_id]['name'],
                        sell_url=sell_items[seller_id]['sell_url'],
                        seller_id=seller_id,
                        price=sell_items[seller_id]['price'],
                        created=datetime.datetime.now())
                    seller_change.save()
                    changed = True

        for seller_id in sell_items:
            if Product_seller.objects.filter(product=product,
                                             seller_id=seller_id):
                Product_seller.objects.filter(
                    product=product, seller_id=seller_id).update(flag=True)

        if changed and not_first:
            product_to_user = Product_to_user.objects.filter(product=product)
            users = product_to_user.values_list('user', flat=True)
            User = get_user_model()
            users = User.objects.filter(id__in=users)
            seller_change = Seller_change.objects.filter(
                product=product,
                created__gte=timezone.now() - datetime.timedelta(minutes=3))
            sellers = seller_change.values_list('name', flat=True)

            for user in users:
                message = "\n".join([
                    u'{0},您好.'.format(user.username),
                    u'{0}有跟卖出现:'.format(product.title), u'跟卖商家:',
                    ','.join([seller for seller in sellers]), u'详情请见:',
                    '/'.join(['amz668.com/follow_sale',
                              product.slug]), u'直达亚马逊:{0}'.format(page.url)
                ])
                send_email(user.email, message, '出现新的跟卖商品')

        for product_seller in Product_seller.objects.filter(product=product,
                                                            flag=False):
            seller_change = Seller_change(product=product,
                                          status='old',
                                          name=product_seller.name,
                                          seller_id=product_seller.seller_id,
                                          price=product_seller.price,
                                          created=datetime.datetime.now())
            seller_change.save()
        Product_seller.objects.filter(product=product, flag=False).delete()
        Product_seller.objects.filter(product=product).update(flag=False)
コード例 #8
0
def rank_in_web(self,asin,keyword,country='us'):
    try:
        #proxy=pro_chi()
        #proxy=proxies
        info = {'rank':None,'page':None,'sponsored':False}
        keyword_rank=Keyword_rank.objects.filter(asin=asin,country=country,word=keyword)[0]
        url_start=country_url(country)
        headers['Host']=country_url(country).split("/")[2]
        s = requests.Session()
        page = s.get(url_start+'s?field-keywords='+keyword,headers=headers,proxies=pro_chi())
        keyword_rank.rank_url=page.url
        keyword_rank.save()
        if page.status_code != 200:
            raise Exception
        if Selector(text=page.content).xpath('.//*[@id="noResultsTitle"]'):
            raise Exception
        #print(page.status_code, page.text[:200])

        flag_1,flag_2 = True,True
    #//*[@id="s-result-count"] //*[@id="s-result-count"]
        if country=='jp':
            try:
                #len(Selector(text=page.content).xpath('.//li[@class="s-result-item  celwidget "]')) >= \
                #int(Selector(text=page.content).xpath('.//*[@id="s-result-count"]/text()').extract()[0][-4:-2]):
                item_amount=int(Selector(text=page.content).xpath('.//*[@id="s-result-count"]/text()').extract()[0][-4:-2])
            except:
                flag_1 = False
                item_amount=len(Selector(text=page.content).xpath('.//li[@class="s-result-item celwidget "]'))
        else:
            if len(Selector(text=page.content).xpath('.//li[@class="s-result-item celwidget "]')):
                #if len(Selector(text=page.content).xpath('.//li[@class="s-result-item celwidget "]')) >= \
                #int(Selector(text=page.content).xpath('.//*[@id="s-result-count"]/text()').extract()[0][2:4]):
                try:
                    item_amount=int(Selector(text=page.content).xpath('.//*[@id="s-result-count"]/text()').extract()[0][2:4])
                except:
                    flag_1 = False
                    item_amount=len(Selector(text=page.content).xpath('.//li[@class="s-result-item celwidget "]'))
            else:
                flag_2 = False
                #if len(Selector(text=page.content).xpath('.//li[@class="s-result-item s-result-card-for-container a-declarative celwidget "]')) >= \
                #int(Selector(text=page.content).xpath('.//*[@id="s-result-count"]/text()').extract()[0][2:4]):
                try:
                    item_amount=int(Selector(text=page.content).xpath('.//*[@id="s-result-count"]/text()').extract()[0][2:4])
                except:
                    flag_1 = False
                    item_amount=len(Selector(text=page.content).xpath('.//li[@class="s-result-item s-result-card-for-container a-declarative celwidget "]'))


        print(item_amount)
        tree = fromstring(page.content)
        print(tree.findtext('.//title'))
        if tree.findtext('.//title')=='Robot Check' or tree.findtext('.//title')=='Amazon CAPTCHA':
            if len(keyword_rank.rank.all())>2:
                rank=Rank(page=keyword_rank.rank.first().page,number=keyword_rank.rank.first().number,sponsored=keyword_rank.rank.first().sponsored,keyword_rank=keyword_rank)
                rank.save()
                info = {'rank':0,'page':0,'sponsored':True}
                return info
            else:
                rank=Rank(keyword_rank=keyword_rank,page=0,number=0,sponsored=False)
                rank.save()
                info = {'rank':0,'page':0,'sponsored':True}
                return info
    #.//li[@class="s-result-item s-result-card-for-container a-declarative celwidget "]
        if flag_1:
            page_num=0
            while True:
                print(tree.findtext('.//title'),"page",page_num+1)
                if country=='jp':
                    fanwei=range(len(Selector(text=page.content).xpath('.//li[@class="s-result-item  celwidget "]')))
                elif flag_2:
                    fanwei=range(len(Selector(text=page.content).xpath('.//li[@class="s-result-item celwidget "]')))
                else:
                    fanwei=range(len(Selector(text=page.content).xpath('.//li[@class="s-result-item s-result-card-for-container a-declarative celwidget "]')))
                for j in fanwei:
                    #try:
                    if Selector(text=page.content).xpath(".//*[@id='result_"+str(item_amount*page_num+j)+"']/@data-asin"):
                        seller=(Selector(text=page.content).xpath(".//*[@id='result_"+str(item_amount*page_num+j)+"']/@data-asin").extract()[0])
                    else:
                        print("在页面找商品的逻辑错误")
                    print(seller)
                    if seller==asin:
                        try:
                            if (Selector(text=page.content).xpath(".//*[@id='result_"+str(item_amount*page_num+j)+"']/div/div/div/div[2]/h5/text()").extract()[0]).exist():
                                info = {'rank':j+1,'page':page_num+1,'sponsored':True}
                        except:
                            info = {'rank':j+1,'page':page_num+1,'sponsored':False}
                        print("搜索关键词%s:%s排名第%s页,第%s名" % (keyword,asin,page_num+1,j+1))
                        rank=Rank(keyword_rank=keyword_rank,page=info['page'],number=info['rank'],sponsored=info['sponsored'])
                        rank.save()
                        return info
                    #except:
                    #    print('Sponsored:搜索关键词%s,排名第%s页,第%s名' % (keyword,page_num+1,j+1))
                if Selector(text=page.content).xpath(".//*[@id='pagnNextLink']/@href") and page_num<20:
                    time.sleep(2+random.random()*5)

                    while True:
                        try:
                            page = s.get(url_start+(Selector(text=page.content).xpath(".//*[@id='pagnNextLink']/@href")).extract()[0],headers=headers,proxies=pro_chi())
                        except:
                            print("Connection refused by the server..")
                            print("Let me sleep for 5 seconds")
                            print("ZZzzzz...")
                            time.sleep(2+random.random()*5)
                            print("Was a nice sleep, now let me continue...")
                            continue

                    page_num += 1
                else:
                    if len(keyword_rank.rank.all())>2:
                        rank=Rank(page=keyword_rank.rank.first().page,number=keyword_rank.rank.first().number,sponsored=keyword_rank.rank.first().sponsored,keyword_rank=keyword_rank)
                        rank.save()
                        info = {'rank':0,'page':0,'sponsored':False}
                        return info
                    else:
                        rank=Rank(keyword_rank=keyword_rank,page=0,number=0,sponsored=False)
                        rank.save()
                        info = {'rank':0,'page':0,'sponsored':False}
                        return info

        else:
            if country=='jp':
                fanwei=range(len(Selector(text=page.content).xpath('.//li[@class="s-result-item  celwidget "]')))
            elif flag_2:
                fanwei=range(len(Selector(text=page.content).xpath('.//li[@class="s-result-item  celwidget "]')))

            else:
                fanwei=range(len(Selector(text=page.content).xpath('.//li[@class="s-result-item s-result-card-for-container a-declarative celwidget "]')))
            for j in fanwei:
                try:
                    if Selector(text=page.content).xpath(".//*[@id='result_"+str(j)+"']/@data-asin"):
                        seller=(Selector(text=page.content).xpath(".//*[@id='result_"+str(j)+"']/@data-asin").extract()[0])
                    else:
                        print("在页面找商品的逻辑错误")
                    print(seller)
                    if seller==asin:
                        try:
                            if (Selector(text=page.content).xpath(".//*[@id='result_"+str(j)+"']/div/div/div/div[2]/h5/text()").extract()[0])=="sponsored":
                                info = {'rank':j+1,'page':1,'sponsored':True}
                        except:
                            info = {'rank':j+1,'page':1,'sponsored':False}
                        print("搜索关键词%s:%s排名第1页,第%s名" % (keyword,asin,j+1))
                        rank=Rank(keyword_rank=keyword_rank,page=info['page'],number=info['rank'],sponsored=info['sponsored'])
                        rank.save()
                        return info
                except:
                    if len(keyword_rank.rank.all())>2:
                        rank=Rank(page=keyword_rank.rank.first().page,number=keyword_rank.rank.first().number,sponsored=keyword_rank.rank.first().sponsored,keyword_rank=keyword_rank)
                        rank.save()
                        info = {'rank':0,'page':0,'sponsored':False}
                        return info
                    else:
                        rank=Rank(keyword_rank=keyword_rank,page=0,number=0,sponsored=False)
                        rank.save()
                        info = {'rank':0,'page':0,'sponsored':False}
                        return info

    except Exception as e:
        dt = datetime.datetime.now(pytz.utc) + datetime.timedelta(seconds=40)
        self.retry(eta=dt, exc=e, max_retries=2)