Esempio n. 1
0
    def prerequest(self):
        queue = self.queue
        queue.put("prerequest")
        print("prerequest")
        GlobalTools.setbaseurl(self.baseurl)
        res = requests.get(self.url, headers=self.headers)
        self.res = res

        html = GlobalTools.getResponseContent(self.res)

        if html.find(id="add-to-cart-button") is None:
            if html.find(id="availability") is not None:
                # print "text" + html.find(id="availability").text
                url = self.baseurl + html.find(
                    id="availability").find("a").get('href')
                self.second_url = url
                res = requests.get(url, headers=GlobalTools.getHeaders())
                html = GlobalTools.getResponseContent(res)

                try:
                    price = html.find(class_="olpOfferPrice").text.strip()
                    self.unnormal_price = price
                    print(price)
                    shop = html.find(class_="olpSellerName").text
                    self.unnormal_shop = shop
                    print(shop)
                except:
                    traceback.print_exc()
                self.normal_situation = False
                return False
        return True
Esempio n. 2
0
    def parse(self, sheet, currrow):
        queue = self.queue
        self.result = [currrow]
        print("")
        queue.put(u"商品链接")
        queue.put(self.url)
        print("商品链接:")
        print(self.url)

        self.html = GlobalTools.getResponseContent(self.res)

        self.geturl()
        self.getprice()
        self.getshopname()
        self.getbrand()
        self.getfirstavailable()
        self.getranking()
        self.getqa()
        self.getCategory()
        self.getstars()
        self.getreviewcount()
        self.getgoodreviewvote()
        #美国的reviewcount是在getgoodreviewvote中统计出来的,所以要重新计算一下
        # if self.countrycode=="us":
        # self.adjustreviewcount()
        if self.us_reviews_need_adjust:
            self.getusviewcount()

        self.getfba()
        print(self.resultmap)
        return self.getresult()
Esempio n. 3
0
    def get_email(self, url):
        print("******************")
        print("url:" + url)

        headers = GlobalTools.getHeaders()
        headers['X-Requested-With'] = 'XMLHttpRequest'
        headers[
            'Referer'] = 'https://www.amazon.de/gp/profile/amzn1.account.AF3BW3DYKKEHMR4HSAFIQDM62QNQ/ref=cm_cr_getr_d_pdp?ie=UTF8'
        headers[
            'Cookie'] = 's_nr=1507517187408-New; s_vnum=1939517187408%26vn%3D1; s_dslv=1507517187409; x-acbde="6gxYYwpBpG20FBChzzu9sn?hypH9MpwKF0gVmk2LOxnYWw2uE@5B3Qh7Df?gkrXM"; at-acbde=Atza|IwEBIFPo-tRvBxygSgF8Ard63lJANpi78TG-8BUTC8ScSLLiUskUDIh0VMUwG_l8fsWqij5ArfksGmp6Ks52ZiYPS0bJeoDkACAtCZF6h3ePo0yqw9jdKVsq4edrTZPfLFYYYaRsbNyD2x09klSn7jKaU8Sn56Cr4VCIx_H8LObqLF2bX6Aq0EWW-O0PoBHgkdYI9iPhMo_2OHQjWuFAeinw0dU1M7X-SWBl2wB4FtzVXlQzarbwLjsHxXSaw2LwX3ENF6oCHOh73pPPnTX68JEedEkLu-sOSL2eZ5Whe7zJ2L76yyEzyjVXQpWbDdUqUP58MdLTNLfhCM5LkwWGmd7fuoLC1u7sZhBkJSA6oLQ0Q3kua5e8x0LfI3HfLZwC6qzrDJ6pheW0my98MFK4r9JaG85Z; sess-at-acbde="d7DXrZglD8+7+42k5qmlfFUxSpHJkUg8H1Dz17ZCU+U="; x-wl-uid=1WLJUGaYF93xUQuJRK3PCgsu0IJeaJoL7J/7XRaD4Men7E4FPUEro4vxW+rjyvLb9XCGGKFNM1yrtwZ9b9BK3yXkMKCav41q6XBiaxBqGmVWG1vMYfNxoP30XR5Otq5GKr5uenX7TA98=; session-token="1o+pNqOm6F7uZWrYdtDbU26LiB8ByJ40B64c+JFwPh3lkBt1MbUn+ha6qR3BaTgduMMVK1e1LjJ6pnoF+/r3c4PUBDfax7J+AGcgt2QiXkvMdVyLjyDowIQtWUbeHi6V4hfxIhgrYGcAyZ4x4keQvPaEHOW0v8t8akQV0nmi5sj1Jzu8pn162bmTw0XLP88olTMWGCWAeJlHGsXpCvyiS1VrFGHpgj2xSW3j5jdNi8DCjE4R7E+EqR+4BNFVQs+1KUR7bf9qBMWu3xT7DDe9KQ=="; session-id-time=2082754801l; session-id=261-5557160-1959728; ubid-acbde=258-5984155-0914160; csm-hit=0CP5W9ZYZNFE06XFCV0V+b-9KE07PDF4YD27JB8DFQQ|1509967781417'
        res = requests.get(url, headers)
        print(res.status_code)
        html = GlobalTools.getResponseContent(res)
        print(html)
        print("******************")
Esempio n. 4
0
 def getusviewcount(self):
     asin = self.asin
     url = "https://www.amazon.com/product-reviews/" + asin + "/ref=acr_dpx_see_all?ie=UTF8&showViewpoints=1"
     res = requests.get(url, headers=GlobalTools.getHeaders())
     html = GlobalTools.getResponseContent(res)
     viewpoints = html.find_all(id=re.compile("viewpoint-"))
     if len(viewpoints) > 0:
         try:
             positive = viewpoints[0].find_all(
                 attrs={"data-reftag": "cm_cr_arp_d_viewpnt_lft"})[0].text
             self.resultmap['positivereviewcount'] = int(
                 positive.split("positive")[0].split("all")[1].strip())
         except:
             pass
     if len(viewpoints) > 1:
         try:
             negtive = viewpoints[1].find_all(
                 attrs={"data-reftag": "cm_cr_arp_d_viewpnt_rgt"})[0].text
             self.resultmap['negtivereviewcount'] = int(
                 negtive.split("critical")[0].split("all")[1].strip())
         except:
             pass
     print(viewpoints)
Esempio n. 5
0
 def get_imgs_by_product_url(self, url):
     res = requests.get(url)
     html = GlobalTools.getResponseContent(res)
     html.find(id="main-image-container").find("ul")