Exemple #1
0
    def get_shop_info(self):
        headers = GlobalTools.getHeaders()
        res = requests.get(self.link, headers=headers)
        html = BeautifulSoup(res.text, 'lxml')
        print html.find(id="sellerName").text
        feedback = html.find(id="feedback-summary-table")
        feedbacktab = feedback.find_all("tr")
        timescoop = feedbacktab[0].find_all("th")
        for item in timescoop:
            print item.text
        positive = feedbacktab[1].find_all("td")
        neutral = feedbacktab[2].find_all("td")
        negtive = feedbacktab[3].find_all("td")
        count = feedbacktab[4].find_all("td")
        for feedback in feedbacktab:
            line = feedback.find_all("td")
            for item in line:
                print item.text.strip("\n").strip() + ",",
            print

        products = html.find(id="product-data").find_all(
            attrs={'class': "product-details"})
        for product in products:
            titlelink = product.find('a', attrs={'class': "product-title"})
            title = titlelink.get('title')
            href = titlelink.get('href')
            price = product.find('div', attrs={'class': 'product-price'})
            ranting = product.find('div', class_="product-rating")
            print title + " " + href + " " + price + " " + ranting + " "
Exemple #2
0
def get_following_by_asin(asin,baseurl):
    headers = GlobalTools.getHeaders()

    url = baseurl+"/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords="+str(asin)

    print "get url:"+url

    res = requests.get(url,headers=headers)

    # print "res:headers:"
    # print res.headers
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content),"lxml")
    else:
        html = BeautifulSoup(res.content,"lxml")

    tmp = open("tmp2.html","w+")
    if res.headers['Content-Encoding'] == "br":
        tmp.write(brotli.decompress(res.content))
    else:
        tmp.write(res.content)
    tmp.close()

    link = (html.find(id="s-results-list-atf")).find('a',attrs={'class':'s-access-detail-page'})
    links = (html.find(id="s-results-list-atf")).find_all('a')
    target = ""
    for link in links:
        if 'offer-listing' in link['href']:
            # print link.text.strip().split('(')
            if int(link.text.strip().split('(')[1].split()[0]) > 1:
                return [True,link['href']]
            else:
                return [False]
    else:
        return [False]
Exemple #3
0
 def get_profile_info(self, url):
     "https://www.amazon.co.uk/gp/profile/amzn1.account.AHTYNWDHL6M2WCVS7LOUVFXBLLFQ/ref=cm_cr_getr_d_gw_btm?ie=UTF8"
     profileid = url.split("profile/")[1].split("/ref")[0]
     url = "https://www.amazon.co.uk/profilewidget/bio/" + profileid + "?view=visitor"
     print("in get_profile_info")
     headers = GlobalTools.getHeaders()
     headers[
         'Cookie'] = 's_nr=1507517187408-New; s_vnum=1939517187408%26vn%3D1; s_dslv=1507517187409; x-acbde="6gxYYwpBpG20FBChzzu9sn?hypH9MpwKF0gVmk2LOxnYWw2uE@5B3Qh7Df?gkrXM"; at-acbde=Atza|IwEBIFPo-tRvBxygSgF8Ard63lJANpi78TG-8BUTC8ScSLLiUskUDIh0VMUwG_l8fsWqij5ArfksGmp6Ks52ZiYPS0bJeoDkACAtCZF6h3ePo0yqw9jdKVsq4edrTZPfLFYYYaRsbNyD2x09klSn7jKaU8Sn56Cr4VCIx_H8LObqLF2bX6Aq0EWW-O0PoBHgkdYI9iPhMo_2OHQjWuFAeinw0dU1M7X-SWBl2wB4FtzVXlQzarbwLjsHxXSaw2LwX3ENF6oCHOh73pPPnTX68JEedEkLu-sOSL2eZ5Whe7zJ2L76yyEzyjVXQpWbDdUqUP58MdLTNLfhCM5LkwWGmd7fuoLC1u7sZhBkJSA6oLQ0Q3kua5e8x0LfI3HfLZwC6qzrDJ6pheW0my98MFK4r9JaG85Z; sess-at-acbde="d7DXrZglD8+7+42k5qmlfFUxSpHJkUg8H1Dz17ZCU+U="; x-wl-uid=1WLJUGaYF93xUQuJRK3PCgsu0IJeaJoL7J/7XRaD4Men7E4FPUEro4vxW+rjyvLb9XCGGKFNM1yrtwZ9b9BK3yXkMKCav41q6XBiaxBqGmVWG1vMYfNxoP30XR5Otq5GKr5uenX7TA98=; session-token="1o+pNqOm6F7uZWrYdtDbU26LiB8ByJ40B64c+JFwPh3lkBt1MbUn+ha6qR3BaTgduMMVK1e1LjJ6pnoF+/r3c4PUBDfax7J+AGcgt2QiXkvMdVyLjyDowIQtWUbeHi6V4hfxIhgrYGcAyZ4x4keQvPaEHOW0v8t8akQV0nmi5sj1Jzu8pn162bmTw0XLP88olTMWGCWAeJlHGsXpCvyiS1VrFGHpgj2xSW3j5jdNi8DCjE4R7E+EqR+4BNFVQs+1KUR7bf9qBMWu3xT7DDe9KQ=="; session-id-time=2082754801l; session-id=261-5557160-1959728; ubid-acbde=258-5984155-0914160; csm-hit=0CP5W9ZYZNFE06XFCV0V+b-9KE07PDF4YD27JB8DFQQ|1509967781417'
     res = requests.get(url, headers=headers)
     # html = GlobalTools.getResponseContent(res)
     # htmltxt = html.text
     # s = htmltxt.split("window.CustomerProfileRootProps")[1].split("window.PageContext")[0].replace("=","").replace(";","").strip()
     # print(s)
     # s = s.decode(encoding="utf-8")
     s = res.text
     try:
         jsonobj = json.loads(s)
     except:
         tmp = open("tmp2.html", "w+")
         tmp.write(s)
         tmp.close()
         exit(1)
     # name = jsonobj["nameHeaderData"]["name"]
     # print("name:"+name)
     reviewRank = jsonobj['topReviewerInfo']['rank']
     print(reviewRank)
Exemple #4
0
    def prerequest(self):
        queue = self.queue
        queue.put("prerequest")
        print("prerequest")
        GlobalTools.setbaseurl(self.baseurl)
        res = requests.get(self.url, headers=self.headers)
        self.res = res

        html = GlobalTools.getResponseContent(self.res)

        if html.find(id="add-to-cart-button") is None:
            if html.find(id="availability") is not None:
                # print "text" + html.find(id="availability").text
                url = self.baseurl + html.find(
                    id="availability").find("a").get('href')
                self.second_url = url
                res = requests.get(url, headers=GlobalTools.getHeaders())
                html = GlobalTools.getResponseContent(res)

                try:
                    price = html.find(class_="olpOfferPrice").text.strip()
                    self.unnormal_price = price
                    print(price)
                    shop = html.find(class_="olpSellerName").text
                    self.unnormal_shop = shop
                    print(shop)
                except:
                    traceback.print_exc()
                self.normal_situation = False
                return False
        return True
Exemple #5
0
 def get_product_images(self):
     for img in self.imgurls:
         res = requests.get(img, headers=GlobalTools.getHeaders())
         filename = self.asin + "_" + self.url.split("/")[-1]
         with (open(filename, "wb")) as f:
             f.write(res.content)
         f.close()
Exemple #6
0
    def get_email(self, url):
        print("******************")
        print("url:" + url)

        headers = GlobalTools.getHeaders()
        headers['X-Requested-With'] = 'XMLHttpRequest'
        headers[
            'Referer'] = 'https://www.amazon.de/gp/profile/amzn1.account.AF3BW3DYKKEHMR4HSAFIQDM62QNQ/ref=cm_cr_getr_d_pdp?ie=UTF8'
        headers[
            'Cookie'] = 's_nr=1507517187408-New; s_vnum=1939517187408%26vn%3D1; s_dslv=1507517187409; x-acbde="6gxYYwpBpG20FBChzzu9sn?hypH9MpwKF0gVmk2LOxnYWw2uE@5B3Qh7Df?gkrXM"; at-acbde=Atza|IwEBIFPo-tRvBxygSgF8Ard63lJANpi78TG-8BUTC8ScSLLiUskUDIh0VMUwG_l8fsWqij5ArfksGmp6Ks52ZiYPS0bJeoDkACAtCZF6h3ePo0yqw9jdKVsq4edrTZPfLFYYYaRsbNyD2x09klSn7jKaU8Sn56Cr4VCIx_H8LObqLF2bX6Aq0EWW-O0PoBHgkdYI9iPhMo_2OHQjWuFAeinw0dU1M7X-SWBl2wB4FtzVXlQzarbwLjsHxXSaw2LwX3ENF6oCHOh73pPPnTX68JEedEkLu-sOSL2eZ5Whe7zJ2L76yyEzyjVXQpWbDdUqUP58MdLTNLfhCM5LkwWGmd7fuoLC1u7sZhBkJSA6oLQ0Q3kua5e8x0LfI3HfLZwC6qzrDJ6pheW0my98MFK4r9JaG85Z; sess-at-acbde="d7DXrZglD8+7+42k5qmlfFUxSpHJkUg8H1Dz17ZCU+U="; x-wl-uid=1WLJUGaYF93xUQuJRK3PCgsu0IJeaJoL7J/7XRaD4Men7E4FPUEro4vxW+rjyvLb9XCGGKFNM1yrtwZ9b9BK3yXkMKCav41q6XBiaxBqGmVWG1vMYfNxoP30XR5Otq5GKr5uenX7TA98=; session-token="1o+pNqOm6F7uZWrYdtDbU26LiB8ByJ40B64c+JFwPh3lkBt1MbUn+ha6qR3BaTgduMMVK1e1LjJ6pnoF+/r3c4PUBDfax7J+AGcgt2QiXkvMdVyLjyDowIQtWUbeHi6V4hfxIhgrYGcAyZ4x4keQvPaEHOW0v8t8akQV0nmi5sj1Jzu8pn162bmTw0XLP88olTMWGCWAeJlHGsXpCvyiS1VrFGHpgj2xSW3j5jdNi8DCjE4R7E+EqR+4BNFVQs+1KUR7bf9qBMWu3xT7DDe9KQ=="; session-id-time=2082754801l; session-id=261-5557160-1959728; ubid-acbde=258-5984155-0914160; csm-hit=0CP5W9ZYZNFE06XFCV0V+b-9KE07PDF4YD27JB8DFQQ|1509967781417'
        res = requests.get(url, headers)
        print(res.status_code)
        html = GlobalTools.getResponseContent(res)
        print(html)
        print("******************")
Exemple #7
0
    def __init__(self, asin, positive_count, negtive_count, debug=False):
        self.debug = debug
        self.baseurl = GlobalTools.getbaseurl()
        self.asin = asin
        self.headers = GlobalTools.getHeaders()
        # self.positiveurl = self.baseurl+"/ss/customer-reviews/ajax/reviews/get/ref=cm_cr_arp_d_viewpnt_lft"
        self.positiveurl = self.baseurl + "/hz/reviews-render/ajax/reviews/get/ref=cm_cr_arp_d_viewpnt_lft"

        self.negtive_page_count = negtive_count / 4 + 1
        self.positivecount = positive_count
        self.negtivecount = negtive_count
        self.positivevote = 0
        self.negtivevote = 0
        self.POSITIVE_VOTE_TYPE = 0
        self.NEGTIVE_VOTE_TYPE = 1
Exemple #8
0
 def __init__(self, queue, asin, countrycode):
     self.queue = queue
     self.countrycode = countrycode
     self.baseurl = GlobalTools.getBaseurlFromCountrycode(countrycode)
     self.headers = GlobalTools.getHeaders()
     self.asin = asin
     self.url = get_link_by_asin(asin, self.baseurl)
     #if can't get a normal page ,can't use this kind of url to get a price and shop name
     #the second link look like this : http://www.amazon.de/gp/offer-listing/B01N52QW8A/ref=dp_olp_0?ie=UTF8&condition=all
     self.second_url = ""
     self.normal_situation = True
     self.unnormal_price = ""
     self.unnormal_shop = ""
     self.resultmap = {}
     self.result = []
     self.us_reviews_need_adjust = False
Exemple #9
0
def get_link_by_asin(asin, baseurl):
    # print "in get_link_by_asin"
    headers = GlobalTools.getHeaders()
    # baseurl = "http://www.amazon.co.uk"
    url = baseurl + "/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=" + str(
        asin)
    # url = baseurl
    # print url

    # url:search - alias = aps
    # field - keywords:B01KGUMWJU
    params = {
        # "url":"search-alias=aps",
        # "field-keywords":asin
    }
    # proxies = {
    #     "http":"123.148.74.107:80",
    #     "https": "218.18.10.11:9797"
    # }
    print "get url:" + url

    res = requests.get(url, headers=headers)

    print "res:headers:"
    print res.headers
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content), "lxml")
    else:
        html = BeautifulSoup(res.content, "lxml")
    # html = BeautifulSoup(res.content, "lxml")

    tmp = open("tmp2.html", "w+")
    if res.headers['Content-Encoding'] == "br":
        tmp.write(brotli.decompress(res.content))
    else:
        tmp.write(res.content)
    tmp.close()

    # print "url:"+url
    # print html.find(id="centerMinus")
    # link = html.find(id="s-results-list-atf")
    link = (html.find(id="s-results-list-atf")).find(
        'a', attrs={'class': 's-access-detail-page'})
    link = link.get('href')
    link = link.split("&qid")[0]
    print "link:" + link
    return link
Exemple #10
0
def getFlowingList(url):
    res = requests.get(url,headers=GlobalTools.getHeaders())
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content),"lxml")
    else:
        html = BeautifulSoup(res.content,"lxml")
    followerlist = html.find(id,"olpOfferList").find_all(class_="olpOffer")
    resultlist = []
    for follow in followerlist:
        followerNameElem = follow.find(class_="olpSellerName")
        if len(followerNameElem.find_all("a"))>0:
            followerName = followerNameElem.text
            url = GlobalTools.getBaseurlFromCountrycode("uk")+(followerNameElem.find("a"))['href']
        else:
            if len(followerNameElem.find_all("img"))>0:
                followerName = followerNameElem.find("img")['alt']
                url = "https://amazon.com"
            else:
                followerName = ""
                url = ""

        print (followerName,url)
Exemple #11
0
def get_link_by_asin(asin, baseurl):
    headers = GlobalTools.getHeaders()
    # url = baseurl+"/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords="+str(asin)
    url = baseurl + "/s?k=" + asin + "&ref=nb_sb_noss_2"

    print("get url:" + url)

    res = requests.get(url, headers=headers)
    #
    print("res:headers:")
    print(res.headers)
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content), "lxml")
        with open("searchasin.html", "w") as f:
            f.write(brotli.decompress(res.content).decode("utf-8"))
    else:
        html = BeautifulSoup(res.content, "lxml")
        with open("searchasin.html", "w") as f:
            f.write(res.content.decode("utf-8"))

    # tmp = open("tmp2.html","w")
    # if res.headers['Content-Encoding'] == "br":
    #     tmp.write(brotli.decompress(res.content))
    # else:
    #     tmp.write(res.content.decode("utf-8"))
    # tmp.close()
    #
    # link = html.find_all(class_="s-search-results")[1].find_all('a',attrs={'class':'a-text-normal'})[0]
    # link = link.get('href')
    # link = link.split("&qid")[0]
    # print("link:"+baseurl+link)
    #
    # return baseurl+link
    # return baseurl + "/dp/" + asin +"/ref=redir_mobile_desktop"
    t = html.find_all(class_="s-search-results")[1]
    productslink = t.find_all("a")
    for item in productslink:
        if "/dp/" + asin in item.get('href'):
            return baseurl + (item.get('href').split("&qid")[0])
Exemple #12
0
 def getusviewcount(self):
     asin = self.asin
     url = "https://www.amazon.com/product-reviews/" + asin + "/ref=acr_dpx_see_all?ie=UTF8&showViewpoints=1"
     res = requests.get(url, headers=GlobalTools.getHeaders())
     html = GlobalTools.getResponseContent(res)
     viewpoints = html.find_all(id=re.compile("viewpoint-"))
     if len(viewpoints) > 0:
         try:
             positive = viewpoints[0].find_all(
                 attrs={"data-reftag": "cm_cr_arp_d_viewpnt_lft"})[0].text
             self.resultmap['positivereviewcount'] = int(
                 positive.split("positive")[0].split("all")[1].strip())
         except:
             pass
     if len(viewpoints) > 1:
         try:
             negtive = viewpoints[1].find_all(
                 attrs={"data-reftag": "cm_cr_arp_d_viewpnt_rgt"})[0].text
             self.resultmap['negtivereviewcount'] = int(
                 negtive.split("critical")[0].split("all")[1].strip())
         except:
             pass
     print(viewpoints)
Exemple #13
0
 def __init__(self,seller):
     self.marketplaceid = GlobalTools.getMarketplaceID()
     self.headers = GlobalTools.getHeaders()
     self.url = GlobalTools.getSearchShopProductsUrl()
     self.seller = seller
Exemple #14
0
def newfba(asin):
    headers = {
        "Accept":
        "application/json, text/javascript, */*; q=0.01",
        "Accept-Encoding":
        "gzip, deflate, br",
        "Accept-Language":
        "zh-CN,zh;q=0.8",
        "Cache-Control":
        "no-cache",
        "Connection":
        "keep-alive",
        # "Content-Length":"547",
        "Content-Type":
        "application/x-www-form-urlencoded; charset=UTF-8",
        # "Content-Type": "text/html;charset=UTF-8",
        "Host":
        "www.amazon.co.uk",
        "Origin":
        "https://www.amazon.co.uk",
        "Content-Encoding":
        "br",
        "Pragma":
        "no-cache",
        "Referer":
        "https://www.amazon.co.uk/gp/cart/view.html/ref=lh_cart_vc_btn",
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
        "X-AUI-View":
        "Desktop",
        "X-Requested-With":
        "XMLHttpRequest",
        "Cookie":
        """s_nr=1507528377078-New; s_vnum=1939528377079%26vn%3D1; s_dslv=1507528377080; x-wl-uid=1Vm2WQeeHRHErocdZmw5/GK41LYoPF67ySPkncJEPAiRRhWfNF0OyPa9yuT4S7+FNdyHQwhizugO0QMrffNe4I2JzXtJIy14CzCSmvUSme8lqhoZjjh77OF8sXJ/jQGXBcjMLuoUEESU=; at-acbuk=Atza|IwEBIOoVGprM0g2Qazrt-ifX53XNbsi7XFYs1OZNmIDgeZD6a5i2s7p4JuLWL6fC30oebF1OGUvU7z7HI266F0nMzVdpN8mWBQ1uOoa0XcmqZYdODKvv57Rq3jARRIaOoqkDAS6Ke6QFIjp1s1V6ZnPftLOOaz9uKLjRlvbMvtD57XnNZq2blSLo8IqJh0BhgpIH1K7cfEd7zgHGInlid0GyjKhMTaN5oRoZEzbvHAl9aHx15bRG8rKSbqpHQMeylRnYRnOirQGFgyPs2zQUp6YtUbivSlb8LGmOXL8aQaqZSE2lwyI3Sy9cGtDbBucHLB-OK4t89Rf5NIMRMSM-uMddzWr504Cg7_bOJ6RZFABsEDvdDEIItPRgnhrDksbMefih0AQSF8jnS9xXg3UbX9tqRbjA; amznacsleftnav-328511b4-0414-31f1-91c6-bca31c30900c=1; x-acbuk="DpcKYrm9@Uw75TNjTsXwmX79eaa3NMP2dk5ZlsVntw6MXujQjHcGEerpfDRFK8hR";session-token=9SQ2EeLcEOiWNXk9Km/DNS6S1V0UZwProvVruiPJrCVgmxhyesgqA/fp58r9T9x2sKqlQqrsEEER26oL2mWsLSDfPDsZIgbKwHiWox5/i0IB0R8heds6DI1HK15chFLvoLUg/J8JaqgwtAoINSoQpvXPRngz83hB73b9x54TmuIuxH8LyuVsQlHkt5CeOaWAKHpif0qNYASaMLmf/Q0EDRW8RO0yBFk+SPYTIZwRv8wy4200Mchhe4UhrsdJOX4aubGsciZgiUtFN7fjp4F4NQ=="; lc-acbuk=en_GB; ubid-acbuk=261-6573040-2508135; session-id-time=2082758401l; session-id=259-7896668-2728509; csm-hit=DQ3DSN2G6C2P8DBSE4K4+s-4CDTDE03S82FARC6XGS1|1514455697049"""
    }

    url = "https://www.amazon.co.uk/gp/cart/ajax-update.html/ref=ox_sc_update_quantity_1%7C9%7C11"

    data = {
        "hasMoreItems":
        0,
        "timeStamp":
        1514454024,
        "token":
        "gFHNsVRD27zMiOpe+yYpwFsAOZohN8u+a5VmqKkAAAAJAAAAAFpEvAhyYXcAAAAA",
        "activeItems":
        "C31HAVQP205TNO|1|0|5|3.05|||0||",
        "addressId":
        "",
        "addressZip":
        "",
        "closeAddonUpsell":
        1,
        "flcExpanded":
        0,
        "quantity.C31HAVQP205TNO":
        "11",
        "pageAction":
        "update-quantity",
        "submit.update-quantity.C31HAVQP205TNO":
        "1",
        "actionItemID":
        "C31HAVQP205TNO",
        "requestID":
        "EFHWWNTW6V3PRPMTQVWY",
        "asin":
        "B003KN7PU2",
        "encodedOffering":
        "%2BMwdK243Pp3oHjtzeyP6rdX8pnsybQAfRMa%2FX803XTXSTS7T%2BThAv741wG3TqvzM2kBUhnHpgojcF03P1%2FiSGuiN%2F5D6331v80WV2YLu2HU%3D"
    }

    # headers = urllib.quote(json.dumps(headers))
    comm_params = urllib.parse.quote(json.dumps(data))
    request = requests.session()
    request.get("https://www.amazon.co.uk", headers=GlobalTools.getHeaders())
    # res = request.post(url,headers=headers,data=comm_params)
    res = requests.post(url, headers=headers, data=comm_params)
    print(res.content)
    jsonobj = json.loads(res.content, encoding="utf-8")
    print(jsonobj['features']['imb'])
    print(jsonobj['features']['nav-cart'])