Ejemplo n.º 1
0
    def prerequest(self):
        queue = self.queue
        queue.put("prerequest")
        print("prerequest")
        GlobalTools.setbaseurl(self.baseurl)
        res = requests.get(self.url, headers=self.headers)
        self.res = res

        html = GlobalTools.getResponseContent(self.res)

        if html.find(id="add-to-cart-button") is None:
            if html.find(id="availability") is not None:
                # print "text" + html.find(id="availability").text
                url = self.baseurl + html.find(
                    id="availability").find("a").get('href')
                self.second_url = url
                res = requests.get(url, headers=GlobalTools.getHeaders())
                html = GlobalTools.getResponseContent(res)

                try:
                    price = html.find(class_="olpOfferPrice").text.strip()
                    self.unnormal_price = price
                    print(price)
                    shop = html.find(class_="olpSellerName").text
                    self.unnormal_shop = shop
                    print(shop)
                except:
                    traceback.print_exc()
                self.normal_situation = False
                return False
        return True
Ejemplo n.º 2
0
    def __init__(self, asin, positive_count, negtive_count, debug=False):
        self.debug = debug
        self.baseurl = GlobalTools.getbaseurl()
        self.asin = asin
        self.headers = GlobalTools.getHeaders()
        # self.positiveurl = self.baseurl+"/ss/customer-reviews/ajax/reviews/get/ref=cm_cr_arp_d_viewpnt_lft"
        self.positiveurl = self.baseurl + "/hz/reviews-render/ajax/reviews/get/ref=cm_cr_arp_d_viewpnt_lft"

        self.negtive_page_count = negtive_count / 4 + 1
        self.positivecount = positive_count
        self.negtivecount = negtive_count
        self.positivevote = 0
        self.negtivevote = 0
        self.POSITIVE_VOTE_TYPE = 0
        self.NEGTIVE_VOTE_TYPE = 1
Ejemplo n.º 3
0
 def getCategory(self):
     try:
         #商品有时没有分类,这时先在这里取为空"",在下面回去分类排名的时候,取第一分类
         menu_levels = self.html.find(
             id="wayfinding-breadcrumbs_feature_div").find_all(
                 attrs={"class": "a-list-item"})
         count = len(menu_levels)
         self.resultmap['first_level_menu'] = GlobalTools.removeBlankChars(
             menu_levels[0].text)
         if count >= 3:
             self.resultmap[
                 'second_level_menu'] = GlobalTools.removeBlankChars(
                     menu_levels[2].text)
     except:
         pass
Ejemplo n.º 4
0
    def get_email(self, url):
        print("******************")
        print("url:" + url)

        headers = GlobalTools.getHeaders()
        headers['X-Requested-With'] = 'XMLHttpRequest'
        headers[
            'Referer'] = 'https://www.amazon.de/gp/profile/amzn1.account.AF3BW3DYKKEHMR4HSAFIQDM62QNQ/ref=cm_cr_getr_d_pdp?ie=UTF8'
        headers[
            'Cookie'] = 's_nr=1507517187408-New; s_vnum=1939517187408%26vn%3D1; s_dslv=1507517187409; x-acbde="6gxYYwpBpG20FBChzzu9sn?hypH9MpwKF0gVmk2LOxnYWw2uE@5B3Qh7Df?gkrXM"; at-acbde=Atza|IwEBIFPo-tRvBxygSgF8Ard63lJANpi78TG-8BUTC8ScSLLiUskUDIh0VMUwG_l8fsWqij5ArfksGmp6Ks52ZiYPS0bJeoDkACAtCZF6h3ePo0yqw9jdKVsq4edrTZPfLFYYYaRsbNyD2x09klSn7jKaU8Sn56Cr4VCIx_H8LObqLF2bX6Aq0EWW-O0PoBHgkdYI9iPhMo_2OHQjWuFAeinw0dU1M7X-SWBl2wB4FtzVXlQzarbwLjsHxXSaw2LwX3ENF6oCHOh73pPPnTX68JEedEkLu-sOSL2eZ5Whe7zJ2L76yyEzyjVXQpWbDdUqUP58MdLTNLfhCM5LkwWGmd7fuoLC1u7sZhBkJSA6oLQ0Q3kua5e8x0LfI3HfLZwC6qzrDJ6pheW0my98MFK4r9JaG85Z; sess-at-acbde="d7DXrZglD8+7+42k5qmlfFUxSpHJkUg8H1Dz17ZCU+U="; x-wl-uid=1WLJUGaYF93xUQuJRK3PCgsu0IJeaJoL7J/7XRaD4Men7E4FPUEro4vxW+rjyvLb9XCGGKFNM1yrtwZ9b9BK3yXkMKCav41q6XBiaxBqGmVWG1vMYfNxoP30XR5Otq5GKr5uenX7TA98=; session-token="1o+pNqOm6F7uZWrYdtDbU26LiB8ByJ40B64c+JFwPh3lkBt1MbUn+ha6qR3BaTgduMMVK1e1LjJ6pnoF+/r3c4PUBDfax7J+AGcgt2QiXkvMdVyLjyDowIQtWUbeHi6V4hfxIhgrYGcAyZ4x4keQvPaEHOW0v8t8akQV0nmi5sj1Jzu8pn162bmTw0XLP88olTMWGCWAeJlHGsXpCvyiS1VrFGHpgj2xSW3j5jdNi8DCjE4R7E+EqR+4BNFVQs+1KUR7bf9qBMWu3xT7DDe9KQ=="; session-id-time=2082754801l; session-id=261-5557160-1959728; ubid-acbde=258-5984155-0914160; csm-hit=0CP5W9ZYZNFE06XFCV0V+b-9KE07PDF4YD27JB8DFQQ|1509967781417'
        res = requests.get(url, headers)
        print(res.status_code)
        html = GlobalTools.getResponseContent(res)
        print(html)
        print("******************")
Ejemplo n.º 5
0
    def parse(self, sheet, currrow):
        queue = self.queue
        self.result = [currrow]
        print("")
        queue.put(u"商品链接")
        queue.put(self.url)
        print("商品链接:")
        print(self.url)

        self.html = GlobalTools.getResponseContent(self.res)

        self.geturl()
        self.getprice()
        self.getshopname()
        self.getbrand()
        self.getfirstavailable()
        self.getranking()
        self.getqa()
        self.getCategory()
        self.getstars()
        self.getreviewcount()
        self.getgoodreviewvote()
        #美国的reviewcount是在getgoodreviewvote中统计出来的,所以要重新计算一下
        # if self.countrycode=="us":
        # self.adjustreviewcount()
        if self.us_reviews_need_adjust:
            self.getusviewcount()

        self.getfba()
        print(self.resultmap)
        return self.getresult()
Ejemplo n.º 6
0
 def __init__(self, queue, asin, countrycode):
     self.queue = queue
     self.countrycode = countrycode
     self.baseurl = GlobalTools.getBaseurlFromCountrycode(countrycode)
     self.headers = GlobalTools.getHeaders()
     self.asin = asin
     self.url = get_link_by_asin(asin, self.baseurl)
     #if can't get a normal page ,can't use this kind of url to get a price and shop name
     #the second link look like this : http://www.amazon.de/gp/offer-listing/B01N52QW8A/ref=dp_olp_0?ie=UTF8&condition=all
     self.second_url = ""
     self.normal_situation = True
     self.unnormal_price = ""
     self.unnormal_shop = ""
     self.resultmap = {}
     self.result = []
     self.us_reviews_need_adjust = False
Ejemplo n.º 7
0
 def get_profile_info(self, url):
     "https://www.amazon.co.uk/gp/profile/amzn1.account.AHTYNWDHL6M2WCVS7LOUVFXBLLFQ/ref=cm_cr_getr_d_gw_btm?ie=UTF8"
     profileid = url.split("profile/")[1].split("/ref")[0]
     url = "https://www.amazon.co.uk/profilewidget/bio/" + profileid + "?view=visitor"
     print("in get_profile_info")
     headers = GlobalTools.getHeaders()
     headers[
         'Cookie'] = 's_nr=1507517187408-New; s_vnum=1939517187408%26vn%3D1; s_dslv=1507517187409; x-acbde="6gxYYwpBpG20FBChzzu9sn?hypH9MpwKF0gVmk2LOxnYWw2uE@5B3Qh7Df?gkrXM"; at-acbde=Atza|IwEBIFPo-tRvBxygSgF8Ard63lJANpi78TG-8BUTC8ScSLLiUskUDIh0VMUwG_l8fsWqij5ArfksGmp6Ks52ZiYPS0bJeoDkACAtCZF6h3ePo0yqw9jdKVsq4edrTZPfLFYYYaRsbNyD2x09klSn7jKaU8Sn56Cr4VCIx_H8LObqLF2bX6Aq0EWW-O0PoBHgkdYI9iPhMo_2OHQjWuFAeinw0dU1M7X-SWBl2wB4FtzVXlQzarbwLjsHxXSaw2LwX3ENF6oCHOh73pPPnTX68JEedEkLu-sOSL2eZ5Whe7zJ2L76yyEzyjVXQpWbDdUqUP58MdLTNLfhCM5LkwWGmd7fuoLC1u7sZhBkJSA6oLQ0Q3kua5e8x0LfI3HfLZwC6qzrDJ6pheW0my98MFK4r9JaG85Z; sess-at-acbde="d7DXrZglD8+7+42k5qmlfFUxSpHJkUg8H1Dz17ZCU+U="; x-wl-uid=1WLJUGaYF93xUQuJRK3PCgsu0IJeaJoL7J/7XRaD4Men7E4FPUEro4vxW+rjyvLb9XCGGKFNM1yrtwZ9b9BK3yXkMKCav41q6XBiaxBqGmVWG1vMYfNxoP30XR5Otq5GKr5uenX7TA98=; session-token="1o+pNqOm6F7uZWrYdtDbU26LiB8ByJ40B64c+JFwPh3lkBt1MbUn+ha6qR3BaTgduMMVK1e1LjJ6pnoF+/r3c4PUBDfax7J+AGcgt2QiXkvMdVyLjyDowIQtWUbeHi6V4hfxIhgrYGcAyZ4x4keQvPaEHOW0v8t8akQV0nmi5sj1Jzu8pn162bmTw0XLP88olTMWGCWAeJlHGsXpCvyiS1VrFGHpgj2xSW3j5jdNi8DCjE4R7E+EqR+4BNFVQs+1KUR7bf9qBMWu3xT7DDe9KQ=="; session-id-time=2082754801l; session-id=261-5557160-1959728; ubid-acbde=258-5984155-0914160; csm-hit=0CP5W9ZYZNFE06XFCV0V+b-9KE07PDF4YD27JB8DFQQ|1509967781417'
     res = requests.get(url, headers=headers)
     # html = GlobalTools.getResponseContent(res)
     # htmltxt = html.text
     # s = htmltxt.split("window.CustomerProfileRootProps")[1].split("window.PageContext")[0].replace("=","").replace(";","").strip()
     # print(s)
     # s = s.decode(encoding="utf-8")
     s = res.text
     try:
         jsonobj = json.loads(s)
     except:
         tmp = open("tmp2.html", "w+")
         tmp.write(s)
         tmp.close()
         exit(1)
     # name = jsonobj["nameHeaderData"]["name"]
     # print("name:"+name)
     reviewRank = jsonobj['topReviewerInfo']['rank']
     print(reviewRank)
Ejemplo n.º 8
0
 def get_product_images(self):
     for img in self.imgurls:
         res = requests.get(img, headers=GlobalTools.getHeaders())
         filename = self.asin + "_" + self.url.split("/")[-1]
         with (open(filename, "wb")) as f:
             f.write(res.content)
         f.close()
Ejemplo n.º 9
0
def get_following_by_asin(asin,baseurl):
    headers = GlobalTools.getHeaders()

    url = baseurl+"/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords="+str(asin)

    print "get url:"+url

    res = requests.get(url,headers=headers)

    # print "res:headers:"
    # print res.headers
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content),"lxml")
    else:
        html = BeautifulSoup(res.content,"lxml")

    tmp = open("tmp2.html","w+")
    if res.headers['Content-Encoding'] == "br":
        tmp.write(brotli.decompress(res.content))
    else:
        tmp.write(res.content)
    tmp.close()

    link = (html.find(id="s-results-list-atf")).find('a',attrs={'class':'s-access-detail-page'})
    links = (html.find(id="s-results-list-atf")).find_all('a')
    target = ""
    for link in links:
        if 'offer-listing' in link['href']:
            # print link.text.strip().split('(')
            if int(link.text.strip().split('(')[1].split()[0]) > 1:
                return [True,link['href']]
            else:
                return [False]
    else:
        return [False]
Ejemplo n.º 10
0
    def get_shop_info(self):
        headers = GlobalTools.getHeaders()
        res = requests.get(self.link, headers=headers)
        html = BeautifulSoup(res.text, 'lxml')
        print html.find(id="sellerName").text
        feedback = html.find(id="feedback-summary-table")
        feedbacktab = feedback.find_all("tr")
        timescoop = feedbacktab[0].find_all("th")
        for item in timescoop:
            print item.text
        positive = feedbacktab[1].find_all("td")
        neutral = feedbacktab[2].find_all("td")
        negtive = feedbacktab[3].find_all("td")
        count = feedbacktab[4].find_all("td")
        for feedback in feedbacktab:
            line = feedback.find_all("td")
            for item in line:
                print item.text.strip("\n").strip() + ",",
            print

        products = html.find(id="product-data").find_all(
            attrs={'class': "product-details"})
        for product in products:
            titlelink = product.find('a', attrs={'class': "product-title"})
            title = titlelink.get('title')
            href = titlelink.get('href')
            price = product.find('div', attrs={'class': 'product-price'})
            ranting = product.find('div', class_="product-rating")
            print title + " " + href + " " + price + " " + ranting + " "
Ejemplo n.º 11
0
def getFlowingList(url):
    res = requests.get(url,headers=GlobalTools.getHeaders())
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content),"lxml")
    else:
        html = BeautifulSoup(res.content,"lxml")
    followerlist = html.find(id,"olpOfferList").find_all(class_="olpOffer")
    resultlist = []
    for follow in followerlist:
        followerNameElem = follow.find(class_="olpSellerName")
        if len(followerNameElem.find_all("a"))>0:
            followerName = followerNameElem.text
            url = GlobalTools.getBaseurlFromCountrycode("uk")+(followerNameElem.find("a"))['href']
        else:
            if len(followerNameElem.find_all("img"))>0:
                followerName = followerNameElem.find("img")['alt']
                url = "https://amazon.com"
            else:
                followerName = ""
                url = ""

        print (followerName,url)
Ejemplo n.º 12
0
 def getusviewcount(self):
     asin = self.asin
     url = "https://www.amazon.com/product-reviews/" + asin + "/ref=acr_dpx_see_all?ie=UTF8&showViewpoints=1"
     res = requests.get(url, headers=GlobalTools.getHeaders())
     html = GlobalTools.getResponseContent(res)
     viewpoints = html.find_all(id=re.compile("viewpoint-"))
     if len(viewpoints) > 0:
         try:
             positive = viewpoints[0].find_all(
                 attrs={"data-reftag": "cm_cr_arp_d_viewpnt_lft"})[0].text
             self.resultmap['positivereviewcount'] = int(
                 positive.split("positive")[0].split("all")[1].strip())
         except:
             pass
     if len(viewpoints) > 1:
         try:
             negtive = viewpoints[1].find_all(
                 attrs={"data-reftag": "cm_cr_arp_d_viewpnt_rgt"})[0].text
             self.resultmap['negtivereviewcount'] = int(
                 negtive.split("critical")[0].split("all")[1].strip())
         except:
             pass
     print(viewpoints)
Ejemplo n.º 13
0
def get_link_by_asin(asin, baseurl):
    # print "in get_link_by_asin"
    headers = GlobalTools.getHeaders()
    # baseurl = "http://www.amazon.co.uk"
    url = baseurl + "/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=" + str(
        asin)
    # url = baseurl
    # print url

    # url:search - alias = aps
    # field - keywords:B01KGUMWJU
    params = {
        # "url":"search-alias=aps",
        # "field-keywords":asin
    }
    # proxies = {
    #     "http":"123.148.74.107:80",
    #     "https": "218.18.10.11:9797"
    # }
    print "get url:" + url

    res = requests.get(url, headers=headers)

    print "res:headers:"
    print res.headers
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content), "lxml")
    else:
        html = BeautifulSoup(res.content, "lxml")
    # html = BeautifulSoup(res.content, "lxml")

    tmp = open("tmp2.html", "w+")
    if res.headers['Content-Encoding'] == "br":
        tmp.write(brotli.decompress(res.content))
    else:
        tmp.write(res.content)
    tmp.close()

    # print "url:"+url
    # print html.find(id="centerMinus")
    # link = html.find(id="s-results-list-atf")
    link = (html.find(id="s-results-list-atf")).find(
        'a', attrs={'class': 's-access-detail-page'})
    link = link.get('href')
    link = link.split("&qid")[0]
    print "link:" + link
    return link
Ejemplo n.º 14
0
def single_thread_main():
    if not os.path.isfile("/Users/eddie/PycharmProjects/amaproj/uk.xls"):
        messagebox.showerror("error", u"请将uk.xls放到和amazon.exe相同目录下")
        exit(0)

    products = []

    rb = xlrd.open_workbook("/Users/eddie/PycharmProjects/amaproj/uk.xls")

    try:
        sheet = rb.sheet_by_name("asin")
        count = sheet.nrows
        for i in range(0, count):
            print(sheet.cell_value(i, 0))
            products.append(sheet.cell_value(i, 0))
    except:
        messagebox.showerror("error", u"uk.xls中必须包含名字为asin的sheet")
        exit(0)
    wb = copy(rb)
    sheet = wb.add_sheet(
        time.strftime(u"%m-%d_%H-%M", time.localtime(time.time())))
    #写头部标题:
    tableheaders = GlobalTools.get_table_header()
    row = 0
    col = 0
    for item in tableheaders:
        sheet.write(row, col, item)
        col += 1

    currrow = 1
    from queue import Queue
    for product in products:
        amazonobj = amazon(Queue(), product, "uk")
        amazonobj.prerequest()
        result = amazonobj.parse(sheet, currrow)
        currrow += 1
        try:
            wb.save("/Users/eddie/PycharmProjects/amaproj/uk.xls")
        except:
            messagebox.showerror("error", u"保存文件失败,运行时,请不要打开uk.xls文件")
Ejemplo n.º 15
0
def get_link_by_asin(asin, baseurl):
    headers = GlobalTools.getHeaders()
    # url = baseurl+"/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords="+str(asin)
    url = baseurl + "/s?k=" + asin + "&ref=nb_sb_noss_2"

    print("get url:" + url)

    res = requests.get(url, headers=headers)
    #
    print("res:headers:")
    print(res.headers)
    if res.headers['Content-Encoding'] == "br":
        html = BeautifulSoup(brotli.decompress(res.content), "lxml")
        with open("searchasin.html", "w") as f:
            f.write(brotli.decompress(res.content).decode("utf-8"))
    else:
        html = BeautifulSoup(res.content, "lxml")
        with open("searchasin.html", "w") as f:
            f.write(res.content.decode("utf-8"))

    # tmp = open("tmp2.html","w")
    # if res.headers['Content-Encoding'] == "br":
    #     tmp.write(brotli.decompress(res.content))
    # else:
    #     tmp.write(res.content.decode("utf-8"))
    # tmp.close()
    #
    # link = html.find_all(class_="s-search-results")[1].find_all('a',attrs={'class':'a-text-normal'})[0]
    # link = link.get('href')
    # link = link.split("&qid")[0]
    # print("link:"+baseurl+link)
    #
    # return baseurl+link
    # return baseurl + "/dp/" + asin +"/ref=redir_mobile_desktop"
    t = html.find_all(class_="s-search-results")[1]
    productslink = t.find_all("a")
    for item in productslink:
        if "/dp/" + asin in item.get('href'):
            return baseurl + (item.get('href').split("&qid")[0])
Ejemplo n.º 16
0
def main(queue, countrycode):
    if not os.path.isfile(GlobalTools.getExcelFile(countrycode)):
        # messagebox.showerror("error", u"请将uk.xls放到和amazon.exe相同目录下")
        queue.put(u"ERROR:请将" + countrycode + u".xls放到和amazon.exe相同目录下")
        exit(0)

    products = []
    rb = xlrd.open_workbook(GlobalTools.getExcelFile(countrycode))
    try:
        sheet = rb.sheet_by_name("asin")
        count = sheet.nrows
        for i in range(0, count):
            print(sheet.cell_value(i, 0))
            products.append(sheet.cell_value(i, 0))
    except:
        # messagebox.showerror("error", u"uk.xls中必须包含名字为asin的sheet")
        queue.put(u"ERROR:请将" + countrycode + u".xls放到和amazon.exe相同目录下")
        exit(0)
    print("copy")
    wb = copy(rb)
    sheet = wb.add_sheet(
        time.strftime(u"%m-%d_%H-%M", time.localtime(time.time())))
    # 写头部标题:
    tableheaders = GlobalTools.get_table_header()
    row = 0
    col = 0
    for item in tableheaders:
        sheet.write(row, col, item)
        col += 1

    pool = multiprocessing.Pool(processes=5)

    currrow = 1
    results = []

    for product in products:
        results.append(
            pool.apply_async(fun, (
                queue,
                product,
                countrycode,
                currrow,
                sheet,
            )))
        currrow += 1

    pool.close()
    pool.join()
    # for res in results:
    #     # try:
    #     print res.get()
    #     # except:
    #     #     pass

    for result in results:
        try:
            row = result.get()
            currrow = row[0]
            print("currrow:" + str(currrow))
            col = 0
            for i in range(1, len(row)):
                sheet.write(currrow, col, row[i])
                col += 1
        except:
            pass
    try:
        wb.save(GlobalTools.getExcelFile(countrycode))
    except:
        queue.put(u"ERROR:保存文件失败,运行时,请不要打开站点对应的xls文件")
        exit(0)
    queue.put("finish.")
Ejemplo n.º 17
0
def get_fba(queue,url,currrow,countrycode):
    result=[currrow]
    caps = dict(DesiredCapabilities.PHANTOMJS)
    caps["phantomjs.page.settings.userAgent"] = "Mozilla/5.0 `(Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
    caps["phantomjs.page.settings.loadImages"] = False
    queue.put("try to get phantomjs driver")
    driver = webdriver.PhantomJS(desired_capabilities=caps)
    # driver = webdriver.Chrome()
    queue.put("init driver")
    init_driver(driver)
    # queue.put("before link to other page")
    href = link_to_other_page(driver,url)
    # queue.put("after")
    print("ASIN:"+ASIN)
    result.append(ASIN)
    if href is not None and href != NORMAL_ADD_TO_CART:
        print("link to other page")
        queue.put("link to other page")
        try:
            driver.get(href)
        except:
            traceback.print_exc()
            # driver.save_screenshot("d:/" + ASIN + "_timeout.png")
            driver.save_screenshot("d:/" + ASIN + "_timeout.png")
            driver.execute_script('window.stop()')
        init_driver(driver)
        driver.save_screenshot(GlobalTools.getimgsavepath(ASIN,"add_to_cart"))
        try:
            driver.find_element_by_name("submit.addToCart").click()
        except:
            traceback.print_exc()
            driver.quit()
            return None

    elif href is None:
        queue.put("href is None")
        driver.save_screenshot("d:/"+ ASIN +"_activity.png")
        if len(driver.find_elements_by_xpath("//div[starts-with(@id,'dealCountdownTimer')]")) > 0:
            fba = u"活动中,请手动获取库存"
        else:
            fba = u"此产品需登录才能看到库存"
        print (fba)
        driver.quit()
        result.append(fba)
        return result

    try:
        queue.put("try click cart")
        driver.find_element_by_id("hlb-view-cart-announce").click()
    except:
        traceback.print_exc()
        return None
    queue.put("try click cart")
    sel = driver.find_element_by_name("quantity")
    Select(sel).select_by_value("10")
    driver.find_element_by_name("quantityBox").send_keys("999")
    #点击更新
    driver.find_element_by_xpath("//a[@data-action='update']").click()
    #get fba
    try:
        queue.put("saving screenshot")
        driver.save_screenshot("d:/" + ASIN + "_fba.png")
        text = driver.find_element_by_class_name("sc-quantity-update-message").text
        # print text
        if countrycode == "uk":
            if text.find("only") > 0:
                fba = text.split("only")[1].split("of")[0]
            elif text.find("limit") > 0:
                fba = "limit "+text.split("of")[1].split("per")[0]
        #us site ends with com
        if countrycode == "com":
            if text.find("only") > 0:
                fba = text.split("only")[1].split("of")[0]
            elif text.find("limit") > 0:
                fba = "limit "+text.split("of")[1].split("per")[0]
        if countrycode == "de":
            if text.find("pro Kunde") > 0:
                fba = "limit " + text.split("lediglich")[1].split("Exemplare")[0]
            elif text.find("nur") > 0:
                fba = text.split("Exemplare")[0].split("nur")[1]
        if countrycode == "fr":
            if text.find("uniquement disponibles") > 0:
                fba = text.split(":")[1].split(".")[0]
            elif text.find("par client") > 0:
                fba = "limit "+ text.split(":")[1].split(".")[0]
        if countrycode == "it":
            if text.find("articoli disponibili") > 0:
                fba = text.split("solo")[1].split("articoli")[0]
            elif text.find("per cliente") > 0:
                fba = "limit "+ text.split(":")[1].split(".")[0]
        if countrycode == "jp":
            if text.find(u"お取り扱い数") > 0:
                fba = text.split(u"お取り扱い数は")[1].split(u"点")[0]
            elif text.find(u"一人様") > 0:
                fba = "limit" + text.split(u"一人様")[1].split(u"点")[0]

        print("fba:===="+fba)
        queue.put("fba:"+fba)
    except:
        traceback.print_exc()
        if driver.find_element_by_id("sc-subtotal-label-activecart") is not None and "999" in driver.find_element_by_id("sc-subtotal-label-activecart").text:
            fba = "999+"
        else:
            print("return None retry")
            return None
    result.append(fba)
    driver.get_screenshot_as_file("d:/4.png")
    driver.quit()
    return result
Ejemplo n.º 18
0
 def get_imgs_by_product_url(self, url):
     res = requests.get(url)
     html = GlobalTools.getResponseContent(res)
     html.find(id="main-image-container").find("ul")
Ejemplo n.º 19
0
def newfba(asin):
    headers = {
        "Accept":
        "application/json, text/javascript, */*; q=0.01",
        "Accept-Encoding":
        "gzip, deflate, br",
        "Accept-Language":
        "zh-CN,zh;q=0.8",
        "Cache-Control":
        "no-cache",
        "Connection":
        "keep-alive",
        # "Content-Length":"547",
        "Content-Type":
        "application/x-www-form-urlencoded; charset=UTF-8",
        # "Content-Type": "text/html;charset=UTF-8",
        "Host":
        "www.amazon.co.uk",
        "Origin":
        "https://www.amazon.co.uk",
        "Content-Encoding":
        "br",
        "Pragma":
        "no-cache",
        "Referer":
        "https://www.amazon.co.uk/gp/cart/view.html/ref=lh_cart_vc_btn",
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
        "X-AUI-View":
        "Desktop",
        "X-Requested-With":
        "XMLHttpRequest",
        "Cookie":
        """s_nr=1507528377078-New; s_vnum=1939528377079%26vn%3D1; s_dslv=1507528377080; x-wl-uid=1Vm2WQeeHRHErocdZmw5/GK41LYoPF67ySPkncJEPAiRRhWfNF0OyPa9yuT4S7+FNdyHQwhizugO0QMrffNe4I2JzXtJIy14CzCSmvUSme8lqhoZjjh77OF8sXJ/jQGXBcjMLuoUEESU=; at-acbuk=Atza|IwEBIOoVGprM0g2Qazrt-ifX53XNbsi7XFYs1OZNmIDgeZD6a5i2s7p4JuLWL6fC30oebF1OGUvU7z7HI266F0nMzVdpN8mWBQ1uOoa0XcmqZYdODKvv57Rq3jARRIaOoqkDAS6Ke6QFIjp1s1V6ZnPftLOOaz9uKLjRlvbMvtD57XnNZq2blSLo8IqJh0BhgpIH1K7cfEd7zgHGInlid0GyjKhMTaN5oRoZEzbvHAl9aHx15bRG8rKSbqpHQMeylRnYRnOirQGFgyPs2zQUp6YtUbivSlb8LGmOXL8aQaqZSE2lwyI3Sy9cGtDbBucHLB-OK4t89Rf5NIMRMSM-uMddzWr504Cg7_bOJ6RZFABsEDvdDEIItPRgnhrDksbMefih0AQSF8jnS9xXg3UbX9tqRbjA; amznacsleftnav-328511b4-0414-31f1-91c6-bca31c30900c=1; x-acbuk="DpcKYrm9@Uw75TNjTsXwmX79eaa3NMP2dk5ZlsVntw6MXujQjHcGEerpfDRFK8hR";session-token=9SQ2EeLcEOiWNXk9Km/DNS6S1V0UZwProvVruiPJrCVgmxhyesgqA/fp58r9T9x2sKqlQqrsEEER26oL2mWsLSDfPDsZIgbKwHiWox5/i0IB0R8heds6DI1HK15chFLvoLUg/J8JaqgwtAoINSoQpvXPRngz83hB73b9x54TmuIuxH8LyuVsQlHkt5CeOaWAKHpif0qNYASaMLmf/Q0EDRW8RO0yBFk+SPYTIZwRv8wy4200Mchhe4UhrsdJOX4aubGsciZgiUtFN7fjp4F4NQ=="; lc-acbuk=en_GB; ubid-acbuk=261-6573040-2508135; session-id-time=2082758401l; session-id=259-7896668-2728509; csm-hit=DQ3DSN2G6C2P8DBSE4K4+s-4CDTDE03S82FARC6XGS1|1514455697049"""
    }

    url = "https://www.amazon.co.uk/gp/cart/ajax-update.html/ref=ox_sc_update_quantity_1%7C9%7C11"

    data = {
        "hasMoreItems":
        0,
        "timeStamp":
        1514454024,
        "token":
        "gFHNsVRD27zMiOpe+yYpwFsAOZohN8u+a5VmqKkAAAAJAAAAAFpEvAhyYXcAAAAA",
        "activeItems":
        "C31HAVQP205TNO|1|0|5|3.05|||0||",
        "addressId":
        "",
        "addressZip":
        "",
        "closeAddonUpsell":
        1,
        "flcExpanded":
        0,
        "quantity.C31HAVQP205TNO":
        "11",
        "pageAction":
        "update-quantity",
        "submit.update-quantity.C31HAVQP205TNO":
        "1",
        "actionItemID":
        "C31HAVQP205TNO",
        "requestID":
        "EFHWWNTW6V3PRPMTQVWY",
        "asin":
        "B003KN7PU2",
        "encodedOffering":
        "%2BMwdK243Pp3oHjtzeyP6rdX8pnsybQAfRMa%2FX803XTXSTS7T%2BThAv741wG3TqvzM2kBUhnHpgojcF03P1%2FiSGuiN%2F5D6331v80WV2YLu2HU%3D"
    }

    # headers = urllib.quote(json.dumps(headers))
    comm_params = urllib.parse.quote(json.dumps(data))
    request = requests.session()
    request.get("https://www.amazon.co.uk", headers=GlobalTools.getHeaders())
    # res = request.post(url,headers=headers,data=comm_params)
    res = requests.post(url, headers=headers, data=comm_params)
    print(res.content)
    jsonobj = json.loads(res.content, encoding="utf-8")
    print(jsonobj['features']['imb'])
    print(jsonobj['features']['nav-cart'])
Ejemplo n.º 20
0
def main(queue):
    if not os.path.isfile(GlobalTools.getExcelFile("fba")):
        queue.put("ERROR:"+u"请将fba.xls放到和amazon.exe相同目录下")
        exit(0)

    productlinks = []

    rb = xlrd.open_workbook(GlobalTools.getExcelFile("fba"))

    try:
        sheet = rb.sheet_by_index(0)
        count = sheet.nrows
        for i in range(0,count):
            print(sheet.cell_value(i,0))
            productlinks.append(sheet.cell_value(i,0))
    except:
        queue.put("ERROR:" + u"请保证文件包含商品链接")
        exit(0)

    wb = copy(rb)
    sheet = wb.add_sheet(time.strftime(u"%m-%d_%H-%M",time.localtime(time.time())))

    pool = multiprocessing.Pool(processes=5)

    currrow = 0
    results = []


    for link in productlinks:
        if link.strip() == "":
            currrow+=1
            continue
        countrycode = link.split("amazon.")[1].split(".")[-1].split('/')[0]
        results.append(pool.apply_async(fun,(queue,link,currrow,countrycode)))
        currrow+=1

    pool.close()
    pool.join()
    # for res in results:
    #     # try:
    #     print res.get()
    #     # except:
    #     #     pass
    # print "finish."

    tmpresult = []

    for result in results:
        try:
            row = result.get()
            tmpresult.append(row)
            currrow = row[0]
            print("currrow:"+str(currrow))
            col = 0
            for i in range(1,len(row)):
                sheet.write(currrow, col, row[i])
                col+=1
        except:
            pass

    try:
        wb.save(GlobalTools.getExcelFile("fba"))
    except:
        tmp = open("./tmp.txt","w+")
        for row in tmpresult:
            tmp.write(str(row)+"\n")
        queue.put("ERROR:"+u"保存文件失败,运行时,请不要打开fba.xls文件")

    queue.put("finish.")
Ejemplo n.º 21
0
 def __init__(self,seller):
     self.marketplaceid = GlobalTools.getMarketplaceID()
     self.headers = GlobalTools.getHeaders()
     self.url = GlobalTools.getSearchShopProductsUrl()
     self.seller = seller