def prerequest(self): queue = self.queue queue.put("prerequest") print("prerequest") GlobalTools.setbaseurl(self.baseurl) res = requests.get(self.url, headers=self.headers) self.res = res html = GlobalTools.getResponseContent(self.res) if html.find(id="add-to-cart-button") is None: if html.find(id="availability") is not None: # print "text" + html.find(id="availability").text url = self.baseurl + html.find( id="availability").find("a").get('href') self.second_url = url res = requests.get(url, headers=GlobalTools.getHeaders()) html = GlobalTools.getResponseContent(res) try: price = html.find(class_="olpOfferPrice").text.strip() self.unnormal_price = price print(price) shop = html.find(class_="olpSellerName").text self.unnormal_shop = shop print(shop) except: traceback.print_exc() self.normal_situation = False return False return True
def __init__(self, asin, positive_count, negtive_count, debug=False): self.debug = debug self.baseurl = GlobalTools.getbaseurl() self.asin = asin self.headers = GlobalTools.getHeaders() # self.positiveurl = self.baseurl+"/ss/customer-reviews/ajax/reviews/get/ref=cm_cr_arp_d_viewpnt_lft" self.positiveurl = self.baseurl + "/hz/reviews-render/ajax/reviews/get/ref=cm_cr_arp_d_viewpnt_lft" self.negtive_page_count = negtive_count / 4 + 1 self.positivecount = positive_count self.negtivecount = negtive_count self.positivevote = 0 self.negtivevote = 0 self.POSITIVE_VOTE_TYPE = 0 self.NEGTIVE_VOTE_TYPE = 1
def getCategory(self): try: #商品有时没有分类,这时先在这里取为空"",在下面回去分类排名的时候,取第一分类 menu_levels = self.html.find( id="wayfinding-breadcrumbs_feature_div").find_all( attrs={"class": "a-list-item"}) count = len(menu_levels) self.resultmap['first_level_menu'] = GlobalTools.removeBlankChars( menu_levels[0].text) if count >= 3: self.resultmap[ 'second_level_menu'] = GlobalTools.removeBlankChars( menu_levels[2].text) except: pass
def get_email(self, url): print("******************") print("url:" + url) headers = GlobalTools.getHeaders() headers['X-Requested-With'] = 'XMLHttpRequest' headers[ 'Referer'] = 'https://www.amazon.de/gp/profile/amzn1.account.AF3BW3DYKKEHMR4HSAFIQDM62QNQ/ref=cm_cr_getr_d_pdp?ie=UTF8' headers[ 'Cookie'] = 's_nr=1507517187408-New; s_vnum=1939517187408%26vn%3D1; s_dslv=1507517187409; x-acbde="6gxYYwpBpG20FBChzzu9sn?hypH9MpwKF0gVmk2LOxnYWw2uE@5B3Qh7Df?gkrXM"; at-acbde=Atza|IwEBIFPo-tRvBxygSgF8Ard63lJANpi78TG-8BUTC8ScSLLiUskUDIh0VMUwG_l8fsWqij5ArfksGmp6Ks52ZiYPS0bJeoDkACAtCZF6h3ePo0yqw9jdKVsq4edrTZPfLFYYYaRsbNyD2x09klSn7jKaU8Sn56Cr4VCIx_H8LObqLF2bX6Aq0EWW-O0PoBHgkdYI9iPhMo_2OHQjWuFAeinw0dU1M7X-SWBl2wB4FtzVXlQzarbwLjsHxXSaw2LwX3ENF6oCHOh73pPPnTX68JEedEkLu-sOSL2eZ5Whe7zJ2L76yyEzyjVXQpWbDdUqUP58MdLTNLfhCM5LkwWGmd7fuoLC1u7sZhBkJSA6oLQ0Q3kua5e8x0LfI3HfLZwC6qzrDJ6pheW0my98MFK4r9JaG85Z; sess-at-acbde="d7DXrZglD8+7+42k5qmlfFUxSpHJkUg8H1Dz17ZCU+U="; x-wl-uid=1WLJUGaYF93xUQuJRK3PCgsu0IJeaJoL7J/7XRaD4Men7E4FPUEro4vxW+rjyvLb9XCGGKFNM1yrtwZ9b9BK3yXkMKCav41q6XBiaxBqGmVWG1vMYfNxoP30XR5Otq5GKr5uenX7TA98=; session-token="1o+pNqOm6F7uZWrYdtDbU26LiB8ByJ40B64c+JFwPh3lkBt1MbUn+ha6qR3BaTgduMMVK1e1LjJ6pnoF+/r3c4PUBDfax7J+AGcgt2QiXkvMdVyLjyDowIQtWUbeHi6V4hfxIhgrYGcAyZ4x4keQvPaEHOW0v8t8akQV0nmi5sj1Jzu8pn162bmTw0XLP88olTMWGCWAeJlHGsXpCvyiS1VrFGHpgj2xSW3j5jdNi8DCjE4R7E+EqR+4BNFVQs+1KUR7bf9qBMWu3xT7DDe9KQ=="; session-id-time=2082754801l; session-id=261-5557160-1959728; ubid-acbde=258-5984155-0914160; csm-hit=0CP5W9ZYZNFE06XFCV0V+b-9KE07PDF4YD27JB8DFQQ|1509967781417' res = requests.get(url, headers) print(res.status_code) html = GlobalTools.getResponseContent(res) print(html) print("******************")
def parse(self, sheet, currrow): queue = self.queue self.result = [currrow] print("") queue.put(u"商品链接") queue.put(self.url) print("商品链接:") print(self.url) self.html = GlobalTools.getResponseContent(self.res) self.geturl() self.getprice() self.getshopname() self.getbrand() self.getfirstavailable() self.getranking() self.getqa() self.getCategory() self.getstars() self.getreviewcount() self.getgoodreviewvote() #美国的reviewcount是在getgoodreviewvote中统计出来的,所以要重新计算一下 # if self.countrycode=="us": # self.adjustreviewcount() if self.us_reviews_need_adjust: self.getusviewcount() self.getfba() print(self.resultmap) return self.getresult()
def __init__(self, queue, asin, countrycode): self.queue = queue self.countrycode = countrycode self.baseurl = GlobalTools.getBaseurlFromCountrycode(countrycode) self.headers = GlobalTools.getHeaders() self.asin = asin self.url = get_link_by_asin(asin, self.baseurl) #if can't get a normal page ,can't use this kind of url to get a price and shop name #the second link look like this : http://www.amazon.de/gp/offer-listing/B01N52QW8A/ref=dp_olp_0?ie=UTF8&condition=all self.second_url = "" self.normal_situation = True self.unnormal_price = "" self.unnormal_shop = "" self.resultmap = {} self.result = [] self.us_reviews_need_adjust = False
def get_profile_info(self, url): "https://www.amazon.co.uk/gp/profile/amzn1.account.AHTYNWDHL6M2WCVS7LOUVFXBLLFQ/ref=cm_cr_getr_d_gw_btm?ie=UTF8" profileid = url.split("profile/")[1].split("/ref")[0] url = "https://www.amazon.co.uk/profilewidget/bio/" + profileid + "?view=visitor" print("in get_profile_info") headers = GlobalTools.getHeaders() headers[ 'Cookie'] = 's_nr=1507517187408-New; s_vnum=1939517187408%26vn%3D1; s_dslv=1507517187409; x-acbde="6gxYYwpBpG20FBChzzu9sn?hypH9MpwKF0gVmk2LOxnYWw2uE@5B3Qh7Df?gkrXM"; at-acbde=Atza|IwEBIFPo-tRvBxygSgF8Ard63lJANpi78TG-8BUTC8ScSLLiUskUDIh0VMUwG_l8fsWqij5ArfksGmp6Ks52ZiYPS0bJeoDkACAtCZF6h3ePo0yqw9jdKVsq4edrTZPfLFYYYaRsbNyD2x09klSn7jKaU8Sn56Cr4VCIx_H8LObqLF2bX6Aq0EWW-O0PoBHgkdYI9iPhMo_2OHQjWuFAeinw0dU1M7X-SWBl2wB4FtzVXlQzarbwLjsHxXSaw2LwX3ENF6oCHOh73pPPnTX68JEedEkLu-sOSL2eZ5Whe7zJ2L76yyEzyjVXQpWbDdUqUP58MdLTNLfhCM5LkwWGmd7fuoLC1u7sZhBkJSA6oLQ0Q3kua5e8x0LfI3HfLZwC6qzrDJ6pheW0my98MFK4r9JaG85Z; sess-at-acbde="d7DXrZglD8+7+42k5qmlfFUxSpHJkUg8H1Dz17ZCU+U="; x-wl-uid=1WLJUGaYF93xUQuJRK3PCgsu0IJeaJoL7J/7XRaD4Men7E4FPUEro4vxW+rjyvLb9XCGGKFNM1yrtwZ9b9BK3yXkMKCav41q6XBiaxBqGmVWG1vMYfNxoP30XR5Otq5GKr5uenX7TA98=; session-token="1o+pNqOm6F7uZWrYdtDbU26LiB8ByJ40B64c+JFwPh3lkBt1MbUn+ha6qR3BaTgduMMVK1e1LjJ6pnoF+/r3c4PUBDfax7J+AGcgt2QiXkvMdVyLjyDowIQtWUbeHi6V4hfxIhgrYGcAyZ4x4keQvPaEHOW0v8t8akQV0nmi5sj1Jzu8pn162bmTw0XLP88olTMWGCWAeJlHGsXpCvyiS1VrFGHpgj2xSW3j5jdNi8DCjE4R7E+EqR+4BNFVQs+1KUR7bf9qBMWu3xT7DDe9KQ=="; session-id-time=2082754801l; session-id=261-5557160-1959728; ubid-acbde=258-5984155-0914160; csm-hit=0CP5W9ZYZNFE06XFCV0V+b-9KE07PDF4YD27JB8DFQQ|1509967781417' res = requests.get(url, headers=headers) # html = GlobalTools.getResponseContent(res) # htmltxt = html.text # s = htmltxt.split("window.CustomerProfileRootProps")[1].split("window.PageContext")[0].replace("=","").replace(";","").strip() # print(s) # s = s.decode(encoding="utf-8") s = res.text try: jsonobj = json.loads(s) except: tmp = open("tmp2.html", "w+") tmp.write(s) tmp.close() exit(1) # name = jsonobj["nameHeaderData"]["name"] # print("name:"+name) reviewRank = jsonobj['topReviewerInfo']['rank'] print(reviewRank)
def get_product_images(self): for img in self.imgurls: res = requests.get(img, headers=GlobalTools.getHeaders()) filename = self.asin + "_" + self.url.split("/")[-1] with (open(filename, "wb")) as f: f.write(res.content) f.close()
def get_following_by_asin(asin,baseurl): headers = GlobalTools.getHeaders() url = baseurl+"/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords="+str(asin) print "get url:"+url res = requests.get(url,headers=headers) # print "res:headers:" # print res.headers if res.headers['Content-Encoding'] == "br": html = BeautifulSoup(brotli.decompress(res.content),"lxml") else: html = BeautifulSoup(res.content,"lxml") tmp = open("tmp2.html","w+") if res.headers['Content-Encoding'] == "br": tmp.write(brotli.decompress(res.content)) else: tmp.write(res.content) tmp.close() link = (html.find(id="s-results-list-atf")).find('a',attrs={'class':'s-access-detail-page'}) links = (html.find(id="s-results-list-atf")).find_all('a') target = "" for link in links: if 'offer-listing' in link['href']: # print link.text.strip().split('(') if int(link.text.strip().split('(')[1].split()[0]) > 1: return [True,link['href']] else: return [False] else: return [False]
def get_shop_info(self): headers = GlobalTools.getHeaders() res = requests.get(self.link, headers=headers) html = BeautifulSoup(res.text, 'lxml') print html.find(id="sellerName").text feedback = html.find(id="feedback-summary-table") feedbacktab = feedback.find_all("tr") timescoop = feedbacktab[0].find_all("th") for item in timescoop: print item.text positive = feedbacktab[1].find_all("td") neutral = feedbacktab[2].find_all("td") negtive = feedbacktab[3].find_all("td") count = feedbacktab[4].find_all("td") for feedback in feedbacktab: line = feedback.find_all("td") for item in line: print item.text.strip("\n").strip() + ",", print products = html.find(id="product-data").find_all( attrs={'class': "product-details"}) for product in products: titlelink = product.find('a', attrs={'class': "product-title"}) title = titlelink.get('title') href = titlelink.get('href') price = product.find('div', attrs={'class': 'product-price'}) ranting = product.find('div', class_="product-rating") print title + " " + href + " " + price + " " + ranting + " "
def getFlowingList(url): res = requests.get(url,headers=GlobalTools.getHeaders()) if res.headers['Content-Encoding'] == "br": html = BeautifulSoup(brotli.decompress(res.content),"lxml") else: html = BeautifulSoup(res.content,"lxml") followerlist = html.find(id,"olpOfferList").find_all(class_="olpOffer") resultlist = [] for follow in followerlist: followerNameElem = follow.find(class_="olpSellerName") if len(followerNameElem.find_all("a"))>0: followerName = followerNameElem.text url = GlobalTools.getBaseurlFromCountrycode("uk")+(followerNameElem.find("a"))['href'] else: if len(followerNameElem.find_all("img"))>0: followerName = followerNameElem.find("img")['alt'] url = "https://amazon.com" else: followerName = "" url = "" print (followerName,url)
def getusviewcount(self): asin = self.asin url = "https://www.amazon.com/product-reviews/" + asin + "/ref=acr_dpx_see_all?ie=UTF8&showViewpoints=1" res = requests.get(url, headers=GlobalTools.getHeaders()) html = GlobalTools.getResponseContent(res) viewpoints = html.find_all(id=re.compile("viewpoint-")) if len(viewpoints) > 0: try: positive = viewpoints[0].find_all( attrs={"data-reftag": "cm_cr_arp_d_viewpnt_lft"})[0].text self.resultmap['positivereviewcount'] = int( positive.split("positive")[0].split("all")[1].strip()) except: pass if len(viewpoints) > 1: try: negtive = viewpoints[1].find_all( attrs={"data-reftag": "cm_cr_arp_d_viewpnt_rgt"})[0].text self.resultmap['negtivereviewcount'] = int( negtive.split("critical")[0].split("all")[1].strip()) except: pass print(viewpoints)
def get_link_by_asin(asin, baseurl): # print "in get_link_by_asin" headers = GlobalTools.getHeaders() # baseurl = "http://www.amazon.co.uk" url = baseurl + "/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=" + str( asin) # url = baseurl # print url # url:search - alias = aps # field - keywords:B01KGUMWJU params = { # "url":"search-alias=aps", # "field-keywords":asin } # proxies = { # "http":"123.148.74.107:80", # "https": "218.18.10.11:9797" # } print "get url:" + url res = requests.get(url, headers=headers) print "res:headers:" print res.headers if res.headers['Content-Encoding'] == "br": html = BeautifulSoup(brotli.decompress(res.content), "lxml") else: html = BeautifulSoup(res.content, "lxml") # html = BeautifulSoup(res.content, "lxml") tmp = open("tmp2.html", "w+") if res.headers['Content-Encoding'] == "br": tmp.write(brotli.decompress(res.content)) else: tmp.write(res.content) tmp.close() # print "url:"+url # print html.find(id="centerMinus") # link = html.find(id="s-results-list-atf") link = (html.find(id="s-results-list-atf")).find( 'a', attrs={'class': 's-access-detail-page'}) link = link.get('href') link = link.split("&qid")[0] print "link:" + link return link
def single_thread_main(): if not os.path.isfile("/Users/eddie/PycharmProjects/amaproj/uk.xls"): messagebox.showerror("error", u"请将uk.xls放到和amazon.exe相同目录下") exit(0) products = [] rb = xlrd.open_workbook("/Users/eddie/PycharmProjects/amaproj/uk.xls") try: sheet = rb.sheet_by_name("asin") count = sheet.nrows for i in range(0, count): print(sheet.cell_value(i, 0)) products.append(sheet.cell_value(i, 0)) except: messagebox.showerror("error", u"uk.xls中必须包含名字为asin的sheet") exit(0) wb = copy(rb) sheet = wb.add_sheet( time.strftime(u"%m-%d_%H-%M", time.localtime(time.time()))) #写头部标题: tableheaders = GlobalTools.get_table_header() row = 0 col = 0 for item in tableheaders: sheet.write(row, col, item) col += 1 currrow = 1 from queue import Queue for product in products: amazonobj = amazon(Queue(), product, "uk") amazonobj.prerequest() result = amazonobj.parse(sheet, currrow) currrow += 1 try: wb.save("/Users/eddie/PycharmProjects/amaproj/uk.xls") except: messagebox.showerror("error", u"保存文件失败,运行时,请不要打开uk.xls文件")
def get_link_by_asin(asin, baseurl): headers = GlobalTools.getHeaders() # url = baseurl+"/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords="+str(asin) url = baseurl + "/s?k=" + asin + "&ref=nb_sb_noss_2" print("get url:" + url) res = requests.get(url, headers=headers) # print("res:headers:") print(res.headers) if res.headers['Content-Encoding'] == "br": html = BeautifulSoup(brotli.decompress(res.content), "lxml") with open("searchasin.html", "w") as f: f.write(brotli.decompress(res.content).decode("utf-8")) else: html = BeautifulSoup(res.content, "lxml") with open("searchasin.html", "w") as f: f.write(res.content.decode("utf-8")) # tmp = open("tmp2.html","w") # if res.headers['Content-Encoding'] == "br": # tmp.write(brotli.decompress(res.content)) # else: # tmp.write(res.content.decode("utf-8")) # tmp.close() # # link = html.find_all(class_="s-search-results")[1].find_all('a',attrs={'class':'a-text-normal'})[0] # link = link.get('href') # link = link.split("&qid")[0] # print("link:"+baseurl+link) # # return baseurl+link # return baseurl + "/dp/" + asin +"/ref=redir_mobile_desktop" t = html.find_all(class_="s-search-results")[1] productslink = t.find_all("a") for item in productslink: if "/dp/" + asin in item.get('href'): return baseurl + (item.get('href').split("&qid")[0])
def main(queue, countrycode): if not os.path.isfile(GlobalTools.getExcelFile(countrycode)): # messagebox.showerror("error", u"请将uk.xls放到和amazon.exe相同目录下") queue.put(u"ERROR:请将" + countrycode + u".xls放到和amazon.exe相同目录下") exit(0) products = [] rb = xlrd.open_workbook(GlobalTools.getExcelFile(countrycode)) try: sheet = rb.sheet_by_name("asin") count = sheet.nrows for i in range(0, count): print(sheet.cell_value(i, 0)) products.append(sheet.cell_value(i, 0)) except: # messagebox.showerror("error", u"uk.xls中必须包含名字为asin的sheet") queue.put(u"ERROR:请将" + countrycode + u".xls放到和amazon.exe相同目录下") exit(0) print("copy") wb = copy(rb) sheet = wb.add_sheet( time.strftime(u"%m-%d_%H-%M", time.localtime(time.time()))) # 写头部标题: tableheaders = GlobalTools.get_table_header() row = 0 col = 0 for item in tableheaders: sheet.write(row, col, item) col += 1 pool = multiprocessing.Pool(processes=5) currrow = 1 results = [] for product in products: results.append( pool.apply_async(fun, ( queue, product, countrycode, currrow, sheet, ))) currrow += 1 pool.close() pool.join() # for res in results: # # try: # print res.get() # # except: # # pass for result in results: try: row = result.get() currrow = row[0] print("currrow:" + str(currrow)) col = 0 for i in range(1, len(row)): sheet.write(currrow, col, row[i]) col += 1 except: pass try: wb.save(GlobalTools.getExcelFile(countrycode)) except: queue.put(u"ERROR:保存文件失败,运行时,请不要打开站点对应的xls文件") exit(0) queue.put("finish.")
def get_fba(queue,url,currrow,countrycode): result=[currrow] caps = dict(DesiredCapabilities.PHANTOMJS) caps["phantomjs.page.settings.userAgent"] = "Mozilla/5.0 `(Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36" caps["phantomjs.page.settings.loadImages"] = False queue.put("try to get phantomjs driver") driver = webdriver.PhantomJS(desired_capabilities=caps) # driver = webdriver.Chrome() queue.put("init driver") init_driver(driver) # queue.put("before link to other page") href = link_to_other_page(driver,url) # queue.put("after") print("ASIN:"+ASIN) result.append(ASIN) if href is not None and href != NORMAL_ADD_TO_CART: print("link to other page") queue.put("link to other page") try: driver.get(href) except: traceback.print_exc() # driver.save_screenshot("d:/" + ASIN + "_timeout.png") driver.save_screenshot("d:/" + ASIN + "_timeout.png") driver.execute_script('window.stop()') init_driver(driver) driver.save_screenshot(GlobalTools.getimgsavepath(ASIN,"add_to_cart")) try: driver.find_element_by_name("submit.addToCart").click() except: traceback.print_exc() driver.quit() return None elif href is None: queue.put("href is None") driver.save_screenshot("d:/"+ ASIN +"_activity.png") if len(driver.find_elements_by_xpath("//div[starts-with(@id,'dealCountdownTimer')]")) > 0: fba = u"活动中,请手动获取库存" else: fba = u"此产品需登录才能看到库存" print (fba) driver.quit() result.append(fba) return result try: queue.put("try click cart") driver.find_element_by_id("hlb-view-cart-announce").click() except: traceback.print_exc() return None queue.put("try click cart") sel = driver.find_element_by_name("quantity") Select(sel).select_by_value("10") driver.find_element_by_name("quantityBox").send_keys("999") #点击更新 driver.find_element_by_xpath("//a[@data-action='update']").click() #get fba try: queue.put("saving screenshot") driver.save_screenshot("d:/" + ASIN + "_fba.png") text = driver.find_element_by_class_name("sc-quantity-update-message").text # print text if countrycode == "uk": if text.find("only") > 0: fba = text.split("only")[1].split("of")[0] elif text.find("limit") > 0: fba = "limit "+text.split("of")[1].split("per")[0] #us site ends with com if countrycode == "com": if text.find("only") > 0: fba = text.split("only")[1].split("of")[0] elif text.find("limit") > 0: fba = "limit "+text.split("of")[1].split("per")[0] if countrycode == "de": if text.find("pro Kunde") > 0: fba = "limit " + text.split("lediglich")[1].split("Exemplare")[0] elif text.find("nur") > 0: fba = text.split("Exemplare")[0].split("nur")[1] if countrycode == "fr": if text.find("uniquement disponibles") > 0: fba = text.split(":")[1].split(".")[0] elif text.find("par client") > 0: fba = "limit "+ text.split(":")[1].split(".")[0] if countrycode == "it": if text.find("articoli disponibili") > 0: fba = text.split("solo")[1].split("articoli")[0] elif text.find("per cliente") > 0: fba = "limit "+ text.split(":")[1].split(".")[0] if countrycode == "jp": if text.find(u"お取り扱い数") > 0: fba = text.split(u"お取り扱い数は")[1].split(u"点")[0] elif text.find(u"一人様") > 0: fba = "limit" + text.split(u"一人様")[1].split(u"点")[0] print("fba:===="+fba) queue.put("fba:"+fba) except: traceback.print_exc() if driver.find_element_by_id("sc-subtotal-label-activecart") is not None and "999" in driver.find_element_by_id("sc-subtotal-label-activecart").text: fba = "999+" else: print("return None retry") return None result.append(fba) driver.get_screenshot_as_file("d:/4.png") driver.quit() return result
def get_imgs_by_product_url(self, url): res = requests.get(url) html = GlobalTools.getResponseContent(res) html.find(id="main-image-container").find("ul")
def newfba(asin): headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "no-cache", "Connection": "keep-alive", # "Content-Length":"547", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", # "Content-Type": "text/html;charset=UTF-8", "Host": "www.amazon.co.uk", "Origin": "https://www.amazon.co.uk", "Content-Encoding": "br", "Pragma": "no-cache", "Referer": "https://www.amazon.co.uk/gp/cart/view.html/ref=lh_cart_vc_btn", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36", "X-AUI-View": "Desktop", "X-Requested-With": "XMLHttpRequest", "Cookie": """s_nr=1507528377078-New; s_vnum=1939528377079%26vn%3D1; s_dslv=1507528377080; x-wl-uid=1Vm2WQeeHRHErocdZmw5/GK41LYoPF67ySPkncJEPAiRRhWfNF0OyPa9yuT4S7+FNdyHQwhizugO0QMrffNe4I2JzXtJIy14CzCSmvUSme8lqhoZjjh77OF8sXJ/jQGXBcjMLuoUEESU=; at-acbuk=Atza|IwEBIOoVGprM0g2Qazrt-ifX53XNbsi7XFYs1OZNmIDgeZD6a5i2s7p4JuLWL6fC30oebF1OGUvU7z7HI266F0nMzVdpN8mWBQ1uOoa0XcmqZYdODKvv57Rq3jARRIaOoqkDAS6Ke6QFIjp1s1V6ZnPftLOOaz9uKLjRlvbMvtD57XnNZq2blSLo8IqJh0BhgpIH1K7cfEd7zgHGInlid0GyjKhMTaN5oRoZEzbvHAl9aHx15bRG8rKSbqpHQMeylRnYRnOirQGFgyPs2zQUp6YtUbivSlb8LGmOXL8aQaqZSE2lwyI3Sy9cGtDbBucHLB-OK4t89Rf5NIMRMSM-uMddzWr504Cg7_bOJ6RZFABsEDvdDEIItPRgnhrDksbMefih0AQSF8jnS9xXg3UbX9tqRbjA; amznacsleftnav-328511b4-0414-31f1-91c6-bca31c30900c=1; x-acbuk="DpcKYrm9@Uw75TNjTsXwmX79eaa3NMP2dk5ZlsVntw6MXujQjHcGEerpfDRFK8hR";session-token=9SQ2EeLcEOiWNXk9Km/DNS6S1V0UZwProvVruiPJrCVgmxhyesgqA/fp58r9T9x2sKqlQqrsEEER26oL2mWsLSDfPDsZIgbKwHiWox5/i0IB0R8heds6DI1HK15chFLvoLUg/J8JaqgwtAoINSoQpvXPRngz83hB73b9x54TmuIuxH8LyuVsQlHkt5CeOaWAKHpif0qNYASaMLmf/Q0EDRW8RO0yBFk+SPYTIZwRv8wy4200Mchhe4UhrsdJOX4aubGsciZgiUtFN7fjp4F4NQ=="; lc-acbuk=en_GB; ubid-acbuk=261-6573040-2508135; session-id-time=2082758401l; session-id=259-7896668-2728509; csm-hit=DQ3DSN2G6C2P8DBSE4K4+s-4CDTDE03S82FARC6XGS1|1514455697049""" } url = "https://www.amazon.co.uk/gp/cart/ajax-update.html/ref=ox_sc_update_quantity_1%7C9%7C11" data = { "hasMoreItems": 0, "timeStamp": 1514454024, "token": "gFHNsVRD27zMiOpe+yYpwFsAOZohN8u+a5VmqKkAAAAJAAAAAFpEvAhyYXcAAAAA", "activeItems": "C31HAVQP205TNO|1|0|5|3.05|||0||", "addressId": "", "addressZip": "", "closeAddonUpsell": 1, "flcExpanded": 0, "quantity.C31HAVQP205TNO": "11", "pageAction": "update-quantity", "submit.update-quantity.C31HAVQP205TNO": "1", "actionItemID": "C31HAVQP205TNO", "requestID": "EFHWWNTW6V3PRPMTQVWY", "asin": "B003KN7PU2", "encodedOffering": "%2BMwdK243Pp3oHjtzeyP6rdX8pnsybQAfRMa%2FX803XTXSTS7T%2BThAv741wG3TqvzM2kBUhnHpgojcF03P1%2FiSGuiN%2F5D6331v80WV2YLu2HU%3D" } # headers = urllib.quote(json.dumps(headers)) comm_params = urllib.parse.quote(json.dumps(data)) request = requests.session() request.get("https://www.amazon.co.uk", headers=GlobalTools.getHeaders()) # res = request.post(url,headers=headers,data=comm_params) res = requests.post(url, headers=headers, data=comm_params) print(res.content) jsonobj = json.loads(res.content, encoding="utf-8") print(jsonobj['features']['imb']) print(jsonobj['features']['nav-cart'])
def main(queue): if not os.path.isfile(GlobalTools.getExcelFile("fba")): queue.put("ERROR:"+u"请将fba.xls放到和amazon.exe相同目录下") exit(0) productlinks = [] rb = xlrd.open_workbook(GlobalTools.getExcelFile("fba")) try: sheet = rb.sheet_by_index(0) count = sheet.nrows for i in range(0,count): print(sheet.cell_value(i,0)) productlinks.append(sheet.cell_value(i,0)) except: queue.put("ERROR:" + u"请保证文件包含商品链接") exit(0) wb = copy(rb) sheet = wb.add_sheet(time.strftime(u"%m-%d_%H-%M",time.localtime(time.time()))) pool = multiprocessing.Pool(processes=5) currrow = 0 results = [] for link in productlinks: if link.strip() == "": currrow+=1 continue countrycode = link.split("amazon.")[1].split(".")[-1].split('/')[0] results.append(pool.apply_async(fun,(queue,link,currrow,countrycode))) currrow+=1 pool.close() pool.join() # for res in results: # # try: # print res.get() # # except: # # pass # print "finish." tmpresult = [] for result in results: try: row = result.get() tmpresult.append(row) currrow = row[0] print("currrow:"+str(currrow)) col = 0 for i in range(1,len(row)): sheet.write(currrow, col, row[i]) col+=1 except: pass try: wb.save(GlobalTools.getExcelFile("fba")) except: tmp = open("./tmp.txt","w+") for row in tmpresult: tmp.write(str(row)+"\n") queue.put("ERROR:"+u"保存文件失败,运行时,请不要打开fba.xls文件") queue.put("finish.")
def __init__(self,seller): self.marketplaceid = GlobalTools.getMarketplaceID() self.headers = GlobalTools.getHeaders() self.url = GlobalTools.getSearchShopProductsUrl() self.seller = seller