def grabSS(proxy): while True: try: headers = RandomHeaders.LoadHeader() driver = webdriver.PhantomJS(service_args=[ '--proxy={}'.format(proxy), '--proxy-type=https' ]) #driver = webdriver.PhantomJS() driver.get(URL) while driver.title == SPLASHTITLE: driver.save_screenshot('{}.png'.format( proxy.replace(':', '').replace('.', ''))) #this just visualized the phantomjs driver - you can replace this with pass if you're trying to reduce mem cookies_list = driver.get_cookies() driver.close() driver.quit() driver = webdriver.Firefox(service_args=[ '--proxy={}'.format(proxy), '--proxy-type=https' ]) # you can only set cookies for the driver's current domain so visit the page first then set cookies driver.get(URL) # precautionary - delete all cookies first driver.delete_all_cookies() for cookie in cookies_list: # precautionary - prevent possible Exception - can only add cookie for current domain if "adidas" in cookie['domain']: driver.add_cookie(cookie) # once cookies are changed browser must be refreshed driver.refresh() #converts phantomjs cookies into firefox webdriver to check out except Exception as exp: print exp
def SearchResultsLocalStores(sku, storenum): url = 'https://www.walmart.com/search/api/preso?prg=desktop&query={}&stores={}'.format( sku, storenum) res = requests.get(url, headers=RandomHeaders.LoadHeader(), proxies=proxies) print(res.json())
def grabSS(proxy): while True: try: headers = RandomHeaders.LoadHeader() driver = webdriver.PhantomJS(service_args=[ '--proxy={}'.format(proxy), '--proxy-type=https' ]) #driver = webdriver.PhantomJS() driver.get(URL) while driver.title == SPLASHTITLE: driver.save_screenshot('{}.png'.format( proxy.replace(':', '').replace('.', ''))) #this just visualized the phantomjs driver - you can replace this with pass if you're trying to reduce mem cookies_list = driver.get_cookies() driver.close() driver.quit() driver = webdriver.Firefox(service_args=[ '--proxy={}'.format(proxy), '--proxy-type=https' ]) for cookie in cookies_list: driver.add_cookie(cookie) #converts phantomjs cookies into firefox webdriver to check out driver.get(URL) except Exception as exp: print exp
def FNCN(store, SKU): for store in store: try: a = {} a['Store'] = str(store) data = { 'authority': 'www.walmart.com', 'method': 'POST', 'path': '/store/ajax/search', 'scheme': 'https', 'accept': 'application/json, text/javascript, */*; q=0.01', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.8', 'content-length': '55', 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'origin': 'https://www.walmart.com', 'referer': 'https://www.walmart.com/store/{}/search?query={}'.format( store, SKU), 'user-agent': str(RandomHeaders.LoadHeader()), 'x-requested-with': 'XMLHttpRequest', "searchQuery": "store={}&query={}".format(store, SKU), } url = "https://www.walmart.com/store/ajax/search" loop = 0 res = None while res == None: loop = loop + 1 res = requests.post(url, data=data, timeout=1) if loop > 5: NoStores.append(' ') break res = res.json() a["Price"] = int((GrabElement(str(res), 'priceInCents'))) * .01 a["Quantity"] = abs( int(float((GrabElement(str(res), 'quantity'))))) Lis.append(a) try: lock.acquire() print('{} Scanned'.format(store)) except: pass lock.release() except Exception as exp: pass
def makeRequest(url, responses): proxies = {"http": random.choice(proxy), "https": random.choice(proxy)} try: response = requests.get(url, headers=RandomHeaders.LoadHeader(), proxies=proxies, timeout=10) responses.append(response.text) except Exception as e: pass
def getCommits(): try: url = 'https://github.com/theriley106/SneakerBotTutorials' res = requests.get(url, headers=RandomHeaders.LoadHeader()) page = bs4.BeautifulSoup(res.text, 'lxml') commitsCount = page.select('.commits a') return int(re.findall('\d+', str(commitsCount[0].getText()))[0]) except: return "ERROR"
def check_stock(url): raw_html = requests.get(url, headers=RandomHeaders.LoadHeader(), proxies=proxies) page = bs4.BeautifulSoup(raw_html.text, "lxml") list_of_raw_sizes = page.select('.size-dropdown-block') sizes = str(list_of_raw_sizes[0].getText()).replace('\t', '') sizes = sizes.replace('\n\n', ' ') sizes = sizes.split() sizes.remove('Select') sizes.remove('size') return sizes
def getCommits(): try: url = 'https://github.com/theriley106/SneakerBotTutorials' res = requests.get(url, headers=RandomHeaders.LoadHeader()) page = bs4.BeautifulSoup(res.text, 'lxml') commitsCount = page.select('.commits a') lastUpdate = page.select('relative-time')[0].getText() updateCount = int(re.findall('\d+', str(commitsCount[0].getText()))[0]) return [lastUpdate, updateCount] except Exception as exp: print(exp) return "ERROR"
def CheckStock(url): RawHTML = requests.get(url, headers=RandomHeaders.LoadHeader()) print(RawHTML) Page = bs4.BeautifulSoup(RawHTML.text, "lxml") ListOfRawSizes = Page.select('.size-dropdown-block') Sizes = str(ListOfRawSizes[0].getText()).replace('\t', '') Sizes = Sizes.replace('\n\n', ' ') Sizes = Sizes.split() Sizes.remove('Select') Sizes.remove('size') print(Sizes) return Sizes
def CheckStock(url): print('CheckStock') RawHTML = requests.get( url, headers=RandomHeaders.LoadHeader()) # , proxies=proxies) Page = bs4.BeautifulSoup(RawHTML.text, "lxml") RawAvailableSizes = Page.select( '.size-dropdown-block') # got this css tag by using selector gadget Sizes = str(RawAvailableSizes[0].getText()).replace("\t", "") Sizes = Sizes.replace("\n\n", " ") Sizes = Sizes.split() Sizes.remove('Select') Sizes.remove('size') return Sizes
def DownloadPage(URLs): URL = NewURL(URLs) for tries in range(3): try: res = requests.get(URL, headers=RandomHeaders.LoadHeader(), proxies=proxies) print(URL) page = bs4.BeautifulSoup(res.text, "lxml") break except BaseException as exp: if Debug > 1: print(exp) pass return page
def grabPage(url): for i in range(10): proxies = {"http": proxy, "https": proxy} try: res = requests.get(url, headers=RandomHeaders.LoadHeader(), proxies=proxies, timeout=10) except Exception as exp: res = None if res != None: break page = bs4.BeautifulSoup(res.text, 'lxml') try: pageNum = re.findall('page\S(\d+)', url)[0] except: pageNum = 1 print("Grabbed: {} | Page: {}".format(page.title.string, pageNum)) return page
def grabSS(proxy): #converts phantomjs cookies into firefox webdriver to check out while True: try: headers = RandomHeaders.LoadHeader() # Generates a random header driver = webdriver.PhantomJS(service_args=[ '--proxy={}'.format(proxy), '--proxy-type=https' ]) # Starts a phantomJS instance with a proxy and random header driver.get(URL) # Navigates to a URL while driver.title == SPLASHTITLE: # This means the driver title matches the title of the target page # ie: the yeezy splash page driver.save_screenshot('{}.png'.format( proxy.replace(':', '').replace('.', ''))) #this just visualizes the phantomjs driver - you can replace this with pass if you're trying to reduce processing cookies_list = driver.get_cookies() # This contains the cookies in the driver # Ideally this would be the point that the driver passes the splash page driver.close() # Closes out this driver driver.quit() # Closes out this driver driver = webdriver.Firefox(service_args=[ '--proxy={}'.format(proxy), '--proxy-type=https' ]) # Opens up a new FIREFOX non-Headless browser window that the user can control driver.get(URL) # Navigate to the same URL # You can only set cookies for the driver's current domain so visit the page first then set cookies driver.delete_all_cookies() # Precautionary - delete all cookies first for cookie in cookies_list: # Loops through all of the cookies if "adidas" in cookie['domain']: # Only looking for Adidas cookies driver.add_cookie(cookie) # Adds adidas cookies to the driver driver.refresh() # once cookies are changed browser must be refreshed except Exception as exp: # Problem with the function print exp
def DownloadPage(ListOfASIN): URL = GenerateURL(ListOfASIN) Asin = URL[1] URL = URL[0] for tries in range(3): try: res = requests.get(URL, headers=RandomHeaders.LoadHeader(), proxies=proxies) print(URL) page = bs4.BeautifulSoup(res.text, "lxml") break except BaseException as exp: if Debug > 1: print(exp) pass return [page, Asin]
#conding=utf-8 from selenium import webdriver import RandomHeaders print(RandomHeaders.LoadHeader()) ua = { 'Connection': 'keep-alive', 'X-Requested-With': 'XMLHttpRequest', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Upgrade-Insecure-Requestss': '1', 'Cache-Control': 'max-age=0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2 ' } profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", ua) # profile.set_preference('network.proxy.type', 0) # profile.set_preference('network.proxy.http', '119.101.117.204') # profile.set_preference('network.proxy.http_port', 9999) # profile.set_preference('network.proxy.ssl', '119.101.114.47') # profile.set_preference('network.proxy.ssl_port', 9999) profile.update_preferences() #重载 driver = webdriver.Firefox(profile) # driver.get('http://ip138.com/')
raw_input('You are about to open {} individual windows... Continue?'.format( len(proxies))) #########URL################3 url = 'https://www.whatismyip.com/' #Use whatever URL you'd like to open here for proxy in proxies: profile = webdriver.FirefoxProfile() #IF this doesn't work, use a different selenium webdriver #########THIS PART SETS UP THE PROXIES######################3 profile.set_preference("general.useragent.override", RandomHeaders.LoadHeader()) profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.http", str(proxy['ip'])) profile.set_preference("network.proxy.http_port", int(proxy['port'])) profile.set_preference("network.proxy.ssl", str(proxy['ip'])) profile.set_preference("network.proxy.ssl_port", int(int(proxy['port']))) profile.set_preference('network.proxy.socks', str(proxy['ip'])) profile.set_preference('network.proxy.socks_port', int(int(proxy['port']))) profile.update_preferences() drivers = webdriver.Firefox(firefox_profile=profile) ############################################################################# #GO TO URL drivers.get(url) #############################
def FNCN(store, SKU): for store in store: try: a = {} a['Store'] = str(store) data = { 'authority': 'www.walmart.com', 'method': 'POST', 'path': '/store/ajax/search', 'scheme': 'https', 'accept': 'application/json, text/javascript, */*; q=0.01', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.8', 'content-length': '55', 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'cookie': 'spid=E5669BFD-B28F-486F-B4E3-54B125AB6AE2; s=undefined; prefper=PREFSTORE~1641~2PREFCITY~1Greenville~2PREFFULLSTREET~16134%20White%20Horse%20Rd~2PREFSTATE~1SC~2PREFZIP~129601~2PREFSTORE~1641~2PREFCITY~1Greenville~2PREFFULLSTREET~16134%20White%20Horse%20Rd~2PREFSTATE~1SC~2PREFZIP~129601; TBV=a0zyf; WMR=p1-1|p2-1|p3-1|p4-0; akavpau_p3=1491489649~id=9dde41a09f2cbb9ae5cdeeeac7e57280; akavpau_p5=1491489662~id=4efbe67fefa3314c47cc13b7f11e36b9; sps=i%2454310056%3B48183904%3B; athrvi=RVI~h33cb4a8-h2df3a60; akavpau_p4=1491508840~id=328602308bf193a1352503323b59cc06; x-csrf-jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ0eXBlIjoiY29va2llIiwidXVpZCI6Ijg4NjU4YTkwLTFiMDItMTFlNy04ZmI1LWVkODA2YmE2N2JiZiIsImlhdCI6MTQ5MTUwODMzNSwiZXhwIjoxNDkxNTA5NTM1fQ.1YZyxD3V_4LWPtHwG-jIyvultQ8w0am1GzCp8ToTfP4; AID=wmlspartner%3DhQHSnYPFcGw%3Areflectorid%3D09007855021638109532%3Alastupd%3D1491508335032; search.perf.metric=timerPromiseAll=143ms|timerHeaderAction=71ms|timerSearchAction=143ms|timerFooterAction=41ms|timerPreso=139ms; DL=67052%2C37.751007080078125%2C-97.8219985961914%2Cip%2C67052%2CUSA%2CKS; NSID=3103.1-5855.6-3283.7-1221.8-5991.12-5990.12-1099.13-4321.14-592.17-1507.19-3155.20-3492.20-2428.28-370.28-346.32-794.33-186.38-369.43-978.48-993.49; akavpau_p7=1491508935~id=e3e677b43522fee0b70a0b183b3c3f06; VSID=2265%2C2266; SSLB=2; akavpau_p0=1491513602~id=fe0e46157f671ff7fcddc52eadcaafd8; AID=wmlspartner%3DhQHSnYPFcGw%3Areflectorid%3D09007855021638109532%3Alastupd%3D1491513085492; com.wm.reflector="reflectorid:09007855021638109532@lastupd:1491513085492@firstcreate:1491486835583"; vtc=Sa7mas539Zo9I10IU9BNk8; bstc=SaXTFjNaJobs1VjUAj7sZc; exp=1%2B1491512606%2BSa7mas539Zo9I10IU9BNk8%2B0%2BCcvJB.uH-Lt|bCt4O.KMTvF; exp-ck=uH-LtKMTvF; akavpau_p9=1491513731~id=a7eec29316d45708d714d4629324b12e', 'origin': 'https://www.walmart.com', 'referer': 'https://www.walmart.com/store/{}/search?query={}'.format( store, SKU), 'user-agent': str(RandomHeaders.LoadHeader()), 'x-requested-with': 'XMLHttpRequest', "searchQuery": "store={}&query={}".format(store, SKU), } url = "https://www.walmart.com/store/ajax/search" loop = 0 res = None while res == None: loop = loop + 1 res = requests.post(url, data=data, proxies=random.choice(Proxies), timeout=10) if loop > 5: NoStores.append(' ') break res = res.json() print res a["Price"] = int((GrabElement(str(res), 'priceInCents'))) * .01 a["Quantity"] = abs( int(float((GrabElement(str(res), 'quantity'))))) Lis.append(a) print('-') except Exception as exp: #print(exp) pass
def GrabNearbyStores(zip): res = requests.get( 'https://www.walmart.com/search/api/location?location={}'.format(zip), headers=RandomHeaders.LoadHeader(), proxies=proxies) print(res.json())
def grab_site(url): # Pulls the site headers = RandomHeaders.LoadHeader() # This is a non-Python user agent, which prevents Amazon from blocking the request return requests.get(url, headers=headers)