class Browser: driver = None def __init__(self): capabilities = DesiredCapabilities.CHROME # capabilities["loggingPrefs"] = {"performance": "ALL"} # chromedriver < ~75 capabilities["goog:loggingPrefs"] = { "performance": "ALL" } # chromedriver 75+ self.driver = Chrome(CHROMEDRV_PATH, desired_capabilities=capabilities) def goto(self, url): self.driver.get(url) def set_input_value(self, name, value): self.driver.find_element_by_id(name).send_keys(value) def click_link(self, class_name): self.driver.find_element_by_class_name(class_name).click() def check_elem_exist(self, name): elem = None try: elem = self.driver.find_element_by_xpath(f"//*[text()='{name}']") finally: return elem def quit(self): self.driver.quit()
def build_diagram(data): first_level = {'Active Profiles': 0} second_level = {} data_dict = data_to_dictionary(data) print(data_dict) for single_data in data_dict: if single_data == "No Active Account": first_level['Inactive Profiles'] = data_dict[single_data] else: first_level['Active Profiles'] += data_dict[single_data] second_level[single_data] = data_dict[single_data] syntax_string = '' for value in first_level: syntax_string += value + '[' + str(first_level[value]) + '] Friends\n' for value in second_level: syntax_string += 'Friends [' + str( second_level[value]) + '] ' + value + '\n' driver = Chrome() driver.get('http://sankeymatic.com/build') input_element = driver.find_element_by_xpath('//*[@id="flows_in"]') input_element.clear() input_element.send_keys(syntax_string) preview_button = driver.find_element_by_xpath('//*[@id="preview_graph"]') preview_button.click() png_img = driver.find_element_by_xpath( '//*[@id="export_options"]/h3[1]/abbr') png_img.click() download_diagram = driver.find_element_by_xpath( '//*[@id="download_png_link"]') download_diagram.click() time.sleep(10)
def login(user_email_address, user_password, is_headless, two_factor_token): """ Attempts to log into Facebook Returns a driver object user_email_address: str Your Email user_password: str Your password user_profile_url: str Your profile URL """ # The Chrome driver is required because Gecko was having issues chrome_options = Options() prefs = { "profile.default_content_setting_values.notifications": 2, 'disk-cache-size': 4096 } chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument("start-maximized") if is_headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('log-level=2') try: driver = Chrome(options=chrome_options) except Exception as e: # The user does not have chromedriver installed # Tell them to install it stderr.write(str(e)) stderr.write(no_chrome_driver) exit(1) driver.implicitly_wait(10) driver.get("https://facebook.com") email = "email" password = "******" login = "******" approvals_code = "approvals_code" emailelement = driver.find_element_by_name(email) passwordelement = driver.find_element_by_name(password) emailelement.send_keys(user_email_address) passwordelement.send_keys(user_password) loginelement = driver.find_element_by_id(login) loginelement.click() # Defaults to no 2fa has_2fa = False try: # If this element exists, we've reached a 2FA page driver.find_element_by_xpath("//form[@class=\"checkpoint\"]") driver.find_element_by_xpath("//input[@name=\"approvals_code\"]") has_2fa = True except NoSuchElementException: has_2fa = "two-factor authentication" in driver.page_source.lower( ) or has_2fa if has_2fa: print(""" Two-Factor Auth is enabled. Please file an issue at https://github.com/weskerfoot/DeleteFB/issues if you run into any problems """) if two_factor_token and has_2fa: twofactorelement = driver.find_element_by_name(approvals_code) twofactorelement.send_keys(two_factor_token) # Submits after the code is passed into the form, does not validate 2FA code. contelement = driver.find_element_by_id("checkpointSubmitButton") contelement.click() # Defaults to saving this new browser, this occurs on each new automated login. save_browser = driver.find_element_by_id("checkpointSubmitButton") save_browser.click() elif has_2fa: # Allow time to enter 2FA code print("Pausing to enter 2FA code") time.sleep(35) print("Continuing execution") else: pass return driver
def issue_request(): # region Variable Declarations global start_date, end_date, \ instrument_type, property_address, \ authenticated instrument_types = [] recording_dates = [] dated_dates = [] grantors = [] grantees = [] result_page_links = [] property_addresses = [] property_owner_names = [] property_owner_addresses = [] pdf_text = "" # endregion Variable Declarations # Browser Instantiation browser = Chrome(ChromeDriverManager().install()) # The site to which we will navigate while also handling its session. browser.get(login_url) # Locates and populates the elements containing the username and password entry fields. username_element = browser.find_element_by_id("USERID") username_element.send_keys(username) password_element = browser.find_element_by_id("PASSWORD") password_element.send_keys(password) # Login password_element.send_keys(Keys.RETURN) authenticated = True if "invalid" not in str( browser.page_source) else False if authenticated: # Navigation to and Selection of Advanced Search browser.get(instrument_type_search_url) # Ensure Page Elements Have Loaded time.sleep(1) # Reveal Additional Search Fields search_type = browser.find_element_by_id("lbSearchType") search_type.click() # Ensure Page Elements Have Loaded time.sleep(1) # Issue an Advanced Search Query start_date_field = browser.find_element_by_id("STARTDATE") start_date_field.send_keys(start_date) end_date_field = browser.find_element_by_id("ENDDATE") end_date_field.send_keys(end_date) instrument_type_field = browser.find_element_by_id("INSTRUMENT_TYPES") instrument_type_field.send_keys(instrument_type) search_button = browser.find_element_by_id("SearchButton") search_button.click() # Harvest the Query document_links = set() result_page_repeater_ids = set( link.get_attribute("id") for link in browser.find_elements_by_xpath("//a[@href]") if "PageRepeater" in link.get_attribute("href")) for result_page_repeater_id in result_page_repeater_ids: result_page_repeater = browser.find_element_by_id( result_page_repeater_id) result_page_repeater.click() current_page_document_links = set( link.get_attribute("href") for link in browser.find_elements_by_xpath("//a[@href]") if "PK" in link.get_attribute("href")) document_links = document_links | current_page_document_links for result_page_link in document_links: # Procedurally Extracting Instrument Types, Recording Dates, # Dated Dates, Grantors, Grantees, & Property Addresses malformed_property_address_counter = 0 browser.get(result_page_link) column_data = browser.find_elements_by_class_name("coldata") for column_datum in column_data: parsed_datum = column_datum.text.split('\n') if malformed_property_address_counter == 0: instrument_types.append(parsed_datum[0]) elif malformed_property_address_counter == 4: recording_dates.append(parsed_datum[0]) elif malformed_property_address_counter == 5: dated_dates.append(parsed_datum[0]) elif malformed_property_address_counter == 9: grantors.append(parsed_datum) elif malformed_property_address_counter == 10: grantees.append(parsed_datum) malformed_property_address_counter += 1 view_document_button = browser.find_element_by_id( "BTN_VIEW_DOCUMENT") on_click = view_document_button.get_attribute("onclick") pdf_download_link = base_search_url + str(on_click).split('\'')[1] pdf = browser.request("GET", pdf_download_link).content pdf_image = pdf2image.convert_from_bytes(pdf) for page in pdf_image: page_text = pytesseract.image_to_string(page) if "Real Estate" in page_text \ or "PROPERTY TO BE DISTRIBUTED" in page_text \ or "Decedent resided at" in page_text \ or "Real Property" in page_text: pdf_text = page_text break property_address = pyap.parse(pdf_text, country="US") if len(property_address) > 0: property_address = str(property_address[0]) result_page_links.append(result_page_link) property_addresses.append(property_address) update_activity_display( "\n\n* Found " + str(len(property_addresses)) + " entries for \"" + instrument_type + "\" cases in the Clay" " County database. Compare" " this number to the" " number of populated fields" " for property addresses in" " the generated CSV file to" " reconcile any" " discrepancies that may" " occur in the PDF" " address-parsing process. *\n", activity_display) valid_property_address_counter = 0 malformed_property_address_counter = 1 for result_page_link, property_address in zip(result_page_links, property_addresses): try: if len(property_address) == 0: raise NoSuchElementException elif len(property_address) > 0: # Navigation to Second Page browser.get(gis_url) # Ensure Page Elements Have Loaded time.sleep(2) # Click Agree agree_field = browser.find_element_by_id( "dojox_mobile_Button_0") agree_field.click() # Navigation to Address Search address_search_field = browser.find_element_by_id( "searchButton") address_search_field.click() # Ensure Page Elements Have Loaded time.sleep(2) address_search_tab = browser.find_element_by_id( "addressTab") address_search_tab.click() # Ensure Page Elements Have Loaded time.sleep(2) search_input_field = browser.find_element_by_id( "search_input") # Ensure Page Elements Have Loaded time.sleep(3) # Enter Address search_input_field.send_keys(property_address) # Ensure Page Elements Have Loaded time.sleep(2) # Click Submit search_input_field.send_keys(Keys.RETURN) # Ensure Search Results Have Loaded time.sleep(3) # Harvesting Property Owner Names gis_results_container = browser.find_element_by_id( "resultsGeocodeContainer0") for line in gis_results_container.text.split("\n"): if "Current Owner" in line: property_owner_name = line.split(":")[1].strip() property_owner_names.append(property_owner_name) break # Collecting Property Owner Addresses tabs = browser.window_handles for link in gis_results_container.find_elements_by_xpath( "//a[@href]"): if "parcelid" in link.get_attribute("href"): link.click() tabs = browser.window_handles browser.switch_to.window(tabs[1]) break property_owner_addresses.append( browser.find_element_by_xpath( "/html/body/table[2]/tbody/tr[7]/td").text + ', ' + browser.find_element_by_xpath( "/html/body/table[2]/tbody/tr[9]/td").text) browser.close() browser.switch_to.window(tabs[0]) valid_property_address_counter += 1 else: property_owner_names.append([]) property_owner_addresses.append([]) except NoSuchElementException: malformed_property_address_result_page_link = result_page_link update_activity_display( "\nAn invalid address entered into the Clay County GIS address entry field was found.\n" + "It was located in the PDF found here:", activity_display) activity_display.config(state=NORMAL) activity_display.pack() activity_display.insert( INSERT, "Link to PDF #" + str(malformed_property_address_counter) + " for Manual Inspection\n", hyperlink.add(lambda: webbrowser.open( malformed_property_address_result_page_link))) activity_display.config(state=DISABLED) property_owner_names.append([]) property_owner_addresses.append([]) malformed_property_address_counter += 1 pass # Exporting Scraped Data to CSV File df_data = [ instrument_types, recording_dates, dated_dates, grantors, grantees, result_page_links, property_addresses, property_owner_names, property_owner_addresses ] df = pd.DataFrame(data=df_data).T df.columns = [ "Instrument Type", "Recording Date", "Dated Date", "Grantor(s)", "Grantee(s)", "Result Page Link", "Property Address", "Current Property Owner Name", "Owner Mailing Address" ] df.to_csv("Realtor Estate Data Export (" + str(date.today()) + ").csv") # Cleaning Up the Browser Instance browser.close() return requests.get(instrument_type_search_url).status_code
UW_PASSWORD=os.getenv("UW_PASSWORD") driver = Chrome(executable_path = DRIVER_BIN) driver.set_window_position(0, 0) driver.set_window_size(400, 1000) driver.get("http://localhost:3000/api/login") driver.find_element_by_id("identifierId").send_keys(UW_EMAIL) driver.find_element_by_id("identifierNext").click() time.sleep(1) driver.find_element_by_id("weblogin_netid").send_keys(UW_NETID) driver.find_element_by_id("weblogin_password").send_keys(UW_PASSWORD) driver.find_element_by_id("submit_button").click() time.sleep(1) driver.find_element_by_xpath('//button[normalize-space()="Continue"]').click() time.sleep(1) driver.find_element_by_xpath('//button[normalize-space()="New referral"]').click() # start for a browser automation approach # CREATE A CHILD driver.find_element_by_id("childFirstName").send_keys('Cindy') driver.find_element_by_id("childLastName").send_keys('Martin') driver.find_element_by_id("childDateOfBirth").send_keys('02/03/2020') driver.find_element_by_xpath("//html").click() sex = driver.find_element_by_id("sexOfChildIsFemale") driver.execute_script("arguments[0].click();", sex) driver.find_element_by_xpath("//html").click() # wait for child search time.sleep(2)
def get_amzn_driver(email, password, headless=False, session_path=None): chrome_options = ChromeOptions() if headless: chrome_options.add_argument('headless') chrome_options.add_argument('no-sandbox') chrome_options.add_argument('disable-dev-shm-usage') chrome_options.add_argument('disable-gpu') if session_path is not None: chrome_options.add_argument("user-data-dir=" + session_path) logger.info('Logging into Amazon.com') home_dir = os.path.expanduser("~") driver = Chrome(options=chrome_options, executable_path=get_stable_chrome_driver(home_dir)) driver.get(ORDER_HISTORY_URL_VIA_SWITCH_ACCOUNT_LOGIN) driver.implicitly_wait(2) def get_element_by_id(driver, id): try: return driver.find_element_by_id(id) except NoSuchElementException: pass return None def get_element_by_xpath(driver, xpath): try: return driver.find_element_by_xpath(xpath) except NoSuchElementException: pass return None # Go straight to the account switcher, and look for the given email. # If present, click on it! Otherwise, click on "Add account". desired_account_element = get_element_by_xpath( driver, "//div[contains(text(), '{}')]".format(email)) if desired_account_element: desired_account_element.click() driver.implicitly_wait(2) # It's possible this account has already authed recently. If so, the # next block will be skipped and the login is complete! if not get_element_by_id(driver, 'report-confirm'): driver.find_element_by_id('ap_password').send_keys( get_password(password)) driver.find_element_by_name('rememberMe').click() driver.find_element_by_id('signInSubmit').submit() else: # Cannot find the desired account in the switch. Log in via Add Account driver.find_element_by_xpath('//div[text()="Add account"]').click() driver.implicitly_wait(2) driver.find_element_by_id('ap_email').send_keys(email) # Login flow sometimes asks just for the email, then a # continue button, then password. if get_element_by_id(driver, 'continue'): driver.find_element_by_id('continue').click() driver.implicitly_wait(2) driver.find_element_by_id('ap_password').send_keys( get_password(password)) driver.find_element_by_name('rememberMe').click() driver.find_element_by_id('signInSubmit').submit() driver.implicitly_wait(2) if not get_element_by_id(driver, 'report-confirm'): logger.warning('Having trouble logging into Amazon. Please see the ' 'browser and complete login within the next 5 minutes. ' 'This script will continue automatically on success. ' 'You may need to manually navigate to: {}'.format( ORDER_HISTORY_REPORT_URL)) if get_element_by_id(driver, 'auth-mfa-otpcode'): logger.warning('Hint: Looks like an auth challenge! Maybe check ' 'your email') try: wait_cond = EC.presence_of_element_located((By.ID, 'report-confirm')) WebDriverWait(driver, 60 * 5).until(wait_cond) except TimeoutException: logger.critical('Cannot complete login!') exit(1) return driver
def send_outlook(): home_dir = str(Path.home()) chrome_cache_path = f"{home_dir}/.chrome_cache" print(f"loading chrome, caching to: {chrome_cache_path}") chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-startup-window") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-sync-preferences") chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--disable-background-networking") chrome_options.add_argument("--no-first-run") chrome_options.add_argument("--aggressive-tab-discard") chrome_options.add_argument("--user-agent=Mozilla/4.0 (Windows; MSIE 6.0; Windows NT 5.2)") chrome_options.add_argument(f"--user-data-dir={chrome_cache_path}/user-data") chrome_options.add_argument(f"--data-path={chrome_cache_path}/data-path") chrome_options.add_argument(f"--disk-cache-dir={chrome_cache_path}/disk-cache") chrome_options.add_argument(f"--homedir={chrome_cache_path}") chrome_options.add_argument(f"--disk-cache-dir={chrome_cache_path}/cache-dir") prefs={"profile.managed_default_content_settings.images": 2, 'disk-cache-size': 4096 } chrome_options.add_experimental_option("prefs",prefs) delay = 60 chrome_options.binary_location = "/usr/bin/chromium-browser" driver = Chrome(executable_path=os.path.abspath("/usr/lib/chromium-browser/chromedriver"), chrome_options=chrome_options) print("logging into outlook") driver.get("https://outlook.office.com/owa/") try: driver.find_element_by_name("loginfmt").send_keys("*****@*****.**") driver.find_element_by_id("idSIButton9").click() print("entered username, waiting for password prompt") try: myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, 'passwordInput'))) print("password prompt loaded") except TimeoutException: print("Loading password prompt took too much time!") print(driver.page_source) driver.close(); exit(1) passwd = getpass.getpass() driver.find_element_by_id("passwordInput").send_keys(passwd) driver.find_element_by_id("submitButton").click() print("entered password, waiting for 2FA token") try: myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, 'idSIButton9'))) driver.find_element_by_id("idSIButton9").click() print("asking to remember credentials for next time") except TimeoutException: print("Loading 2FA page took too much time!") print(driver.page_source) driver.close(); exit(1) print("2FA accepted, loading office landing page") except NoSuchElementException: print("already logged in") try: print("waiting for landing page to load") myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, 'lnkBrwsAllFldrs'))) except TimeoutException: print(driver.page_source) print("Loading landing page too much time!") driver.close(); exit(1) try: eink.send_update("Loading Tasks") print("loading tasks") driver.find_element_by_id("lnkBrwsAllFldrs").click() driver.find_element_by_id("selbrfld").click() Select(driver.find_element_by_id("selbrfld")).select_by_visible_text("Tasks") driver.find_element_by_id("selbrfld").click() driver.find_element_by_xpath("(.//*[normalize-space(text()) and normalize-space(.)='Sent Items'])[1]/following::img[1]").click() myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.CLASS_NAME, 'lvw'))) except TimeoutException: print(driver.page_source) print("Loading todo list took too much time!") driver.close(); exit(1) elements = driver.find_elements_by_css_selector("td h1 a") for i, element in enumerate(elements): eink.send_todo(i, element.text) try: eink.send_update("Loading Calendar") print("loading calendar") driver.find_element_by_id("lnkNavCal").click() myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.CLASS_NAME, 'cntnttp'))) print("calendar loaded, dumping entries") except TimeoutException: print(driver.page_source) print("Loading calendar took too much time!") driver.close(); exit(1) elements = driver.find_elements_by_css_selector("td.v a") for i, element in enumerate(elements): eink.send_meeting(i,element.get_attribute('title'))
def get_amzn_driver(email, password, headless=False, session_path=None): zip_type = "" executable_path = os.path.join(os.getcwd(), 'chromedriver') if _platform in ['win32', 'win64']: executable_path += '.exe' zip_type = CHROME_ZIP_TYPES.get(_platform) if not os.path.exists(executable_path): zip_file_url = CHROME_DRIVER_BASE_URL.format(CHROME_DRIVER_VERSION, zip_type) request = requests.get(zip_file_url) if request.status_code != 200: raise RuntimeError( 'Error finding chromedriver at {}, status = {}'.format( zip_file_url, request.status_code)) zip_file = zipfile.ZipFile(io.BytesIO(request.content)) zip_file.extractall() os.chmod(executable_path, 0o755) chrome_options = ChromeOptions() if headless: chrome_options.add_argument('headless') chrome_options.add_argument('no-sandbox') chrome_options.add_argument('disable-dev-shm-usage') chrome_options.add_argument('disable-gpu') # chrome_options.add_argument("--window-size=1920x1080") if session_path is not None: chrome_options.add_argument("user-data-dir=" + session_path) logger.info('Logging into Amazon.com') driver = Chrome(chrome_options=chrome_options, executable_path=executable_path) driver.get(ORDER_HISTORY_URL_VIA_SWITCH_ACCOUNT_LOGIN) driver.implicitly_wait(2) def get_element_by_id(driver, id): try: return driver.find_element_by_id(id) except NoSuchElementException: pass return None def get_element_by_xpath(driver, xpath): try: return driver.find_element_by_xpath(xpath) except NoSuchElementException: pass return None # Go straight to the account switcher, and look for the given email. # If present, click on it! Otherwise, click on "Add account". desired_account_element = get_element_by_xpath( driver, "//div[contains(text(), '{}')]".format(email)) if desired_account_element: desired_account_element.click() driver.implicitly_wait(2) # It's possible this account has already authed recently. If so, the # next block will be skipped and the login is complete! if not get_element_by_id(driver, 'report-confirm'): driver.find_element_by_id('ap_password').send_keys( get_password(password)) driver.find_element_by_name('rememberMe').click() driver.find_element_by_id('signInSubmit').submit() else: # Cannot find the desired account in the switch. Log in via Add Account driver.find_element_by_xpath('//div[text()="Add account"]').click() driver.implicitly_wait(2) driver.find_element_by_id('ap_email').send_keys(email) driver.find_element_by_id('ap_password').send_keys( get_password(password)) driver.find_element_by_name('rememberMe').click() driver.find_element_by_id('signInSubmit').submit() driver.implicitly_wait(2) if not get_element_by_id(driver, 'report-confirm'): logger.warning('Having trouble logging into Amazon. Please see the ' 'browser and complete login within the next 5 minutes. ' 'This script will continue automatically on success. ' 'You may need to manually navigate to: {}'.format( ORDER_HISTORY_REPORT_URL)) if get_element_by_id(driver, 'auth-mfa-otpcode'): logger.warning('Hint: Looks like an auth challenge! Maybe check ' 'your email') try: wait_cond = EC.presence_of_element_located((By.ID, 'report-confirm')) WebDriverWait(driver, 60 * 5).until(wait_cond) except TimeoutException: logger.critical('Cannot complete login!') exit(1) return driver
class Task: #takes arguments and creates class attributes for easier and prettier access def __init__(self, name, incart, checkout, settings): self.cart = [] #array of item objects to cart and checkout self.settings = settings #dictionairy of user settings for item in incart: self.cart.append( Item(item, self.settings )) #convert array of arrays to array of item objects self.checkout = checkout #dictionairy of checkout values (such as name, address, ect) self.name = name #name of task to be used during printing to console #creates a selenium webdriver def create_webdriver(self): #https://stackoverflow.com/questions/29916054/change-user-agent-for-selenium-driver opts = Options() self.useragent = random_user_agent() #calls for a random useragent opts.add_argument("user-agent=" + self.useragent) #adds random user agent to options self.webdriver = Chrome( executable_path=get_local_directory() + '/resources/chromedriver', chrome_options=opts) #creates new webdriver with premade options self.webdriver.set_page_load_timeout(int( self.settings["timeout"])) #set timeout of pageload from config #attempts to cart every item in cart. Returns true if any of the items cart def cart_items(self): self.create_webdriver() return_bool = False for item in self.cart: if (item.in_cart == False): item.cart(self.webdriver) if (item.in_cart == True): return_bool = True return return_bool #loads checkout page, fills form, clicks checkout, waits for user to complete captcha def manual_checkout(self): self.webdriver.get( 'https://www.supremenewyork.com/checkout') #load the checkout page self.fill_form() self.webdriver.find_element_by_name("commit").click() if (self.wait_for_manual_captcha()): return self.confirmation() #fills all elements of form and check required check boxes def fill_form(self): #TODO: IF SITEKEY CHANGED, CLEAR OLD TOKENS (not high priority) update_sitekey( self.webdriver.find_element_by_xpath( '//*[@id="cart-cc"]/fieldset/div[3]').get_attribute( "data-sitekey") ) #updates the captcha sitekey saved on file (in case it changed). self.webdriver.find_element_by_name("order[billing_name]").send_keys( self.checkout["name"]) self.webdriver.find_element_by_name("order[email]").send_keys( self.checkout["email"]) self.webdriver.find_element_by_name("order[tel]").send_keys( self.checkout["phone"]) self.webdriver.find_element_by_name( "order[billing_address]").send_keys(self.checkout["address"]) self.webdriver.find_element_by_name("order[billing_zip]").send_keys( self.checkout["zipcode"]) self.webdriver.find_element_by_name("order[billing_city]").send_keys( self.checkout["city"]) self.webdriver.find_element_by_name("order[billing_state]").send_keys( self.checkout["state"]) self.webdriver.find_element_by_name( "order[billing_country]").send_keys(self.checkout["country"]) checkout_field = self.webdriver.find_element_by_name( "credit_card[nlb]") for character in list( self.checkout["card_number"] ): #weird error occurs when you send all at once,was shuffling characters time.sleep(.01) checkout_field.send_keys(character) self.webdriver.find_element_by_name("credit_card[month]").send_keys( self.checkout["card_month"]) self.webdriver.find_element_by_name("credit_card[year]").send_keys( self.checkout["card_year"]) self.webdriver.find_element_by_name("credit_card[rvv]").send_keys( self.checkout["cvv"]) self.webdriver.find_element_by_xpath( "//*[@id='cart-cc']/fieldset/p[2]/label/div/ins").click() #loads checkout page, fills form(supreme supplies necessary cookies during form filling, so even though I never submit I need to fill the form) def ajax_checkout(self): self.webdriver.get('https://www.supremenewyork.com/checkout') if ("y" in self.settings["fill_form"]): try: self.fill_form() except NoSuchElementException: pass csrf_token = self.webdriver.find_element_by_name( 'csrf-token').get_attribute("content") #headers required to make AJAX request, found using chrome devtools headers = { 'Accept': '*/*', 'X-CSRF-Token': csrf_token, 'X-Requested-With': 'XMLHttpRequest', 'Referer': 'https://www.supremenewyork.com/checkout', 'Accept-Language': 'en-US,en;q=0.8', 'User-Agent': self.useragent, 'Connection': 'keep-alive', #'Host':'wwww.supremenewyork.com', 'Origin': 'https://www.supremenewyork.com', 'Accept-Encoding': 'gzip, deflate, br', 'Content-Length': '1006', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } #payload required to make AJAX request, found using chrome devtools payload = { 'utf8': '✓', 'authenticity_token': csrf_token, 'order[billing_name]': self.checkout['name'], 'order[email]': self.checkout['email'], 'order[tel]': self.checkout['phone'], 'order[billing_address]': self.checkout['address'], 'order[billing_address_2]': '', 'order[billing_zip]': self.checkout['zipcode'], 'order[billing_city]': self.checkout['city'], 'order[billing_state]': self.checkout['state'], 'order[billing_country]': self.checkout['country'], 'same_as_billing_address': '1', 'asec': 'Rmasn', 'store_credit_id': '', 'credit_card[nlb]': self.checkout["card_number"], 'credit_card[month]': self.checkout["card_month"], 'credit_card[year]': self.checkout["card_year"], 'credit_card[rvv]': self.checkout["cvv"], 'order[terms]': '0', 'order[terms]': '1', 'credit_card[vval]': self.checkout["cvv"], 'g-recaptcha-response': get_captcha() } #make request, print response response = self.webdriver.request( 'POST', 'https://www.supremenewyork.com/checkout.json', data=payload, headers=headers) print "[" + self.name + "] RESPONSE: " + response.text #looks to see if captchas solved yet def wait_for_manual_captcha(self): while (True): #trys to click checkout, if captcha is visible it will throw WebDriverException try: self.webdriver.find_element_by_name("commit").click() return True except WebDriverException: print "[" + self.name + "] Fill captcha manually..." time.sleep(3) #finds result of checkout on confirmation page def confirmation(self): while True: try: if ('selected' in self.webdriver.find_element_by_xpath( '//*[@id="tabs"]/div[3]').get_attribute('class') ): #if the confirmation tab is selected print "[" + self.name + "] Response: " + self.webdriver.find_element_by_id( 'content' ).text.split("CONFIRMATION")[ 1] #print the desired information from checkout page return True except (NoSuchElementException, StaleElementReferenceException ): #thrown if tab is not selected print "[" + self.name + "] Waiting for confirmation..." time.sleep(1)
class ObjSup: def __init__(self): print self.start = time.time() with open('settings.json', 'r') as settingsFile: self.settings = json.loads(settingsFile.read()) self.headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'en-US,en;q=0.8', 'Cache-Control': 'max-age=0', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36' } self.session = Chrome( '/Users/alex/PycharmProjects/FirstSelenium/chromedriver') self.session.headers = self.headers self.found_link = '' self.style_id = '' self.prod_id = '' self.form_key = '' def cook(self): self.log('Starting up') self.log('Attempting to scrape category {}'.format( self.settings['category'])) if self.scrape_product(): self.log('Attempting ATC') if self.add_to_cart(): self.log('Attempting checkout') if self.checkout(): self.log('Checked out') else: self.log('Failed to check out') else: self.log('Failed to ATC') else: self.log('Failed to scrape category') def log(self, text): current = datetime.now() print('== {} == {}'.format(colored(str(current), 'blue'), str(text))) def scrape_product(self): cat_url = 'http://supremenewyork.com/shop/all/{}'.format( self.settings['category']) url_arr = [] name_arr = [] style_arr = [] self.log('Opening category page') r = self.session.request('GET', cat_url, verify=False) if r.status_code != 200: self.log('Encountered bad status code {}'.format(r.status_code)) return False soup = BeautifulSoup(r.text, 'html.parser') for link in soup.select('a.name-link'): url = link.get('href') if url_arr.count(url) == 0: name_arr.append(link.get_text()) url_arr.append(url) else: style_arr.append(link.get_text()) self.log('Found {} products ( {} names / {} styles)'.format( str(len(url_arr)), str(len(name_arr)), str(len(style_arr)))) self.log('Checking against keyword "{}" & style "{}"'.format( self.settings['name_key'], self.settings['style_key'])) for i in range(0, len(url_arr)): if self.settings['name_key'] in name_arr[i].lower(): if self.settings['style_key'] in style_arr[i].lower(): self.found_link = url_arr[i] self.log('Found matching link {}'.format(self.found_link)) return True if not self.found_link: self.log('Did not find a matching link') return False def add_to_cart(self): url = 'http://supremenewyork.com/{}.json'.format(self.found_link) self.log("Opening product JSON") r = self.session.request('GET', url, verify=False) if r.status_code != 200: self.log('Encountered bad status {} opening product JSON'.format( r.status_code)) return False j = r.json() self.log('Getting IDs') for e in j['styles']: if e['name'].lower() == self.settings['style_key']: self.style_id = e['id'] self.log('Found style ID {}'.format(self.style_id)) for s in e['sizes']: if s['name'].lower() == self.settings['size_key']: if s['stock_level'] == 1: self.prod_id = s['id'] self.log('Found product ID {}'.format( self.prod_id)) if s['stock_level'] == 0: self.log('Out of stock') return False # The form key entry point seems to be not working at the moment # self.log('Looking for form key') # url = 'http://supremenewyork.com/{}'.format(self.found_link) # r = self.session.request("GET", url, verify=False) # if r.status_code != 200: # self.log('Bad status code {} when looking for form key'.format(r.status_code)) # return False # soup = BeautifulSoup(r.text, 'html.parser') # sources = soup.findAll('form', {"action": True}) # for s in sources: # self.form_key = s['action'] # self.log('Found form key {}'.format(self.form_key)) # time.sleep(2) # url = 'http://www.supremenewyork.com/{}'.format(self.form_key) # p = "commit=add%20to%20cart&style={}&size={}&utf8=%E2%9C%93".format(self.style_id, self.prod_id) # r = self.session.request('POST', url, data=p, verify=False) # if r.status_code != 200: # self.log('Bad status code {} when firing form POST'.format(r.status_code)) # return False self.session.get('http://www.supremenewyork.com/{}'.format( self.found_link)) s = Select(self.session.find_element_by_xpath("//select[@id='size']")) s.select_by_value(str(self.prod_id)) button = self.session.find_element_by_name("commit") button.click() time.sleep(1) self.log('Checking cart contents') r = self.session.request( 'GET', 'http://www.supremenewyork.com/shop/cart.json', verify=False) if str(self.prod_id) not in r.text: self.log('Product ID not in cart contents') return False return True def checkout(self): # This is a collection of the form elements. The selectors might change # so I try and adapt by using tab keys. self.session.get('https://www.supremenewyork.com/checkout') self.log('Finding form elements') name = self.session.find_element_by_id('order_billing_name') # email = self.session.find_element_by_id('order_email') tel = self.session.find_element_by_id('order_tel') # add = self.session.find_element_by_name('order[billing_address]') # ad2 = self.session.find_element_by_name('order[billing_address_2]') # zip = self.session.find_element_by_id('order_billing_zip') # city = self.session.find_element_by_id('order_billing_city') # state = Select(self.session.find_element_by_id('order_billing_state')) # country = Select(self.session.find_element_by_id('order_billing_country')) ctype = Select(self.session.find_element_by_id('credit_card_type')) # cc = self.session.find_element_by_name('order[cnb]') xm = Select(self.session.find_element_by_id('credit_card_month')) xy = Select(self.session.find_element_by_id('credit_card_year')) # cvv = self.session.find_element_by_xpath("//input[@size='4']") submit = self.session.find_element_by_xpath("//input[@type='submit']") self.log('Entering checkout details') name.send_keys(self.settings['f_name'] + ' ' + self.settings['l_name'], Keys.TAB, self.settings['email'], Keys.TAB) tel.send_keys(self.settings['phone'], Keys.TAB, self.settings['address'], Keys.TAB, self.settings['address2'], Keys.TAB, self.settings['zip'], Keys.TAB) ctype.select_by_visible_text(self.settings['type']) cardchain = ActionChains(self.session) cardchain.send_keys(Keys.TAB, self.settings['cc']) cardchain.perform() xm.select_by_value(self.settings['month']) xy.select_by_value(self.settings['year']) cvvchain = ActionChains(self.session) cvvchain.send_keys(Keys.TAB, self.settings['cvv'], Keys.TAB, Keys.SPACE) cvvchain.perform() # This is the delay you want to set to avoid ghost checkout time.sleep(self.settings['delay']) submit.click() return True
driver.find_elements_by_xpath(".//a[contains(@href, " + "'" + color[30:] + "'" + ")]")) path = "" for item_path in item_paths: if usr_color in item_path[2].get_attribute("text"): path = item_path[2].get_attribute("href") if (path == ""): path = item_paths[0][2].get_attribute("href") print("https://www.supremenewyork.com" + path) driver.get(path) size_codes = [] try: size_codes.append("S " + driver.find_element_by_xpath( "//option[text() = 'Small']").get_attribute("value")) except: e = sys.exc_info()[0] try: size_codes.append("M " + driver.find_element_by_xpath( "//option[text() = 'Medium']").get_attribute("value")) except: e = sys.exc_info()[0] try: size_codes.append("L " + driver.find_element_by_xpath( "//option[text() = 'Large']").get_attribute("value")) except: e = sys.exc_info()[0] try: size_codes.append("X " + driver.find_element_by_xpath( "//option[text() = 'XLarge']").get_attribute("value"))
def location_page(driver): print driver # 登录 driver.get('https://www.alimama.com/member/login.htm') while driver.current_url != 'https://www.alimama.com/index.htm': time.sleep(1) # 跳转到超级搜索 driver.get('http://pub.alimama.com/promo/search/index.htm') time.sleep(1) # 搜索某一商品 txt_search = driver.find_element_by_xpath('//*[@id="q"]') bt_search = driver.find_element_by_xpath( '//*[@id="magix_vf_header"]/div/div/div[2]/div[2]/button') txt_search.send_keys('https://detail.tmall.com/item.htm?id=538096499721') bt_search.click() time.sleep(3) # 跳转到商品的店铺 shop = driver.find_element_by_xpath( '//*[@id="J_search_results"]/div/div/div[3]/div[1]') shop.click() time.sleep(5) # 获取userNumberId print driver.current_url print driver.current_window_handle
class DigitalCommonsConnection: def __init__(self, user, password): self.options = Options() self.driver = Chrome( executable_path=os.path.abspath("/usr/bin/chromedriver"), options=self.options) self.login(user, password) self.dissertations = self.get_list_of_dissertations() self.lookup_values = self.__review_dissertations() self.__lookup_decisions() def login(self, username, passwd): self.driver.get( 'https://trace.tennessee.edu/cgi/myaccount.cgi?context=') self.driver.find_element_by_id('auth_email').send_keys(username) self.driver.find_element_by_id('auth_password').send_keys(passwd) self.driver.find_element_by_xpath( '/html/body/div[2]/div/div[3]/div[1]/div[1]/div/div[2]/div[1]/div/form/div/p/button' ).click() def get_list_of_dissertations(self): self.driver.get( 'https://trace.tennessee.edu/utk_graddiss/index.11.html#year_2015') disserations = self.driver.find_elements_by_css_selector( '.article-listing > a') return [ disserations[link].get_attribute('href') for link in range(0, len(disserations)) ] def __review_dissertations(self): lookups = [] for dissertation in self.dissertations: self.driver.get(dissertation) link = self.driver.find_element_by_css_selector('#title > p > a') lookups.append( link.get_attribute('href').split('=')[1].split('&')[0]) return lookups def __lookup_decisions(self): for dissertation in self.lookup_values: self.driver.get( f'https://trace.tennessee.edu/cgi/editor.cgi?article={dissertation}' f'&window=viewdecisions&context=utk_graddiss') decisions = self.driver.find_elements_by_css_selector( '.MenuMain > tbody > tr > td > table > tbody > tr > td > a') all_decisions = [ decisions[link].get_attribute('href') for link in range(0, len(decisions)) if link != 'https://trace.tennessee.edu/cgi/help.cgi?context=utk_graddiss&help=help-submissions.html#' ] for decision in all_decisions: self.driver.get(decision) try: final_decision_metadata = self.driver.find_element_by_css_selector( '.MenuMain > tbody > tr > td > span') decision_metadata = final_decision_metadata.text print(decision_metadata.split('\n')) final_decision = self.driver.find_element_by_css_selector( '.MenuMain > tbody > tr > td > pre') except NoSuchElementException: pass return