def main(): model_name = MODEL_FILENAME if len(sys.argv) == 2: model_name = sys.argv[1] print('Use model: ' + model_name) model = load_model(model_name) account_info = get_account_from_ini(INI_FILENAME) print('ID: "' + account_info['user_account'] + '" start login test ...') for i in xrange(sys.maxint): driver = seleniumrequests.Chrome(DRIVER_PATH) driver.get(LOGIN_URL) img = get_vcode(driver) vcode = convert_vcode(img) print('Start predict (' + str(i) + ')!') vcode_str = predict(vcode, model_name, model) print('Get vcode: "' + vcode_str + '"') if_login = login(driver, account_info, vcode_str) print('Result: ' + str(if_login)) if not if_login and not SHOW_IMG: ImageOps.invert(img)\ .filter(ImageFilter.ModeFilter)\ .convert('L')\ .show() if not if_login or KEEP_TEST: driver.close() else: break raw_input('Click ENTER to close!!')
def first_load(): global data, user, table options = webdriver.ChromeOptions() options.binary_location = os.environ["GOOGLE_CHROME_BIN"] options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") options.add_argument("--headless") browser = seleniumrequests.Chrome(executable_path=os.environ["CHROMEDRIVER_PATH"], chrome_options=options) browser.implicitly_wait(60) logger.info("Iniciando crawler ita (first load)") crawler1 = Crawler(browser, user) crawler1.run(1) logger.info("Iniciando parse ita (first load)") export_to_csv("report_ita", crawler1.table) table = crawler1.table_raw logger.info("Iniciando crawler srv (first load)") crawler2 = Crawler(browser, user) crawler2.run(2) logger.info("Iniciando parse srv (first load)") export_to_csv("report_srv", crawler2.table) table += crawler2.table_raw browser.quit() data, updates = update_from_csv("report_ita", data) data, updates = update_from_csv("report_srv", data) last_update = datetime.now() - timedelta(hours=3)
def update_billing(bot, job): global data, user, table options = webdriver.ChromeOptions() options.binary_location = os.environ["GOOGLE_CHROME_BIN"] options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") options.add_argument("--headless") browser = seleniumrequests.Chrome(executable_path=os.environ["CHROMEDRIVER_PATH"], chrome_options=options) browser.implicitly_wait(60) logger.info("Iniciando crawler ita") crawler1 = Crawler(browser, user) crawler1.run(1) logger.info("Iniciando parse ita") export_to_csv("report_ita", crawler1.table) table = crawler1.table_raw logger.info("Iniciando crawler srv") crawler2 = Crawler(browser, user) crawler2.run(2) logger.info("Iniciando parse srv") export_to_csv("report_srv", crawler2.table) table += crawler2.table_raw browser.quit() data, updates = update_from_csv("report_ita", data) data, updates = update_from_csv("report_srv", data) last_update = datetime.now() - timedelta(hours=3) logger.info(job.context.from_user.first_name + " - Checkando e notificando atualizações") if len(updates): for update in updates: for chat_id in id_list: if update.auth(chat_id): logger.info(ID_TO_NAME[chat_id] + " - Pedido Nº" + update.data["Pedido"]) bot.send_message(chat_id=chat_id, text=str(update))
def PDBReader(file, timeout=600): """ Accesses http://charmm-gui.org and uses the PDB Reader. Parameters ---------- file : str Path to the input PDB file. timeout : int Timeout in seconds. Returns ------- filename_output : str The absolute path to the output TGZ archive. """ def autoClicker(id, timeout): # deals with some rare cases of an unclickable element for i in range(timeout): try: elem = driver.find_element_by_id(id) elem.click() return except _exceptions.WebDriverException: _time.sleep(1) elem = driver.find_element_by_id(id) elem.click() file = _os.path.abspath(file) options = _options.Options() options.headless = True try: driver = _seleniumrequests.Chrome(options=options) except _exceptions.WebDriverException: try: driver = _seleniumrequests.Firefox(options=options) except _exceptions.WebDriverException: raise SystemError("Need either Chrome or Firefox for CHARMM-GUI " "functionality.") _logging.info("Accessing http://www.charmm-gui.org ...") driver.get("http://www.charmm-gui.org/?doc=input/pdbreader") pdb_element = driver.find_element_by_name("file") pdb_element.send_keys(file) pdb_radio = driver.find_element_by_xpath("//input[@name='pdb_format' and " "@value='PDB']") pdb_radio.click() autoClicker("nextBtn", 60) # could add some support for options. For now, we just go with the # defaults. wait = _wait.WebDriverWait(driver, timeout) wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn"))) autoClicker("nextBtn", 60) wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn"))) autoClicker("nextBtn", 60) try: _logging.info("Retrieving files...") wait.until( _EC.visibility_of_any_elements_located( (_by.By.CLASS_NAME, "download"))) except TimeoutError: raise ConnectionError("Could not retrieve any files. Please increase " "the maximum timeout or try again later.") _logging.info("Downloading TGZ archive...") filebase = _os.path.splitext(file)[0] tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0] response = driver.request('POST', tgz_file.get_attribute("href"), verify=False, stream=True) with open(filebase + "_CHARMM.tgz", "wb") as file: file.write(response.raw.read()) driver.quit() return filebase + "_CHARMM.tgz"
def ligandReader(file, timeout=60, find_similar_residues=False): """ Accesses http://charmm-gui.org and uses the Ligand Reader. Parameters ---------- file : str Path to the input ligand file. timeout : int Timeout in seconds. find_similar_residues : bool Whether to tick the "Find similar residues" checkbox before searching. Returns ------- filename_output : str The absolute path to the output TGZ archive. """ file = _os.path.abspath(file) options = _options.Options() options.headless = True try: driver = _seleniumrequests.Chrome(options=options) except _exceptions.WebDriverException: try: driver = _seleniumrequests.Firefox(options=options) except _exceptions.WebDriverException: raise SystemError("Need either Chrome or Firefox for CHARMM-GUI " "functionality.") _logging.info("Accessing http://www.charmm-gui.org ...") driver.get("http://www.charmm-gui.org/?doc=input/ligandrm") pdb_element = driver.find_element_by_name("file2") pdb_element.send_keys(file) upload_button = driver.find_element_by_xpath( "//input[@type='button' and @value='Upload MOL/MOL2/SDF']") upload_button.click() driver.switch_to.alert.accept() _time.sleep(5) if find_similar_residues: checkbox = driver.find_element_by_name("simi") checkbox.click() next_button = driver.find_element_by_id("nextBtn") next_button.click() # could add some support for options. For now, we just go with the # defaults. next_button = driver.find_element_by_id("nextBtn") next_button.click() try: _logging.info("Retrieving files...") wait = _wait.WebDriverWait(driver, timeout) wait.until( _EC.visibility_of_any_elements_located( (_by.By.CLASS_NAME, "download"))) except TimeoutError: raise ConnectionError("Could not retrieve any files. Please increase " "the maximum timeout or try again later.") _logging.info("Downloading TGZ archive...") filebase = _os.path.splitext(file)[0] tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0] response = driver.request('POST', tgz_file.get_attribute("href"), verify=False, stream=True) with open(filebase + "_CHARMM.tgz", "wb") as file: file.write(response.raw.read()) driver.quit() return filebase + "_CHARMM.tgz"
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import seleniumrequests from selenium.webdriver.support.ui import Select from datetime import datetime from time import strftime from scripts.scraping.scrape_politican import get_filer_info from scripts.utils import PrintException from time import time, sleep driver = seleniumrequests.Chrome() try: driver.get("https://www.ethics.state.tx.us/Jasper/AdvancedSearch.html") transaction_type = Select(driver.find_element_by_name("transaction")) transaction_type.select_by_visible_text("Contributions") search_type = Select(driver.find_element_by_name("searchtype")) search_type.select_by_visible_text("By Filer ID") datetype = Select(driver.find_element_by_name("datetype")) datetype.select_by_visible_text("By Specific Date Range") begin_date = driver.find_element_by_name("begin_date") begin_date.send_keys("2000-01-01") end_date = driver.find_element_by_name("end_date")
def main(method, url, webdriver, input_file): """ The tool is intended to extract all links (complete and relative ones) from HTML tag attributes """ try: # checking for the supported methods if not method.upper() in ['GET', 'POST', 'OPTIONS', 'PUT', 'DELETE']: raise ValueError("Method %s is not supported." % method) requests.packages.urllib3.disable_warnings( ) # suppressing unsafe HTTPS warnings if webdriver: # if the -w switch is present - switch to webdriver instead of requests module print('[+] Starting up a webdriver') driver = seleniumrequests.Chrome('chromedriver.exe') print('[+] Retrieving ' + url) response = driver.request(method, url, verify=False) # get the response else: print('[+] Retrieving ' + url) response = requests.request(method, url, verify=False) # get the response url = response.url.rstrip( '/') # if there was a redirect - acquire the URL from the response # neat print headers print("[+] Received the response HTTP %d" % response.status_code) for header in response.headers: print(header + ':', response.headers[header]) # assigning HTML contents raw_html = response.content parsed_html = BeautifulSoup(raw_html, "html.parser") # gathering a list of links from specific elements script_elements = [ element['src'] for element in parsed_html.select('script[src]') ] anchor_elements = [ element['href'] for element in parsed_html.select('a[href]') ] link_elements = [ element['href'] for element in parsed_html.select('link[href]') ] form_elements = [ element['action'] for element in parsed_html.select('form[action]') ] iframe_elements = [ element['src'] for element in parsed_html.select('iframe[src]') ] links = script_elements + anchor_elements + link_elements + form_elements + iframe_elements # removing bookmarks, non-interesting schemes and '/' print('\n[+] Tidying up the links') links = [ link for link in links if not urlparse(link).scheme in ['mailto', 'skype', 'tel'] ] links = [urljoin(url, link) for link in links] # gathering links together # final links count and listing unique_links = set(links) if input_file is not None: file = open(input_file, "w") file.write('Links on ' + url + ':') print('[+] Writing links into the file') for link in unique_links: file.write('\n' + link) file.close() else: for link in unique_links: print(link) print("[+] Total %d unique links extracted (%d duplicates removed)" % (len(unique_links), len(links) - len(unique_links))) except Exception as e: print("[-] Something went wrong: %s" % e) except KeyboardInterrupt: print("[x] Exiting by user command")
img.save('captchas/{}.jpg'.format(vericode)) btn_logout[0].submit() def predict_captcha(model, browser, url): response = browser.request('GET', url) img = PIL.Image.open(BytesIO(response.content)) img_bin = capgen.binarization(img) X1 = np.zeros((1, 25, 96, 3), dtype=np.uint8) X1[0] = img_bin y1 = model.predict(X1) vericode = network.decode(y1) return img, vericode model = network.create_model() model.load_weights('my_model_weights_gen.h5') user_id = input("Enter username:"******"Enter password:"******"//img[@id='imgVeriCode']") #url = img.get_attribute('src') #vericode = predict_captcha(model, browser, url) browser.quit()