コード例 #1
0
ファイル: final.py プロジェクト: happyincent/ML_final
def main():
    model_name = MODEL_FILENAME
    if len(sys.argv) == 2:
        model_name = sys.argv[1]
    print('Use model: ' + model_name)
    model = load_model(model_name)

    account_info = get_account_from_ini(INI_FILENAME)
    print('ID: "' + account_info['user_account'] + '" start login test ...')

    for i in xrange(sys.maxint):
        driver = seleniumrequests.Chrome(DRIVER_PATH)
        driver.get(LOGIN_URL)

        img = get_vcode(driver)
        vcode = convert_vcode(img)

        print('Start predict (' + str(i) + ')!')
        vcode_str = predict(vcode, model_name, model)
        print('Get vcode: "' + vcode_str + '"')
        if_login = login(driver, account_info, vcode_str)
        print('Result: ' + str(if_login))

        if not if_login and not SHOW_IMG:
            ImageOps.invert(img)\
            .filter(ImageFilter.ModeFilter)\
            .convert('L')\
            .show()

        if not if_login or KEEP_TEST:
            driver.close()
        else:
            break

    raw_input('Click ENTER to close!!')
コード例 #2
0
def first_load():
    global data, user, table
    options = webdriver.ChromeOptions()
    options.binary_location = os.environ["GOOGLE_CHROME_BIN"]
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--headless")
    browser = seleniumrequests.Chrome(executable_path=os.environ["CHROMEDRIVER_PATH"], chrome_options=options)
    browser.implicitly_wait(60)

    logger.info("Iniciando crawler ita (first load)")
    crawler1 = Crawler(browser, user)
    crawler1.run(1)
    logger.info("Iniciando parse ita (first load)")
    export_to_csv("report_ita", crawler1.table)
    table = crawler1.table_raw

    logger.info("Iniciando crawler srv (first load)")
    crawler2 = Crawler(browser, user)
    crawler2.run(2)
    logger.info("Iniciando parse srv (first load)")
    export_to_csv("report_srv", crawler2.table)
    table += crawler2.table_raw
    
    browser.quit()
    data, updates = update_from_csv("report_ita", data)
    data, updates = update_from_csv("report_srv", data)
    last_update = datetime.now() - timedelta(hours=3)
コード例 #3
0
def update_billing(bot, job):
    global data, user, table
    options = webdriver.ChromeOptions()
    options.binary_location = os.environ["GOOGLE_CHROME_BIN"]
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--headless")
    browser = seleniumrequests.Chrome(executable_path=os.environ["CHROMEDRIVER_PATH"], chrome_options=options)
    browser.implicitly_wait(60)

    logger.info("Iniciando crawler ita")
    crawler1 = Crawler(browser, user)
    crawler1.run(1)
    logger.info("Iniciando parse ita")
    export_to_csv("report_ita", crawler1.table)
    table = crawler1.table_raw

    logger.info("Iniciando crawler srv")
    crawler2 = Crawler(browser, user)
    crawler2.run(2)
    logger.info("Iniciando parse srv")
    export_to_csv("report_srv", crawler2.table)
    table += crawler2.table_raw
    
    browser.quit()
    data, updates = update_from_csv("report_ita", data)
    data, updates = update_from_csv("report_srv", data)
    last_update = datetime.now() - timedelta(hours=3)

    logger.info(job.context.from_user.first_name + " - Checkando e notificando atualizações")
    if len(updates):
        for update in updates:
                for chat_id in id_list:
                        if update.auth(chat_id):
                                logger.info(ID_TO_NAME[chat_id] + " - Pedido Nº" + update.data["Pedido"])
                                bot.send_message(chat_id=chat_id, text=str(update))
コード例 #4
0
def PDBReader(file, timeout=600):
    """
    Accesses http://charmm-gui.org and uses the PDB Reader.

    Parameters
    ----------
    file : str
        Path to the input PDB file.
    timeout : int
        Timeout in seconds.

    Returns
    -------
    filename_output : str
        The absolute path to the output TGZ archive.
    """
    def autoClicker(id, timeout):
        # deals with some rare cases of an unclickable element
        for i in range(timeout):
            try:
                elem = driver.find_element_by_id(id)
                elem.click()
                return
            except _exceptions.WebDriverException:
                _time.sleep(1)
        elem = driver.find_element_by_id(id)
        elem.click()

    file = _os.path.abspath(file)
    options = _options.Options()
    options.headless = True

    try:
        driver = _seleniumrequests.Chrome(options=options)
    except _exceptions.WebDriverException:
        try:
            driver = _seleniumrequests.Firefox(options=options)
        except _exceptions.WebDriverException:
            raise SystemError("Need either Chrome or Firefox for CHARMM-GUI "
                              "functionality.")

    _logging.info("Accessing http://www.charmm-gui.org ...")
    driver.get("http://www.charmm-gui.org/?doc=input/pdbreader")

    pdb_element = driver.find_element_by_name("file")
    pdb_element.send_keys(file)

    pdb_radio = driver.find_element_by_xpath("//input[@name='pdb_format' and "
                                             "@value='PDB']")
    pdb_radio.click()

    autoClicker("nextBtn", 60)

    # could add some support for options. For now, we just go with the
    # defaults.
    wait = _wait.WebDriverWait(driver, timeout)
    wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn")))
    autoClicker("nextBtn", 60)

    wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn")))
    autoClicker("nextBtn", 60)

    try:
        _logging.info("Retrieving files...")
        wait.until(
            _EC.visibility_of_any_elements_located(
                (_by.By.CLASS_NAME, "download")))
    except TimeoutError:
        raise ConnectionError("Could not retrieve any files. Please increase "
                              "the maximum timeout or try again later.")

    _logging.info("Downloading TGZ archive...")
    filebase = _os.path.splitext(file)[0]
    tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0]
    response = driver.request('POST',
                              tgz_file.get_attribute("href"),
                              verify=False,
                              stream=True)
    with open(filebase + "_CHARMM.tgz", "wb") as file:
        file.write(response.raw.read())

    driver.quit()
    return filebase + "_CHARMM.tgz"
コード例 #5
0
def ligandReader(file, timeout=60, find_similar_residues=False):
    """
    Accesses http://charmm-gui.org and uses the Ligand Reader.

    Parameters
    ----------
    file : str
        Path to the input ligand file.
    timeout : int
        Timeout in seconds.
    find_similar_residues : bool
        Whether to tick the "Find similar residues" checkbox before searching.

    Returns
    -------
    filename_output : str
        The absolute path to the output TGZ archive.
    """
    file = _os.path.abspath(file)
    options = _options.Options()
    options.headless = True

    try:
        driver = _seleniumrequests.Chrome(options=options)
    except _exceptions.WebDriverException:
        try:
            driver = _seleniumrequests.Firefox(options=options)
        except _exceptions.WebDriverException:
            raise SystemError("Need either Chrome or Firefox for CHARMM-GUI "
                              "functionality.")

    _logging.info("Accessing http://www.charmm-gui.org ...")
    driver.get("http://www.charmm-gui.org/?doc=input/ligandrm")

    pdb_element = driver.find_element_by_name("file2")
    pdb_element.send_keys(file)

    upload_button = driver.find_element_by_xpath(
        "//input[@type='button' and @value='Upload MOL/MOL2/SDF']")
    upload_button.click()

    driver.switch_to.alert.accept()

    _time.sleep(5)

    if find_similar_residues:
        checkbox = driver.find_element_by_name("simi")
        checkbox.click()

    next_button = driver.find_element_by_id("nextBtn")
    next_button.click()

    # could add some support for options. For now, we just go with the
    # defaults.
    next_button = driver.find_element_by_id("nextBtn")
    next_button.click()

    try:
        _logging.info("Retrieving files...")
        wait = _wait.WebDriverWait(driver, timeout)
        wait.until(
            _EC.visibility_of_any_elements_located(
                (_by.By.CLASS_NAME, "download")))
    except TimeoutError:
        raise ConnectionError("Could not retrieve any files. Please increase "
                              "the maximum timeout or try again later.")

    _logging.info("Downloading TGZ archive...")
    filebase = _os.path.splitext(file)[0]
    tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0]
    response = driver.request('POST',
                              tgz_file.get_attribute("href"),
                              verify=False,
                              stream=True)
    with open(filebase + "_CHARMM.tgz", "wb") as file:
        file.write(response.raw.read())

    driver.quit()
    return filebase + "_CHARMM.tgz"
コード例 #6
0
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import seleniumrequests
from selenium.webdriver.support.ui import Select
from datetime import datetime
from time import strftime
from scripts.scraping.scrape_politican import get_filer_info
from scripts.utils import PrintException

from time import time, sleep

driver = seleniumrequests.Chrome()

try:
    driver.get("https://www.ethics.state.tx.us/Jasper/AdvancedSearch.html")

    transaction_type = Select(driver.find_element_by_name("transaction"))
    transaction_type.select_by_visible_text("Contributions")

    search_type = Select(driver.find_element_by_name("searchtype"))
    search_type.select_by_visible_text("By Filer ID")

    datetype = Select(driver.find_element_by_name("datetype"))
    datetype.select_by_visible_text("By Specific Date Range")

    begin_date = driver.find_element_by_name("begin_date")
    begin_date.send_keys("2000-01-01")

    end_date = driver.find_element_by_name("end_date")
コード例 #7
0
def main(method, url, webdriver, input_file):
    """
    The tool is intended to extract all links (complete and relative ones) from HTML tag attributes
    """
    try:
        # checking for the supported methods
        if not method.upper() in ['GET', 'POST', 'OPTIONS', 'PUT', 'DELETE']:
            raise ValueError("Method %s is not supported." % method)

        requests.packages.urllib3.disable_warnings(
        )  # suppressing unsafe HTTPS warnings

        if webdriver:  # if the -w switch is present - switch to webdriver instead of requests module
            print('[+] Starting up a webdriver')
            driver = seleniumrequests.Chrome('chromedriver.exe')

            print('[+] Retrieving ' + url)
            response = driver.request(method, url,
                                      verify=False)  # get the response
        else:
            print('[+] Retrieving ' + url)
            response = requests.request(method, url,
                                        verify=False)  # get the response

        url = response.url.rstrip(
            '/')  # if there was a redirect - acquire the URL from the response

        # neat print headers
        print("[+] Received the response HTTP %d" % response.status_code)
        for header in response.headers:
            print(header + ':', response.headers[header])

        # assigning HTML contents
        raw_html = response.content
        parsed_html = BeautifulSoup(raw_html, "html.parser")

        # gathering a list of links from specific elements
        script_elements = [
            element['src'] for element in parsed_html.select('script[src]')
        ]
        anchor_elements = [
            element['href'] for element in parsed_html.select('a[href]')
        ]
        link_elements = [
            element['href'] for element in parsed_html.select('link[href]')
        ]
        form_elements = [
            element['action'] for element in parsed_html.select('form[action]')
        ]
        iframe_elements = [
            element['src'] for element in parsed_html.select('iframe[src]')
        ]
        links = script_elements + anchor_elements + link_elements + form_elements + iframe_elements

        # removing bookmarks, non-interesting schemes and '/'
        print('\n[+] Tidying up the links')
        links = [
            link for link in links
            if not urlparse(link).scheme in ['mailto', 'skype', 'tel']
        ]
        links = [urljoin(url, link)
                 for link in links]  # gathering links together

        # final links count and listing
        unique_links = set(links)

        if input_file is not None:
            file = open(input_file, "w")
            file.write('Links on ' + url + ':')
            print('[+] Writing links into the file')
            for link in unique_links:
                file.write('\n' + link)
            file.close()
        else:
            for link in unique_links:
                print(link)

        print("[+] Total %d unique links extracted (%d duplicates removed)" %
              (len(unique_links), len(links) - len(unique_links)))

    except Exception as e:
        print("[-] Something went wrong: %s" % e)

    except KeyboardInterrupt:
        print("[x] Exiting by user command")
コード例 #8
0
            img.save('captchas/{}.jpg'.format(vericode))
            btn_logout[0].submit()


def predict_captcha(model, browser, url):
    response = browser.request('GET', url)
    img = PIL.Image.open(BytesIO(response.content))
    img_bin = capgen.binarization(img)
    X1 = np.zeros((1, 25, 96, 3), dtype=np.uint8)
    X1[0] = img_bin
    y1 = model.predict(X1)
    vericode = network.decode(y1)
    return img, vericode


model = network.create_model()
model.load_weights('my_model_weights_gen.h5')

user_id = input("Enter username:"******"Enter password:"******"//img[@id='imgVeriCode']")
#url = img.get_attribute('src')
#vericode = predict_captcha(model, browser, url)

browser.quit()