Example #1
0
 def wrapped(*args, **kwargs):
     driver = args[0].driver
     try:
         captcha_div = driver.find_element_by_css_selector('.g-recaptcha')
         site_key = captcha_div.get_attribute('data-sitekey').strip()
         print(f're-captcha site-key: {site_key}')
         try:
             configs = Configs()
             anticaptcha_api_key = configs.captcha_service_api_key
         except KeyError:
             msg = f'CAPTCHA sites require Anticaptcha.com API key set in {configs.config_file_path}'
             raise CaptchaError(msg)
     except selenium.common.exceptions.NoSuchElementException:
         site_key = None
     if site_key:
         site_url = driver.current_url
         solver = recaptchaV2Proxyless()
         solver.set_verbose(1)
         solver.set_key(anticaptcha_api_key)
         solver.set_website_url(site_url)
         solver.set_website_key(site_key)
         g_response = solver.solve_and_return_solution()
         to_inject = f'document.querySelector(".g-recaptcha-response").innerHTML = "{g_response}";'
         driver.execute_script(to_inject)
     return method(*args, **kwargs)
Example #2
0
def search(place_id, case_number, case_numbers_file, with_browser):
    """Search court site."""
    # Config and logging setup
    configs = Configs()
    cache_dir = Path(configs.cache_dir)
    cache_dir.mkdir(parents=True, exist_ok=True)
    log_file = str(cache_dir.joinpath("logfile.txt"))
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s - %(name)-12s - %(message)s",
        datefmt="%m-%d %H:%M",
        filename=log_file,
        filemode="a",
    )
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter("%(message)s")
    console.setFormatter(formatter)
    logging.getLogger("").addHandler(console)
    logger = logging.getLogger(__name__)
    # Get Runner and execute the search
    RunnerKlass = _get_runner(place_id)
    runner = RunnerKlass(configs.cache_dir, configs.config_file_path, place_id)
    if case_number:
        case_numbers = [case_number]
    else:
        case_numbers = [t.strip() for t in case_numbers_file]
    kwargs = {
        "case_numbers": case_numbers,
        "headless": not with_browser,
    }
    # TODO: Restore catch-all try/except
    results = runner.search(**kwargs)
    runner.cache_detail_pages(results)
    dstore = Datastore(configs.db_path)
    logger.info("Adding {} results to {}".format(len(results),
                                                 configs.db_path))
    to_db = []
    for result in results:
        # Place ID is required Case db table
        result.place_id = place_id
        to_db.append(result.standard_data)
    dstore.upsert(to_db)
Example #3
0
    def search(self, case_numbers=[], headless=True, **kwargs):
        """
        For a given scraper, executes the search, acquisition
        and processing of case info.

        Args:

            case_numbers (list<str>): List of case numbers to search
            headless (boolean): Whether or not to run headless (default: True)

        Returns:

            List of CaseInfo instances

        """
        # Look up the catcha API key as env variable, then fall back to config file
        configs = Configs()
        site = Site(self.place_id, captcha_api_key=configs.captcha_service_api_key)
        logger.info("Executing search for {}".format(self.place_id))
        data = site.search(case_numbers=case_numbers, headless=headless)
        return data
Example #4
0
import logging
import os
import traceback
from pathlib import Path

import click
from click_option_group import optgroup, RequiredMutuallyExclusiveOptionGroup

from court_scraper.configs import Configs
from court_scraper.datastore import Datastore
from court_scraper.runner import Runner
from court_scraper.sites_meta import SitesMeta

configs = Configs()
cache_dir = Path(configs.cache_dir)
cache_dir.mkdir(parents=True, exist_ok=True)
log_file = str(cache_dir.joinpath('logfile.txt'))
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(name)-12s - %(message)s',
                    datefmt='%m-%d %H:%M',
                    filename=log_file,
                    filemode='a')
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
logger = logging.getLogger(__name__)


@click.group()
Example #5
0
def get_captcha_service_api_key():
    configs = Configs()
    return configs.captcha_service_api_key