Example #1
0
    def __init__(self):
        self._logger = logging.getLogger(type(self).__name__)

        jobs = Configuration().getProperty('jobs')
        self._package = jobs.get("package")
        self._module = jobs.get("module")
        self._job_list = jobs.get("list")

        self._config_file_path = None
    def __init__(self):
        cfg = Configuration()
        cache_types = cfg.getProperty("traffic_limit").keys()

        self._mutexes = dict()
        for cache_type in cache_types:
            limit = cfg.getProperty(f"traffic_limit.{cache_type}.size")

            # Configure limit
            self._mutexes[cache_type] = threading.Semaphore(limit)
    def __init__(self):
        self._logger = logging.getLogger(type(self).__name__)
        cfg = Configuration()
        job_lists = cfg.getProperty("jobs.list")

        self._pools = dict()
        for job_name, job_config in job_lists.items():
            pool_size = 1
            if 'one-off' != job_config["type"]:
                pool_size = job_config["pool_size"]
            max_workers = pool_size

            # Configure worker
            thread_pool_executor = ThreadPoolExecutor(
                thread_name_prefix=f'task_executor_{job_name}',
                max_workers=max_workers)
            self._pools[job_name] = thread_pool_executor
    def __init__(self):
        self._use_headless = False
        self._browser_type = CONSTANT.driver_name()
        cfg = Configuration()
        self._driver_path_mapping = {
            CONSTANT.chrome_name(): cfg.getProperty("driver_path.chrome")
        }
        self._driver_builder_mapping = {
            CONSTANT.chrome_name(): self._build_chrome_driver
        }
        self._clear_process_cmd_mapping = {
            CONSTANT.chrome_name(): self._clear_chrome_process
        }
        self._tmp_dir = cfg.getProperty("client.tmp_dir")
        options = webdriver.ChromeOptions()
        if self._use_headless:
            options.add_argument('headless')
        cfg = {'download.default_directory': self._tmp_dir}
        options.add_experimental_option('prefs', cfg)
        options.add_argument(f"download.default_directory={self._tmp_dir}")
        options.add_argument("--start-maximized")
        options.add_argument("--disable-infobars")
        options.add_argument("--disable-extensions")
        options.add_argument('--disable-gpu')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--hide-scrollbars')
        self._options = options

        self._driver_path = self._driver_path_mapping.get(self._browser_type)
        self._safe_raise_exception(
            self._driver_path,
            f"Driver for browser {self._browser_type} not configured! ")
        os.environ['PATH'] += os.pathsep + self._driver_path
        self._builder = self._driver_builder_mapping.get(self._browser_type)
        self._safe_raise_exception(
            self._builder, f"Browser {self._browser_type} not supported")
Example #5
0
 def __init__(self):
     os.environ["DBUS_SESSION_BUS_ADDRESS"] = "/dev/null"
     cfg = Configuration()
     self._timeout = cfg.getProperty("client.selenium.timeout")
Example #6
0
 def sec(self):
     return Configuration().getProperty(
         f"jobs.list.{type(self).__name__}.sec")
Example #7
0
 def cron(self):
     return Configuration().getProperty(
         f"jobs.list.{type(self).__name__}.cron")
Example #8
0
 def schedule_type(self):
     return Configuration().getProperty(
         f"jobs.list.{type(self).__name__}.type")
Example #9
0
import sys

from ibranch.scraping_scheduler.configuration.Configurator import Configuration
from ibranch.scraping_scheduler.engine.Scraper import ScraperEngine

if __name__ == "__main__":
    args = sys.argv[1:]
    Configuration(args)

    ScraperEngine().start()