コード例 #1
0
def test_cli_journal_client_origin_visit_status(
    swh_scheduler_cfg, swh_scheduler_cfg_path,
):
    kafka_server = swh_scheduler_cfg["journal"]["brokers"][0]
    swh_scheduler = get_scheduler(**swh_scheduler_cfg["scheduler"])
    producer = Producer(
        {
            "bootstrap.servers": kafka_server,
            "client.id": "test visit-stats producer",
            "acks": "all",
        }
    )
    visit_status = VISIT_STATUSES_1[0]

    value = value_to_kafka(visit_status)
    topic = "swh.journal.objects.origin_visit_status"
    producer.produce(topic=topic, key=b"bogus-origin", value=value)
    producer.flush()

    result = invoke(
        ["journal-client", "--stop-after-objects", "1",], swh_scheduler_cfg_path,
    )

    # Check the output
    expected_output = "Processed 1 message(s).\nDone.\n"
    assert result.exit_code == 0, result.output
    assert result.output == expected_output

    actual_visit_stats = swh_scheduler.origin_visit_stats_get(
        [(visit_status["origin"], visit_status["type"])]
    )

    assert actual_visit_stats
    assert len(actual_visit_stats) == 1
コード例 #2
0
def test_register_ttypes_all(
    cli_runner, mock_pkg_resources, local_sched_config, local_sched_configfile
):
    """Registering all task types"""

    for command in [
        ["--config-file", local_sched_configfile, "task-type", "register"],
        ["--config-file", local_sched_configfile, "task-type", "register", "-p", "all"],
        [
            "--config-file",
            local_sched_configfile,
            "task-type",
            "register",
            "-p",
            "lister.gnu",
            "-p",
            "lister.pypi",
        ],
    ]:
        result = cli_runner.invoke(cli, command)

        assert result.exit_code == 0, traceback.print_exception(*result.exc_info)

        scheduler = get_scheduler(**local_sched_config["scheduler"])
        all_tasks = [
            "list-gnu-full",
            "list-pypi",
        ]
        for task in all_tasks:
            task_type_desc = scheduler.get_task_type(task)
            assert task_type_desc
            assert task_type_desc["type"] == task
            assert task_type_desc["backoff_factor"] == 1
コード例 #3
0
def test_register_ttypes_filter(mock_pkg_resources, cli_runner,
                                local_sched_config, local_sched_configfile):
    """Filtering on one worker should only register its associated task type

    """
    result = cli_runner.invoke(
        cli,
        [
            "--config-file",
            local_sched_configfile,
            "task-type",
            "register",
            "--plugins",
            "lister.gnu",
        ],
    )

    assert result.exit_code == 0, traceback.print_exception(*result.exc_info)

    scheduler = get_scheduler(**local_sched_config["scheduler"])
    all_tasks = [
        "list-gnu-full",
    ]
    for task in all_tasks:
        task_type_desc = scheduler.get_task_type(task)
        assert task_type_desc
        assert task_type_desc["type"] == task
        assert task_type_desc["backoff_factor"] == 1
コード例 #4
0
def get_config(config_file="web/web"):
    """Read the configuration file `config_file`.

       If an environment variable SWH_CONFIG_FILENAME is defined, this
       takes precedence over the config_file parameter.

       In any case, update the app with parameters (secret_key, conf)
       and return the parsed configuration as a dict.

       If no configuration file is provided, return a default
       configuration.

    """

    if not swhweb_config:
        config_filename = os.environ.get("SWH_CONFIG_FILENAME")
        if config_filename:
            config_file = config_filename
        cfg = config.load_named_config(config_file, DEFAULT_CONFIG)
        swhweb_config.update(cfg)
        config.prepare_folders(swhweb_config, "log_dir")
        if swhweb_config.get("search"):
            swhweb_config["search"] = get_search(**swhweb_config["search"])
        else:
            swhweb_config["search"] = None
        swhweb_config["storage"] = get_storage(**swhweb_config["storage"])
        swhweb_config["vault"] = get_vault(**swhweb_config["vault"])
        swhweb_config["indexer_storage"] = get_indexer_storage(
            **swhweb_config["indexer_storage"])
        swhweb_config["scheduler"] = get_scheduler(
            **swhweb_config["scheduler"])
    return swhweb_config
コード例 #5
0
def get_config(config_file='web/web'):
    """Read the configuration file `config_file`.

       If an environment variable SWH_CONFIG_FILENAME is defined, this
       takes precedence over the config_file parameter.

       In any case, update the app with parameters (secret_key, conf)
       and return the parsed configuration as a dict.

       If no configuration file is provided, return a default
       configuration.

    """

    if not swhweb_config:
        config_filename = os.environ.get('SWH_CONFIG_FILENAME')
        if config_filename:
            config_file = config_filename
        cfg = config.load_named_config(config_file, DEFAULT_CONFIG)
        swhweb_config.update(cfg)
        config.prepare_folders(swhweb_config, 'log_dir')
        swhweb_config['storage'] = get_storage(**swhweb_config['storage'])
        swhweb_config['vault'] = get_vault(**swhweb_config['vault'])
        swhweb_config['indexer_storage'] = \
            get_indexer_storage(**swhweb_config['indexer_storage'])
        swhweb_config['scheduler'] = get_scheduler(
            **swhweb_config['scheduler'])
    return swhweb_config
コード例 #6
0
def visit_scheduler_thread(
    config: Dict,
    visit_type: str,
    command_queue: Queue[object],
    exc_queue: Queue[Tuple[str, BaseException]],
):
    """Target function for the visit sending thread, which initializes local connections
    and handles exceptions by sending them back to the main thread."""

    from swh.scheduler import get_scheduler
    from swh.scheduler.celery_backend.config import build_app

    try:
        # We need to reinitialize these connections because they're not generally
        # thread-safe
        app = build_app(config.get("celery"))
        scheduler = get_scheduler(**config["scheduler"])
        task_type = scheduler.get_task_type(f"load-{visit_type}")
        if task_type is None:
            raise ValueError(f"Unknown task type: load-{visit_type}")

        policy_cfg = config.get("scheduling_policy", DEFAULT_POLICY_CONFIG)
        for policies in policy_cfg.values():
            for policy in policies:
                if "weight" not in policy or "policy" not in policy:
                    raise ValueError(
                        "Each policy configuration needs at least a 'policy' "
                        "and a 'weight' entry")
        policy_cfg = {**DEFAULT_POLICY_CONFIG, **policy_cfg}

        next_iteration = time.monotonic()

        while True:
            # vary the next iteration time a little bit
            next_iteration = next_iteration + splay()
            while time.monotonic() < next_iteration:
                # Wait for next iteration to start. Listen for termination message.
                try:
                    msg = command_queue.get(block=True, timeout=1)
                except Empty:
                    continue

                if msg is TERMINATE:
                    return
                else:
                    logger.warn(
                        "Received unexpected message %s in command queue", msg)

            next_iteration = send_visits_for_visit_type(
                scheduler,
                app,
                visit_type,
                task_type,
                policy_cfg.get(visit_type, policy_cfg["default"]),
            )

    except BaseException as e:
        exc_queue.put((visit_type, e))
コード例 #7
0
def main():
    from .config import app as main_app

    for module in main_app.conf.CELERY_IMPORTS:
        __import__(module)

    main_backend = get_scheduler("local")
    try:
        run_ready_tasks(main_backend, main_app)
    except Exception:
        main_backend.rollback()
        raise
コード例 #8
0
 def __init__(self):
     self.config: Dict[str, Any] = config.load_from_envvar(DEFAULT_CONFIG)
     self.scheduler: SchedulerInterface = get_scheduler(
         **self.config["scheduler"])
     self.tool = {
         "name": "swh-deposit",
         "version": __version__,
         "configuration": {
             "sword_version": "2"
         },
     }
     self.storage: StorageInterface = get_storage(**self.config["storage"])
     self.storage_metadata: StorageInterface = get_storage(
         **self.config["storage_metadata"])
コード例 #9
0
ファイル: backend.py プロジェクト: SoftwareHeritage/swh-vault
    def __init__(self, **config):
        self.config = config
        self.cache = VaultCache(**config["cache"])
        self.scheduler = get_scheduler(**config["scheduler"])
        self.storage = get_storage(**config["storage"])
        self.smtp_server = smtplib.SMTP(**config.get("smtp", {}))

        db_conn = config["db"]
        self._pool = psycopg2.pool.ThreadedConnectionPool(
            config.get("min_pool_conns", 1),
            config.get("max_pool_conns", 10),
            db_conn,
            cursor_factory=psycopg2.extras.RealDictCursor,
        )
        self._db = None
コード例 #10
0
def swh_scheduler(swh_scheduler_config):
    scheduler = get_scheduler("postgresql", **swh_scheduler_config)
    for taskname in TASK_NAMES:
        scheduler.create_task_type({
            "type":
            "swh-test-{}".format(taskname),
            "description":
            "The {} testing task".format(taskname),
            "backend_name":
            "swh.scheduler.tests.tasks.{}".format(taskname),
            "default_interval":
            timedelta(days=1),
            "min_interval":
            timedelta(hours=6),
            "max_interval":
            timedelta(days=12),
        })

    return scheduler
コード例 #11
0
    def __init__(self, override_config=None):
        self.backoff = self.INITIAL_BACKOFF
        logger.debug('Loading config from %s' % self.CONFIG_BASE_FILENAME)
        self.config = self.parse_config_file(
            base_filename=self.CONFIG_BASE_FILENAME,
            additional_configs=[self.ADDITIONAL_CONFIG]
        )
        self.config['cache_dir'] = os.path.expanduser(self.config['cache_dir'])
        if self.config['cache_responses']:
            config.prepare_folders(self.config, 'cache_dir')

        if override_config:
            self.config.update(override_config)

        logger.debug('%s CONFIG=%s' % (self, self.config))
        self.storage = get_storage(**self.config['storage'])
        self.scheduler = get_scheduler(**self.config['scheduler'])
        self.db_engine = create_engine(self.config['lister']['args']['db'])
        self.mk_session = sessionmaker(bind=self.db_engine)
        self.db_session = self.mk_session()
コード例 #12
0
def cli(ctx, config_file, database, url, no_stdout):
    """Software Heritage Scheduler tools.

    Use a local scheduler instance by default (plugged to the
    main scheduler db).
    """
    try:
        from psycopg2 import OperationalError
    except ImportError:

        class OperationalError(Exception):
            pass

    from swh.core import config
    from swh.scheduler import DEFAULT_CONFIG, get_scheduler

    ctx.ensure_object(dict)

    logger = logging.getLogger(__name__)
    scheduler = None
    conf = config.read(config_file, DEFAULT_CONFIG)
    if "scheduler" not in conf:
        raise ValueError("missing 'scheduler' configuration")

    if database:
        conf["scheduler"]["cls"] = "postgresql"
        conf["scheduler"]["db"] = database
    elif url:
        conf["scheduler"]["cls"] = "remote"
        conf["scheduler"]["url"] = url
    sched_conf = conf["scheduler"]
    try:
        logger.debug("Instantiating scheduler with %s", sched_conf)
        scheduler = get_scheduler(**sched_conf)
    except (ValueError, OperationalError):
        # it's the subcommand to decide whether not having a proper
        # scheduler instance is a problem.
        pass

    ctx.obj["scheduler"] = scheduler
    ctx.obj["config"] = conf
コード例 #13
0
    def from_config(cls, scheduler: Dict[str, Any], **config: Any):
        """Instantiate a lister from a configuration dict.

        This is basically a backwards-compatibility shim for the CLI.

        Args:
          scheduler: instantiation config for the scheduler
          config: the configuration dict for the lister, with the following keys:
            - credentials (optional): credentials list for the scheduler
            - any other kwargs passed to the lister.

        Returns:
          the instantiated lister
        """
        # Drop the legacy config keys which aren't used for this generation of listers.
        for legacy_key in ("storage", "lister", "celery"):
            config.pop(legacy_key, None)

        # Instantiate the scheduler
        scheduler_instance = get_scheduler(**scheduler)

        return cls(scheduler=scheduler_instance, **config)
コード例 #14
0
def deposit_autoconfig(deposit_config_path):
    """Enforce config for deposit classes inherited from APIConfig."""
    cfg = read(deposit_config_path)

    if "scheduler" in cfg:
        # scheduler setup: require the check-deposit and load-deposit tasks
        scheduler = get_scheduler(**cfg["scheduler"])
        task_types = [
            {
                "type": "check-deposit",
                "backend_name": "swh.deposit.loader.tasks.ChecksDepositTsk",
                "description": "Check deposit metadata/archive before loading",
                "num_retries": 3,
            },
            {
                "type": "load-deposit",
                "backend_name": "swh.loader.package.deposit.tasks.LoadDeposit",
                "description": "Loading deposit archive into swh archive",
                "num_retries": 3,
            },
        ]
        for task_type in task_types:
            scheduler.create_task_type(task_type)
コード例 #15
0
def get_global_scheduler():
    global scheduler
    if not scheduler:
        scheduler = get_scheduler(**app.config["scheduler"])
    return scheduler
コード例 #16
0
        result = event["result"]

        status = None

        if isinstance(result, dict) and "status" in result:
            status = result["status"]
            if status == "success":
                status = "eventful" if result.get("eventful") else "uneventful"

        if status is None:
            status = "eventful" if result else "uneventful"

        scheduler_backend.end_task_run(uuid,
                                       timestamp=utcnow(),
                                       status=status,
                                       result=result)
    elif event_type == "task-failed":
        scheduler_backend.end_task_run(uuid,
                                       timestamp=utcnow(),
                                       status="failed")


if __name__ == "__main__":
    url = sys.argv[1]
    logging.basicConfig(level=logging.DEBUG)
    scheduler_backend = get_scheduler("local",
                                      args={"db": "service=swh-scheduler"})
    channel = get_listener(url, "celeryev.test", scheduler_backend)
    logger.info("Start consuming")
    channel.start_consuming()
コード例 #17
0
def test_init_get_scheduler_deprecation_warning(class_name, expected_class,
                                                kwargs, mock_psycopg2):
    with pytest.warns(DeprecationWarning):
        concrete_scheduler = get_scheduler(class_name, args=kwargs)
    assert isinstance(concrete_scheduler, expected_class)
コード例 #18
0
def test_init_get_scheduler(class_name, expected_class, kwargs, mock_psycopg2):
    concrete_scheduler = get_scheduler(class_name, **kwargs)
    assert isinstance(concrete_scheduler, expected_class)
    assert isinstance(concrete_scheduler, SchedulerInterface)
コード例 #19
0
def test_init_get_scheduler_failure():
    with pytest.raises(ValueError, match="Unknown Scheduler class"):
        get_scheduler("unknown-scheduler-storage")
コード例 #20
0
ファイル: conftest.py プロジェクト: zapashcanon/swh-scheduler
def swh_sched(swh_sched_config):
    return get_scheduler(**swh_sched_config["scheduler"])
コード例 #21
0
        result = event["result"]

        status = None

        if isinstance(result, dict) and "status" in result:
            status = result["status"]
            if status == "success":
                status = "eventful" if result.get("eventful") else "uneventful"

        if status is None:
            status = "eventful" if result else "uneventful"

        scheduler_backend.end_task_run(uuid,
                                       timestamp=utcnow(),
                                       status=status,
                                       result=result)
    elif event_type == "task-failed":
        scheduler_backend.end_task_run(uuid,
                                       timestamp=utcnow(),
                                       status="failed")


if __name__ == "__main__":
    url = sys.argv[1]
    logging.basicConfig(level=logging.DEBUG)
    scheduler_backend = get_scheduler("postgresql",
                                      args={"db": "service=swh-scheduler"})
    channel = get_listener(url, "celeryev.test", scheduler_backend)
    logger.info("Start consuming")
    channel.start_consuming()