Beispiel #1
0
def profile_memory_per_line(
    get_maximum,
    per_element,
    input_file,
    output_file,
    show_logs,
    entity_ids,
    config_file,
):
    from klio_exec.commands import profile

    config_path = config_file or "klio-job.yaml"
    config_data = _get_config(config_path)
    klio_config = config.KlioConfig(config_data)

    _require_profile_input_data(input_file, entity_ids)

    if not show_logs:
        logging.disable(logging.CRITICAL)

    klio_pipeline = profile.KlioPipeline(
        klio_config=klio_config,
        input_file=input_file,
        output_file=output_file,
        entity_ids=entity_ids,
    )
    klio_pipeline.profile(what="memory_per_line", get_maximum=get_maximum)
Beispiel #2
0
    def wrapper(*args, **kwargs):
        raw_overrides = kwargs.pop("override")
        raw_templates = kwargs.pop("template")
        job_dir = kwargs.pop("job_dir")
        config_file = kwargs.pop("config_file")
        job_dir, config_path = get_config_job_dir(job_dir, config_file)

        warn_if_py2_job(job_dir)

        raw_config_data = get_config_by_path(config_path)

        processed_config_data = config.KlioConfigPreprocessor.process(
            raw_config_data=raw_config_data,
            raw_template_list=raw_templates,
            raw_override_list=raw_overrides,
        )

        meta = KlioConfigMeta(
            job_dir=job_dir,
            config_file=config_file,
            config_path=config_path,
        )

        conf = config.KlioConfig(processed_config_data)

        kwargs["klio_config"] = conf
        kwargs["config_meta"] = meta

        func(*args, **kwargs)
Beispiel #3
0
def klio_config():
    conf = {
        "job_name": "test-job",
        "version": 1,
        "pipeline_options": {
            "worker_harness_container_image":
            ("gcr.io/sigint/gke-baseline-random-music-gke"),
            "region":
            "some-region",
            "project":
            "test-project",
        },
        "job_config": {
            "inputs": [{
                "topic": "foo-topic",
                "subscription": "foo-sub",
                "data_location": "foo-input-location",
            }],
            "outputs": [{
                "topic": "foo-topic-output",
                "data_location": "foo-output-location",
            }],
        },
    }
    return config.KlioConfig(conf)
Beispiel #4
0
def run_pipeline(image_tag, direct_runner, update, config_file, blocking):
    config_path = config_file or "klio-job.yaml"
    config_data = _get_config(config_path)

    # Prompt user to continue if runtime config file is not the same as
    # the buildtime config file. Do this after _get_config since that
    # will prompt the user if their config file doesn't even exist first.
    if _compare_runtime_to_buildtime_config(config_path) is False:
        msg = (
            "The Klio config file '{}' at runtime differs from the config "
            "file used when building this Docker image. If this is unexpected "
            "behavior, please double check your runtime config, or rebuild "
            "your Docker image with the correct config file.")
        logging.warning(msg.format(config_path))

    if direct_runner:
        config_data["pipeline_options"]["runner"] = "direct"

    job_name = config_data["job_name"]
    conf_obj = config.KlioConfig(config_data)
    if update is None:  # if it's not explicitly set in CLI, look at config
        update = conf_obj.pipeline_options.update
    if blocking is None:  # if it's not explicitly set in CLI, look at config
        blocking = conf_obj.job_config.blocking

    runtime_conf = RuntimeConfig(image_tag, direct_runner, update, blocking)

    klio_pipeline = run.KlioPipeline(job_name, conf_obj, runtime_conf)
    klio_pipeline.run()
Beispiel #5
0
    def setup(self, config_data, config_file, config_override=None):
        self.config_override = config_override
        self.config_data = config_data
        self.config_file = config_file

        self.mock_warn_if_py2_job = self.mocker.Mock()
        self.monkeypatch.setattr(
            self.module.core_utils,
            "warn_if_py2_job",
            self.mock_warn_if_py2_job,
        )

        self.mock_get_config_job_dir.return_value = (
            self.patch_os_getcwd,
            config_override or config_file,
        )

        self.mock_get_config.return_value = config_data

        self.meta = core_utils.KlioConfigMeta(
            job_dir=self.patch_os_getcwd,
            config_file=config_override,
            config_path=config_override or config_file,
        )

        self.klio_config = kconfig.KlioConfig(config_data)

        self.mock_klio_config = self.mocker.patch.object(
            core_utils.config, "KlioConfig")
        self.mock_klio_config.return_value = self.klio_config

        return self.klio_config
Beispiel #6
0
def test_compare_runtime_to_buildtime_config(
    mocker, monkeypatch, addl_runtime_data, buildtime_exists, exp_retval
):
    monkeypatch.setattr(os.path, "exists", lambda x: buildtime_exists)

    buildtime_data = {"job_name": "foo", "job_config": {}}
    runtime_data = buildtime_data.copy()
    if addl_runtime_data:
        runtime_data["job_config"] = runtime_data["job_config"].copy()
        runtime_data["job_config"]["foo"] = "bar"

    # multiple `open` mocks: https://stackoverflow.com/a/26830397/1579977
    open_name = "klio_exec.cli.open"
    buildtime_data_str = yaml.dump(buildtime_data).encode("utf-8")

    runtime_conf = kconfig.KlioConfig(runtime_data)

    mock_open_buildtime = mocker.mock_open(read_data=buildtime_data_str)
    mock_open = mocker.patch(open_name, mock_open_buildtime)

    side_effect = (mock_open_buildtime.return_value,)
    mock_open.side_effect = side_effect

    act_retval = cli._compare_runtime_to_buildtime_config(runtime_conf)
    assert exp_retval == act_retval
Beispiel #7
0
def klio_job_config():
    conf = {
        "job_name": "test-job",
        "version": 1,
        "pipeline_options": {
            "project": "test-gcp-project"
        },
        "job_config": {
            "events": {
                "inputs": [{
                    "type": "pubsub",
                    "topic": "an-input-topic",
                    "subscription": "a-subscription",
                }],
                "outputs": [{
                    "type": "pubsub",
                    "topic": "foo-topic-output"
                }],
            },
            "data": {
                "inputs": [{
                    "type": "gcs",
                    "location": "gs://a-test-input/location"
                }],
                "outputs": [{
                    "type": "gcs",
                    "location": "foo-output-location"
                }],
            },
        },
    }
    return config.KlioConfig(conf)
Beispiel #8
0
def profile_cpu(
    interval,
    input_file,
    output_file,
    plot_graph,
    show_logs,
    entity_ids,
    config_file,
):
    from klio_exec.commands import profile

    config_path = config_file or "klio-job.yaml"
    config_data = _get_config(config_path)
    klio_config = config.KlioConfig(config_data)

    _require_profile_input_data(input_file, entity_ids)

    klio_pipeline = profile.KlioPipeline(
        klio_config=klio_config,
        input_file=input_file,
        output_file=output_file,
        entity_ids=entity_ids,
    )
    kwargs = {
        "interval": interval,
        "show_logs": show_logs,
        "plot_graph": plot_graph,
    }
    output_png = klio_pipeline.profile(what="cpu", **kwargs)
    if output_png:
        click.echo("CPU plot graph generated at: {}".format(output_png))
Beispiel #9
0
def stop_job(config_file):
    job_dir = os.path.abspath(os.getcwd())
    config_file = config_file or "klio-job.yaml"
    config_path = os.path.join(job_dir, config_file)
    config_data = _get_config(config_path)
    conf_obj = config.KlioConfig(config_data)
    # TODO: make this a click option once draining is supported @lynn
    strategy = "cancel"
    stop.stop(conf_obj, strategy)
Beispiel #10
0
def audit_job(config_file):
    # NOTE: we assume that audit_job is the only method called in this
    # subprocess, so setting KLIO_TEST_MODE will not impact subsequent
    # method calls
    os.environ["KLIO_TEST_MODE"] = "true"

    job_dir = os.path.abspath(os.getcwd())
    config_data = _get_config(config_file or "klio-job.yaml")
    conf_obj = config.KlioConfig(config_data)
    audit.audit(job_dir, conf_obj)
Beispiel #11
0
def test_klio_config(config_dict, final_config_dict):

    config_obj = config.KlioConfig(config_dict, config_skip_preprocessing=True)

    assert "test-job" == config_obj.job_name
    assert isinstance(config_obj.job_config, config.KlioJobConfig)
    assert isinstance(config_obj.pipeline_options, config.KlioPipelineConfig)
    assert final_config_dict == config_obj.as_dict()

    repr_actual = repr(config_obj)
    assert "KlioConfig(job_name='test-job')" == repr_actual
Beispiel #12
0
def test_no_gcp_klio_config(no_gcp_config_dict):

    config_obj = config.KlioConfig(no_gcp_config_dict)

    assert "test-job" == config_obj.job_name
    assert isinstance(config_obj.job_config, config.KlioJobConfig)
    assert isinstance(config_obj.pipeline_options, config.KlioPipelineConfig)
    # Default variables are added to the pipeline config
    assert config_dict != config_obj.as_dict()

    repr_actual = repr(config_obj)
    assert "KlioConfig(job_name='test-job')" == repr_actual
Beispiel #13
0
def get_config():
    """Load KlioConfig object."""
    config_path = os.path.join(os.path.dirname(__file__), "..",
                               "klio-job.yaml")
    try:
        with open(config_path) as f:
            cfg_dict = yaml.safe_load(f)

        return config.KlioConfig(cfg_dict)

    except IOError as e:
        logging.error(e)
        raise SystemExit(1)
Beispiel #14
0
def test_get_environment(monkeypatch, project, config_data, klio_pipeline):
    config_data["pipeline_options"]["project"] = project
    config = kconfig.KlioConfig(config_data)
    monkeypatch.setattr(klio_pipeline, "klio_config", config)
    gcreds = "/usr/gcloud/application_default_credentials.json"
    exp_envs = {
        "PYTHONPATH": "/usr/src/app",
        "GOOGLE_APPLICATION_CREDENTIALS": gcreds,
        "USER": "******",
    }
    if project:
        exp_envs["GOOGLE_CLOUD_PROJECT"] = project

    actual_env = klio_pipeline._get_environment()

    assert exp_envs == actual_env
Beispiel #15
0
 def _load_config_from_file(cls):
     # [Klio v2] this may get expensive, to always be reading config
     # from a file. Can this be replaced by something in memory
     # that's also globally accessible?
     klio_job_file = "/usr/src/config/.effective-klio-job.yaml"
     # for backwards compatibility, and user is using setup.py and we
     # have to find it somewhere...
     if not os.path.exists(klio_job_file):
         # use iterator so we don't waste time searching everywhere upfront
         files = glob.iglob("/usr/**/klio-job.yaml", recursive=True)
         for f in files:
             klio_job_file = f
             # only grab the first one
             break
     with open(klio_job_file, "r") as f:
         all_config_data = yaml.safe_load(f)
     return config.KlioConfig(all_config_data)
Beispiel #16
0
def test_build(conf_file, exp_image_tag, mocker, monkeypatch):

    mock_docker = mocker.Mock()
    mock_client = mocker.Mock()
    mock_docker.from_env.return_value = mock_client
    monkeypatch.setattr(build_image, "docker", mock_docker)

    mock_docker_utils = mocker.Mock()
    monkeypatch.setattr(build_image, "docker_utils", mock_docker_utils)

    mock_config = {
        "job_name": "test-job",
        "version": 1,
        "pipeline_options": {
            "worker_harness_container_image": "gcr.register.io/squad/feature"
        },
        "job_config": {
            "inputs": [{
                "topic": "foo-topic",
                "subscription": "foo-sub",
                "data_location": "foo-input-location",
            }],
            "outputs": [{
                "topic": "foo-topic-output",
                "data_location": "foo-output-location",
            }],
        },
    }
    conf_obj = config.KlioConfig(mock_config)
    job_dir = "jerbs"
    image_tag = "v1"

    build_image.build(job_dir, conf_obj, conf_file, image_tag)

    mock_docker.from_env.assert_called_once_with()
    mock_docker_utils.check_docker_connection.assert_called_once_with(
        mock_client)

    mock_docker_utils.check_dockerfile_present.assert_called_once_with(job_dir)

    mock_docker_utils.build_docker_image.assert_called_once_with(
        job_dir,
        conf_obj.pipeline_options.worker_harness_container_image,
        exp_image_tag,
        conf_file,
    )
Beispiel #17
0
def _run_pipeline(input_file, show_logs, entity_ids, config_file):

    from klio_exec.commands import profile

    config_path = config_file or "klio-job.yaml"
    config_data = _get_config(config_path)
    klio_config = config.KlioConfig(config_data)

    # safety check, even though it should be invoked by another klioexec
    # command
    _require_profile_input_data(input_file, entity_ids)

    if not show_logs:
        logging.disable(logging.CRITICAL)

    klio_pipeline = profile.KlioPipeline(klio_config=klio_config,
                                         input_file=input_file,
                                         entity_ids=entity_ids)
    klio_pipeline.profile(what="run")
Beispiel #18
0
def test_config_pickling(config_dict, final_config_dict):
    # This test attempts to verify that class-level attributes aren't used as
    # instance attributes, since they are not pickled, which can result in
    # missing/wrong values when config is unpickled on dataflow workers

    def get_class_attributes(cls):
        attrs = {}
        for key in cls.__dict__:
            value = getattr(cls, key)
            is_fn = inspect.ismethod(value) or inspect.isfunction(value)
            if not key.startswith("__") and not is_fn:
                attrs[key] = copy.copy(value)
        return attrs

    classes = [
        config.KlioConfig,
        config.KlioJobConfig,
        config.KlioPipelineConfig,
    ]

    cls_attribs = {}

    for cls in classes:
        cls_attribs[cls] = get_class_attributes(cls)

    klio_config = config.KlioConfig(
        config_dict, config_skip_preprocessing=True
    )

    pickled = dill.dumps(klio_config)

    # reset any class-level attributes back to whatever value they had before
    # instantiating KlioConfig
    for cls, keyvals in cls_attribs.items():
        for key, value in keyvals.items():
            setattr(cls, key, value)

    unpickled = dill.loads(pickled)

    actual = unpickled.as_dict()

    assert final_config_dict == actual
Beispiel #19
0
def profile_wall_time(input_file, output_file, iterations, show_logs,
                      entity_ids, config_file):
    from klio_exec.commands import profile

    config_path = config_file or "klio-job.yaml"
    config_data = _get_config(config_path)
    klio_config = config.KlioConfig(config_data)

    _require_profile_input_data(input_file, entity_ids)

    if not show_logs:
        logging.disable(logging.CRITICAL)

    klio_pipeline = profile.KlioPipeline(
        klio_config=klio_config,
        input_file=input_file,
        output_file=output_file,
        entity_ids=entity_ids,
    )
    klio_pipeline.profile(what="timeit", iterations=iterations)
Beispiel #20
0
def klio_config():
    config_dict = {
        "job_name": "test-job",
        "version": 1,
        "pipeline_options": {
            "worker_harness_container_image": "test-image"
        },
        "job_config": {
            "inputs": [{
                "topic": "foo-topic",
                "subscription": "foo-sub",
                "data_location": "foo-input-location",
            }],
            "outputs": [{
                "topic": "foo-topic-output",
                "data_location": "foo-output-locaiton",
            }],
        },
    }
    return config.KlioConfig(config_dict)
Beispiel #21
0
def develop_job(job_dir, config_file, **kwargs):
    job_dir, config_path = core_utils.get_config_job_dir(job_dir, config_file)
    config_data = core_utils.get_config_by_path(config_path)
    conf = config.KlioConfig(config_data)

    git_sha = cli_utils.get_git_sha(job_dir, kwargs.get("image_tag"))
    image_tag = kwargs.get("image_tag") or git_sha
    if config_file:
        basename = os.path.basename(config_file)
        image_tag = "{}-{}".format(image_tag, basename)

    runtime_config = main_cli.DockerRuntimeConfig(
        image_tag=image_tag,
        force_build=kwargs.get("force_build"),
        config_file_override=config_file,
    )

    klio_pipeline = develop.DevelopKlioContainer(job_dir, conf, runtime_config,
                                                 kwargs["klio_path"],
                                                 kwargs["exclude"])
    klio_pipeline.run()
Beispiel #22
0
def test_job(pytest_args):
    """Thin wrapper around pytest. Any arguments after -- are passed through.
    """
    import os
    import pytest

    config_path = "klio-job.yaml"
    config_data = _get_config(config_path)
    conf_obj = config.KlioConfig(config_data)

    # RunConfig ensures config is pickled and sent to worker.  Note this
    # depends on save_main_session being True
    klio_transforms_core.RunConfig.set(conf_obj)

    # NOTE: we assume that test_job is the only method called in this
    # subprocess, so setting KLIO_TEST_MODE will not impact subsequent
    # method calls
    os.environ["KLIO_TEST_MODE"] = "true"
    exit_code = pytest.main(list(pytest_args))

    if exit_code != 0:
        raise SystemExit("Tests failed with exit code %s" % exit_code)
Beispiel #23
0
    def _load_config_from_file(cls):
        klio_job_file = None

        if os.path.exists(config_core.WORKER_RUN_EFFECTIVE_CONFIG_PATH):
            klio_job_file = config_core.WORKER_RUN_EFFECTIVE_CONFIG_PATH
        else:
            run_config_path = os.path.join(
                "/usr/**", config_core.RUN_EFFECTIVE_CONFIG_FILE)
            files = glob.iglob(run_config_path, recursive=True)
            for f in files:
                klio_job_file = f
                # only grab the first one
                break

        if not klio_job_file:
            klio_job_file = "/usr/src/config/.effective-klio-job.yaml"

        logger = logging.getLogger("klio")
        logger.debug(f"Loading config file from {klio_job_file}.")

        with open(klio_job_file, "r") as f:
            all_config_data = yaml.safe_load(f)
        return config.KlioConfig(all_config_data)
Beispiel #24
0
def test_test_job(
    runner,
    mocker,
    config_file,
    patch_os_getcwd,
    pytest_args,
    conf_override,
    image_tag,
    mock_get_git_sha,
    mock_warn_if_py2_job,
    mock_get_config_job_dir,
):
    mock_test_pipeline = mocker.patch.object(cli.job_commands.test,
                                             "TestPipeline")
    mock_test_pipeline.return_value.run.return_value = 0
    mock_get_config_job_dir.return_value = (
        patch_os_getcwd,
        conf_override or config_file,
    )

    cli_inputs = ["job", "test"]
    if image_tag:
        cli_inputs.extend(["--image-tag", image_tag])
    if conf_override:
        cli_inputs.extend(["--config-file", conf_override])
    cli_inputs.extend(pytest_args)

    config_data = {
        "job_name": "test-job",
        "pipeline_options": {
            "worker_harness_container_image": "gcr.register.io/squad/feature",
            "project": "test-project",
            "region": "boonies",
            "staging_location": "gs://somewhere/over/the/rainbow",
            "temp_location": "gs://somewhere/over/the/rainbow",
        },
        "job_config": {
            "inputs": [{
                "topic": "foo-topic",
                "subscription": "foo-sub",
                "data_location": "foo-input-location",
            }],
            "outputs": [{
                "topic": "foo-topic-output",
                "data_location": "foo-output-location",
            }],
        },
    }
    mock_get_config = mocker.patch.object(core_utils, "get_config_by_path")
    # deepcopy since KlioConfig will pop keys
    mock_get_config.return_value = config_data
    conf = kconfig.KlioConfig(copy.deepcopy(config_data))
    mock_klio_config = mocker.patch.object(core_utils.config, "KlioConfig")
    mock_klio_config.return_value = conf

    result = runner.invoke(cli.main, cli_inputs)

    core_testing.assert_execution_success(result)
    assert "" == result.output

    exp_image_tag = image_tag or mock_get_git_sha.return_value
    if conf_override:
        exp_image_tag = "{}-{}".format(exp_image_tag, conf_override)
    mock_get_config_job_dir.assert_called_once_with(None, conf_override)
    mock_warn_if_py2_job.assert_called_once_with(patch_os_getcwd)
    if not image_tag:
        mock_get_git_sha.assert_called_once_with(patch_os_getcwd)
    else:
        mock_get_git_sha.assert_not_called()

    mock_get_config.assert_called_once_with(conf_override or config_file)
    mock_klio_config.assert_called_once_with(config_data)
    exp_docker_runtime_config = cli.DockerRuntimeConfig(
        image_tag=exp_image_tag,
        force_build=False,
        config_file_override=conf_override,
    )
    mock_test_pipeline.assert_called_once_with(patch_os_getcwd, conf,
                                               exp_docker_runtime_config)
    mock_test_pipeline.return_value.run.assert_called_once_with(
        pytest_args=pytest_args)
Beispiel #25
0
    def _get_effective_config(self):
        conf = kconfig.KlioConfig(self.config_data)

        effective_config = conf.as_dict()
        return self._order_config_keys(effective_config)
Beispiel #26
0
def klio_config(config):
    return kconfig.KlioConfig(config)
Beispiel #27
0
 def _load_klio_config(cls):
     config_file_path = os.path.join(os.path.dirname(__file__),
                                     "klio-job.yaml")
     with open(config_file_path) as f:
         return config.KlioConfig(yaml.safe_load(f))
Beispiel #28
0
def config():
    return _config()


@pytest.fixture
def klio_config(config):
    return kconfig.KlioConfig(config)


# NOTE: Python decorators are evaluated on import, and so importing
# `klio_exec.commands.profile` (which imports `klio.transforms.helpers`, which
# imports `klio.transforms.decorators`) and `klio_exec.cli` triggers the  code
# in those decorators to get evaluated. Therefore, we must patch this part in
# order to import it, otherwise it will try to load the non-existant
# `/usr/src/config/.effective-klio-job.yaml`
mock_config = kconfig.KlioConfig(_config())
patcher = mock.patch(
    "klio.transforms.core.RunConfig.get",
    lambda: mock_config,
)
patcher.start()

from klio_exec.commands import profile  # noqa E402
from klio_exec import cli  # noqa E402


@pytest.fixture
def patch_get_config(monkeypatch, config):
    monkeypatch.setattr(cli, "_get_config", lambda x: config)

Beispiel #29
0
def _klio_config():
    return config.KlioConfig(_config_dict())
Beispiel #30
0
def klio_config(config_dict):
    return config.KlioConfig(config_dict)