def profile_memory_per_line( get_maximum, per_element, input_file, output_file, show_logs, entity_ids, config_file, ): from klio_exec.commands import profile config_path = config_file or "klio-job.yaml" config_data = _get_config(config_path) klio_config = config.KlioConfig(config_data) _require_profile_input_data(input_file, entity_ids) if not show_logs: logging.disable(logging.CRITICAL) klio_pipeline = profile.KlioPipeline( klio_config=klio_config, input_file=input_file, output_file=output_file, entity_ids=entity_ids, ) klio_pipeline.profile(what="memory_per_line", get_maximum=get_maximum)
def wrapper(*args, **kwargs): raw_overrides = kwargs.pop("override") raw_templates = kwargs.pop("template") job_dir = kwargs.pop("job_dir") config_file = kwargs.pop("config_file") job_dir, config_path = get_config_job_dir(job_dir, config_file) warn_if_py2_job(job_dir) raw_config_data = get_config_by_path(config_path) processed_config_data = config.KlioConfigPreprocessor.process( raw_config_data=raw_config_data, raw_template_list=raw_templates, raw_override_list=raw_overrides, ) meta = KlioConfigMeta( job_dir=job_dir, config_file=config_file, config_path=config_path, ) conf = config.KlioConfig(processed_config_data) kwargs["klio_config"] = conf kwargs["config_meta"] = meta func(*args, **kwargs)
def klio_config(): conf = { "job_name": "test-job", "version": 1, "pipeline_options": { "worker_harness_container_image": ("gcr.io/sigint/gke-baseline-random-music-gke"), "region": "some-region", "project": "test-project", }, "job_config": { "inputs": [{ "topic": "foo-topic", "subscription": "foo-sub", "data_location": "foo-input-location", }], "outputs": [{ "topic": "foo-topic-output", "data_location": "foo-output-location", }], }, } return config.KlioConfig(conf)
def run_pipeline(image_tag, direct_runner, update, config_file, blocking): config_path = config_file or "klio-job.yaml" config_data = _get_config(config_path) # Prompt user to continue if runtime config file is not the same as # the buildtime config file. Do this after _get_config since that # will prompt the user if their config file doesn't even exist first. if _compare_runtime_to_buildtime_config(config_path) is False: msg = ( "The Klio config file '{}' at runtime differs from the config " "file used when building this Docker image. If this is unexpected " "behavior, please double check your runtime config, or rebuild " "your Docker image with the correct config file.") logging.warning(msg.format(config_path)) if direct_runner: config_data["pipeline_options"]["runner"] = "direct" job_name = config_data["job_name"] conf_obj = config.KlioConfig(config_data) if update is None: # if it's not explicitly set in CLI, look at config update = conf_obj.pipeline_options.update if blocking is None: # if it's not explicitly set in CLI, look at config blocking = conf_obj.job_config.blocking runtime_conf = RuntimeConfig(image_tag, direct_runner, update, blocking) klio_pipeline = run.KlioPipeline(job_name, conf_obj, runtime_conf) klio_pipeline.run()
def setup(self, config_data, config_file, config_override=None): self.config_override = config_override self.config_data = config_data self.config_file = config_file self.mock_warn_if_py2_job = self.mocker.Mock() self.monkeypatch.setattr( self.module.core_utils, "warn_if_py2_job", self.mock_warn_if_py2_job, ) self.mock_get_config_job_dir.return_value = ( self.patch_os_getcwd, config_override or config_file, ) self.mock_get_config.return_value = config_data self.meta = core_utils.KlioConfigMeta( job_dir=self.patch_os_getcwd, config_file=config_override, config_path=config_override or config_file, ) self.klio_config = kconfig.KlioConfig(config_data) self.mock_klio_config = self.mocker.patch.object( core_utils.config, "KlioConfig") self.mock_klio_config.return_value = self.klio_config return self.klio_config
def test_compare_runtime_to_buildtime_config( mocker, monkeypatch, addl_runtime_data, buildtime_exists, exp_retval ): monkeypatch.setattr(os.path, "exists", lambda x: buildtime_exists) buildtime_data = {"job_name": "foo", "job_config": {}} runtime_data = buildtime_data.copy() if addl_runtime_data: runtime_data["job_config"] = runtime_data["job_config"].copy() runtime_data["job_config"]["foo"] = "bar" # multiple `open` mocks: https://stackoverflow.com/a/26830397/1579977 open_name = "klio_exec.cli.open" buildtime_data_str = yaml.dump(buildtime_data).encode("utf-8") runtime_conf = kconfig.KlioConfig(runtime_data) mock_open_buildtime = mocker.mock_open(read_data=buildtime_data_str) mock_open = mocker.patch(open_name, mock_open_buildtime) side_effect = (mock_open_buildtime.return_value,) mock_open.side_effect = side_effect act_retval = cli._compare_runtime_to_buildtime_config(runtime_conf) assert exp_retval == act_retval
def klio_job_config(): conf = { "job_name": "test-job", "version": 1, "pipeline_options": { "project": "test-gcp-project" }, "job_config": { "events": { "inputs": [{ "type": "pubsub", "topic": "an-input-topic", "subscription": "a-subscription", }], "outputs": [{ "type": "pubsub", "topic": "foo-topic-output" }], }, "data": { "inputs": [{ "type": "gcs", "location": "gs://a-test-input/location" }], "outputs": [{ "type": "gcs", "location": "foo-output-location" }], }, }, } return config.KlioConfig(conf)
def profile_cpu( interval, input_file, output_file, plot_graph, show_logs, entity_ids, config_file, ): from klio_exec.commands import profile config_path = config_file or "klio-job.yaml" config_data = _get_config(config_path) klio_config = config.KlioConfig(config_data) _require_profile_input_data(input_file, entity_ids) klio_pipeline = profile.KlioPipeline( klio_config=klio_config, input_file=input_file, output_file=output_file, entity_ids=entity_ids, ) kwargs = { "interval": interval, "show_logs": show_logs, "plot_graph": plot_graph, } output_png = klio_pipeline.profile(what="cpu", **kwargs) if output_png: click.echo("CPU plot graph generated at: {}".format(output_png))
def stop_job(config_file): job_dir = os.path.abspath(os.getcwd()) config_file = config_file or "klio-job.yaml" config_path = os.path.join(job_dir, config_file) config_data = _get_config(config_path) conf_obj = config.KlioConfig(config_data) # TODO: make this a click option once draining is supported @lynn strategy = "cancel" stop.stop(conf_obj, strategy)
def audit_job(config_file): # NOTE: we assume that audit_job is the only method called in this # subprocess, so setting KLIO_TEST_MODE will not impact subsequent # method calls os.environ["KLIO_TEST_MODE"] = "true" job_dir = os.path.abspath(os.getcwd()) config_data = _get_config(config_file or "klio-job.yaml") conf_obj = config.KlioConfig(config_data) audit.audit(job_dir, conf_obj)
def test_klio_config(config_dict, final_config_dict): config_obj = config.KlioConfig(config_dict, config_skip_preprocessing=True) assert "test-job" == config_obj.job_name assert isinstance(config_obj.job_config, config.KlioJobConfig) assert isinstance(config_obj.pipeline_options, config.KlioPipelineConfig) assert final_config_dict == config_obj.as_dict() repr_actual = repr(config_obj) assert "KlioConfig(job_name='test-job')" == repr_actual
def test_no_gcp_klio_config(no_gcp_config_dict): config_obj = config.KlioConfig(no_gcp_config_dict) assert "test-job" == config_obj.job_name assert isinstance(config_obj.job_config, config.KlioJobConfig) assert isinstance(config_obj.pipeline_options, config.KlioPipelineConfig) # Default variables are added to the pipeline config assert config_dict != config_obj.as_dict() repr_actual = repr(config_obj) assert "KlioConfig(job_name='test-job')" == repr_actual
def get_config(): """Load KlioConfig object.""" config_path = os.path.join(os.path.dirname(__file__), "..", "klio-job.yaml") try: with open(config_path) as f: cfg_dict = yaml.safe_load(f) return config.KlioConfig(cfg_dict) except IOError as e: logging.error(e) raise SystemExit(1)
def test_get_environment(monkeypatch, project, config_data, klio_pipeline): config_data["pipeline_options"]["project"] = project config = kconfig.KlioConfig(config_data) monkeypatch.setattr(klio_pipeline, "klio_config", config) gcreds = "/usr/gcloud/application_default_credentials.json" exp_envs = { "PYTHONPATH": "/usr/src/app", "GOOGLE_APPLICATION_CREDENTIALS": gcreds, "USER": "******", } if project: exp_envs["GOOGLE_CLOUD_PROJECT"] = project actual_env = klio_pipeline._get_environment() assert exp_envs == actual_env
def _load_config_from_file(cls): # [Klio v2] this may get expensive, to always be reading config # from a file. Can this be replaced by something in memory # that's also globally accessible? klio_job_file = "/usr/src/config/.effective-klio-job.yaml" # for backwards compatibility, and user is using setup.py and we # have to find it somewhere... if not os.path.exists(klio_job_file): # use iterator so we don't waste time searching everywhere upfront files = glob.iglob("/usr/**/klio-job.yaml", recursive=True) for f in files: klio_job_file = f # only grab the first one break with open(klio_job_file, "r") as f: all_config_data = yaml.safe_load(f) return config.KlioConfig(all_config_data)
def test_build(conf_file, exp_image_tag, mocker, monkeypatch): mock_docker = mocker.Mock() mock_client = mocker.Mock() mock_docker.from_env.return_value = mock_client monkeypatch.setattr(build_image, "docker", mock_docker) mock_docker_utils = mocker.Mock() monkeypatch.setattr(build_image, "docker_utils", mock_docker_utils) mock_config = { "job_name": "test-job", "version": 1, "pipeline_options": { "worker_harness_container_image": "gcr.register.io/squad/feature" }, "job_config": { "inputs": [{ "topic": "foo-topic", "subscription": "foo-sub", "data_location": "foo-input-location", }], "outputs": [{ "topic": "foo-topic-output", "data_location": "foo-output-location", }], }, } conf_obj = config.KlioConfig(mock_config) job_dir = "jerbs" image_tag = "v1" build_image.build(job_dir, conf_obj, conf_file, image_tag) mock_docker.from_env.assert_called_once_with() mock_docker_utils.check_docker_connection.assert_called_once_with( mock_client) mock_docker_utils.check_dockerfile_present.assert_called_once_with(job_dir) mock_docker_utils.build_docker_image.assert_called_once_with( job_dir, conf_obj.pipeline_options.worker_harness_container_image, exp_image_tag, conf_file, )
def _run_pipeline(input_file, show_logs, entity_ids, config_file): from klio_exec.commands import profile config_path = config_file or "klio-job.yaml" config_data = _get_config(config_path) klio_config = config.KlioConfig(config_data) # safety check, even though it should be invoked by another klioexec # command _require_profile_input_data(input_file, entity_ids) if not show_logs: logging.disable(logging.CRITICAL) klio_pipeline = profile.KlioPipeline(klio_config=klio_config, input_file=input_file, entity_ids=entity_ids) klio_pipeline.profile(what="run")
def test_config_pickling(config_dict, final_config_dict): # This test attempts to verify that class-level attributes aren't used as # instance attributes, since they are not pickled, which can result in # missing/wrong values when config is unpickled on dataflow workers def get_class_attributes(cls): attrs = {} for key in cls.__dict__: value = getattr(cls, key) is_fn = inspect.ismethod(value) or inspect.isfunction(value) if not key.startswith("__") and not is_fn: attrs[key] = copy.copy(value) return attrs classes = [ config.KlioConfig, config.KlioJobConfig, config.KlioPipelineConfig, ] cls_attribs = {} for cls in classes: cls_attribs[cls] = get_class_attributes(cls) klio_config = config.KlioConfig( config_dict, config_skip_preprocessing=True ) pickled = dill.dumps(klio_config) # reset any class-level attributes back to whatever value they had before # instantiating KlioConfig for cls, keyvals in cls_attribs.items(): for key, value in keyvals.items(): setattr(cls, key, value) unpickled = dill.loads(pickled) actual = unpickled.as_dict() assert final_config_dict == actual
def profile_wall_time(input_file, output_file, iterations, show_logs, entity_ids, config_file): from klio_exec.commands import profile config_path = config_file or "klio-job.yaml" config_data = _get_config(config_path) klio_config = config.KlioConfig(config_data) _require_profile_input_data(input_file, entity_ids) if not show_logs: logging.disable(logging.CRITICAL) klio_pipeline = profile.KlioPipeline( klio_config=klio_config, input_file=input_file, output_file=output_file, entity_ids=entity_ids, ) klio_pipeline.profile(what="timeit", iterations=iterations)
def klio_config(): config_dict = { "job_name": "test-job", "version": 1, "pipeline_options": { "worker_harness_container_image": "test-image" }, "job_config": { "inputs": [{ "topic": "foo-topic", "subscription": "foo-sub", "data_location": "foo-input-location", }], "outputs": [{ "topic": "foo-topic-output", "data_location": "foo-output-locaiton", }], }, } return config.KlioConfig(config_dict)
def develop_job(job_dir, config_file, **kwargs): job_dir, config_path = core_utils.get_config_job_dir(job_dir, config_file) config_data = core_utils.get_config_by_path(config_path) conf = config.KlioConfig(config_data) git_sha = cli_utils.get_git_sha(job_dir, kwargs.get("image_tag")) image_tag = kwargs.get("image_tag") or git_sha if config_file: basename = os.path.basename(config_file) image_tag = "{}-{}".format(image_tag, basename) runtime_config = main_cli.DockerRuntimeConfig( image_tag=image_tag, force_build=kwargs.get("force_build"), config_file_override=config_file, ) klio_pipeline = develop.DevelopKlioContainer(job_dir, conf, runtime_config, kwargs["klio_path"], kwargs["exclude"]) klio_pipeline.run()
def test_job(pytest_args): """Thin wrapper around pytest. Any arguments after -- are passed through. """ import os import pytest config_path = "klio-job.yaml" config_data = _get_config(config_path) conf_obj = config.KlioConfig(config_data) # RunConfig ensures config is pickled and sent to worker. Note this # depends on save_main_session being True klio_transforms_core.RunConfig.set(conf_obj) # NOTE: we assume that test_job is the only method called in this # subprocess, so setting KLIO_TEST_MODE will not impact subsequent # method calls os.environ["KLIO_TEST_MODE"] = "true" exit_code = pytest.main(list(pytest_args)) if exit_code != 0: raise SystemExit("Tests failed with exit code %s" % exit_code)
def _load_config_from_file(cls): klio_job_file = None if os.path.exists(config_core.WORKER_RUN_EFFECTIVE_CONFIG_PATH): klio_job_file = config_core.WORKER_RUN_EFFECTIVE_CONFIG_PATH else: run_config_path = os.path.join( "/usr/**", config_core.RUN_EFFECTIVE_CONFIG_FILE) files = glob.iglob(run_config_path, recursive=True) for f in files: klio_job_file = f # only grab the first one break if not klio_job_file: klio_job_file = "/usr/src/config/.effective-klio-job.yaml" logger = logging.getLogger("klio") logger.debug(f"Loading config file from {klio_job_file}.") with open(klio_job_file, "r") as f: all_config_data = yaml.safe_load(f) return config.KlioConfig(all_config_data)
def test_test_job( runner, mocker, config_file, patch_os_getcwd, pytest_args, conf_override, image_tag, mock_get_git_sha, mock_warn_if_py2_job, mock_get_config_job_dir, ): mock_test_pipeline = mocker.patch.object(cli.job_commands.test, "TestPipeline") mock_test_pipeline.return_value.run.return_value = 0 mock_get_config_job_dir.return_value = ( patch_os_getcwd, conf_override or config_file, ) cli_inputs = ["job", "test"] if image_tag: cli_inputs.extend(["--image-tag", image_tag]) if conf_override: cli_inputs.extend(["--config-file", conf_override]) cli_inputs.extend(pytest_args) config_data = { "job_name": "test-job", "pipeline_options": { "worker_harness_container_image": "gcr.register.io/squad/feature", "project": "test-project", "region": "boonies", "staging_location": "gs://somewhere/over/the/rainbow", "temp_location": "gs://somewhere/over/the/rainbow", }, "job_config": { "inputs": [{ "topic": "foo-topic", "subscription": "foo-sub", "data_location": "foo-input-location", }], "outputs": [{ "topic": "foo-topic-output", "data_location": "foo-output-location", }], }, } mock_get_config = mocker.patch.object(core_utils, "get_config_by_path") # deepcopy since KlioConfig will pop keys mock_get_config.return_value = config_data conf = kconfig.KlioConfig(copy.deepcopy(config_data)) mock_klio_config = mocker.patch.object(core_utils.config, "KlioConfig") mock_klio_config.return_value = conf result = runner.invoke(cli.main, cli_inputs) core_testing.assert_execution_success(result) assert "" == result.output exp_image_tag = image_tag or mock_get_git_sha.return_value if conf_override: exp_image_tag = "{}-{}".format(exp_image_tag, conf_override) mock_get_config_job_dir.assert_called_once_with(None, conf_override) mock_warn_if_py2_job.assert_called_once_with(patch_os_getcwd) if not image_tag: mock_get_git_sha.assert_called_once_with(patch_os_getcwd) else: mock_get_git_sha.assert_not_called() mock_get_config.assert_called_once_with(conf_override or config_file) mock_klio_config.assert_called_once_with(config_data) exp_docker_runtime_config = cli.DockerRuntimeConfig( image_tag=exp_image_tag, force_build=False, config_file_override=conf_override, ) mock_test_pipeline.assert_called_once_with(patch_os_getcwd, conf, exp_docker_runtime_config) mock_test_pipeline.return_value.run.assert_called_once_with( pytest_args=pytest_args)
def _get_effective_config(self): conf = kconfig.KlioConfig(self.config_data) effective_config = conf.as_dict() return self._order_config_keys(effective_config)
def klio_config(config): return kconfig.KlioConfig(config)
def _load_klio_config(cls): config_file_path = os.path.join(os.path.dirname(__file__), "klio-job.yaml") with open(config_file_path) as f: return config.KlioConfig(yaml.safe_load(f))
def config(): return _config() @pytest.fixture def klio_config(config): return kconfig.KlioConfig(config) # NOTE: Python decorators are evaluated on import, and so importing # `klio_exec.commands.profile` (which imports `klio.transforms.helpers`, which # imports `klio.transforms.decorators`) and `klio_exec.cli` triggers the code # in those decorators to get evaluated. Therefore, we must patch this part in # order to import it, otherwise it will try to load the non-existant # `/usr/src/config/.effective-klio-job.yaml` mock_config = kconfig.KlioConfig(_config()) patcher = mock.patch( "klio.transforms.core.RunConfig.get", lambda: mock_config, ) patcher.start() from klio_exec.commands import profile # noqa E402 from klio_exec import cli # noqa E402 @pytest.fixture def patch_get_config(monkeypatch, config): monkeypatch.setattr(cli, "_get_config", lambda x: config)
def _klio_config(): return config.KlioConfig(_config_dict())
def klio_config(config_dict): return config.KlioConfig(config_dict)