def test_dict_roundtrip(): handle = SolidHandle('baz', SolidHandle('bar', SolidHandle('foo', None))) assert SolidHandle.from_dict(json.loads(json.dumps( handle._asdict()))) == handle handle = SolidHandle('foo', None) assert SolidHandle.from_dict(json.loads(json.dumps( handle._asdict()))) == handle
def test_dict_roundtrip(): handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None))) assert NodeHandle.from_dict(json.loads(json.dumps( handle._asdict()))) == handle handle = NodeHandle("foo", None) assert NodeHandle.from_dict(json.loads(json.dumps( handle._asdict()))) == handle
def launch_run(self, instance, run, external_pipeline): '''Subclasses must implement this method.''' check.inst_param(run, 'run', PipelineRun) check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) # We use the IPC subprocess machinery here because we want to be able to interrupt this # process. We could use multiprocessing and a thread to poll a shared multiprocessing.Event, # interrupting when the event is set, but in this case that's six of one, half a dozen of # the other. process = open_ipc_subprocess([ sys.executable, file_relative_path(__file__, 'sync_cli_api_execute_run.py'), json.dumps({ 'instance_ref': serialize_dagster_namedtuple(self._instance.get_ref()), 'pipeline_origin': serialize_dagster_namedtuple(external_pipeline.get_origin()), 'pipeline_run_id': run.run_id, }), ]) with self._processes_lock: self._living_process_by_run_id[run.run_id] = process return run
def test_graphql_get(instance, test_client: TestClient): # base case response = test_client.get( "/graphql", params={"query": "{__typename}"}, ) assert response.status_code == 200, response.text assert response.json() == {"data": {"__typename": "DagitQuery"}} # missing response = test_client.get("/graphql") assert response.status_code == 400, response.text # variables var_str = json.dumps({"runId": "missing"}) response = test_client.get( "/graphql", params={"query": RUN_QUERY, "variables": var_str}, ) assert response.status_code == 200, response.text assert response.json() == {"data": {"pipelineRunOrError": {"__typename": "RunNotFoundError"}}} # malformed vars response = test_client.get( "/graphql", params={ "query": RUN_QUERY, "variables": var_str[:-2], # malform by trimming }, ) assert response.status_code == 400, response.text
def test_asset_multi_wipe(asset_instance): runner = CliRunner() execute_pipeline(pipeline_one, instance=asset_instance) execute_pipeline(pipeline_two, instance=asset_instance) asset_keys = asset_instance.all_asset_keys() assert len(asset_keys) == 4 result = runner.invoke( asset_wipe_command, [json.dumps(["path", "to", "asset_3"]), json.dumps(["asset_1"])], input="DELETE\n", ) assert result.exit_code == 0 assert "Removed asset indexes from event logs" in result.output asset_keys = asset_instance.all_asset_keys() assert len(asset_keys) == 2
def send_wrapper(message: Message): if message["type"] == "http.response.start": counter = traced_counter.get() if counter and isinstance(counter, Counter): headers = MutableHeaders(scope=message) headers.append("x-dagster-call-counts", json.dumps(counter.counts())) return send(message)
def test_asset_wipe_errors(asset_instance): # pylint: disable=unused-argument runner = CliRunner() result = runner.invoke(asset_wipe_command) assert result.exit_code == 2 assert ( "Error, you must specify an asset key or use `--all` to wipe all asset keys." in result.output ) result = runner.invoke(asset_wipe_command, ["--all", json.dumps(["path", "to", "asset_key"])]) assert result.exit_code == 2 assert "Error, cannot use more than one of: asset key, `--all`." in result.output
def test_asset_single_wipe_noprompt(asset_instance): runner = CliRunner() execute_pipeline(pipeline_one, instance=asset_instance) execute_pipeline(pipeline_two, instance=asset_instance) asset_keys = asset_instance.all_asset_keys() assert len(asset_keys) == 4 result = runner.invoke( asset_wipe_command, ["--noprompt", json.dumps(["path", "to", "asset_3"])]) assert result.exit_code == 0 assert "Removed asset indexes from event logs" in result.output asset_keys = asset_instance.all_asset_keys() assert len(asset_keys) == 3
def test_graphql_post(test_client: TestClient): # base case response = test_client.post( "/graphql", params={"query": "{__typename}"}, ) assert response.status_code == 200, response.text assert response.json() == {"data": {"__typename": "DagitQuery"}} # missing response = test_client.post("/graphql") assert response.status_code == 400, response.text # variables var_str = json.dumps({"runId": "missing"}) response = test_client.post( "/graphql", params={"query": RUN_QUERY, "variables": var_str}, ) assert response.status_code == 200, response.text assert response.json() == {"data": {"pipelineRunOrError": {"__typename": "RunNotFoundError"}}} # malformed vars response = test_client.post( "/graphql", params={ "query": RUN_QUERY, "variables": var_str[:-2], # malform by trimming }, ) assert response.status_code == 400, response.text # application/json response = test_client.post( "/graphql", json={"query": RUN_QUERY, "variables": {"runId": "missing"}}, ) assert response.status_code == 200, response.text assert response.json() == {"data": {"pipelineRunOrError": {"__typename": "RunNotFoundError"}}} # application/graphql response = test_client.post( "/graphql", data="{__typename}", headers={"Content-type": "application/graphql"}, ) assert response.status_code == 200, response.text assert response.json() == {"data": {"__typename": "DagitQuery"}}
def dataproc_solid(context): job_config = context.solid_config['job_config'] context.log.info('submitting job with config: %s' % str(json.dumps(job_config))) if context.solid_config['job_scoped_cluster']: # Cluster context manager, creates and then deletes cluster with context.resources.dataproc.cluster_context_manager() as cluster: # Submit the job specified by this solid to the cluster defined by the associated resource result = cluster.submit_job(job_config) job_id = result['reference']['jobId'] context.log.info('Submitted job ID {}'.format(job_id)) cluster.wait_for_job(job_id) else: # Submit to an existing cluster # Submit the job specified by this solid to the cluster defined by the associated resource result = context.resources.dataproc.submit_job(job_config) job_id = result['reference']['jobId'] context.log.info('Submitted job ID {}'.format(job_id)) context.resources.dataproc.wait_for_job(job_id)
def construct_job(self, run): check.inst_param(run, 'run', PipelineRun) dagster_labels = { 'app.kubernetes.io/name': 'dagster', 'app.kubernetes.io/instance': 'dagster', 'app.kubernetes.io/version': dagster_version, } execution_params = execution_params_from_pipeline_run(run) job_container = client.V1Container( name='dagster-job-%s' % run.run_id, image=self.job_image, command=['dagster-graphql'], args=[ "-p", "startPipelineExecution", "-v", json.dumps( {'executionParams': execution_params.to_graphql_input()}), ], image_pull_policy=self.image_pull_policy, env=[ client.V1EnvVar( name='DAGSTER_PG_PASSWORD', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector( name='dagster-postgresql', key='postgresql-password')), ), ], env_from=self.env_from_sources, volume_mounts=[ client.V1VolumeMount( name='dagster-instance', mount_path='{dagster_home}/dagster.yaml'.format( dagster_home=self.dagster_home), sub_path='dagster.yaml', ) ], ) config_map_volume = client.V1Volume( name='dagster-instance', config_map=client.V1ConfigMapVolumeSource( name=self.instance_config_map), ) template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta( name='dagster-job-pod-%s' % run.run_id, labels=dagster_labels, ), spec=client.V1PodSpec( image_pull_secrets=self.image_pull_secrets, service_account_name=self.service_account_name, restart_policy='Never', containers=[job_container], volumes=[config_map_volume], ), ) job = client.V1Job( api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name='dagster-job-%s' % run.run_id, labels=dagster_labels), spec=client.V1JobSpec( template=template, backoff_limit=BACKOFF_LIMIT, ttl_seconds_after_finished=TTL_SECONDS_AFTER_FINISHED, ), ) return job
def construct_job(self, run): check.inst_param(run, 'run', PipelineRun) dagster_labels = { 'app.kubernetes.io/name': 'dagster', 'app.kubernetes.io/instance': 'dagster', 'app.kubernetes.io/version': dagster_version, } execution_params = { 'executionParams': { 'selector': run.selector.to_graphql_input(), "environmentConfigData": run.environment_dict, 'executionMetadata': { "runId": run.run_id }, "mode": run.mode, }, } job_container = client.V1Container( name='dagster-job-%s' % run.run_id, image=self.job_image, command=['dagster-graphql'], args=["-p", "executePlan", "-v", json.dumps(execution_params)], image_pull_policy=self.image_pull_policy, env=[ client.V1EnvVar(name='DAGSTER_HOME', value='/opt/dagster/dagster_home') ], volume_mounts=[ client.V1VolumeMount( name='dagster-instance', mount_path='/opt/dagster/dagster_home/dagster.yaml', sub_path='dagster.yaml', ) ], ) config_map_volume = client.V1Volume( name='dagster-instance', config_map=client.V1ConfigMapVolumeSource( name=self.instance_config_map), ) template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(name='dagster-job-pod-%s' % run.run_id, labels=dagster_labels), spec=client.V1PodSpec( image_pull_secrets=self.image_pull_secrets, service_account_name=self.service_account_name, restart_policy='Never', containers=[job_container], volumes=[config_map_volume], ), ) job = client.V1Job( api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name='dagster-job-%s' % run.run_id, labels=dagster_labels), spec=client.V1JobSpec( template=template, backoff_limit=BACKOFF_LIMIT, ttl_seconds_after_finished=TTL_SECONDS_AFTER_FINISHED, ), ) return job
def pretty_dump(data): return json.dumps(data, indent=2, separators=(",", ": "))
def pretty_dump(data): return json.dumps(data, indent=2, separators=(',', ': '))
def return_counts(response): counter = traced_counter.get() if counter and isinstance(counter, Counter): response.headers["x-dagster-call-counts"] = json.dumps( counter.counts()) return response
def construct_job(self, run): check.inst_param(run, 'run', PipelineRun) dagster_labels = { 'app.kubernetes.io/name': 'dagster', 'app.kubernetes.io/instance': 'dagster', 'app.kubernetes.io/version': dagster_version, } init_container = client.V1Container( name='check-db-ready', image='postgres:9.6.16', command=[ 'sh', '-c', 'until pg_isready -h {pg_host} -p {pg_port}; ' 'do echo waiting for database; sleep 2; done;'.format( pg_host=self.postgres_host, pg_port=self.postgres_port, ), ], ) execution_params = { 'executionParams': { 'selector': run.selector.to_graphql_input(), "environmentConfigData": run.environment_dict, "mode": run.mode, } } job_container = client.V1Container( name='dagster-job-%s' % run.run_id, image=self.job_image, command=['dagster-graphql'], args=[ "-p", "startPipelineExecution", "-v", json.dumps(execution_params) ], image_pull_policy='IfNotPresent', env=[ client.V1EnvVar(name='DAGSTER_HOME', value='/opt/dagster/dagster_home') ], volume_mounts=[ client.V1VolumeMount( name='dagster-instance', mount_path='/opt/dagster/dagster_home/dagster.yaml', sub_path='dagster.yaml', ) ], ) config_map_volume = client.V1Volume( name='dagster-instance', config_map=client.V1ConfigMapVolumeSource(name='dagster-instance'), ) template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(name='dagster-job-pod-%s' % run.run_id, labels=dagster_labels), spec=client.V1PodSpec( image_pull_secrets=self.image_pull_secrets, service_account_name=self.service_account_name, init_containers=[init_container], restart_policy='Never', containers=[job_container], volumes=[config_map_volume], ), ) job = client.V1Job( api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name='dagster-job-%s' % run.run_id, labels=dagster_labels), spec=client.V1JobSpec(template=template, backoff_limit=4), ) return job