def _execute_step_docker( self, execute_step_args_packed, docker_config, ): """Run step execution in a Docker container.""" execute_step_args = unpack_value( check.dict_param( execute_step_args_packed, "execute_step_args_packed", )) check.inst_param(execute_step_args, "execute_step_args", ExecuteStepArgs) check.dict_param(docker_config, "docker_config") instance = DagsterInstance.from_ref(execute_step_args.instance_ref) pipeline_run = instance.get_run_by_id( execute_step_args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Could not load run {}".format(execute_step_args.pipeline_run_id), ) step_keys_str = ", ".join(execute_step_args.step_keys_to_execute) input_json = serialize_dagster_namedtuple(execute_step_args) command = "dagster api execute_step {}".format(json.dumps(input_json)) docker_image = (docker_config["image"] if docker_config.get("image") else execute_step_args. pipeline_origin.repository_origin.container_image) if not docker_image: raise Exception( "No docker image specified by either the job or the repository" ) client = docker.client.from_env() if docker_config.get("registry"): client.login( registry=docker_config["registry"]["url"], username=docker_config["registry"]["username"], password=docker_config["registry"]["password"], ) # Post event for starting execution engine_event = instance.report_engine_event( "Executing steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData( [ MetadataEntry.text(step_keys_str, "Step keys"), MetadataEntry.text(docker_image, "Image"), MetadataEntry.text(self.request.hostname, "Celery worker"), ], marker_end=DELEGATE_MARKER, ), CeleryDockerExecutor, step_key=execute_step_args.step_keys_to_execute[0], ) serialized_events = [serialize_dagster_namedtuple(engine_event)] docker_env = {} if docker_config.get("env_vars"): docker_env = { env_name: os.getenv(env_name) for env_name in docker_config["env_vars"] } try: docker_response = client.containers.run( docker_image, command=command, detach=False, auto_remove=True, # pass through this worker's environment for things like AWS creds etc. environment=docker_env, network=docker_config.get("network", None), ) res = docker_response.decode("utf-8") except docker.errors.ContainerError as err: instance.report_engine_event( "Failed to run steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData([ MetadataEntry.text(docker_image, "Job image"), MetadataEntry.text(err.stderr, "Docker stderr"), ], ), CeleryDockerExecutor, step_key=execute_step_args.step_keys_to_execute[0], ) raise else: if res is None: raise Exception( "No response from execute_step in CeleryDockerExecutor") serialized_events += [event for event in res.split("\n") if event] return serialized_events
def _execute_step_docker( _self, instance_ref_dict, step_keys, run_config, mode, repo_name, repo_location_name, run_id, docker_config, ): '''Run step execution in a Docker container. ''' instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) step_keys_str = ", ".join(step_keys) variables = { 'executionParams': { 'runConfigData': run_config, 'mode': mode, 'selector': { 'repositoryLocationName': repo_location_name, 'repositoryName': repo_name, 'pipelineName': pipeline_run.pipeline_name, 'solidSelection': pipeline_run.solid_selection, }, 'executionMetadata': { 'runId': run_id }, 'stepKeys': step_keys, } } command = 'dagster-graphql -v \'{variables}\' -p executePlan'.format( variables=seven.json.dumps(variables)) docker_image = docker_config['image'] client = docker.client.from_env() if docker_config.get('registry'): client.login( registry=docker_config['registry']['url'], username=docker_config['registry']['username'], password=docker_config['registry']['password'], ) # Post event for starting execution engine_event = instance.report_engine_event( 'Executing steps {} in Docker container {}'.format( step_keys_str, docker_image), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, 'Step keys'), EventMetadataEntry.text(docker_image, 'Image'), ], marker_end=DELEGATE_MARKER, ), CeleryDockerExecutor, step_key=step_keys[0], ) events = [engine_event] docker_env = {} if docker_config.get('env_vars'): docker_env = { env_name: os.getenv(env_name) for env_name in docker_config['env_vars'] } try: docker_response = client.containers.run( docker_image, command=command, detach=False, auto_remove=True, # pass through this worker's environment for things like AWS creds etc. environment=docker_env, ) res = seven.json.loads(docker_response) except docker.errors.ContainerError as err: instance.report_engine_event( 'Failed to run steps {} in Docker container {}'.format( step_keys_str, docker_image), pipeline_run, EngineEventData([ EventMetadataEntry.text(docker_image, 'Job image'), EventMetadataEntry.text(err.stderr, 'Docker stderr'), ], ), CeleryDockerExecutor, step_key=step_keys[0], ) raise except JSONDecodeError: instance.report_engine_event( 'Failed to parse response for steps {} from Docker container {}' .format(step_keys_str, docker_image), pipeline_run, EngineEventData([ EventMetadataEntry.text(docker_image, 'Job image'), EventMetadataEntry.text(docker_response, 'Docker Response'), ], ), CeleryDockerExecutor, step_key=step_keys[0], ) raise else: handle_execution_errors(res, 'executePlan') step_events = handle_execute_plan_result(res) events += step_events serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def _execute_step_docker( self, instance_ref_dict, step_keys, run_config, mode, repo_name, run_id, docker_config, pipeline_origin_packed, retries_dict, ): """Run step execution in a Docker container. """ check.dict_param(instance_ref_dict, "instance_ref_dict") check.list_param(step_keys, "step_keys", of_type=str) check.dict_param(run_config, "run_config") check.str_param(mode, "mode") check.str_param(repo_name, "repo_name") check.str_param(run_id, "run_id") check.dict_param(docker_config, "docker_config") pipeline_origin = unpack_value( check.dict_param(pipeline_origin_packed, "pipeline_origin_packed")) check.dict_param(retries_dict, "retries_dict") instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, "Could not load run {}".format(run_id)) step_keys_str = ", ".join(step_keys) input_json = serialize_dagster_namedtuple( ExecuteStepArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run_id, instance_ref=instance_ref, mode=mode, step_keys_to_execute=step_keys, run_config=run_config, retries_dict=retries_dict, )) command = "dagster api execute_step_with_structured_logs {}".format( json.dumps(input_json)) docker_image = docker_config["image"] client = docker.client.from_env() if docker_config.get("registry"): client.login( registry=docker_config["registry"]["url"], username=docker_config["registry"]["username"], password=docker_config["registry"]["password"], ) # Post event for starting execution engine_event = instance.report_engine_event( "Executing steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, "Step keys"), EventMetadataEntry.text(docker_image, "Image"), EventMetadataEntry.text(self.request.hostname, "Celery worker"), ], marker_end=DELEGATE_MARKER, ), CeleryDockerExecutor, step_key=step_keys[0], ) serialized_events = [serialize_dagster_namedtuple(engine_event)] docker_env = {} if docker_config.get("env_vars"): docker_env = { env_name: os.getenv(env_name) for env_name in docker_config["env_vars"] } try: docker_response = client.containers.run( docker_image, command=command, detach=False, auto_remove=True, # pass through this worker's environment for things like AWS creds etc. environment=docker_env, network=docker_config.get("network", None), ) res = docker_response.decode("utf-8") except docker.errors.ContainerError as err: instance.report_engine_event( "Failed to run steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData([ EventMetadataEntry.text(docker_image, "Job image"), EventMetadataEntry.text(err.stderr, "Docker stderr"), ], ), CeleryDockerExecutor, step_key=step_keys[0], ) raise else: if res is None: raise Exception( "No response from execute_step_with_structured_logs in CeleryDockerExecutor" ) serialized_events += [event for event in res.split("\n") if event] return serialized_events
def _execute_step_docker( _self, instance_ref_dict, step_keys, run_config, mode, repo_name, repo_location_name, run_id, docker_config, ): """Run step execution in a Docker container. """ instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, "Could not load run {}".format(run_id)) step_keys_str = ", ".join(step_keys) variables = { "executionParams": { "runConfigData": run_config, "mode": mode, "selector": { "repositoryLocationName": repo_location_name, "repositoryName": repo_name, "pipelineName": pipeline_run.pipeline_name, "solidSelection": list(pipeline_run.solids_to_execute) if pipeline_run.solids_to_execute else None, }, "executionMetadata": { "runId": run_id }, "stepKeys": step_keys, } } command = "dagster-graphql -v '{variables}' -p executePlan".format( variables=seven.json.dumps(variables)) docker_image = docker_config["image"] client = docker.client.from_env() if docker_config.get("registry"): client.login( registry=docker_config["registry"]["url"], username=docker_config["registry"]["username"], password=docker_config["registry"]["password"], ) # Post event for starting execution engine_event = instance.report_engine_event( "Executing steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, "Step keys"), EventMetadataEntry.text(docker_image, "Image"), ], marker_end=DELEGATE_MARKER, ), CeleryDockerExecutor, step_key=step_keys[0], ) events = [engine_event] docker_env = {} if docker_config.get("env_vars"): docker_env = { env_name: os.getenv(env_name) for env_name in docker_config["env_vars"] } try: docker_response = client.containers.run( docker_image, command=command, detach=False, auto_remove=True, # pass through this worker's environment for things like AWS creds etc. environment=docker_env, ) res = seven.json.loads(docker_response) except docker.errors.ContainerError as err: instance.report_engine_event( "Failed to run steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData([ EventMetadataEntry.text(docker_image, "Job image"), EventMetadataEntry.text(err.stderr, "Docker stderr"), ], ), CeleryDockerExecutor, step_key=step_keys[0], ) raise except JSONDecodeError: instance.report_engine_event( "Failed to parse response for steps {} from Docker container {}" .format(step_keys_str, docker_image), pipeline_run, EngineEventData([ EventMetadataEntry.text(docker_image, "Job image"), EventMetadataEntry.text(docker_response, "Docker Response"), ], ), CeleryDockerExecutor, step_key=step_keys[0], ) raise else: handle_execution_errors(res, "executePlan") step_events = handle_execute_plan_result(res) events += step_events serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def _execute_step_docker( self, execute_step_args_packed, docker_config, ): """Run step execution in a Docker container.""" execute_step_args = unpack_value( check.dict_param( execute_step_args_packed, "execute_step_args_packed", )) check.inst_param(execute_step_args, "execute_step_args", ExecuteStepArgs) check.dict_param(docker_config, "docker_config") instance = DagsterInstance.from_ref(execute_step_args.instance_ref) pipeline_run = instance.get_run_by_id( execute_step_args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Could not load run {}".format(execute_step_args.pipeline_run_id), ) step_keys_str = ", ".join(execute_step_args.step_keys_to_execute) input_json = serialize_dagster_namedtuple(execute_step_args) command = "dagster api execute_step {}".format(json.dumps(input_json)) docker_image = (docker_config["image"] if docker_config.get("image") else execute_step_args. pipeline_origin.repository_origin.container_image) if not docker_image: raise Exception( "No docker image specified by either the job or the repository" ) client = docker.client.from_env() if docker_config.get("registry"): client.login( registry=docker_config["registry"]["url"], username=docker_config["registry"]["username"], password=docker_config["registry"]["password"], ) # Post event for starting execution engine_event = instance.report_engine_event( "Executing steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData( [ MetadataEntry("Step keys", value=step_keys_str), MetadataEntry("Image", value=docker_image), MetadataEntry("Celery worker", value=self.request.hostname), ], marker_end=DELEGATE_MARKER, ), CeleryDockerExecutor, step_key=execute_step_args.step_keys_to_execute[0], ) serialized_events = [serialize_dagster_namedtuple(engine_event)] docker_env = {} if docker_config.get("env_vars"): docker_env = { env_name: os.getenv(env_name) for env_name in docker_config["env_vars"] } container_kwargs = check.opt_dict_param( docker_config.get("container_kwargs"), "container_kwargs", key_type=str) # set defaults for detach and auto_remove container_kwargs["detach"] = container_kwargs.get("detach", False) container_kwargs["auto_remove"] = container_kwargs.get( "auto_remove", True) # if environment variables are provided via container_kwargs, merge with env_vars if container_kwargs.get("environment") is not None: e_vars = container_kwargs.get("environment") if isinstance(e_vars, dict): docker_env.update(e_vars) else: for v in e_vars: key, val = v.split("=") docker_env[key] = val del container_kwargs["environment"] try: docker_response = client.containers.run( docker_image, command=command, # pass through this worker's environment for things like AWS creds etc. environment=docker_env, network=docker_config.get("network", None), **container_kwargs, ) res = docker_response.decode("utf-8") except docker.errors.ContainerError as err: entries = [MetadataEntry("Job image", value=docker_image)] if err.stderr is not None: entries.append(MetadataEntry("Docker stderr", value=err.stderr)) instance.report_engine_event( "Failed to run steps {} in Docker container {}".format( step_keys_str, docker_image), pipeline_run, EngineEventData(entries), CeleryDockerExecutor, step_key=execute_step_args.step_keys_to_execute[0], ) raise else: if res is None: raise Exception( "No response from execute_step in CeleryDockerExecutor") serialized_events += [event for event in res.split("\n") if event] return serialized_events