def query(self): variables = construct_variables( self.mode, self.environment_dict, self.pipeline_name, self.run_id, self.step_keys, ) variables = add_airflow_tags(variables, self.airflow_ts) self.log.info( 'Executing GraphQL query: {query}\n'.format(query=RAW_EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(variables, indent=2) ) return 'dagster-graphql -v \'{variables}\' -t \'{query}\''.format( variables=seven.json.dumps(variables), query=RAW_EXECUTE_PLAN_MUTATION )
def invoke_steps_within_python_operator(invocation_args, ts, dag_run, **kwargs): # pylint: disable=unused-argument mode = invocation_args.mode pipeline_name = invocation_args.pipeline_name step_keys = invocation_args.step_keys instance_ref = invocation_args.instance_ref run_config = invocation_args.run_config recon_repo = invocation_args.recon_repo pipeline_snapshot = invocation_args.pipeline_snapshot execution_plan_snapshot = invocation_args.execution_plan_snapshot parent_pipeline_snapshot = invocation_args.parent_pipeline_snapshot run_id = dag_run.run_id variables = construct_variables(recon_repo, mode, run_config, pipeline_name, run_id, step_keys) variables = add_airflow_tags(variables, ts) logging.info('Executing GraphQL query: {query}\n'.format( query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(variables, indent=2)) instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None if instance: instance.register_managed_run( pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=mode, solids_to_execute=None, step_keys_to_execute=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=parent_pipeline_snapshot, ) workspace = create_in_process_ephemeral_workspace( pointer=recon_repo.pointer) events = execute_execute_plan_mutation( workspace, variables, instance_ref=instance_ref, ) check_events_for_failures(events) check_events_for_skips(events) return events
def invoke_steps_within_python_operator(invocation_args, ts, dag_run, **kwargs): # pylint: disable=unused-argument mode = invocation_args.mode pipeline_name = invocation_args.pipeline_name step_keys = invocation_args.step_keys instance_ref = invocation_args.instance_ref environment_dict = invocation_args.environment_dict handle = invocation_args.handle pipeline_snapshot = invocation_args.pipeline_snapshot execution_plan_snapshot = invocation_args.execution_plan_snapshot parent_pipeline_snapshot = invocation_args.parent_pipeline_snapshot run_id = dag_run.run_id variables = construct_variables(mode, environment_dict, pipeline_name, run_id, step_keys) variables = add_airflow_tags(variables, ts) logging.info('Executing GraphQL query: {query}\n'.format( query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(variables, indent=2)) instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None if instance: instance.register_managed_run( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, solid_subset=None, step_keys_to_execute=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=pipeline_snapshot, execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=parent_pipeline_snapshot, ) events = execute_execute_plan_mutation( handle, variables, instance_ref=instance_ref, ) check_events_for_failures(events) check_events_for_skips(events) return events
def invoke_steps_within_python_operator(invocation_args, ts, dag_run, **kwargs): # pylint: disable=unused-argument mode = invocation_args.mode pipeline_name = invocation_args.pipeline_name step_keys = invocation_args.step_keys instance_ref = invocation_args.instance_ref environment_dict = invocation_args.environment_dict handle = invocation_args.handle run_id = dag_run.run_id variables = construct_variables(mode, environment_dict, pipeline_name, run_id, step_keys) variables = add_airflow_tags(variables, ts) logging.info('Executing GraphQL query: {query}\n'.format( query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(variables, indent=2)) instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None if instance: instance.get_or_create_run( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, selector=ExecutionSelector(pipeline_name), step_keys_to_execute=None, tags=None, status=PipelineRunStatus.MANAGED, )) events = execute_execute_plan_mutation( handle, variables, instance_ref=instance_ref, ) check_events_for_failures(events) check_events_for_skips(events) return events
def query(self): variables = construct_variables( self.recon_repo, self.mode, self.run_config, self.pipeline_name, self.run_id, self.step_keys, ) variables = add_airflow_tags(variables, self.airflow_ts) self.log.info( 'Executing GraphQL query: {query}\n'.format(query=RAW_EXECUTE_PLAN_MUTATION) + 'with variables:\n' + seven.json.dumps(variables, indent=2) ) return [ 'dagster-graphql', '-v', '{}'.format(seven.json.dumps(variables)), '-t', '{}'.format(RAW_EXECUTE_PLAN_MUTATION), ]
def _execute_step_k8s_job( _self, instance_ref_dict, step_keys, environment_dict, mode, pipeline_name, run_id, job_config_dict, job_namespace, load_incluster_config, kubeconfig_file=None, ): '''Run step execution in a K8s job pod. ''' from dagster_k8s.job import DagsterK8sJobConfig, construct_dagster_graphql_k8s_job from dagster_k8s.utils import get_pod_names_in_job, retrieve_pod_logs, wait_for_job_success import kubernetes check.dict_param(instance_ref_dict, 'instance_ref_dict') check.list_param(step_keys, 'step_keys', of_type=str) check.invariant( len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time' ) check.dict_param(environment_dict, 'environment_dict') check.str_param(mode, 'mode') check.str_param(pipeline_name, 'pipeline_name') check.str_param(run_id, 'run_id') # Celery will serialize this as a list job_config = DagsterK8sJobConfig.from_dict(job_config_dict) check.inst_param(job_config, 'job_config', DagsterK8sJobConfig) check.str_param(job_namespace, 'job_namespace') check.bool_param(load_incluster_config, 'load_incluster_config') check.opt_str_param(kubeconfig_file, 'kubeconfig_file') # For when launched via DinD or running the cluster if load_incluster_config: kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config(kubeconfig_file) instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) step_keys_str = ", ".join(step_keys) # Ensure we stay below k8s name length limits k8s_name_key = _get_k8s_name_key(run_id, step_keys) job_name = 'dagster-stepjob-%s' % k8s_name_key pod_name = 'dagster-stepjob-%s' % k8s_name_key variables = construct_variables(mode, environment_dict, pipeline_name, run_id, step_keys) args = ['-p', 'executePlan', '-v', seven.json.dumps(variables)] job = construct_dagster_graphql_k8s_job(job_config, args, job_name, pod_name) # Running list of events generated from this task execution events = [] # Post event for starting execution engine_event = instance.report_engine_event( 'Executing steps {} in Kubernetes job {}'.format(step_keys_str, job.metadata.name), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, 'Step keys'), EventMetadataEntry.text(job.metadata.name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_config.job_image, 'Job image'), EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'), EventMetadataEntry.text( str(job_config.image_pull_secrets), 'Image pull secrets' ), EventMetadataEntry.text( str(job_config.service_account_name), 'Service account name' ), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobEngine, # validated above that step_keys is length 1, and it is not possible to use ETH or # execution plan in this function (Celery K8s workers should not access to user code) step_key=step_keys[0], ) events.append(engine_event) kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace) wait_for_job_success(job.metadata.name, namespace=job_namespace) pod_names = get_pod_names_in_job(job.metadata.name, namespace=job_namespace) # Post engine event for log retrieval engine_event = instance.report_engine_event( 'Retrieving logs from Kubernetes Job pods', pipeline_run, EngineEventData([EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]), CeleryK8sJobEngine, step_key=step_keys[0], ) events.append(engine_event) logs = [] for pod_name in pod_names: raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace) logs += raw_logs.split('\n') res = parse_raw_log_lines(logs) handle_execution_errors(res, 'executePlan') step_events = handle_execute_plan_result(res) events += step_events serialized_events = [serialize_dagster_namedtuple(event) for event in events] return serialized_events