Beispiel #1
0
    def query(self):
        variables = construct_variables(
            self.mode, self.environment_dict, self.pipeline_name, self.run_id, self.step_keys,
        )
        variables = add_airflow_tags(variables, self.airflow_ts)

        self.log.info(
            'Executing GraphQL query: {query}\n'.format(query=RAW_EXECUTE_PLAN_MUTATION)
            + 'with variables:\n'
            + seven.json.dumps(variables, indent=2)
        )

        return 'dagster-graphql -v \'{variables}\' -t \'{query}\''.format(
            variables=seven.json.dumps(variables), query=RAW_EXECUTE_PLAN_MUTATION
        )
Beispiel #2
0
def invoke_steps_within_python_operator(invocation_args, ts, dag_run,
                                        **kwargs):  # pylint: disable=unused-argument
    mode = invocation_args.mode
    pipeline_name = invocation_args.pipeline_name
    step_keys = invocation_args.step_keys
    instance_ref = invocation_args.instance_ref
    run_config = invocation_args.run_config
    recon_repo = invocation_args.recon_repo
    pipeline_snapshot = invocation_args.pipeline_snapshot
    execution_plan_snapshot = invocation_args.execution_plan_snapshot
    parent_pipeline_snapshot = invocation_args.parent_pipeline_snapshot

    run_id = dag_run.run_id

    variables = construct_variables(recon_repo, mode, run_config,
                                    pipeline_name, run_id, step_keys)
    variables = add_airflow_tags(variables, ts)

    logging.info('Executing GraphQL query: {query}\n'.format(
        query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' +
                 seven.json.dumps(variables, indent=2))
    instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None
    if instance:
        instance.register_managed_run(
            pipeline_name=pipeline_name,
            run_id=run_id,
            run_config=run_config,
            mode=mode,
            solids_to_execute=None,
            step_keys_to_execute=None,
            tags=None,
            root_run_id=None,
            parent_run_id=None,
            pipeline_snapshot=pipeline_snapshot,
            execution_plan_snapshot=execution_plan_snapshot,
            parent_pipeline_snapshot=parent_pipeline_snapshot,
        )

    workspace = create_in_process_ephemeral_workspace(
        pointer=recon_repo.pointer)
    events = execute_execute_plan_mutation(
        workspace,
        variables,
        instance_ref=instance_ref,
    )
    check_events_for_failures(events)
    check_events_for_skips(events)
    return events
Beispiel #3
0
def invoke_steps_within_python_operator(invocation_args, ts, dag_run,
                                        **kwargs):  # pylint: disable=unused-argument
    mode = invocation_args.mode
    pipeline_name = invocation_args.pipeline_name
    step_keys = invocation_args.step_keys
    instance_ref = invocation_args.instance_ref
    environment_dict = invocation_args.environment_dict
    handle = invocation_args.handle
    pipeline_snapshot = invocation_args.pipeline_snapshot
    execution_plan_snapshot = invocation_args.execution_plan_snapshot
    parent_pipeline_snapshot = invocation_args.parent_pipeline_snapshot

    run_id = dag_run.run_id

    variables = construct_variables(mode, environment_dict, pipeline_name,
                                    run_id, step_keys)
    variables = add_airflow_tags(variables, ts)

    logging.info('Executing GraphQL query: {query}\n'.format(
        query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' +
                 seven.json.dumps(variables, indent=2))
    instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None
    if instance:
        instance.register_managed_run(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            solid_subset=None,
            step_keys_to_execute=None,
            tags=None,
            root_run_id=None,
            parent_run_id=None,
            pipeline_snapshot=pipeline_snapshot,
            execution_plan_snapshot=execution_plan_snapshot,
            parent_pipeline_snapshot=parent_pipeline_snapshot,
        )

    events = execute_execute_plan_mutation(
        handle,
        variables,
        instance_ref=instance_ref,
    )
    check_events_for_failures(events)
    check_events_for_skips(events)
    return events
Beispiel #4
0
def invoke_steps_within_python_operator(invocation_args, ts, dag_run,
                                        **kwargs):  # pylint: disable=unused-argument
    mode = invocation_args.mode
    pipeline_name = invocation_args.pipeline_name
    step_keys = invocation_args.step_keys
    instance_ref = invocation_args.instance_ref
    environment_dict = invocation_args.environment_dict
    handle = invocation_args.handle

    run_id = dag_run.run_id

    variables = construct_variables(mode, environment_dict, pipeline_name,
                                    run_id, step_keys)
    variables = add_airflow_tags(variables, ts)

    logging.info('Executing GraphQL query: {query}\n'.format(
        query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' +
                 seven.json.dumps(variables, indent=2))
    instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None
    if instance:
        instance.get_or_create_run(
            PipelineRun(
                pipeline_name=pipeline_name,
                run_id=run_id,
                environment_dict=environment_dict,
                mode=mode,
                selector=ExecutionSelector(pipeline_name),
                step_keys_to_execute=None,
                tags=None,
                status=PipelineRunStatus.MANAGED,
            ))

    events = execute_execute_plan_mutation(
        handle,
        variables,
        instance_ref=instance_ref,
    )
    check_events_for_failures(events)
    check_events_for_skips(events)
    return events
Beispiel #5
0
    def query(self):
        variables = construct_variables(
            self.recon_repo,
            self.mode,
            self.run_config,
            self.pipeline_name,
            self.run_id,
            self.step_keys,
        )
        variables = add_airflow_tags(variables, self.airflow_ts)

        self.log.info(
            'Executing GraphQL query: {query}\n'.format(query=RAW_EXECUTE_PLAN_MUTATION)
            + 'with variables:\n'
            + seven.json.dumps(variables, indent=2)
        )

        return [
            'dagster-graphql',
            '-v',
            '{}'.format(seven.json.dumps(variables)),
            '-t',
            '{}'.format(RAW_EXECUTE_PLAN_MUTATION),
        ]
Beispiel #6
0
    def _execute_step_k8s_job(
        _self,
        instance_ref_dict,
        step_keys,
        environment_dict,
        mode,
        pipeline_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        kubeconfig_file=None,
    ):
        '''Run step execution in a K8s job pod.
        '''
        from dagster_k8s.job import DagsterK8sJobConfig, construct_dagster_graphql_k8s_job
        from dagster_k8s.utils import get_pod_names_in_job, retrieve_pod_logs, wait_for_job_success

        import kubernetes

        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.invariant(
            len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time'
        )
        check.dict_param(environment_dict, 'environment_dict')
        check.str_param(mode, 'mode')
        check.str_param(pipeline_name, 'pipeline_name')
        check.str_param(run_id, 'run_id')

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, 'job_config', DagsterK8sJobConfig)
        check.str_param(job_namespace, 'job_namespace')
        check.bool_param(load_incluster_config, 'load_incluster_config')
        check.opt_str_param(kubeconfig_file, 'kubeconfig_file')

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        step_keys_str = ", ".join(step_keys)

        # Ensure we stay below k8s name length limits
        k8s_name_key = _get_k8s_name_key(run_id, step_keys)
        job_name = 'dagster-stepjob-%s' % k8s_name_key
        pod_name = 'dagster-stepjob-%s' % k8s_name_key

        variables = construct_variables(mode, environment_dict, pipeline_name, run_id, step_keys)
        args = ['-p', 'executePlan', '-v', seven.json.dumps(variables)]

        job = construct_dagster_graphql_k8s_job(job_config, args, job_name, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            'Executing steps {} in Kubernetes job {}'.format(step_keys_str, job.metadata.name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, 'Step keys'),
                    EventMetadataEntry.text(job.metadata.name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_config.job_image, 'Job image'),
                    EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), 'Image pull secrets'
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), 'Service account name'
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobEngine,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_keys[0],
        )
        events.append(engine_event)

        kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)

        wait_for_job_success(job.metadata.name, namespace=job_namespace)
        pod_names = get_pod_names_in_job(job.metadata.name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            'Retrieving logs from Kubernetes Job pods',
            pipeline_run,
            EngineEventData([EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]),
            CeleryK8sJobEngine,
            step_key=step_keys[0],
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split('\n')

        res = parse_raw_log_lines(logs)

        handle_execution_errors(res, 'executePlan')
        step_events = handle_execute_plan_result(res)

        events += step_events

        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events