Example #1
0
def helm_chart_for_k8s_run_launcher(namespace,
                                    docker_image,
                                    should_cleanup=True):
    check.str_param(namespace, "namespace")
    check.str_param(docker_image, "docker_image")
    check.bool_param(should_cleanup, "should_cleanup")

    repository, tag = docker_image.split(":")
    pull_policy = image_pull_policy()
    helm_config = {
        "dagster-user-deployments": {
            "enabled": False,
            "enableSubchart": False
        },
        "dagit": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            "env": {
                "TEST_SET_ENV_VAR": "test_dagit_env_var"
            },
            "envConfigMaps": [{
                "name": TEST_CONFIGMAP_NAME
            }],
            "envSecrets": [{
                "name": TEST_SECRET_NAME
            }],
            "livenessProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "runLauncher": {
            "type": "K8sRunLauncher",
            "config": {
                "k8sRunLauncher": {
                    "jobNamespace": namespace,
                    "envConfigMaps": [{
                        "name": TEST_CONFIGMAP_NAME
                    }],
                    "envSecrets": [{
                        "name": TEST_SECRET_NAME
                    }],
                }
            },
        },
        "rabbitmq": {
            "enabled": False
        },
        "scheduler": {
            "type": "K8sScheduler",
            "config": {
                "k8sScheduler": {
                    "schedulerNamespace": namespace,
                    "envSecrets": [{
                        "name": TEST_SECRET_NAME
                    }],
                }
            },
        },
        "serviceAccount": {
            "name": "dagit-admin"
        },
        "postgresqlPassword": "******",
        "postgresqlDatabase": "test",
        "postgresqlUser": "******",
        "dagsterDaemon": {
            "enabled": False
        },
    }

    with _helm_chart_helper(
            namespace,
            should_cleanup,
            helm_config,
            helm_install_name="helm_chart_for_k8s_run_launcher"):
        yield
Example #2
0
    def _execute_step_k8s_job(
        _self,
        instance_ref_dict,
        step_keys,
        environment_dict,
        mode,
        pipeline_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        kubeconfig_file=None,
    ):
        '''Run step execution in a K8s job pod.
        '''
        from dagster_k8s.job import DagsterK8sJobConfig, construct_dagster_graphql_k8s_job
        from dagster_k8s.utils import get_pod_names_in_job, retrieve_pod_logs, wait_for_job_success

        import kubernetes

        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.invariant(
            len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time'
        )
        check.dict_param(environment_dict, 'environment_dict')
        check.str_param(mode, 'mode')
        check.str_param(pipeline_name, 'pipeline_name')
        check.str_param(run_id, 'run_id')

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, 'job_config', DagsterK8sJobConfig)
        check.str_param(job_namespace, 'job_namespace')
        check.bool_param(load_incluster_config, 'load_incluster_config')
        check.opt_str_param(kubeconfig_file, 'kubeconfig_file')

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)
        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        step_keys_str = ", ".join(step_keys)

        # Ensure we stay below k8s name length limits
        k8s_name_key = _get_k8s_name_key(run_id, step_keys)
        job_name = 'dagster-stepjob-%s' % k8s_name_key
        pod_name = 'dagster-stepjob-%s' % k8s_name_key

        variables = construct_variables(mode, environment_dict, pipeline_name, run_id, step_keys)
        args = ['-p', 'executePlan', '-v', seven.json.dumps(variables)]

        job = construct_dagster_graphql_k8s_job(job_config, args, job_name, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        engine_event = instance.report_engine_event(
            'Executing steps {} in Kubernetes job {}'.format(step_keys_str, job.metadata.name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_keys_str, 'Step keys'),
                    EventMetadataEntry.text(job.metadata.name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_config.job_image, 'Job image'),
                    EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), 'Image pull secrets'
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), 'Service account name'
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobEngine,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_keys[0],
        )
        events.append(engine_event)

        kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)

        wait_for_job_success(job.metadata.name, namespace=job_namespace)
        pod_names = get_pod_names_in_job(job.metadata.name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            'Retrieving logs from Kubernetes Job pods',
            pipeline_run,
            EngineEventData([EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]),
            CeleryK8sJobEngine,
            step_key=step_keys[0],
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split('\n')

        res = parse_raw_log_lines(logs)

        handle_execution_errors(res, 'executePlan')
        step_events = handle_execute_plan_result(res)

        events += step_events

        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Example #3
0
 def __init__(self, overwrite):
     # Overwrite is currently only a signal to callers to not overwrite.
     # These classes currently do not enforce any semantics around that
     self.overwrite = check.bool_param(overwrite, "overwrite")
Example #4
0
def make_dagster_repo_from_airflow_dags_path(
    dag_path,
    repo_name,
    safe_mode=True,
    store_serialized_dags=False,
    use_airflow_template_context=False,
):
    """ Construct a Dagster repository corresponding to Airflow DAGs in dag_path.

    ``DagBag.get_dag()`` dependency requires Airflow DB to be initialized.

    Usage:
        Create ``make_dagster_repo.py``:

        .. code-block:: python

            from dagster_airflow.dagster_pipeline_factory import make_dagster_repo_from_airflow_dags_path

            def make_repo_from_dir():
                return make_dagster_repo_from_airflow_dags_path(
                    '/path/to/dags/', 'my_repo_name'
                )

        Use RepositoryDefinition as usual, for example:
        ``dagit -f path/to/make_dagster_repo.py -n make_repo_from_dir``

    Args:
        dag_path (str): Path to directory or file that contains Airflow Dags
        repo_name (str): Name for generated RepositoryDefinition
        include_examples (bool): True to include Airflow's example DAGs. (default: False)
        safe_mode (bool): True to use Airflow's default heuristic to find files that contain DAGs
            (ie find files that contain both b'DAG' and b'airflow') (default: True)
        store_serialized_dags (bool): True to read Airflow DAGS from Airflow DB. False to read DAGS
            from Python files. (default: False)
        use_airflow_template_context (bool): If True, will call get_template_context() on the
            Airflow TaskInstance model which requires and modifies the DagRun table.
            (default: False)

    Returns:
        RepositoryDefinition
    """
    check.str_param(dag_path, "dag_path")
    check.str_param(repo_name, "repo_name")
    check.bool_param(safe_mode, "safe_mode")
    check.bool_param(store_serialized_dags, "store_serialized_dags")
    check.bool_param(use_airflow_template_context,
                     "use_airflow_template_context")

    try:
        dag_bag = DagBag(
            dag_folder=dag_path,
            include_examples=False,  # Exclude Airflow example dags
            safe_mode=safe_mode,
            store_serialized_dags=store_serialized_dags,
        )
    except Exception:  # pylint: disable=broad-except
        raise DagsterAirflowError(
            "Error initializing airflow.models.dagbag object with arguments")

    return make_dagster_repo_from_airflow_dag_bag(
        dag_bag, repo_name, use_airflow_template_context)
Example #5
0
def make_dagster_repo_from_airflow_dag_bag(dag_bag,
                                           repo_name,
                                           refresh_from_airflow_db=False,
                                           use_airflow_template_context=False):
    """ Construct a Dagster repository corresponding to Airflow DAGs in DagBag.

    Usage:
        Create `make_dagster_repo.py`:
            from dagster_airflow.dagster_pipeline_factory import make_dagster_repo_from_airflow_dag_bag
            from airflow_home import my_dag_bag

            def make_repo_from_dag_bag():
                return make_dagster_repo_from_airflow_dag_bag(my_dag_bag, 'my_repo_name')

        Use RepositoryDefinition as usual, for example:
            `dagit -f path/to/make_dagster_repo.py -n make_repo_from_dag_bag`

    Args:
        dag_path (str): Path to directory or file that contains Airflow Dags
        repo_name (str): Name for generated RepositoryDefinition
        refresh_from_airflow_db (bool): If True, will refresh DAG if expired via DagBag.get_dag(),
            which requires access to initialized Airflow DB. If False (recommended), gets dag from
            DagBag's dags dict without depending on Airflow DB. (default: False)
        use_airflow_template_context (bool): If True, will call get_template_context() on the
            Airflow TaskInstance model which requires and modifies the DagRun table.
            (default: False)

    Returns:
        RepositoryDefinition
    """
    check.inst_param(dag_bag, "dag_bag", DagBag)
    check.str_param(repo_name, "repo_name")
    check.bool_param(refresh_from_airflow_db, "refresh_from_airflow_db")
    check.bool_param(use_airflow_template_context,
                     "use_airflow_template_context")

    use_unique_id = contains_duplicate_task_names(dag_bag,
                                                  refresh_from_airflow_db)

    pipeline_defs = []
    count = 0
    # To enforce predictable iteration order
    sorted_dag_ids = sorted(dag_bag.dag_ids)
    for dag_id in sorted_dag_ids:
        # Only call Airflow DB via dag_bag.get_dag(dag_id) if refresh_from_airflow_db is True
        dag = dag_bag.dags.get(
            dag_id) if not refresh_from_airflow_db else dag_bag.get_dag(dag_id)
        if not use_unique_id:
            pipeline_defs.append(
                make_dagster_pipeline_from_airflow_dag(
                    dag=dag,
                    tags=None,
                    use_airflow_template_context=use_airflow_template_context))
        else:
            pipeline_defs.append(
                make_dagster_pipeline_from_airflow_dag(
                    dag=dag,
                    tags=None,
                    use_airflow_template_context=use_airflow_template_context,
                    unique_id=count,
                ))
            count += 1

    @repository(name=repo_name)
    def _repo():
        return pipeline_defs

    return _repo
Example #6
0
def pipeline_initialization_event_generator(
    execution_plan,
    run_config,
    pipeline_run,
    instance,
    scoped_resources_builder_cm,
    system_storage_data=None,
    intermediate_storage=None,
    raise_on_error=False,
):
    execution_plan = check.inst_param(execution_plan, 'execution_plan',
                                      ExecutionPlan)
    pipeline_def = execution_plan.pipeline.get_definition()

    run_config = check.dict_param(run_config, 'run_config', key_type=str)
    pipeline_run = check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    instance = check.inst_param(instance, 'instance', DagsterInstance)

    scoped_resources_builder_cm = check.callable_param(
        scoped_resources_builder_cm, 'scoped_resources_builder_cm')
    system_storage_data = check.opt_inst_param(system_storage_data,
                                               'system_storage_data',
                                               SystemStorageData)
    intermediate_storage = check.opt_inst_param(intermediate_storage,
                                                'intermediate_storage_data',
                                                IntermediateStorage)
    raise_on_error = check.bool_param(raise_on_error, 'raise_on_error')

    pipeline_context = None
    resources_manager = None

    try:
        context_creation_data = create_context_creation_data(
            execution_plan,
            run_config,
            pipeline_run,
            instance,
        )

        executor = check.inst(create_executor(context_creation_data), Executor,
                              'Must return an Executor')

        log_manager = create_log_manager(context_creation_data)
        resources_manager = scoped_resources_builder_cm(
            execution_plan,
            context_creation_data.environment_config,
            context_creation_data.pipeline_run,
            log_manager,
            context_creation_data.resource_keys_to_init,
        )
        for event in resources_manager.generate_setup_events():
            yield event
        scoped_resources_builder = check.inst(resources_manager.get_object(),
                                              ScopedResourcesBuilder)
        system_storage_data = create_system_storage_data(
            context_creation_data, system_storage_data,
            scoped_resources_builder)
        if intermediate_storage or context_creation_data.intermediate_storage_def:
            intermediate_storage = create_intermediate_storage(
                context_creation_data,
                intermediate_storage,
                scoped_resources_builder,
            )
        else:
            # remove this as part of https://github.com/dagster-io/dagster/issues/2705
            intermediate_storage = system_storage_data.intermediates_manager
        pipeline_context = construct_pipeline_execution_context(
            context_creation_data=context_creation_data,
            scoped_resources_builder=scoped_resources_builder,
            system_storage_data=system_storage_data,
            intermediate_storage=intermediate_storage,
            log_manager=log_manager,
            executor=executor,
            raise_on_error=raise_on_error,
        )

        _validate_plan_with_context(pipeline_context, execution_plan)

        yield pipeline_context
        for event in resources_manager.generate_teardown_events():
            yield event
    except DagsterError as dagster_error:
        if pipeline_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error else sys.exc_info())
            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)

            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(
                    instance, pipeline_run, pipeline_def),
            )
            if resources_manager:
                for event in resources_manager.generate_teardown_events():
                    yield event
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error
Example #7
0
    def __init__(
        self,
        server_termination_event,
        loadable_target_origin=None,
        heartbeat=False,
        heartbeat_timeout=30,
        lazy_load_user_code=False,
        fixed_server_id=None,
    ):
        super(DagsterApiServer, self).__init__()

        check.bool_param(heartbeat, "heartbeat")
        check.int_param(heartbeat_timeout, "heartbeat_timeout")
        check.invariant(heartbeat_timeout > 0,
                        "heartbeat_timeout must be greater than 0")

        self._server_termination_event = check.inst_param(
            server_termination_event, "server_termination_event",
            seven.ThreadingEventType)
        self._loadable_target_origin = check.opt_inst_param(
            loadable_target_origin, "loadable_target_origin",
            LoadableTargetOrigin)

        # Each server is initialized with a unique UUID. This UUID is used by clients to track when
        # servers are replaced and is used for cache invalidation and reloading.
        self._server_id = check.opt_str_param(fixed_server_id,
                                              "fixed_server_id",
                                              str(uuid.uuid4()))

        # Client tells the server to shutdown by calling ShutdownServer (or by failing to send a
        # hearbeat, at which point this event is set. The cleanup thread will then set the server
        # termination event once all current executions have finished, which will stop the server)
        self._shutdown_once_executions_finish_event = threading.Event()

        # Dict[str, (multiprocessing.Process, DagsterInstance)]
        self._executions = {}
        # Dict[str, multiprocessing.Event]
        self._termination_events = {}
        self._termination_times = {}
        self._execution_lock = threading.Lock()

        self._repository_symbols_and_code_pointers = LazyRepositorySymbolsAndCodePointers(
            loadable_target_origin)
        if not lazy_load_user_code:
            self._repository_symbols_and_code_pointers.load()

        self.__last_heartbeat_time = time.time()
        if heartbeat:
            self.__heartbeat_thread = threading.Thread(
                target=self._heartbeat_thread,
                args=(heartbeat_timeout, ),
            )
            self.__heartbeat_thread.daemon = True
            self.__heartbeat_thread.start()
        else:
            self.__heartbeat_thread = None

        self.__cleanup_thread = threading.Thread(
            target=self._cleanup_thread,
            args=(),
        )
        self.__cleanup_thread.daemon = True

        self.__cleanup_thread.start()
Example #8
0
    def __init__(
        self,
        origin,
        host=None,
        port=None,
        socket=None,
        server_id=None,
        heartbeat=False,
        watch_server=True,
    ):
        from dagster.grpc.client import DagsterGrpcClient, client_heartbeat_thread
        from dagster.grpc.server_watcher import create_grpc_watch_thread

        self._origin = check.inst_param(origin, "origin",
                                        RepositoryLocationOrigin)

        if isinstance(self._origin, GrpcServerRepositoryLocationOrigin):
            self._port = self.origin.port
            self._socket = self.origin.socket
            self._host = self.origin.host
            self._use_ssl = bool(self.origin.use_ssl)
        else:
            self._port = check.opt_int_param(port, "port")
            self._socket = check.opt_str_param(socket, "socket")
            self._host = check.str_param(host, "host")
            self._use_ssl = False

        self._watch_thread_shutdown_event = None
        self._watch_thread = None

        self._heartbeat_shutdown_event = None
        self._heartbeat_thread = None

        self._heartbeat = check.bool_param(heartbeat, "heartbeat")
        self._watch_server = check.bool_param(watch_server, "watch_server")

        self.server_id = None
        self._external_repositories_data = None

        try:
            self.client = DagsterGrpcClient(
                port=self._port,
                socket=self._socket,
                host=self._host,
                use_ssl=self._use_ssl,
            )
            list_repositories_response = sync_list_repositories_grpc(
                self.client)

            self.server_id = server_id if server_id else sync_get_server_id(
                self.client)
            self.repository_names = set(
                symbol.repository_name
                for symbol in list_repositories_response.repository_symbols)

            if self._heartbeat:
                self._heartbeat_shutdown_event = threading.Event()

                self._heartbeat_thread = threading.Thread(
                    target=client_heartbeat_thread,
                    args=(
                        self.client,
                        self._heartbeat_shutdown_event,
                    ),
                    name="grpc-client-heartbeat",
                )
                self._heartbeat_thread.daemon = True
                self._heartbeat_thread.start()

            if self._watch_server:
                self._state_subscribers = []
                self._watch_thread_shutdown_event, self._watch_thread = create_grpc_watch_thread(
                    self.client,
                    on_updated=lambda new_server_id: self.
                    _send_state_event_to_subscribers(
                        LocationStateChangeEvent(
                            LocationStateChangeEventType.LOCATION_UPDATED,
                            location_name=self.location_name,
                            message="Server has been updated.",
                            server_id=new_server_id,
                        )),
                    on_error=lambda: self._send_state_event_to_subscribers(
                        LocationStateChangeEvent(
                            LocationStateChangeEventType.LOCATION_ERROR,
                            location_name=self.location_name,
                            message=
                            "Unable to reconnect to server. You can reload the server once it is "
                            "reachable again",
                        )),
                )

                self._watch_thread.start()

            self.executable_path = list_repositories_response.executable_path
            self.repository_code_pointer_dict = (
                list_repositories_response.repository_code_pointer_dict)

            self.container_image = self._reload_current_image()

            self._external_repositories_data = sync_get_streaming_external_repositories_data_grpc(
                self.client,
                self,
            )
        except:
            self.cleanup()
            raise
Example #9
0
 def __new__(cls, can_cancel):
     return super(CanCancelExecutionResult, cls).__new__(
         cls,
         can_cancel=check.bool_param(can_cancel, "can_cancel"),
     )
def execution_context_event_generator(
    pipeline: IPipeline,
    execution_plan: ExecutionPlan,
    run_config: Dict[str, Any],
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    retry_mode: RetryMode,
    scoped_resources_builder_cm: Optional[Callable[
        ..., EventGenerationManager[ScopedResourcesBuilder]]] = None,
    raise_on_error: Optional[bool] = False,
    output_capture: Optional[Dict["StepOutputHandle", Any]] = None,
) -> Generator[Union[DagsterEvent, PlanExecutionContext], None, None]:
    scoped_resources_builder_cm = cast(
        Callable[..., EventGenerationManager[ScopedResourcesBuilder]],
        check.opt_callable_param(
            scoped_resources_builder_cm,
            "scoped_resources_builder_cm",
            default=resource_initialization_manager,
        ),
    )

    execution_plan = check.inst_param(execution_plan, "execution_plan",
                                      ExecutionPlan)
    pipeline_def = pipeline.get_definition()

    run_config = check.dict_param(run_config, "run_config", key_type=str)
    pipeline_run = check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    instance = check.inst_param(instance, "instance", DagsterInstance)

    raise_on_error = check.bool_param(raise_on_error, "raise_on_error")

    context_creation_data = create_context_creation_data(
        pipeline,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
    )

    log_manager = create_log_manager(context_creation_data)
    resource_defs = pipeline_def.get_required_resource_defs_for_mode(
        context_creation_data.resolved_run_config.mode)

    resources_manager = scoped_resources_builder_cm(
        resource_defs=resource_defs,
        resource_configs=context_creation_data.resolved_run_config.resources,
        log_manager=log_manager,
        execution_plan=execution_plan,
        pipeline_run=context_creation_data.pipeline_run,
        resource_keys_to_init=context_creation_data.resource_keys_to_init,
        instance=instance,
        emit_persistent_events=True,
        pipeline_def_for_backwards_compat=pipeline_def,
    )
    yield from resources_manager.generate_setup_events()
    scoped_resources_builder = check.inst(resources_manager.get_object(),
                                          ScopedResourcesBuilder)

    execution_context = PlanExecutionContext(
        plan_data=create_plan_data(context_creation_data, raise_on_error,
                                   retry_mode),
        execution_data=create_execution_data(context_creation_data,
                                             scoped_resources_builder),
        log_manager=log_manager,
        output_capture=output_capture,
    )

    _validate_plan_with_context(execution_context, execution_plan)

    yield execution_context
    yield from resources_manager.generate_teardown_events()
Example #11
0
def host_mode_execution_context_event_generator(
    execution_plan,
    recon_pipeline,
    run_config,
    pipeline_run,
    instance,
    get_executor_def_fn,
    raise_on_error,
):
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline)

    check.dict_param(run_config, "run_config", key_type=str)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    get_executor_def_fn = check.opt_callable_param(
        get_executor_def_fn, "get_executor_def_fn",
        _default_get_executor_def_fn)
    check.bool_param(raise_on_error, "raise_on_error")

    execution_context = None

    loggers = []

    for (logger_def, logger_config) in default_system_loggers():
        loggers.append(
            logger_def.logger_fn(
                InitLoggerContext(
                    logger_config,
                    pipeline_def=None,
                    logger_def=logger_def,
                    run_id=pipeline_run.run_id,
                )))

    loggers.append(instance.get_logger())

    log_manager = DagsterLogManager(
        run_id=pipeline_run.run_id,
        logging_tags=get_logging_tags(pipeline_run),
        loggers=loggers,
    )

    try:
        executor = _get_host_mode_executor(recon_pipeline, run_config,
                                           get_executor_def_fn, instance)
        execution_context = HostModeRunWorkerExecutionContext(
            execution_context_data=HostModeExecutionContextData(
                pipeline_run=pipeline_run,
                recon_pipeline=recon_pipeline,
                execution_plan=execution_plan,
                instance=instance,
                raise_on_error=raise_on_error,
                retry_mode=executor.retries,
            ),
            log_manager=log_manager,
            executor=executor,
        )

        yield execution_context

    except DagsterError as dagster_error:
        if execution_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error else sys.exc_info())
            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)

            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_run.pipeline_name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=log_manager,
            )
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error
Example #12
0
 def __init__(self, ignore_missing_vals):
     description = "Column must be unique."
     self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
     super(UniqueColumnConstraint, self).__init__(
         error_description=description, markdown_description=description
     )
Example #13
0
def helm_chart_for_daemon(namespace, docker_image, should_cleanup=True):
    check.str_param(namespace, "namespace")
    check.str_param(docker_image, "docker_image")
    check.bool_param(should_cleanup, "should_cleanup")

    repository, tag = docker_image.split(":")
    pull_policy = image_pull_policy()
    helm_config = {
        "dagster-user-deployments": {
            "enabled":
            True,
            "enableSubchart":
            True,
            "deployments": [{
                "name":
                "user-code-deployment-1",
                "image": {
                    "repository": repository,
                    "tag": tag,
                    "pullPolicy": pull_policy
                },
                "dagsterApiGrpcArgs": [
                    "-m",
                    "dagster_test.test_project.test_pipelines.repo",
                    "-a",
                    "define_demo_execution_repo",
                ],
                "port":
                3030,
                "env": ({
                    "BUILDKITE": os.getenv("BUILDKITE")
                } if os.getenv("BUILDKITE") else {}),
                "annotations": {
                    "dagster-integration-tests": "ucd-1-pod-annotation"
                },
                "service": {
                    "annotations": {
                        "dagster-integration-tests": "ucd-1-svc-annotation"
                    }
                },
                "replicaCount":
                1,
            }],
        },
        "dagit": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            "env": {
                "TEST_SET_ENV_VAR": "test_dagit_env_var"
            },
            "envConfigMaps": [{
                "name": TEST_CONFIGMAP_NAME
            }],
            "envSecrets": [{
                "name": TEST_SECRET_NAME
            }],
            "livenessProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
            "annotations": {
                "dagster-integration-tests": "dagit-pod-annotation"
            },
            "service": {
                "annotations": {
                    "dagster-integration-tests": "dagit-svc-annotation"
                }
            },
        },
        "runLauncher": {
            "type": "CeleryK8sRunLauncher",
            "config": {
                "celeryK8sRunLauncher": {
                    "image": {
                        "repository": repository,
                        "tag": tag,
                        "pullPolicy": pull_policy
                    },
                    "workerQueues": [
                        {
                            "name": "dagster",
                            "replicaCount": 2
                        },
                        {
                            "name": "extra-queue-1",
                            "replicaCount": 1
                        },
                    ],
                    "livenessProbe": {
                        "initialDelaySeconds": 15,
                        "periodSeconds": 10,
                        "timeoutSeconds": 10,
                        "successThreshold": 1,
                        "failureThreshold": 3,
                    },
                    "configSource": {
                        "broker_transport_options": {
                            "priority_steps": [9]
                        },
                        "worker_concurrency": 1,
                    },
                    "annotations": {
                        "dagster-integration-tests": "celery-pod-annotation"
                    },
                },
            },
        },
        "rabbitmq": {
            "enabled": True
        },
        "scheduler": {
            "type": "DagsterDaemonScheduler",
            "config": {}
        },
        "serviceAccount": {
            "name": "dagit-admin"
        },
        "postgresqlPassword": "******",
        "postgresqlDatabase": "test",
        "postgresqlUser": "******",
        "dagsterDaemon": {
            "enabled":
            True,
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            "heartbeatTolerance":
            180,
            "queuedRunCoordinator": {
                "enabled": True
            },
            "env": ({
                "BUILDKITE": os.getenv("BUILDKITE")
            } if os.getenv("BUILDKITE") else {}),
            "envConfigMaps": [{
                "name": TEST_CONFIGMAP_NAME
            }],
            "envSecrets": [{
                "name": TEST_SECRET_NAME
            }],
            "annotations": {
                "dagster-integration-tests": "daemon-pod-annotation"
            },
        },
        # Used to set the environment variables in dagster.shared_env that determine the run config
        "pipelineRun": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            }
        },
    }

    with _helm_chart_helper(namespace,
                            should_cleanup,
                            helm_config,
                            helm_install_name="helm_chart_for_daemon"):
        yield
Example #14
0
def helm_chart_for_user_deployments_subchart_disabled(namespace,
                                                      docker_image,
                                                      should_cleanup=True):
    check.str_param(namespace, "namespace")
    check.str_param(docker_image, "docker_image")
    check.bool_param(should_cleanup, "should_cleanup")

    repository, tag = docker_image.split(":")
    pull_policy = image_pull_policy()
    helm_config = {
        "dagster-user-deployments": {
            "enabled":
            True,
            "enableSubchart":
            False,
            "deployments": [{
                "name":
                "user-code-deployment-1",
                "image": {
                    "repository": repository,
                    "tag": tag,
                    "pullPolicy": pull_policy
                },
                "dagsterApiGrpcArgs": [
                    "-m",
                    "dagster_test.test_project.test_pipelines.repo",
                    "-a",
                    "define_demo_execution_repo",
                ],
                "port":
                3030,
                "replicaCount":
                1,
            }],
        },
        "dagit": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            "env": {
                "TEST_SET_ENV_VAR": "test_dagit_env_var"
            },
            "envConfigMaps": [{
                "name": TEST_CONFIGMAP_NAME
            }],
            "envSecrets": [{
                "name": TEST_SECRET_NAME
            }],
            "livenessProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "flower": {
            "livenessProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "runLauncher": {
            "type": "CeleryK8sRunLauncher",
            "config": {
                "celeryK8sRunLauncher": {
                    "image": {
                        "repository": repository,
                        "tag": tag,
                        "pullPolicy": pull_policy
                    },
                    "workerQueues": [
                        {
                            "name": "dagster",
                            "replicaCount": 2
                        },
                        {
                            "name": "extra-queue-1",
                            "replicaCount": 1
                        },
                    ],
                    "env": {
                        "TEST_SET_ENV_VAR": "test_celery_env_var"
                    },
                    "envConfigMaps": [{
                        "name": TEST_CONFIGMAP_NAME
                    }],
                    "envSecrets": [{
                        "name": TEST_SECRET_NAME
                    }],
                    "livenessProbe": {
                        "initialDelaySeconds": 15,
                        "periodSeconds": 10,
                        "timeoutSeconds": 10,
                        "successThreshold": 1,
                        "failureThreshold": 3,
                    },
                    "configSource": {
                        "broker_transport_options": {
                            "priority_steps": [9]
                        },
                        "worker_concurrency": 1,
                    },
                }
            },
        },
        "rabbitmq": {
            "enabled": True
        },
        "scheduler": {
            "type": "K8sScheduler",
            "config": {
                "k8sScheduler": {
                    "schedulerNamespace": namespace,
                    "envSecrets": [{
                        "name": TEST_SECRET_NAME
                    }],
                }
            },
        },
        "serviceAccount": {
            "name": "dagit-admin"
        },
        "postgresqlPassword": "******",
        "postgresqlDatabase": "test",
        "postgresqlUser": "******",
        "dagsterDaemon": {
            "enabled": False
        },
    }

    with _helm_chart_helper(
            namespace,
            should_cleanup,
            helm_config,
            helm_install_name=
            "helm_chart_for_user_deployments_subchart_disabled",
    ):
        yield
Example #15
0
def helm_chart(namespace, docker_image, should_cleanup=True):
    check.str_param(namespace, "namespace")
    check.str_param(docker_image, "docker_image")
    check.bool_param(should_cleanup, "should_cleanup")

    repository, tag = docker_image.split(":")
    pull_policy = image_pull_policy()
    helm_config = {
        "dagit": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            "env": {
                "TEST_SET_ENV_VAR": "test_dagit_env_var"
            },
            "env_config_maps": [TEST_CONFIGMAP_NAME],
            "env_secrets": [TEST_SECRET_NAME],
            "livenessProbe": {
                "tcpSocket": {
                    "port": "http"
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "tcpSocket": {
                    "port": "http"
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "flower": {
            "livenessProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "celery": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            # https://github.com/dagster-io/dagster/issues/2671
            # 'extraWorkerQueues': [{'name': 'extra-queue-1', 'replicaCount': 1},],
            "livenessProbe": {
                "initialDelaySeconds": 15,
                "periodSeconds": 10,
                "timeoutSeconds": 10,
                "successThreshold": 1,
                "failureThreshold": 3,
            },
        },
        "scheduler": {
            "k8sEnabled": "true",
            "schedulerNamespace": namespace
        },
        "serviceAccount": {
            "name": "dagit-admin"
        },
        "postgresqlPassword": "******",
        "postgresqlDatabase": "test",
        "postgresqlUser": "******",
    }

    with _helm_chart_helper(namespace, should_cleanup, helm_config):
        yield
Example #16
0
    def _execute_step_k8s_job(
        self,
        execute_step_args_packed,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        job_wait_timeout,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod."""
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            )
        )
        check.inst_param(execute_step_args, "execute_step_args", ExecuteStepArgs)
        check.invariant(
            len(execute_step_args.step_keys_to_execute) == 1,
            "Celery K8s task executor can only execute 1 step at a time",
        )

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict
        )
        check.opt_inst_param(
            user_defined_k8s_config,
            "user_defined_k8s_config",
            UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)
        pipeline_run = instance.get_run_by_id(execute_step_args.pipeline_run_id)

        check.inst(
            pipeline_run,
            PipelineRun,
            "Could not load run {}".format(execute_step_args.pipeline_run_id),
        )
        step_key = execute_step_args.step_keys_to_execute[0]

        celery_worker_name = self.request.hostname
        celery_pod_name = os.environ.get("HOSTNAME")
        instance.report_engine_event(
            "Task for step {step_key} picked up by Celery".format(step_key=step_key),
            pipeline_run,
            EngineEventData(
                [
                    MetadataEntry.text(celery_worker_name, "Celery worker name"),
                    MetadataEntry.text(celery_pod_name, "Celery worker Kubernetes Pod name"),
                ]
            ),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )

        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because dagster run status is not STARTED",
                pipeline_run,
                EngineEventData(
                    [
                        MetadataEntry.text(step_key, "Step key"),
                    ]
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(execute_step_args.pipeline_run_id, step_key)

        retry_state = execute_step_args.known_state.get_retry_state()

        if retry_state.get_attempt_count(step_key):
            attempt_number = retry_state.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        args = execute_step_args.get_command_args()

        job = construct_dagster_k8s_job(
            job_config,
            args,
            job_name,
            user_defined_k8s_config,
            pod_name,
            component="step_worker",
            labels={
                "dagster/job": execute_step_args.pipeline_origin.pipeline_name,
                "dagster/op": step_key,
            },
        )

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    MetadataEntry.text(step_key, "Step key"),
                    MetadataEntry.text(job_name, "Kubernetes Job name"),
                    MetadataEntry.text(job_config.job_image, "Job image"),
                    MetadataEntry.text(job_config.image_pull_policy, "Image pull policy"),
                    MetadataEntry.text(str(job_config.image_pull_secrets), "Image pull secrets"),
                    MetadataEntry.text(
                        str(job_config.service_account_name), "Service account name"
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)
        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so proceed and see if the existing job succeeded
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, proceeding with existing job.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            MetadataEntry.text(step_key, "Step key"),
                            MetadataEntry.text(job_name, "Kubernetes Job name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            MetadataEntry.text(step_key, "Step key"),
                        ],
                        error=serializable_error_info_from_exc_info(sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
                return []

        try:
            wait_for_job_success(
                job_name=job_name,
                namespace=job_namespace,
                instance=instance,
                run_id=execute_step_args.pipeline_run_id,
                wait_timeout=job_wait_timeout,
            )
        except (DagsterK8sError, DagsterK8sTimeoutError) as err:
            step_failure_event = construct_step_failure_event_and_handle(
                pipeline_run, step_key, err, instance=instance
            )
            events.append(step_failure_event)
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because dagster run status is not STARTED",
                pipeline_run,
                EngineEventData(
                    [
                        MetadataEntry.text(step_key, "Step key"),
                        MetadataEntry.text(job_name, "Kubernetes Job name"),
                        MetadataEntry.text(job_namespace, "Kubernetes Job namespace"),
                    ]
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return []
        except (
            DagsterK8sUnrecoverableAPIError,
            DagsterK8sAPIRetryLimitExceeded,
            # We shouldn't see unwrapped APIExceptions anymore, as they should all be wrapped in
            # a retry boundary. We still catch it here just in case we missed one so that we can
            # report it to the event log
            kubernetes.client.rest.ApiException,
        ) as err:
            instance.report_engine_event(
                "Encountered unexpected error while waiting on Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [
                        MetadataEntry.text(step_key, "Step key"),
                    ],
                    error=serializable_error_info_from_exc_info(sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        try:
            pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            instance.report_engine_event(
                "Encountered unexpected error retreiving Pods for Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [
                        MetadataEntry.text(step_key, "Step key"),
                    ],
                    error=serializable_error_info_from_exc_info(sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData([MetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            try:
                raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
                logs += raw_logs.split("\n")
            except kubernetes.client.rest.ApiException as e:
                instance.report_engine_event(
                    "Encountered unexpected error while fetching pod logs for Kubernetes job {}, "
                    "Pod name {} for step {}. Will attempt to continue with other pods.".format(
                        job_name, pod_name, step_key
                    ),
                    pipeline_run,
                    EngineEventData(
                        [
                            MetadataEntry.text(step_key, "Step key"),
                        ],
                        error=serializable_error_info_from_exc_info(sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Example #17
0
def helm_chart_for_user_deployments(namespace,
                                    docker_image,
                                    should_cleanup=True):
    check.str_param(namespace, "namespace")
    check.str_param(docker_image, "docker_image")
    check.bool_param(should_cleanup, "should_cleanup")

    repository, tag = docker_image.split(":")
    pull_policy = image_pull_policy()
    helm_config = {
        "userDeployments": {
            "enabled":
            True,
            "deployments": [{
                "name":
                "user-code-deployment-1",
                "image": {
                    "repository": repository,
                    "tag": tag,
                    "pullPolicy": pull_policy
                },
                "dagsterApiGrpcArgs": [
                    "-m",
                    "dagster_test.test_project.test_pipelines.repo",
                    "-a",
                    "define_demo_execution_repo",
                ],
                "port":
                3030,
            }],
        },
        "dagit": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            "env": {
                "TEST_SET_ENV_VAR": "test_dagit_env_var"
            },
            "env_config_maps": [TEST_CONFIGMAP_NAME],
            "env_secrets": [TEST_SECRET_NAME],
            "livenessProbe": {
                "tcpSocket": {
                    "port": "http"
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "tcpSocket": {
                    "port": "http"
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "flower": {
            "livenessProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "celery": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            # https://github.com/dagster-io/dagster/issues/2671
            # 'extraWorkerQueues': [{'name': 'extra-queue-1', 'replicaCount': 1},],
            "livenessProbe": {
                "initialDelaySeconds": 15,
                "periodSeconds": 10,
                "timeoutSeconds": 10,
                "successThreshold": 1,
                "failureThreshold": 3,
            },
            "configSource": {
                "broker_transport_options": {
                    "priority_steps": [9]
                },
                "worker_concurrency": 1,
            },
        },
        "scheduler": {
            "k8sEnabled": "true",
            "schedulerNamespace": namespace
        },
        "serviceAccount": {
            "name": "dagit-admin"
        },
        "postgresqlPassword": "******",
        "postgresqlDatabase": "test",
        "postgresqlUser": "******",
    }

    with _helm_chart_helper(namespace, should_cleanup, helm_config):
        yield
Example #18
0
    def get_event_records(
        self,
        event_records_filter: Optional[EventRecordsFilter] = None,
        limit: Optional[int] = None,
        ascending: bool = False,
    ) -> Iterable[EventLogRecord]:
        """Overridden method to enable cross-run event queries in sqlite.

        The record id in sqlite does not auto increment cross runs, so instead of fetching events
        after record id, we only fetch events whose runs updated after update_timestamp.
        """
        check.opt_inst_param(event_records_filter, "event_records_filter",
                             EventRecordsFilter)
        check.opt_int_param(limit, "limit")
        check.bool_param(ascending, "ascending")

        is_asset_query = (event_records_filter
                          and event_records_filter.event_type
                          == DagsterEventType.ASSET_MATERIALIZATION)
        if is_asset_query:
            # asset materializations get mirrored into the index shard, so no custom run shard-aware
            # cursor logic needed
            return super(SqliteEventLogStorage, self).get_event_records(
                event_records_filter=event_records_filter,
                limit=limit,
                ascending=ascending)

        query = db.select(
            [SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event])
        if event_records_filter and event_records_filter.asset_key:
            asset_details = next(
                iter(self._get_assets_details([event_records_filter.asset_key
                                               ])))
        else:
            asset_details = None

        if not event_records_filter or not (isinstance(
                event_records_filter.after_cursor, RunShardedEventsCursor)):
            warnings.warn("""
                Called `get_event_records` on a run-sharded event log storage with a query that
                is not run aware (e.g. not using a RunShardedEventsCursor).  This likely has poor
                performance characteristics.  Consider adding a RunShardedEventsCursor to your query
                or switching your instance configuration to use a non-run sharded event log storage
                (e.g. PostgresEventLogStorage, ConsolidatedSqliteEventLogStorage)
            """)

        query = self._apply_filter_to_query(
            query=query,
            event_records_filter=event_records_filter,
            asset_details=asset_details,
            apply_cursor_filters=
            False,  # run-sharded cursor filters don't really make sense
        )
        if limit:
            query = query.limit(limit)
        if ascending:
            query = query.order_by(SqlEventLogStorageTable.c.timestamp.asc())
        else:
            query = query.order_by(SqlEventLogStorageTable.c.timestamp.desc())

        # workaround for the run-shard sqlite to enable cross-run queries: get a list of run_ids
        # whose events may qualify the query, and then open run_connection per run_id at a time.
        run_updated_after = (
            event_records_filter.after_cursor.run_updated_after
            if event_records_filter and isinstance(
                event_records_filter.after_cursor, RunShardedEventsCursor) else
            None)
        run_records = self._instance.get_run_records(
            filters=PipelineRunsFilter(updated_after=run_updated_after),
            order_by="update_timestamp",
            ascending=ascending,
        )

        event_records = []
        for run_record in run_records:
            run_id = run_record.pipeline_run.run_id
            with self.run_connection(run_id) as conn:
                results = conn.execute(query).fetchall()

            for row_id, json_str in results:
                try:
                    event_record = deserialize_json_to_dagster_namedtuple(
                        json_str)
                    if not isinstance(event_record, EventLogEntry):
                        logging.warning(
                            "Could not resolve event record as EventLogEntry for id `{}`."
                            .format(row_id))
                        continue
                    else:
                        event_records.append(
                            EventLogRecord(storage_id=row_id,
                                           event_log_entry=event_record))
                    if limit and len(event_records) >= limit:
                        break
                except seven.JSONDecodeError:
                    logging.warning(
                        "Could not parse event record id `{}`.".format(row_id))

            if limit and len(event_records) >= limit:
                break

        return event_records[:limit]
Example #19
0
    def _execute_step_k8s_job(
        _self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        pipeline_origin_packed,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod.
        """

        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.list_param(step_keys, "step_keys", of_type=str)
        check.invariant(
            len(step_keys) == 1, "Celery K8s task executor can only execute 1 step at a time"
        )
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.str_param(repo_name, "repo_name")
        check.str_param(repo_location_name, "repo_location_name")
        check.str_param(run_id, "run_id")

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")
        check.dict_param(retries_dict, "retries_dict")

        pipeline_origin = unpack_value(
            check.dict_param(
                pipeline_origin_packed, "pipeline_origin_packed"
            )  # TODO: make part of args
        )
        check.inst(pipeline_origin, PipelineOrigin)

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict
        )
        check.opt_inst_param(
            user_defined_k8s_config, "user_defined_k8s_config", UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)

        check.invariant(pipeline_run, "Could not load run {}".format(run_id))

        step_key = step_keys[0]
        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([EventMetadataEntry.text(step_key, "Step keys"),]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(run_id, step_key)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=None,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            )
        )
        command = ["dagster"]
        args = ["api", "execute_step_with_structured_logs", input_json]

        job = construct_dagster_k8s_job(
            job_config, command, args, job_name, user_defined_k8s_config, pod_name
        )

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, "Step keys"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                    EventMetadataEntry.text(job_config.job_image, "Job image"),
                    EventMetadataEntry.text(job_config.image_pull_policy, "Image pull policy"),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), "Image pull secrets"
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), "Service account name"
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)

        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so do not procede.
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step keys"),
                            EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                            EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step keys"),
                            EventMetadataEntry.text(e, "Error"),
                        ]
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            return

        try:
            wait_for_job_success(
                job_name=job_name, namespace=job_namespace, instance=instance, run_id=run_id,
            )
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step keys"),
                        EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                        EventMetadataEntry.text(job_namespace, "Kubernetes Job namespace"),
                    ]
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return

        pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData([EventMetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split("\n")

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Example #20
0
 def __init__(self, raise_on_error=True):
     self.raise_on_error = check.bool_param(raise_on_error, 'raise_on_error')
Example #21
0
    def __init__(
        self,
        host="localhost",
        port=None,
        socket=None,
        max_workers=1,
        loadable_target_origin=None,
        heartbeat=False,
        heartbeat_timeout=30,
        lazy_load_user_code=False,
        ipc_output_file=None,
        fixed_server_id=None,
    ):
        check.opt_str_param(host, "host")
        check.opt_int_param(port, "port")
        check.opt_str_param(socket, "socket")
        check.int_param(max_workers, "max_workers")
        check.opt_inst_param(loadable_target_origin, "loadable_target_origin",
                             LoadableTargetOrigin)
        check.invariant(
            port is not None if seven.IS_WINDOWS else True,
            "You must pass a valid `port` on Windows: `socket` not supported.",
        )
        check.invariant(
            (port or socket) and not (port and socket),
            "You must pass one and only one of `port` or `socket`.",
        )
        check.invariant(
            host is not None if port else True,
            "Must provide a host when serving on a port",
        )
        check.bool_param(heartbeat, "heartbeat")
        check.int_param(heartbeat_timeout, "heartbeat_timeout")
        self._ipc_output_file = check.opt_str_param(ipc_output_file,
                                                    "ipc_output_file")
        check.opt_str_param(fixed_server_id, "fixed_server_id")

        check.invariant(heartbeat_timeout > 0,
                        "heartbeat_timeout must be greater than 0")
        check.invariant(
            max_workers > 1 if heartbeat else True,
            "max_workers must be greater than 1 if heartbeat is True",
        )

        self.server = grpc.server(ThreadPoolExecutor(max_workers=max_workers))
        self._server_termination_event = threading.Event()

        self._api_servicer = DagsterApiServer(
            server_termination_event=self._server_termination_event,
            loadable_target_origin=loadable_target_origin,
            heartbeat=heartbeat,
            heartbeat_timeout=heartbeat_timeout,
            lazy_load_user_code=lazy_load_user_code,
            fixed_server_id=fixed_server_id,
        )

        # Create a health check servicer
        self._health_servicer = health.HealthServicer()
        health_pb2_grpc.add_HealthServicer_to_server(self._health_servicer,
                                                     self.server)

        add_DagsterApiServicer_to_server(self._api_servicer, self.server)

        if port:
            server_address = host + ":" + str(port)
        else:
            server_address = "unix:" + os.path.abspath(socket)

        # grpc.Server.add_insecure_port returns:
        # - 0 on failure
        # - port number when a port is successfully bound
        # - 1 when a UDS is successfully bound
        res = self.server.add_insecure_port(server_address)
        if socket and res != 1:
            if self._ipc_output_file:
                with ipc_write_stream(self._ipc_output_file) as ipc_stream:
                    ipc_stream.send(GrpcServerFailedToBindEvent())
            raise CouldNotBindGrpcServerToAddress(socket)
        if port and res != port:
            if self._ipc_output_file:
                with ipc_write_stream(self._ipc_output_file) as ipc_stream:
                    ipc_stream.send(GrpcServerFailedToBindEvent())
            raise CouldNotBindGrpcServerToAddress(port)
Example #22
0
def execute_query(
    handle,
    query,
    variables=None,
    pipeline_run_storage=None,
    raise_on_error=False,
    use_sync_executor=False,
):
    check.inst_param(handle, 'handle', ExecutionTargetHandle)
    check.str_param(query, 'query')
    check.opt_dict_param(variables, 'variables')
    # We allow external creation of the pipeline_run_storage to support testing contexts where we
    # need access to the underlying run storage
    check.opt_inst_param(pipeline_run_storage, 'pipeline_run_storage',
                         RunStorage)
    check.bool_param(raise_on_error, 'raise_on_error')
    check.bool_param(use_sync_executor, 'use_sync_executor')

    query = query.strip('\'" \n\t')

    execution_manager = SynchronousExecutionManager()

    pipeline_run_storage = pipeline_run_storage or InMemoryRunStorage()

    context = DagsterGraphQLContext(
        handle=handle,
        pipeline_runs=pipeline_run_storage,
        execution_manager=execution_manager,
        raise_on_error=raise_on_error,
        version=__version__,
    )

    executor = SyncExecutor() if use_sync_executor else GeventExecutor()

    result = graphql(
        request_string=query,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    result_dict = result.to_dict()

    # Here we detect if this is in fact an error response
    # If so, we iterate over the result_dict and the original result
    # which contains a GraphQLError. If that GraphQL error contains
    # an original_error property (which is the exception the resolver
    # has thrown, typically) we serialize the stack trace of that exception
    # in the 'stack_trace' property of each error to ease debugging

    if 'errors' in result_dict:
        check.invariant(len(result_dict['errors']) == len(result.errors))
        for python_error, error_dict in zip(result.errors,
                                            result_dict['errors']):
            if hasattr(python_error,
                       'original_error') and python_error.original_error:
                error_dict['stack_trace'] = get_stack_trace_array(
                    python_error.original_error)

    return result_dict
Example #23
0
def make_dagster_pipeline_from_airflow_dag(dag,
                                           tags=None,
                                           use_airflow_template_context=False,
                                           unique_id=None):
    """Construct a Dagster pipeline corresponding to a given Airflow DAG.

    Tasks in the resulting pipeline will execute the ``execute()`` method on the corresponding
    Airflow Operator. Dagster, any dependencies required by Airflow Operators, and the module
    containing your DAG definition must be available in the Python environment within which your
    Dagster solids execute.

    To set Airflow's ``execution_date`` for use with Airflow Operator's ``execute()`` methods,
    either:

    1. (Best for ad hoc runs) Run Pipeline with 'default' preset, which sets execution_date to the
        time (in UTC) of pipeline invocation:

        .. code-block:: python

            execute_pipeline(
                pipeline=make_dagster_pipeline_from_airflow_dag(dag=dag),
                preset='default')

    2. Add ``{'airflow_execution_date': utc_date_string}`` to the PipelineDefinition tags. This will
       override behavior from (1).

        .. code-block:: python

            execute_pipeline(
                make_dagster_pipeline_from_airflow_dag(
                    dag=dag,
                    tags={'airflow_execution_date': utc_execution_date_str}
                )
            )

    3. (Recommended) Add ``{'airflow_execution_date': utc_date_string}`` to the PipelineRun tags,
        such as in the Dagit UI. This will override behavior from (1) and (2)


    We apply normalized_name() to the dag id and task ids when generating pipeline name and solid
    names to ensure that names conform to Dagster's naming conventions.

    Args:
        dag (DAG): The Airflow DAG to compile into a Dagster pipeline
        tags (Dict[str, Field]): Pipeline tags. Optionally include
            `tags={'airflow_execution_date': utc_date_string}` to specify execution_date used within
            execution of Airflow Operators.
        use_airflow_template_context (bool): If True, will call get_template_context() on the
            Airflow TaskInstance model which requires and modifies the DagRun table.
            (default: False)
        unique_id (int): If not None, this id will be postpended to generated solid names. Used by
            framework authors to enforce unique solid names within a repo.

    Returns:
        pipeline_def (PipelineDefinition): The generated Dagster pipeline

    """
    check.inst_param(dag, "dag", DAG)
    tags = check.opt_dict_param(tags, "tags")
    check.bool_param(use_airflow_template_context,
                     "use_airflow_template_context")
    unique_id = check.opt_int_param(unique_id, "unique_id")

    if IS_AIRFLOW_INGEST_PIPELINE_STR not in tags:
        tags[IS_AIRFLOW_INGEST_PIPELINE_STR] = "true"

    tags = validate_tags(tags)

    pipeline_dependencies, solid_defs = _get_pipeline_definition_args(
        dag, use_airflow_template_context, unique_id)
    pipeline_def = PipelineDefinition(
        name=normalized_name(dag.dag_id, None),
        solid_defs=solid_defs,
        dependencies=pipeline_dependencies,
        tags=tags,
    )
    return pipeline_def
Example #24
0
    def __init__(
        self,
        type_check_fn,
        key=None,
        name=None,
        is_builtin=False,
        description=None,
        input_hydration_config=None,
        output_materialization_config=None,
        serialization_strategy=None,
        auto_plugins=None,
    ):
        check.opt_str_param(key, 'key')
        check.opt_str_param(name, 'name')

        check.invariant(not (name is None and key is None),
                        'Must set key or name')

        if name is None:
            check.param_invariant(
                bool(key),
                'key',
                'If name is not provided, must provide key.',
            )
            self.key, self.name = key, name
        elif key is None:
            check.param_invariant(
                bool(name),
                'name',
                'If key is not provided, must provide name.',
            )
            self.key, self.name = name, name
        else:
            check.invariant(key and name)
            self.key, self.name = key, name

        self.description = check.opt_str_param(description, 'description')
        self.input_hydration_config = check.opt_inst_param(
            input_hydration_config, 'input_hydration_config',
            InputHydrationConfig)
        self.output_materialization_config = check.opt_inst_param(
            output_materialization_config,
            'output_materialization_config',
            OutputMaterializationConfig,
        )
        self.serialization_strategy = check.opt_inst_param(
            serialization_strategy,
            'serialization_strategy',
            SerializationStrategy,
            PickleSerializationStrategy(),
        )

        self._type_check_fn = check.callable_param(type_check_fn,
                                                   'type_check_fn')

        auto_plugins = check.opt_list_param(auto_plugins,
                                            'auto_plugins',
                                            of_type=type)

        check.param_invariant(
            all(
                issubclass(auto_plugin_type, TypeStoragePlugin)
                for auto_plugin_type in auto_plugins),
            'auto_plugins',
        )

        self.auto_plugins = auto_plugins

        self.is_builtin = check.bool_param(is_builtin, 'is_builtin')
        check.invariant(
            self.display_name is not None,
            'All types must have a valid display name, got None for key {}'.
            format(key),
        )
Example #25
0
    def __init__(
        self,
        origin: RepositoryLocationOrigin,
        host: Optional[str] = None,
        port: Optional[int] = None,
        socket: Optional[str] = None,
        server_id: Optional[str] = None,
        heartbeat: Optional[bool] = False,
        watch_server: Optional[bool] = True,
        grpc_server_registry: Optional[GrpcServerRegistry] = None,
    ):
        from dagster.grpc.client import DagsterGrpcClient, client_heartbeat_thread

        self._origin = check.inst_param(origin, "origin",
                                        RepositoryLocationOrigin)

        self.grpc_server_registry = check.opt_inst_param(
            grpc_server_registry, "grpc_server_registry", GrpcServerRegistry)

        if isinstance(self.origin, GrpcServerRepositoryLocationOrigin):
            self._port = self.origin.port
            self._socket = self.origin.socket
            self._host = self.origin.host
            self._use_ssl = bool(self.origin.use_ssl)
        else:
            self._port = check.opt_int_param(port, "port")
            self._socket = check.opt_str_param(socket, "socket")
            self._host = check.str_param(host, "host")
            self._use_ssl = False

        self._watch_thread_shutdown_event = None
        self._watch_thread = None

        self._heartbeat_shutdown_event = None
        self._heartbeat_thread = None

        self._heartbeat = check.bool_param(heartbeat, "heartbeat")
        self._watch_server = check.bool_param(watch_server, "watch_server")

        self.server_id = None
        self._external_repositories_data = None

        self._executable_path = None
        self._container_image = None
        self._container_context = None
        self._repository_code_pointer_dict = None
        self._entry_point = None

        try:
            self.client = DagsterGrpcClient(
                port=self._port,
                socket=self._socket,
                host=self._host,
                use_ssl=self._use_ssl,
            )
            list_repositories_response = sync_list_repositories_grpc(
                self.client)

            self.server_id = server_id if server_id else sync_get_server_id(
                self.client)
            self.repository_names = set(
                symbol.repository_name
                for symbol in list_repositories_response.repository_symbols)

            if self._heartbeat:
                self._heartbeat_shutdown_event = threading.Event()

                self._heartbeat_thread = threading.Thread(
                    target=client_heartbeat_thread,
                    args=(
                        self.client,
                        self._heartbeat_shutdown_event,
                    ),
                    name="grpc-client-heartbeat",
                )
                self._heartbeat_thread.daemon = True
                self._heartbeat_thread.start()

            self._executable_path = list_repositories_response.executable_path
            self._repository_code_pointer_dict = (
                list_repositories_response.repository_code_pointer_dict)
            self._entry_point = list_repositories_response.entry_point

            self._container_image = (
                list_repositories_response.container_image
                or self._reload_current_image(
                )  # Back-compat for older gRPC servers that did not include container_image in ListRepositoriesResponse
            )

            self._container_context = list_repositories_response.container_context

            self._external_repositories_data = sync_get_streaming_external_repositories_data_grpc(
                self.client,
                self,
            )

            self.external_repositories = {
                repo_name: ExternalRepository(
                    repo_data,
                    RepositoryHandle(
                        repository_name=repo_name,
                        repository_location=self,
                    ),
                )
                for repo_name, repo_data in
                self._external_repositories_data.items()
            }
        except:
            self.cleanup()
            raise
Example #26
0
    def execute_query(self,
                      query,
                      fetch_results=False,
                      cursor_factory=None,
                      error_callback=None):
        """Synchronously execute a single query against Redshift. Will return a list of rows, where
        each row is a tuple of values, e.g. SELECT 1 will return [(1,)].

        Args:
            query (str): The query to execute.
            fetch_results (Optional[bool]): Whether to return the results of executing the query.
                Defaults to False, in which case the query will be executed without retrieving the
                results.
            cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative
                cursor_factory; defaults to None. Will be used when constructing the cursor.
            error_callback (Optional[Callable[[Exception, Cursor, DagsterLogManager], None]]): A
                callback function, invoked when an exception is encountered during query execution;
                this is intended to support executing additional queries to provide diagnostic
                information, e.g. by querying ``stl_load_errors`` using ``pg_last_copy_id()``. If no
                function is provided, exceptions during query execution will be raised directly.

        Returns:
            Optional[List[Tuple[Any, ...]]]: Results of the query, as a list of tuples, when
                fetch_results is set. Otherwise return None.
        """
        check.str_param(query, "query")
        check.bool_param(fetch_results, "fetch_results")
        check.opt_class_param(cursor_factory,
                              "cursor_factory",
                              superclass=psycopg2.extensions.cursor)
        check.opt_callable_param(error_callback, "error_callback")

        with self._get_conn() as conn:
            with self._get_cursor(conn,
                                  cursor_factory=cursor_factory) as cursor:
                try:
                    self.log.info(
                        "Executing query '{query}'".format(query=query))
                    cursor.execute(query)

                    if fetch_results and cursor.rowcount > 0:
                        return cursor.fetchall()
                    else:
                        self.log.info("Empty result from query")

                except Exception as e:
                    # If autocommit is disabled or not set (it is disabled by default), Redshift
                    # will be in the middle of a transaction at exception time, and because of
                    # the failure the current transaction will not accept any further queries.
                    #
                    # This conn.commit() call closes the open transaction before handing off
                    # control to the error callback, so that the user can issue additional
                    # queries. Notably, for e.g. pg_last_copy_id() to work, it requires you to
                    # use the same conn/cursor, so you have to do this conn.commit() to ensure
                    # things are in a usable state in the error callback.
                    if not self.autocommit:
                        conn.commit()

                    if error_callback is not None:
                        error_callback(e, cursor, self.log)
                    else:
                        raise
Example #27
0
    def __init__(
        self,
        type_check_fn,
        key=None,
        name=None,
        is_builtin=False,
        description=None,
        loader=None,
        materializer=None,
        serialization_strategy=None,
        auto_plugins=None,
        required_resource_keys=None,
        kind=DagsterTypeKind.REGULAR,
        # Graveyard is below
        input_hydration_config=None,
        output_materialization_config=None,
    ):
        check.opt_str_param(key, "key")
        check.opt_str_param(name, "name")

        check.invariant(not (name is None and key is None), "Must set key or name")

        if name is None:
            check.param_invariant(
                bool(key), "key", "If name is not provided, must provide key.",
            )
            self.key, self.name = key, None
        elif key is None:
            check.param_invariant(
                bool(name), "name", "If key is not provided, must provide name.",
            )
            self.key, self.name = name, name
        else:
            check.invariant(key and name)
            self.key, self.name = key, name

        self.description = check.opt_str_param(description, "description")
        self.loader = canonicalize_backcompat_args(
            check.opt_inst_param(loader, "loader", DagsterTypeLoader),
            "loader",
            check.opt_inst_param(
                input_hydration_config, "input_hydration_config", DagsterTypeLoader
            ),
            "input_hydration_config",
            "0.10.0",
        )
        self.materializer = canonicalize_backcompat_args(
            check.opt_inst_param(materializer, "materializer", DagsterTypeMaterializer),
            "materializer",
            check.opt_inst_param(
                output_materialization_config,
                "output_materialization_config",
                DagsterTypeMaterializer,
            ),
            "output_materialization_config",
            "0.10.0",
        )
        self.serialization_strategy = check.opt_inst_param(
            serialization_strategy,
            "serialization_strategy",
            SerializationStrategy,
            PickleSerializationStrategy(),
        )
        self.required_resource_keys = check.opt_set_param(
            required_resource_keys, "required_resource_keys",
        )

        self._type_check_fn = check.callable_param(type_check_fn, "type_check_fn")
        _validate_type_check_fn(self._type_check_fn, self.name)

        auto_plugins = check.opt_list_param(auto_plugins, "auto_plugins", of_type=type)

        check.param_invariant(
            all(
                issubclass(auto_plugin_type, TypeStoragePlugin) for auto_plugin_type in auto_plugins
            ),
            "auto_plugins",
        )

        self.auto_plugins = auto_plugins

        self.is_builtin = check.bool_param(is_builtin, "is_builtin")
        check.invariant(
            self.display_name is not None,
            "All types must have a valid display name, got None for key {}".format(key),
        )

        self.kind = check.inst_param(kind, "kind", DagsterTypeKind)
Example #28
0
def _helm_chart_helper(namespace, should_cleanup, helm_config):
    """Install helm chart.
    """
    check.str_param(namespace, "namespace")
    check.bool_param(should_cleanup, "should_cleanup")

    print("--- \033[32m:helm: Installing Helm chart\033[0m")

    try:
        helm_config_yaml = yaml.dump(helm_config, default_flow_style=False)

        helm_cmd = [
            "helm",
            "install",
            "--namespace",
            namespace,
            "-f",
            "-",
            "dagster",
            os.path.join(git_repository_root(), "helm", "dagster"),
        ]

        print("Running Helm Install: \n", " ".join(helm_cmd),
              "\nWith config:\n", helm_config_yaml)

        p = subprocess.Popen(helm_cmd,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        stdout, stderr = p.communicate(six.ensure_binary(helm_config_yaml))
        print("Helm install completed with stdout: ", stdout)
        print("Helm install completed with stderr: ", stderr)
        assert p.returncode == 0

        # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild)
        kube_api = kubernetes.client.CoreV1Api()

        print("Waiting for Dagit pod to be ready...")
        dagit_pod = None
        while dagit_pod is None:
            pods = kube_api.list_namespaced_pod(namespace=namespace)
            pod_names = [
                p.metadata.name for p in pods.items
                if "dagit" in p.metadata.name
            ]
            if pod_names:
                dagit_pod = pod_names[0]
            time.sleep(1)

        # Wait for Celery worker queues to become ready
        print("Waiting for celery workers")
        pods = kubernetes.client.CoreV1Api().list_namespaced_pod(
            namespace=namespace)
        pod_names = [
            p.metadata.name for p in pods.items
            if "celery-workers" in p.metadata.name
        ]
        for pod_name in pod_names:
            print("Waiting for Celery worker pod %s" % pod_name)
            wait_for_pod(pod_name, namespace=namespace)

        if helm_config.get("userDeployments") and helm_config.get(
                "userDeployments", {}).get("enabled"):
            # Wait for user code deployments to be ready
            print("Waiting for user code deployments")
            pods = kubernetes.client.CoreV1Api().list_namespaced_pod(
                namespace=namespace)
            pod_names = [
                p.metadata.name for p in pods.items
                if "user-code-deployment" in p.metadata.name
            ]
            for pod_name in pod_names:
                print("Waiting for user code deployment pod %s" % pod_name)
                wait_for_pod(pod_name, namespace=namespace)

        yield

    finally:
        # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g.
        # w/ a kind cluster
        if should_cleanup:
            print("Uninstalling helm chart")
            check_output(
                ["helm", "uninstall", "dagster", "--namespace", namespace],
                cwd=git_repository_root(),
            )
Example #29
0
def execution_context_event_generator(
    construct_context_fn,
    pipeline,
    execution_plan,
    run_config,
    pipeline_run,
    instance,
    scoped_resources_builder_cm=None,
    intermediate_storage=None,
    raise_on_error=False,
    output_capture=None,
):
    scoped_resources_builder_cm = check.opt_callable_param(
        scoped_resources_builder_cm,
        "scoped_resources_builder_cm",
        default=resource_initialization_manager,
    )

    execution_plan = check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    pipeline_def = pipeline.get_definition()

    run_config = check.dict_param(run_config, "run_config", key_type=str)
    pipeline_run = check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    instance = check.inst_param(instance, "instance", DagsterInstance)

    intermediate_storage = check.opt_inst_param(
        intermediate_storage, "intermediate_storage_data", IntermediateStorage
    )
    raise_on_error = check.bool_param(raise_on_error, "raise_on_error")

    execution_context = None
    resources_manager = None

    try:
        context_creation_data = create_context_creation_data(
            pipeline,
            execution_plan,
            run_config,
            pipeline_run,
            instance,
        )

        log_manager = create_log_manager(context_creation_data)
        resource_defs = pipeline_def.get_mode_definition(
            context_creation_data.environment_config.mode
        ).resource_defs
        resources_manager = scoped_resources_builder_cm(
            resource_defs=resource_defs,
            resource_configs=context_creation_data.environment_config.resources,
            log_manager=log_manager,
            execution_plan=execution_plan,
            pipeline_run=context_creation_data.pipeline_run,
            resource_keys_to_init=context_creation_data.resource_keys_to_init,
            instance=instance,
            emit_persistent_events=True,
            pipeline_def_for_backwards_compat=pipeline_def,
        )
        yield from resources_manager.generate_setup_events()
        scoped_resources_builder = check.inst(
            resources_manager.get_object(), ScopedResourcesBuilder
        )

        intermediate_storage = create_intermediate_storage(
            context_creation_data,
            intermediate_storage,
            scoped_resources_builder,
        )

        execution_context = construct_context_fn(
            context_creation_data=context_creation_data,
            scoped_resources_builder=scoped_resources_builder,
            log_manager=log_manager,
            intermediate_storage=intermediate_storage,
            raise_on_error=raise_on_error,
            output_capture=output_capture,
        )

        _validate_plan_with_context(execution_context, execution_plan)

        yield execution_context
        yield from resources_manager.generate_teardown_events()
    except DagsterError as dagster_error:
        if execution_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error
                else sys.exc_info()
            )
            error_info = serializable_error_info_from_exc_info(user_facing_exc_info)

            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(instance, pipeline_run, pipeline_def),
            )
            if resources_manager:
                yield from resources_manager.generate_teardown_events()
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error
Example #30
0
def helm_chart(namespace, docker_image, should_cleanup=True):
    check.str_param(namespace, "namespace")
    check.str_param(docker_image, "docker_image")
    check.bool_param(should_cleanup, "should_cleanup")

    repository, tag = docker_image.split(":")
    pull_policy = image_pull_policy()
    helm_config = {
        "dagster-user-deployments": {
            "enabled": False,
            "enableSubchart": False
        },
        "dagit": {
            "image": {
                "repository": repository,
                "tag": tag,
                "pullPolicy": pull_policy
            },
            "env": {
                "TEST_SET_ENV_VAR": "test_dagit_env_var"
            },
            "envConfigMaps": [{
                "name": TEST_CONFIGMAP_NAME
            }],
            "envSecrets": [{
                "name": TEST_SECRET_NAME
            }],
            "livenessProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "httpGet": {
                    "path": "/dagit_info",
                    "port": 80
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "flower": {
            "enabled": True,
            "livenessProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "periodSeconds": 20,
                "failureThreshold": 3,
            },
            "startupProbe": {
                "tcpSocket": {
                    "port": "flower"
                },
                "failureThreshold": 6,
                "periodSeconds": 10,
            },
        },
        "runLauncher": {
            "type": "CeleryK8sRunLauncher",
            "config": {
                "celeryK8sRunLauncher": {
                    "image": {
                        "repository": repository,
                        "tag": tag,
                        "pullPolicy": pull_policy
                    },
                    "workerQueues": [
                        {
                            "name": "dagster",
                            "replicaCount": 2
                        },
                        {
                            "name": "extra-queue-1",
                            "replicaCount": 1,
                            "labels": {
                                "celery-label-key": "celery-label-value"
                            },
                        },
                    ],
                    "livenessProbe": {
                        "initialDelaySeconds": 15,
                        "periodSeconds": 10,
                        "timeoutSeconds": 10,
                        "successThreshold": 1,
                        "failureThreshold": 3,
                    },
                },
            },
        },
        "rabbitmq": {
            "enabled": True
        },
        "ingress": {
            "enabled": True,
            "dagit": {
                "host": "dagit.example.com"
            },
            "flower": {
                "flower": "flower.example.com"
            },
        },
        "scheduler": {
            "type": "K8sScheduler",
            "config": {
                "k8sScheduler": {
                    "schedulerNamespace": namespace,
                    "envSecrets": [{
                        "name": TEST_SECRET_NAME
                    }],
                }
            },
        },
        "serviceAccount": {
            "name": "dagit-admin"
        },
        "postgresqlPassword": "******",
        "postgresqlDatabase": "test",
        "postgresqlUser": "******",
        "dagsterDaemon": {
            "enabled": False
        },
    }

    with _helm_chart_helper(namespace,
                            should_cleanup,
                            helm_config,
                            helm_install_name="helm_chart"):
        yield