Example #1
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLRunStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.mysql_url = mysql_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url,
            isolation_level="AUTOCOMMIT",
            poolclass=db.pool.NullPool,
        )

        self._index_migration_cache = {}
        table_names = retry_mysql_connection_fn(
            db.inspect(self._engine).get_table_names)

        if "runs" not in table_names:
            with self.connect() as conn:
                alembic_config = mysql_alembic_config(__file__)
                retry_mysql_creation_fn(
                    lambda: RunStorageSqlMetadata.create_all(conn))
                stamp_alembic_rev(alembic_config, conn)
            self.build_missing_indexes()

        super().__init__()
Example #2
0
 def __new__(cls, partition: str, entry: EventMetadataEntry):
     experimental_class_warning("PartitionMetadataEntry")
     return super(PartitionMetadataEntry, cls).__new__(
         cls,
         check.str_param(partition, "partition"),
         check.inst_param(entry, "entry", EventMetadataEntry),
     )
Example #3
0
 def __new__(cls, max_retries=1, delay=None):
     experimental_class_warning("RetryPolicy")
     return super().__new__(
         cls,
         max_retries=check.int_param(max_retries, "max_retries"),
         delay=check.opt_numeric_param(delay, "delay"),
     )
Example #4
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLEventLogStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData)
        self.mysql_url = check.str_param(mysql_url, "mysql_url")
        self._disposed = False

        self._event_watcher = SqlPollingEventWatcher(self)

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool
        )
        self._secondary_index_cache = {}

        table_names = retry_mysql_connection_fn(db.inspect(self._engine).get_table_names)

        if "event_logs" not in table_names:
            with self._connect() as conn:
                alembic_config = mysql_alembic_config(__file__)
                retry_mysql_creation_fn(lambda: SqlEventLogStorageMetadata.create_all(conn))
                # This revision may be shared by any other dagster storage classes using the same DB
                stamp_alembic_rev(alembic_config, conn)

            # mark all secondary indexes to be used
            self.reindex()

        super().__init__()
Example #5
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLEventLogStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.mysql_url = check.str_param(mysql_url, "mysql_url")
        self._disposed = False

        self._event_watcher = SqlPollingEventWatcher(self)

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url,
            isolation_level="AUTOCOMMIT",
            poolclass=db.pool.NullPool,
        )
        self._secondary_index_cache = {}

        table_names = retry_mysql_connection_fn(
            db.inspect(self._engine).get_table_names)

        # Stamp and create tables if the main table does not exist (we can't check alembic
        # revision because alembic config may be shared with other storage classes)
        if "event_logs" not in table_names:
            retry_mysql_creation_fn(self._init_db)
            # mark all secondary indexes to be used
            self.reindex_events()
            self.reindex_assets()

        super().__init__()
Example #6
0
    def __init__(
        self,
        hostname: str,
        port_number: Optional[int] = None,
        transport: Optional[Transport] = None,
        use_https: bool = False,
    ):
        experimental_class_warning(self.__class__.__name__)

        self._hostname = check.str_param(hostname, "hostname")
        self._port_number = check.opt_int_param(port_number, "port_number")
        self._use_https = check.bool_param(use_https, "use_https")

        self._url = (("https://" if self._use_https else "http://") +
                     (f"{self._hostname}:{self._port_number}"
                      if self._port_number else self._hostname) + "/graphql")

        self._transport = check.opt_inst_param(
            transport,
            "transport",
            Transport,
            default=RequestsHTTPTransport(url=self._url, use_json=True),
        )
        try:
            self._client = Client(transport=self._transport,
                                  fetch_schema_from_transport=True)
        except requests.exceptions.ConnectionError as exc:
            raise DagsterGraphQLClientError(
                f"Error when connecting to url {self._url}. " +
                f"Did you specify hostname: {self._hostname} " +
                (f"and port_number: {self._port_number} " if self.
                 _port_number else "") + "correctly?") from exc
Example #7
0
    def __new__(
        cls,
        max_retries: int = 1,
        delay: Optional[check.Numeric] = None,
        backoff: Optional[Backoff] = None,
        jitter: Optional[Jitter] = None,
    ):
        experimental_class_warning("RetryPolicy")

        if backoff is not None and delay is None:
            raise DagsterInvalidDefinitionError(
                "Can not set jitter on RetryPolicy without also setting delay"
            )

        if jitter is not None and delay is None:
            raise DagsterInvalidDefinitionError(
                "Can not set backoff on RetryPolicy without also setting delay"
            )

        return super().__new__(
            cls,
            max_retries=check.int_param(max_retries, "max_retries"),
            delay=check.opt_numeric_param(delay, "delay"),
            backoff=check.opt_inst_param(backoff, "backoff", Backoff),
            jitter=check.opt_inst_param(jitter, "jitter", Jitter),
        )
Example #8
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLRunStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.mysql_url = mysql_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url,
            isolation_level="AUTOCOMMIT",
            poolclass=db.pool.NullPool,
        )

        self._index_migration_cache = {}
        table_names = retry_mysql_connection_fn(
            db.inspect(self._engine).get_table_names)

        # Stamp and create tables if the main table does not exist (we can't check alembic
        # revision because alembic config may be shared with other storage classes)
        if "runs" not in table_names:
            retry_mysql_creation_fn(self._init_db)
            self.migrate()
            self.optimize()

        elif "instance_info" not in table_names:
            InstanceInfo.create(self._engine)

        super().__init__()
Example #9
0
 def __new__(
     cls,
     instance,
 ):
     experimental_class_warning("ExecutableContext")
     return super(ExecutableContext, cls).__new__(
         cls,
         check.inst_param(instance, "instance", DagsterInstance),
     )
Example #10
0
 def __init__(self, hostname: str, port_number: Optional[int] = None):
     experimental_class_warning(self.__class__.__name__)
     self._hostname = check.str_param(hostname, "hostname")
     self._port_number = check.opt_int_param(port_number, "port_number")
     self._url = ("http://" + (f"{self._hostname}:{self._port_number}"
                               if self._port_number else self._hostname) +
                  "/graphql")
     self._transport = RequestsHTTPTransport(url=self._url, use_json=True)
     self._client = Client(transport=self._transport,
                           fetch_schema_from_transport=True)
Example #11
0
 def __init__(
     self, name, pipeline_name, evaluation_fn, solid_selection=None, mode=None,
 ):
     experimental_class_warning("SensorDefinition")
     super(SensorDefinition, self).__init__(
         name,
         job_type=JobType.SENSOR,
         pipeline_name=pipeline_name,
         mode=mode,
         solid_selection=solid_selection,
     )
     self._evaluation_fn = check.callable_param(evaluation_fn, "evaluation_fn")
Example #12
0
 def __init__(
     self, description, validation_fn, resulting_exception, raise_or_typecheck=True, name=None
 ):
     experimental_class_warning(self.__class__.__name__)
     if name is None:
         self.name = self.__class__.__name__
     else:
         self.name = name
     self.description = description
     # should return a tuple of (bool, and either an empty dict or a dict of extra params)
     self.validation_fn = validation_fn
     self.resulting_exception = resulting_exception
     self.raise_or_typecheck = raise_or_typecheck
Example #13
0
 def __init__(
     self,
     name,
     pipeline_name,
     run_config_fn=None,
     tags_fn=None,
     mode="default",
     solid_selection=None,
 ):
     experimental_class_warning("ExecutableDefinition")
     self._name = check.str_param(name, "name")
     self._pipeline_name = check.str_param(pipeline_name, "pipeline_name")
     self._run_config_fn = check.opt_callable_param(run_config_fn,
                                                    "run_config_fn",
                                                    lambda _context: {})
     self._tags_fn = check.opt_callable_param(tags_fn, "tags_fn",
                                              lambda _context: {})
     self._mode = check.opt_str_param(mode, "mode", DEFAULT_MODE_NAME)
     self._solid_selection = check.opt_nullable_list_param(
         solid_selection, "solid_selection", of_type=str)
Example #14
0
 def __init__(self):
     experimental_class_warning("MyExperimentalClass")
Example #15
0
    def __init__(
        self,
        solid_defs: Optional[List[NodeDefinition]] = None,
        name: Optional[str] = None,
        description: Optional[str] = None,
        dependencies: Optional[Dict[Union[str, NodeInvocation],
                                    Dict[str, IDependencyDefinition]]] = None,
        mode_defs: Optional[List[ModeDefinition]] = None,
        preset_defs: Optional[List[PresetDefinition]] = None,
        tags: Optional[Dict[str, Any]] = None,
        hook_defs: Optional[AbstractSet[HookDefinition]] = None,
        solid_retry_policy: Optional[RetryPolicy] = None,
        graph_def=None,
        _parent_pipeline_def=None,  # https://github.com/dagster-io/dagster/issues/2115
        version_strategy: Optional[VersionStrategy] = None,
    ):
        # If a graph is specificed directly use it
        if check.opt_inst_param(graph_def, "graph_def", GraphDefinition):
            self._graph_def = graph_def
            self._name = name or graph_def.name

        # Otherwise fallback to legacy construction
        else:
            if name is None:
                check.failed("name must be set provided")
            self._name = name

            if solid_defs is None:
                check.failed("solid_defs must be provided")

            self._graph_def = GraphDefinition(
                name=name,
                dependencies=dependencies,
                node_defs=solid_defs,
                input_mappings=None,
                output_mappings=None,
                config=None,
                description=None,
            )

        # tags and description can exist on graph as well, but since
        # same graph may be in multiple pipelines/jobs, keep separate layer
        self._description = check.opt_str_param(description, "description")
        self._tags = validate_tags(tags)

        self._current_level_node_defs = self._graph_def.node_defs

        mode_definitions = check.opt_list_param(mode_defs,
                                                "mode_defs",
                                                of_type=ModeDefinition)

        if not mode_definitions:
            mode_definitions = [ModeDefinition()]

        self._mode_definitions = mode_definitions

        seen_modes = set()
        for mode_def in mode_definitions:
            if mode_def.name in seen_modes:
                raise DagsterInvalidDefinitionError((
                    'Two modes seen with the name "{mode_name}" in "{pipeline_name}". '
                    "Modes must have unique names.").format(
                        mode_name=mode_def.name, pipeline_name=self.name))
            seen_modes.add(mode_def.name)

        self._hook_defs = check.opt_set_param(hook_defs,
                                              "hook_defs",
                                              of_type=HookDefinition)
        self._solid_retry_policy = check.opt_inst_param(
            solid_retry_policy, "solid_retry_policy", RetryPolicy)

        self._preset_defs = check.opt_list_param(preset_defs, "preset_defs",
                                                 PresetDefinition)
        self._preset_dict: Dict[str, PresetDefinition] = {}
        for preset in self._preset_defs:
            if preset.name in self._preset_dict:
                raise DagsterInvalidDefinitionError((
                    'Two PresetDefinitions seen with the name "{name}" in "{pipeline_name}". '
                    "PresetDefinitions must have unique names.").format(
                        name=preset.name, pipeline_name=self.name))
            if preset.mode not in seen_modes:
                raise DagsterInvalidDefinitionError(
                    ('PresetDefinition "{name}" in "{pipeline_name}" '
                     'references mode "{mode}" which is not defined.').format(
                         name=preset.name,
                         pipeline_name=self.name,
                         mode=preset.mode))
            self._preset_dict[preset.name] = preset

        self._resource_requirements = {
            mode_def.name: _checked_resource_reqs_for_mode(
                mode_def,
                self._current_level_node_defs,
                self._graph_def._dagster_type_dict,
                self._graph_def._node_dict,
                self._hook_defs,
                self._graph_def._dependency_structure,
            )
            for mode_def in self._mode_definitions
        }

        # Recursively explore all nodes in the this pipeline
        self._all_node_defs = _build_all_node_defs(
            self._current_level_node_defs)
        self._parent_pipeline_def = check.opt_inst_param(
            _parent_pipeline_def, "_parent_pipeline_def", PipelineDefinition)
        self._cached_run_config_schemas: Dict[str, "RunConfigSchema"] = {}
        self._cached_external_pipeline = None

        self.version_strategy = check.opt_inst_param(version_strategy,
                                                     "version_strategy",
                                                     VersionStrategy)

        if self.version_strategy is not None:
            experimental_class_warning("VersionStrategy")