def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLRunStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = mysql_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool, ) self._index_migration_cache = {} table_names = retry_mysql_connection_fn( db.inspect(self._engine).get_table_names) if "runs" not in table_names: with self.connect() as conn: alembic_config = mysql_alembic_config(__file__) retry_mysql_creation_fn( lambda: RunStorageSqlMetadata.create_all(conn)) stamp_alembic_rev(alembic_config, conn) self.build_missing_indexes() super().__init__()
def __new__(cls, partition: str, entry: EventMetadataEntry): experimental_class_warning("PartitionMetadataEntry") return super(PartitionMetadataEntry, cls).__new__( cls, check.str_param(partition, "partition"), check.inst_param(entry, "entry", EventMetadataEntry), )
def __new__(cls, max_retries=1, delay=None): experimental_class_warning("RetryPolicy") return super().__new__( cls, max_retries=check.int_param(max_retries, "max_retries"), delay=check.opt_numeric_param(delay, "delay"), )
def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLEventLogStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = check.str_param(mysql_url, "mysql_url") self._disposed = False self._event_watcher = SqlPollingEventWatcher(self) # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool ) self._secondary_index_cache = {} table_names = retry_mysql_connection_fn(db.inspect(self._engine).get_table_names) if "event_logs" not in table_names: with self._connect() as conn: alembic_config = mysql_alembic_config(__file__) retry_mysql_creation_fn(lambda: SqlEventLogStorageMetadata.create_all(conn)) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(alembic_config, conn) # mark all secondary indexes to be used self.reindex() super().__init__()
def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLEventLogStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = check.str_param(mysql_url, "mysql_url") self._disposed = False self._event_watcher = SqlPollingEventWatcher(self) # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool, ) self._secondary_index_cache = {} table_names = retry_mysql_connection_fn( db.inspect(self._engine).get_table_names) # Stamp and create tables if the main table does not exist (we can't check alembic # revision because alembic config may be shared with other storage classes) if "event_logs" not in table_names: retry_mysql_creation_fn(self._init_db) # mark all secondary indexes to be used self.reindex_events() self.reindex_assets() super().__init__()
def __init__( self, hostname: str, port_number: Optional[int] = None, transport: Optional[Transport] = None, use_https: bool = False, ): experimental_class_warning(self.__class__.__name__) self._hostname = check.str_param(hostname, "hostname") self._port_number = check.opt_int_param(port_number, "port_number") self._use_https = check.bool_param(use_https, "use_https") self._url = (("https://" if self._use_https else "http://") + (f"{self._hostname}:{self._port_number}" if self._port_number else self._hostname) + "/graphql") self._transport = check.opt_inst_param( transport, "transport", Transport, default=RequestsHTTPTransport(url=self._url, use_json=True), ) try: self._client = Client(transport=self._transport, fetch_schema_from_transport=True) except requests.exceptions.ConnectionError as exc: raise DagsterGraphQLClientError( f"Error when connecting to url {self._url}. " + f"Did you specify hostname: {self._hostname} " + (f"and port_number: {self._port_number} " if self. _port_number else "") + "correctly?") from exc
def __new__( cls, max_retries: int = 1, delay: Optional[check.Numeric] = None, backoff: Optional[Backoff] = None, jitter: Optional[Jitter] = None, ): experimental_class_warning("RetryPolicy") if backoff is not None and delay is None: raise DagsterInvalidDefinitionError( "Can not set jitter on RetryPolicy without also setting delay" ) if jitter is not None and delay is None: raise DagsterInvalidDefinitionError( "Can not set backoff on RetryPolicy without also setting delay" ) return super().__new__( cls, max_retries=check.int_param(max_retries, "max_retries"), delay=check.opt_numeric_param(delay, "delay"), backoff=check.opt_inst_param(backoff, "backoff", Backoff), jitter=check.opt_inst_param(jitter, "jitter", Jitter), )
def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLRunStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = mysql_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool, ) self._index_migration_cache = {} table_names = retry_mysql_connection_fn( db.inspect(self._engine).get_table_names) # Stamp and create tables if the main table does not exist (we can't check alembic # revision because alembic config may be shared with other storage classes) if "runs" not in table_names: retry_mysql_creation_fn(self._init_db) self.migrate() self.optimize() elif "instance_info" not in table_names: InstanceInfo.create(self._engine) super().__init__()
def __new__( cls, instance, ): experimental_class_warning("ExecutableContext") return super(ExecutableContext, cls).__new__( cls, check.inst_param(instance, "instance", DagsterInstance), )
def __init__(self, hostname: str, port_number: Optional[int] = None): experimental_class_warning(self.__class__.__name__) self._hostname = check.str_param(hostname, "hostname") self._port_number = check.opt_int_param(port_number, "port_number") self._url = ("http://" + (f"{self._hostname}:{self._port_number}" if self._port_number else self._hostname) + "/graphql") self._transport = RequestsHTTPTransport(url=self._url, use_json=True) self._client = Client(transport=self._transport, fetch_schema_from_transport=True)
def __init__( self, name, pipeline_name, evaluation_fn, solid_selection=None, mode=None, ): experimental_class_warning("SensorDefinition") super(SensorDefinition, self).__init__( name, job_type=JobType.SENSOR, pipeline_name=pipeline_name, mode=mode, solid_selection=solid_selection, ) self._evaluation_fn = check.callable_param(evaluation_fn, "evaluation_fn")
def __init__( self, description, validation_fn, resulting_exception, raise_or_typecheck=True, name=None ): experimental_class_warning(self.__class__.__name__) if name is None: self.name = self.__class__.__name__ else: self.name = name self.description = description # should return a tuple of (bool, and either an empty dict or a dict of extra params) self.validation_fn = validation_fn self.resulting_exception = resulting_exception self.raise_or_typecheck = raise_or_typecheck
def __init__( self, name, pipeline_name, run_config_fn=None, tags_fn=None, mode="default", solid_selection=None, ): experimental_class_warning("ExecutableDefinition") self._name = check.str_param(name, "name") self._pipeline_name = check.str_param(pipeline_name, "pipeline_name") self._run_config_fn = check.opt_callable_param(run_config_fn, "run_config_fn", lambda _context: {}) self._tags_fn = check.opt_callable_param(tags_fn, "tags_fn", lambda _context: {}) self._mode = check.opt_str_param(mode, "mode", DEFAULT_MODE_NAME) self._solid_selection = check.opt_nullable_list_param( solid_selection, "solid_selection", of_type=str)
def __init__(self): experimental_class_warning("MyExperimentalClass")
def __init__( self, solid_defs: Optional[List[NodeDefinition]] = None, name: Optional[str] = None, description: Optional[str] = None, dependencies: Optional[Dict[Union[str, NodeInvocation], Dict[str, IDependencyDefinition]]] = None, mode_defs: Optional[List[ModeDefinition]] = None, preset_defs: Optional[List[PresetDefinition]] = None, tags: Optional[Dict[str, Any]] = None, hook_defs: Optional[AbstractSet[HookDefinition]] = None, solid_retry_policy: Optional[RetryPolicy] = None, graph_def=None, _parent_pipeline_def=None, # https://github.com/dagster-io/dagster/issues/2115 version_strategy: Optional[VersionStrategy] = None, ): # If a graph is specificed directly use it if check.opt_inst_param(graph_def, "graph_def", GraphDefinition): self._graph_def = graph_def self._name = name or graph_def.name # Otherwise fallback to legacy construction else: if name is None: check.failed("name must be set provided") self._name = name if solid_defs is None: check.failed("solid_defs must be provided") self._graph_def = GraphDefinition( name=name, dependencies=dependencies, node_defs=solid_defs, input_mappings=None, output_mappings=None, config=None, description=None, ) # tags and description can exist on graph as well, but since # same graph may be in multiple pipelines/jobs, keep separate layer self._description = check.opt_str_param(description, "description") self._tags = validate_tags(tags) self._current_level_node_defs = self._graph_def.node_defs mode_definitions = check.opt_list_param(mode_defs, "mode_defs", of_type=ModeDefinition) if not mode_definitions: mode_definitions = [ModeDefinition()] self._mode_definitions = mode_definitions seen_modes = set() for mode_def in mode_definitions: if mode_def.name in seen_modes: raise DagsterInvalidDefinitionError(( 'Two modes seen with the name "{mode_name}" in "{pipeline_name}". ' "Modes must have unique names.").format( mode_name=mode_def.name, pipeline_name=self.name)) seen_modes.add(mode_def.name) self._hook_defs = check.opt_set_param(hook_defs, "hook_defs", of_type=HookDefinition) self._solid_retry_policy = check.opt_inst_param( solid_retry_policy, "solid_retry_policy", RetryPolicy) self._preset_defs = check.opt_list_param(preset_defs, "preset_defs", PresetDefinition) self._preset_dict: Dict[str, PresetDefinition] = {} for preset in self._preset_defs: if preset.name in self._preset_dict: raise DagsterInvalidDefinitionError(( 'Two PresetDefinitions seen with the name "{name}" in "{pipeline_name}". ' "PresetDefinitions must have unique names.").format( name=preset.name, pipeline_name=self.name)) if preset.mode not in seen_modes: raise DagsterInvalidDefinitionError( ('PresetDefinition "{name}" in "{pipeline_name}" ' 'references mode "{mode}" which is not defined.').format( name=preset.name, pipeline_name=self.name, mode=preset.mode)) self._preset_dict[preset.name] = preset self._resource_requirements = { mode_def.name: _checked_resource_reqs_for_mode( mode_def, self._current_level_node_defs, self._graph_def._dagster_type_dict, self._graph_def._node_dict, self._hook_defs, self._graph_def._dependency_structure, ) for mode_def in self._mode_definitions } # Recursively explore all nodes in the this pipeline self._all_node_defs = _build_all_node_defs( self._current_level_node_defs) self._parent_pipeline_def = check.opt_inst_param( _parent_pipeline_def, "_parent_pipeline_def", PipelineDefinition) self._cached_run_config_schemas: Dict[str, "RunConfigSchema"] = {} self._cached_external_pipeline = None self.version_strategy = check.opt_inst_param(version_strategy, "version_strategy", VersionStrategy) if self.version_strategy is not None: experimental_class_warning("VersionStrategy")