def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLEventLogStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = check.str_param(mysql_url, "mysql_url") self._disposed = False self._event_watcher = SqlPollingEventWatcher(self) # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool ) self._secondary_index_cache = {} table_names = retry_mysql_connection_fn(db.inspect(self._engine).get_table_names) if "event_logs" not in table_names: with self._connect() as conn: alembic_config = mysql_alembic_config(__file__) retry_mysql_creation_fn(lambda: SqlEventLogStorageMetadata.create_all(conn)) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(alembic_config, conn) # mark all secondary indexes to be used self.reindex() super().__init__()
def _initdb(self, engine): alembic_config = get_alembic_config(__file__) try: SqlEventLogStorageMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, engine) except (db.exc.DatabaseError, sqlite3.DatabaseError, sqlite3.OperationalError) as exc: # This is SQLite-specific handling for concurrency issues that can arise when, e.g., # the root nodes of a pipeline execute simultaneously on Airflow with SQLite storage # configured and contend with each other to init the db. When we hit the following # errors, we know that another process is on the case and it's safe to continue: err_msg = str(exc) if not ( "table event_logs already exists" in err_msg or "database is locked" in err_msg or "table alembic_version already exists" in err_msg or "UNIQUE constraint failed: alembic_version.version_num" in err_msg ): raise else: logging.info( "SqliteEventLogStorage._initdb: Encountered apparent concurrent init, " "swallowing {str_exc}".format(str_exc=err_msg) )
def __init__(self, postgres_url, inst_data=None): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.postgres_url = check.str_param(postgres_url, "postgres_url") self._disposed = False self._event_watcher = PostgresEventWatcher(self.postgres_url) # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine(self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool) self._secondary_index_cache = {} table_names = retry_pg_connection_fn( lambda: db.inspect(self._engine).get_table_names()) if "event_logs" not in table_names: with self.connect() as conn: alembic_config = get_alembic_config(__file__) retry_pg_creation_fn( lambda: SqlEventLogStorageMetadata.create_all(conn)) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(alembic_config, conn)
def __init__(self, postgres_url, inst_data=None): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.postgres_url = postgres_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool, ) self._index_migration_cache = {} table_names = retry_pg_connection_fn(lambda: db.inspect(self._engine).get_table_names()) # Stamp and create tables if there's no previously stamped revision and the main table # doesn't exist (since we used to not stamp postgres storage when it was first created) if "runs" not in table_names: with self.connect() as conn: retry_pg_creation_fn(lambda: RunStorageSqlMetadata.create_all(conn)) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(pg_alembic_config(__file__), conn) # mark all secondary indexes as built self.build_missing_indexes() super().__init__()
def __init__(self, mysql_url, inst_data=None): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = mysql_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool, ) self._index_migration_cache = {} table_names = retry_mysql_connection_fn( db.inspect(self._engine).get_table_names) if "runs" not in table_names: with self.connect() as conn: alembic_config = mysql_alembic_config(__file__) retry_mysql_creation_fn( lambda: RunStorageSqlMetadata.create_all(conn)) stamp_alembic_rev(alembic_config, conn) self.build_missing_indexes() super().__init__()
def from_local(cls, base_dir, inst_data=None): check.str_param(base_dir, "base_dir") mkdir_p(base_dir) conn_string = create_db_conn_string(base_dir, "runs") engine = create_engine(conn_string, poolclass=NullPool) alembic_config = get_alembic_config(__file__) should_mark_indexes = False with engine.connect() as connection: db_revision, head_revision = check_alembic_revision( alembic_config, connection) if not (db_revision and head_revision): RunStorageSqlMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, connection) should_mark_indexes = True table_names = db.inspect(engine).get_table_names() if "instance_info" not in table_names: InstanceInfo.create(engine) run_storage = cls(conn_string, inst_data) if should_mark_indexes: run_storage.migrate() run_storage.optimize() return run_storage
def __init__(self, postgres_url, should_autocreate_tables=True, inst_data=None): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.postgres_url = postgres_url self.should_autocreate_tables = check.bool_param( should_autocreate_tables, "should_autocreate_tables") # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine(self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool) table_names = retry_pg_connection_fn( lambda: db.inspect(self._engine).get_table_names()) missing_main_table = "schedules" not in table_names and "jobs" not in table_names if self.should_autocreate_tables and missing_main_table: with self.connect() as conn: alembic_config = pg_alembic_config(__file__) retry_pg_creation_fn( lambda: ScheduleStorageSqlMetadata.create_all(conn)) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(alembic_config, conn) super().__init__()
def _init_db(self): with self.connect() as conn: with conn.begin(): ScheduleStorageSqlMetadata.create_all(conn) stamp_alembic_rev(pg_alembic_config(__file__), conn) # mark all the data migrations as applied self.migrate() self.optimize()
def _init_db(self): mkdir_p(self._base_dir) engine = create_engine(self._conn_string, poolclass=NullPool) SqlEventLogStorageMetadata.create_all(engine) engine.execute('PRAGMA journal_mode=WAL;') alembic_config = get_alembic_config(__file__) connection = engine.connect() db_revision, head_revision = check_alembic_revision(alembic_config, connection) if not (db_revision and head_revision): stamp_alembic_rev(alembic_config, engine)
def from_local(base_dir, inst_data=None): check.str_param(base_dir, "base_dir") mkdir_p(base_dir) conn_string = create_db_conn_string(base_dir, "runs") engine = create_engine(conn_string, poolclass=NullPool) alembic_config = get_alembic_config(__file__) with engine.connect() as connection: db_revision, head_revision = check_alembic_revision(alembic_config, connection) if not (db_revision and head_revision): RunStorageSqlMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, connection) return SqliteRunStorage(conn_string, inst_data)
def from_local(base_dir, inst_data=None): check.str_param(base_dir, 'base_dir') mkdir_p(base_dir) conn_string = create_db_conn_string(base_dir, 'schedules') engine = create_engine(conn_string, poolclass=NullPool) engine.execute('PRAGMA journal_mode=WAL;') ScheduleStorageSqlMetadata.create_all(engine) connection = engine.connect() alembic_config = get_alembic_config(__file__) db_revision, head_revision = check_alembic_revision(alembic_config, connection) if not (db_revision and head_revision): stamp_alembic_rev(alembic_config, engine) return SqliteScheduleStorage(conn_string, inst_data)
def test_load_instance(conn_string, hostname): # Wipe the DB to ensure it is fresh MySQLEventLogStorage.wipe_storage(conn_string) MySQLRunStorage.wipe_storage(conn_string) MySQLScheduleStorage.wipe_storage(conn_string) engine = create_engine(conn_string, poolclass=NullPool) alembic_config = get_alembic_config( file_relative_path(__file__, "../dagster_mysql/__init__.py") ) with engine.connect() as conn: stamp_alembic_rev(alembic_config, conn, rev=None, quiet=False) # Now load from scratch, verify it loads without errors with instance_for_test(overrides=yaml.safe_load(full_mysql_config(hostname))): pass
def _initdb(self, engine): alembic_config = get_alembic_config(__file__) retry_limit = 10 while True: try: SqlEventLogStorageMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") with engine.connect() as connection: db_revision, head_revision = check_alembic_revision(alembic_config, connection) if not (db_revision and head_revision): stamp_alembic_rev(alembic_config, engine) break except (db.exc.DatabaseError, sqlite3.DatabaseError, sqlite3.OperationalError) as exc: # This is SQLite-specific handling for concurrency issues that can arise when # multiple processes (e.g. the dagit process and user code process) contend with # each other to init the db. When we hit the following errors, we know that another # process is on the case and we should retry. err_msg = str(exc) if not ( "table asset_keys already exists" in err_msg or "table secondary_indexes already exists" in err_msg or "table event_logs already exists" in err_msg or "database is locked" in err_msg or "table alembic_version already exists" in err_msg or "UNIQUE constraint failed: alembic_version.version_num" in err_msg ): raise if retry_limit == 0: raise else: logging.info( "SqliteEventLogStorage._initdb: Encountered apparent concurrent init, " "retrying ({retry_limit} retries left). Exception: {str_exc}".format( retry_limit=retry_limit, str_exc=err_msg ) ) time.sleep(0.2) retry_limit -= 1
def _init_db(self): mkdir_p(self._base_dir) engine = create_engine(self._conn_string, poolclass=NullPool) alembic_config = get_alembic_config(__file__) should_mark_indexes = False with engine.connect() as connection: db_revision, head_revision = check_alembic_revision(alembic_config, connection) if not (db_revision and head_revision): SqlEventLogStorageMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, connection) should_mark_indexes = True if should_mark_indexes: # mark all secondary indexes self.reindex_events() self.reindex_assets()
def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLScheduleStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = mysql_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine(self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool) table_names = retry_mysql_connection_fn( db.inspect(self._engine).get_table_names) if "jobs" not in table_names: with self.connect() as conn: alembic_config = mysql_alembic_config(__file__) retry_mysql_creation_fn( lambda: ScheduleStorageSqlMetadata.create_all(conn)) stamp_alembic_rev(alembic_config, conn) super().__init__()
def from_local(cls, base_dir, inst_data=None): check.str_param(base_dir, "base_dir") mkdir_p(base_dir) conn_string = create_db_conn_string(base_dir, "schedules") engine = create_engine(conn_string, poolclass=NullPool) alembic_config = get_alembic_config(__file__) should_migrate_data = False with engine.connect() as connection: db_revision, head_revision = check_alembic_revision( alembic_config, connection) if not (db_revision and head_revision): ScheduleStorageSqlMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, connection) should_migrate_data = True schedule_storage = cls(conn_string, inst_data) if should_migrate_data: schedule_storage.migrate() schedule_storage.optimize() return schedule_storage
def from_local(base_dir, inst_data=None): check.str_param(base_dir, "base_dir") mkdir_p(base_dir) conn_string = create_db_conn_string(base_dir, "runs") engine = create_engine(conn_string, poolclass=NullPool) alembic_config = get_alembic_config(__file__) should_mark_indexes = False with engine.connect() as connection: db_revision, head_revision = check_alembic_revision(alembic_config, connection) if not (db_revision and head_revision): RunStorageSqlMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, connection) should_mark_indexes = True run_storage = SqliteRunStorage(conn_string, inst_data) if should_mark_indexes: # mark all secondary indexes run_storage.build_missing_indexes() return run_storage
def _init_db(self): with self.connect() as conn: with conn.begin(): RunStorageSqlMetadata.create_all(conn) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(pg_alembic_config(__file__), conn)
def _init_db(self): with self._connect() as conn: with conn.begin(): SqlEventLogStorageMetadata.create_all(conn) stamp_alembic_rev(pg_alembic_config(__file__), conn)
def _init_db(self): with self.connect() as conn: with conn.begin(): ScheduleStorageSqlMetadata.create_all(conn) stamp_alembic_rev(pg_alembic_config(__file__), conn)