Example #1
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLEventLogStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData)
        self.mysql_url = check.str_param(mysql_url, "mysql_url")
        self._disposed = False

        self._event_watcher = SqlPollingEventWatcher(self)

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool
        )
        self._secondary_index_cache = {}

        table_names = retry_mysql_connection_fn(db.inspect(self._engine).get_table_names)

        if "event_logs" not in table_names:
            with self._connect() as conn:
                alembic_config = mysql_alembic_config(__file__)
                retry_mysql_creation_fn(lambda: SqlEventLogStorageMetadata.create_all(conn))
                # This revision may be shared by any other dagster storage classes using the same DB
                stamp_alembic_rev(alembic_config, conn)

            # mark all secondary indexes to be used
            self.reindex()

        super().__init__()
Example #2
0
    def _initdb(self, engine):

        alembic_config = get_alembic_config(__file__)

        try:
            SqlEventLogStorageMetadata.create_all(engine)
            engine.execute("PRAGMA journal_mode=WAL;")
            stamp_alembic_rev(alembic_config, engine)
        except (db.exc.DatabaseError, sqlite3.DatabaseError, sqlite3.OperationalError) as exc:
            # This is SQLite-specific handling for concurrency issues that can arise when, e.g.,
            # the root nodes of a pipeline execute simultaneously on Airflow with SQLite storage
            # configured and contend with each other to init the db. When we hit the following
            # errors, we know that another process is on the case and it's safe to continue:
            err_msg = str(exc)
            if not (
                "table event_logs already exists" in err_msg
                or "database is locked" in err_msg
                or "table alembic_version already exists" in err_msg
                or "UNIQUE constraint failed: alembic_version.version_num" in err_msg
            ):
                raise
            else:
                logging.info(
                    "SqliteEventLogStorage._initdb: Encountered apparent concurrent init, "
                    "swallowing {str_exc}".format(str_exc=err_msg)
                )
Example #3
0
    def __init__(self, postgres_url, inst_data=None):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.postgres_url = check.str_param(postgres_url, "postgres_url")
        self._disposed = False

        self._event_watcher = PostgresEventWatcher(self.postgres_url)

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(self.postgres_url,
                                     isolation_level="AUTOCOMMIT",
                                     poolclass=db.pool.NullPool)
        self._secondary_index_cache = {}

        table_names = retry_pg_connection_fn(
            lambda: db.inspect(self._engine).get_table_names())

        if "event_logs" not in table_names:
            with self.connect() as conn:
                alembic_config = get_alembic_config(__file__)
                retry_pg_creation_fn(
                    lambda: SqlEventLogStorageMetadata.create_all(conn))

                # This revision may be shared by any other dagster storage classes using the same DB
                stamp_alembic_rev(alembic_config, conn)
Example #4
0
    def __init__(self, postgres_url, inst_data=None):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData)
        self.postgres_url = postgres_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.postgres_url,
            isolation_level="AUTOCOMMIT",
            poolclass=db.pool.NullPool,
        )

        self._index_migration_cache = {}
        table_names = retry_pg_connection_fn(lambda: db.inspect(self._engine).get_table_names())

        # Stamp and create tables if there's no previously stamped revision and the main table
        # doesn't exist (since we used to not stamp postgres storage when it was first created)
        if "runs" not in table_names:
            with self.connect() as conn:
                retry_pg_creation_fn(lambda: RunStorageSqlMetadata.create_all(conn))

                # This revision may be shared by any other dagster storage classes using the same DB
                stamp_alembic_rev(pg_alembic_config(__file__), conn)

            # mark all secondary indexes as built
            self.build_missing_indexes()

        super().__init__()
Example #5
0
    def __init__(self, mysql_url, inst_data=None):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.mysql_url = mysql_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url,
            isolation_level="AUTOCOMMIT",
            poolclass=db.pool.NullPool,
        )

        self._index_migration_cache = {}
        table_names = retry_mysql_connection_fn(
            db.inspect(self._engine).get_table_names)

        if "runs" not in table_names:
            with self.connect() as conn:
                alembic_config = mysql_alembic_config(__file__)
                retry_mysql_creation_fn(
                    lambda: RunStorageSqlMetadata.create_all(conn))
                stamp_alembic_rev(alembic_config, conn)
            self.build_missing_indexes()

        super().__init__()
Example #6
0
    def from_local(cls, base_dir, inst_data=None):
        check.str_param(base_dir, "base_dir")
        mkdir_p(base_dir)
        conn_string = create_db_conn_string(base_dir, "runs")
        engine = create_engine(conn_string, poolclass=NullPool)
        alembic_config = get_alembic_config(__file__)

        should_mark_indexes = False
        with engine.connect() as connection:
            db_revision, head_revision = check_alembic_revision(
                alembic_config, connection)
            if not (db_revision and head_revision):
                RunStorageSqlMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")
                stamp_alembic_rev(alembic_config, connection)
                should_mark_indexes = True

            table_names = db.inspect(engine).get_table_names()
            if "instance_info" not in table_names:
                InstanceInfo.create(engine)

        run_storage = cls(conn_string, inst_data)

        if should_mark_indexes:
            run_storage.migrate()
            run_storage.optimize()

        return run_storage
Example #7
0
    def __init__(self,
                 postgres_url,
                 should_autocreate_tables=True,
                 inst_data=None):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.postgres_url = postgres_url
        self.should_autocreate_tables = check.bool_param(
            should_autocreate_tables, "should_autocreate_tables")

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(self.postgres_url,
                                     isolation_level="AUTOCOMMIT",
                                     poolclass=db.pool.NullPool)

        table_names = retry_pg_connection_fn(
            lambda: db.inspect(self._engine).get_table_names())

        missing_main_table = "schedules" not in table_names and "jobs" not in table_names
        if self.should_autocreate_tables and missing_main_table:
            with self.connect() as conn:
                alembic_config = pg_alembic_config(__file__)
                retry_pg_creation_fn(
                    lambda: ScheduleStorageSqlMetadata.create_all(conn))

                # This revision may be shared by any other dagster storage classes using the same DB
                stamp_alembic_rev(alembic_config, conn)

        super().__init__()
Example #8
0
    def _init_db(self):
        with self.connect() as conn:
            with conn.begin():
                ScheduleStorageSqlMetadata.create_all(conn)
                stamp_alembic_rev(pg_alembic_config(__file__), conn)

        # mark all the data migrations as applied
        self.migrate()
        self.optimize()
Example #9
0
 def _init_db(self):
     mkdir_p(self._base_dir)
     engine = create_engine(self._conn_string, poolclass=NullPool)
     SqlEventLogStorageMetadata.create_all(engine)
     engine.execute('PRAGMA journal_mode=WAL;')
     alembic_config = get_alembic_config(__file__)
     connection = engine.connect()
     db_revision, head_revision = check_alembic_revision(alembic_config, connection)
     if not (db_revision and head_revision):
         stamp_alembic_rev(alembic_config, engine)
Example #10
0
    def from_local(base_dir, inst_data=None):
        check.str_param(base_dir, "base_dir")
        mkdir_p(base_dir)
        conn_string = create_db_conn_string(base_dir, "runs")
        engine = create_engine(conn_string, poolclass=NullPool)
        alembic_config = get_alembic_config(__file__)
        with engine.connect() as connection:
            db_revision, head_revision = check_alembic_revision(alembic_config, connection)
            if not (db_revision and head_revision):
                RunStorageSqlMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")
                stamp_alembic_rev(alembic_config, connection)

        return SqliteRunStorage(conn_string, inst_data)
Example #11
0
    def from_local(base_dir, inst_data=None):
        check.str_param(base_dir, 'base_dir')
        mkdir_p(base_dir)
        conn_string = create_db_conn_string(base_dir, 'schedules')
        engine = create_engine(conn_string, poolclass=NullPool)
        engine.execute('PRAGMA journal_mode=WAL;')
        ScheduleStorageSqlMetadata.create_all(engine)
        connection = engine.connect()
        alembic_config = get_alembic_config(__file__)
        db_revision, head_revision = check_alembic_revision(alembic_config, connection)
        if not (db_revision and head_revision):
            stamp_alembic_rev(alembic_config, engine)

        return SqliteScheduleStorage(conn_string, inst_data)
Example #12
0
def test_load_instance(conn_string, hostname):
    # Wipe the DB to ensure it is fresh
    MySQLEventLogStorage.wipe_storage(conn_string)
    MySQLRunStorage.wipe_storage(conn_string)
    MySQLScheduleStorage.wipe_storage(conn_string)
    engine = create_engine(conn_string, poolclass=NullPool)
    alembic_config = get_alembic_config(
        file_relative_path(__file__, "../dagster_mysql/__init__.py")
    )
    with engine.connect() as conn:
        stamp_alembic_rev(alembic_config, conn, rev=None, quiet=False)

    # Now load from scratch, verify it loads without errors
    with instance_for_test(overrides=yaml.safe_load(full_mysql_config(hostname))):
        pass
Example #13
0
    def _initdb(self, engine):
        alembic_config = get_alembic_config(__file__)

        retry_limit = 10

        while True:
            try:
                SqlEventLogStorageMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")

                with engine.connect() as connection:
                    db_revision, head_revision = check_alembic_revision(alembic_config, connection)

                if not (db_revision and head_revision):
                    stamp_alembic_rev(alembic_config, engine)

                break
            except (db.exc.DatabaseError, sqlite3.DatabaseError, sqlite3.OperationalError) as exc:
                # This is SQLite-specific handling for concurrency issues that can arise when
                # multiple processes (e.g. the dagit process and user code process) contend with
                # each other to init the db. When we hit the following errors, we know that another
                # process is on the case and we should retry.
                err_msg = str(exc)

                if not (
                    "table asset_keys already exists" in err_msg
                    or "table secondary_indexes already exists" in err_msg
                    or "table event_logs already exists" in err_msg
                    or "database is locked" in err_msg
                    or "table alembic_version already exists" in err_msg
                    or "UNIQUE constraint failed: alembic_version.version_num" in err_msg
                ):
                    raise

                if retry_limit == 0:
                    raise
                else:
                    logging.info(
                        "SqliteEventLogStorage._initdb: Encountered apparent concurrent init, "
                        "retrying ({retry_limit} retries left). Exception: {str_exc}".format(
                            retry_limit=retry_limit, str_exc=err_msg
                        )
                    )
                    time.sleep(0.2)
                    retry_limit -= 1
Example #14
0
    def _init_db(self):
        mkdir_p(self._base_dir)
        engine = create_engine(self._conn_string, poolclass=NullPool)
        alembic_config = get_alembic_config(__file__)

        should_mark_indexes = False
        with engine.connect() as connection:
            db_revision, head_revision = check_alembic_revision(alembic_config, connection)
            if not (db_revision and head_revision):
                SqlEventLogStorageMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")
                stamp_alembic_rev(alembic_config, connection)
                should_mark_indexes = True

        if should_mark_indexes:
            # mark all secondary indexes
            self.reindex_events()
            self.reindex_assets()
Example #15
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLScheduleStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.mysql_url = mysql_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(self.mysql_url,
                                     isolation_level="AUTOCOMMIT",
                                     poolclass=db.pool.NullPool)

        table_names = retry_mysql_connection_fn(
            db.inspect(self._engine).get_table_names)
        if "jobs" not in table_names:
            with self.connect() as conn:
                alembic_config = mysql_alembic_config(__file__)
                retry_mysql_creation_fn(
                    lambda: ScheduleStorageSqlMetadata.create_all(conn))
                stamp_alembic_rev(alembic_config, conn)

        super().__init__()
Example #16
0
    def from_local(cls, base_dir, inst_data=None):
        check.str_param(base_dir, "base_dir")
        mkdir_p(base_dir)
        conn_string = create_db_conn_string(base_dir, "schedules")
        engine = create_engine(conn_string, poolclass=NullPool)
        alembic_config = get_alembic_config(__file__)

        should_migrate_data = False
        with engine.connect() as connection:
            db_revision, head_revision = check_alembic_revision(
                alembic_config, connection)
            if not (db_revision and head_revision):
                ScheduleStorageSqlMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")
                stamp_alembic_rev(alembic_config, connection)
                should_migrate_data = True

        schedule_storage = cls(conn_string, inst_data)
        if should_migrate_data:
            schedule_storage.migrate()
            schedule_storage.optimize()

        return schedule_storage
Example #17
0
    def from_local(base_dir, inst_data=None):
        check.str_param(base_dir, "base_dir")
        mkdir_p(base_dir)
        conn_string = create_db_conn_string(base_dir, "runs")
        engine = create_engine(conn_string, poolclass=NullPool)
        alembic_config = get_alembic_config(__file__)

        should_mark_indexes = False
        with engine.connect() as connection:
            db_revision, head_revision = check_alembic_revision(alembic_config, connection)
            if not (db_revision and head_revision):
                RunStorageSqlMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")
                stamp_alembic_rev(alembic_config, connection)
                should_mark_indexes = True

        run_storage = SqliteRunStorage(conn_string, inst_data)

        if should_mark_indexes:
            # mark all secondary indexes
            run_storage.build_missing_indexes()

        return run_storage
Example #18
0
 def _init_db(self):
     with self.connect() as conn:
         with conn.begin():
             RunStorageSqlMetadata.create_all(conn)
             # This revision may be shared by any other dagster storage classes using the same DB
             stamp_alembic_rev(pg_alembic_config(__file__), conn)
Example #19
0
 def _init_db(self):
     with self._connect() as conn:
         with conn.begin():
             SqlEventLogStorageMetadata.create_all(conn)
             stamp_alembic_rev(pg_alembic_config(__file__), conn)
Example #20
0
 def _init_db(self):
     with self.connect() as conn:
         with conn.begin():
             ScheduleStorageSqlMetadata.create_all(conn)
             stamp_alembic_rev(pg_alembic_config(__file__), conn)