Beispiel #1
0
def watcher_thread(conn_string, run_id_dict, handlers_dict, dict_lock, watcher_thread_exit):

    try:
        for notif in await_pg_notifications(
            conn_string,
            channels=[CHANNEL_NAME],
            timeout=POLLING_CADENCE,
            yield_on_timeout=True,
            exit_event=watcher_thread_exit,
        ):
            if notif is None:
                if watcher_thread_exit.is_set():
                    break
            else:
                run_id, index_str = notif.payload.split('_')
                if run_id not in run_id_dict:
                    continue

                index = int(index_str)
                with dict_lock:
                    handlers = handlers_dict.get(run_id, [])

                engine = create_engine(
                    conn_string, isolation_level='AUTOCOMMIT', poolclass=db.pool.NullPool
                )
                try:
                    res = engine.execute(
                        db.select([SqlEventLogStorageTable.c.event]).where(
                            SqlEventLogStorageTable.c.id == index
                        ),
                    )
                    dagster_event = deserialize_json_to_dagster_namedtuple(res.fetchone()[0])
                finally:
                    engine.dispose()

                for (cursor, callback) in handlers:
                    if index >= cursor:
                        callback(dagster_event)
    except psycopg2.OperationalError:
        pass
Beispiel #2
0
    def __init__(self, postgres_url, inst_data=None):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData)
        self.postgres_url = check.str_param(postgres_url, "postgres_url")
        self._disposed = False

        self._event_watcher = PostgresEventWatcher(self.postgres_url)

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool
        )
        self._secondary_index_cache = {}

        table_names = retry_pg_connection_fn(lambda: db.inspect(self._engine).get_table_names())

        if "event_logs" not in table_names:
            with self.connect() as conn:
                alembic_config = get_alembic_config(__file__)
                retry_pg_creation_fn(lambda: SqlEventLogStorageMetadata.create_all(conn))

                # This revision may be shared by any other dagster storage classes using the same DB
                stamp_alembic_rev(alembic_config, self._engine)
    def __init__(self, postgres_url, inst_data=None):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.postgres_url = postgres_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(self.postgres_url,
                                     isolation_level="AUTOCOMMIT",
                                     poolclass=db.pool.NullPool)

        table_names = retry_pg_connection_fn(
            lambda: db.inspect(self._engine).get_table_names())

        missing_main_table = "schedules" not in table_names and "jobs" not in table_names
        if missing_main_table:
            with self.connect() as conn:
                alembic_config = get_alembic_config(__file__)
                retry_pg_creation_fn(
                    lambda: ScheduleStorageSqlMetadata.create_all(conn))

                # This revision may be shared by any other dagster storage classes using the same DB
                stamp_alembic_rev(alembic_config, conn)
Beispiel #4
0
    def from_local(base_dir, inst_data=None):
        check.str_param(base_dir, "base_dir")
        mkdir_p(base_dir)
        conn_string = create_db_conn_string(base_dir, "runs")
        engine = create_engine(conn_string, poolclass=NullPool)
        alembic_config = get_alembic_config(__file__)

        should_mark_indexes = False
        with engine.connect() as connection:
            db_revision, head_revision = check_alembic_revision(alembic_config, connection)
            if not (db_revision and head_revision):
                RunStorageSqlMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")
                stamp_alembic_rev(alembic_config, connection)
                should_mark_indexes = True

        run_storage = SqliteRunStorage(conn_string, inst_data)

        if should_mark_indexes:
            # mark all secondary indexes
            run_storage.build_missing_indexes()

        return run_storage
Beispiel #5
0
    def from_local(cls, base_dir, inst_data=None):
        check.str_param(base_dir, "base_dir")
        mkdir_p(base_dir)
        conn_string = create_db_conn_string(base_dir, "schedules")
        engine = create_engine(conn_string, poolclass=NullPool)
        alembic_config = get_alembic_config(__file__)

        should_migrate_data = False
        with engine.connect() as connection:
            db_revision, head_revision = check_alembic_revision(
                alembic_config, connection)
            if not (db_revision and head_revision):
                ScheduleStorageSqlMetadata.create_all(engine)
                engine.execute("PRAGMA journal_mode=WAL;")
                stamp_alembic_rev(alembic_config, connection)
                should_migrate_data = True

        schedule_storage = cls(conn_string, inst_data)
        if should_migrate_data:
            schedule_storage.migrate()
            schedule_storage.optimize()

        return schedule_storage
Beispiel #6
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLScheduleStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.mysql_url = mysql_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url,
            isolation_level="AUTOCOMMIT",
            poolclass=db.pool.NullPool,
        )

        table_names = retry_mysql_connection_fn(
            db.inspect(self._engine).get_table_names)
        if "jobs" not in table_names:
            with self.connect() as conn:
                alembic_config = mysql_alembic_config(__file__)
                retry_mysql_creation_fn(
                    lambda: ScheduleStorageSqlMetadata.create_all(conn))
                stamp_alembic_rev(alembic_config, conn)

        super().__init__()
Beispiel #7
0
    def connect(self, run_id=None):
        with self._db_lock:
            check.str_param(run_id, "run_id")

            conn_string = self.conn_string_for_run_id(run_id)
            engine = create_engine(conn_string, poolclass=NullPool)

            if not run_id in self._initialized_dbs:
                self._initdb(engine)
                self._initialized_dbs.add(run_id)

            conn = engine.connect()

            try:
                with handle_schema_errors(
                    conn,
                    get_alembic_config(__file__),
                    msg="SqliteEventLogStorage for run {run_id}".format(run_id=run_id),
                ):
                    yield conn
            finally:
                conn.close()
            engine.dispose()
Beispiel #8
0
    def __init__(self, mysql_url, inst_data=None):
        experimental_class_warning("MySQLRunStorage")
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.mysql_url = mysql_url

        # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
        self._engine = create_engine(
            self.mysql_url,
            isolation_level="AUTOCOMMIT",
            poolclass=db.pool.NullPool,
        )

        self._index_migration_cache = {}
        table_names = retry_mysql_connection_fn(
            db.inspect(self._engine).get_table_names)

        # Stamp and create tables if the main table does not exist (we can't check alembic
        # revision because alembic config may be shared with other storage classes)
        if "runs" not in table_names:
            retry_mysql_creation_fn(self._init_db)
            self.build_missing_indexes()

        super().__init__()
Beispiel #9
0
    def __init__(self, base_dir, inst_data=None):
        """Note that idempotent initialization of the SQLite database is done on a per-run_id
        basis in the body of connect, since each run is stored in a separate database."""
        self._base_dir = os.path.abspath(check.str_param(base_dir, "base_dir"))
        mkdir_p(self._base_dir)

        self._watchers = defaultdict(dict)
        self._obs = Observer()
        self._obs.start()
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)

        # Used to ensure that each run ID attempts to initialize its DB the first time it connects,
        # ensuring that the database will be created if it doesn't exist
        self._initialized_dbs = set()

        # Ensure that multiple threads (like the event log watcher) interact safely with each other
        self._db_lock = threading.Lock()

        if not os.path.exists(self.path_for_shard(INDEX_SHARD_NAME)):
            conn_string = self.conn_string_for_shard(INDEX_SHARD_NAME)
            engine = create_engine(conn_string, poolclass=NullPool)
            self._initdb(engine)
            self.reindex()
Beispiel #10
0
 def optimize_for_dagit(self, statement_timeout):
     # When running in dagit, hold 1 open connection
     # https://github.com/dagster-io/dagster/issues/3719
     self._engine = create_engine(self.mysql_url,
                                  isolation_level="AUTOCOMMIT",
                                  pool_size=1)
Beispiel #11
0
 def wipe_storage(mysql_url):
     engine = create_engine(mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool)
     try:
         ScheduleStorageSqlMetadata.drop_all(engine)
     finally:
         engine.dispose()