def test_connection_leak(hostname, conn_string): num_instances = 20 tempdir = tempfile.TemporaryDirectory() copies = [] for _ in range(num_instances): copies.append( DagsterInstance.from_ref( InstanceRef.from_dir(tempdir.name, overrides=yaml.safe_load( full_pg_config(hostname))))) with get_conn(conn_string).cursor() as curs: # count open connections curs.execute("SELECT count(*) from pg_stat_activity") res = curs.fetchall() # This includes a number of internal connections, so just ensure it did not scale # with number of instances assert res[0][0] < num_instances for copy in copies: copy.dispose() tempdir.cleanup()
def watcher_thread(conn_string, queue, handlers_dict, dict_lock, watcher_thread_exit): done = False while not done and not watcher_thread_exit.is_set(): event_list = [] while not queue.empty(): try: event_list.append(queue.get_nowait()) except Empty: pass for event in event_list: if not isinstance(event, EventWatcherThreadEvents): warnings.warn( 'Event watcher thread got unexpected event {event}'.format(event=event) ) continue if isinstance(event, EventWatcherThreadNoopEvents): continue elif isinstance(event, EventWatcherThreadEndEvents): done = True else: assert isinstance(event, EventWatcherEvent) run_id, index_str = event.payload index = int(index_str) with dict_lock: handlers = handlers_dict.get(run_id, []) with get_conn(conn_string).cursor() as curs: curs.execute(SELECT_EVENT_LOG_SQL, (index,)) dagster_event = deserialize_json_to_dagster_namedtuple(curs.fetchone()[0]) for (cursor, callback) in handlers: if index >= cursor: callback(dagster_event) time.sleep(WATCHER_POLL_INTERVAL)
def create_nuked_storage(conn_string): check.str_param(conn_string, 'conn_string') conn = get_conn(conn_string) conn.cursor().execute(DROP_EVENT_LOG_SQL) conn.cursor().execute(CREATE_EVENT_LOG_SQL) return PostgresEventLogStorage(conn_string)
def __init__(self, postgres_url, inst_data=None): self.conn_string = check.str_param(postgres_url, 'postgres_url') self._event_watcher = create_event_watcher(self.conn_string) conn = get_conn(self.conn_string) conn.cursor().execute(CREATE_EVENT_LOG_SQL) self._inst_data = check.opt_inst_param(inst_data, 'inst_data', ConfigurableClassData)
def store_event(self, event): '''Store an event corresponding to a pipeline run. Args: run_id (str): The id of the run that generated the event. event (EventRecord): The event to store. ''' check.inst_param(event, 'event', EventRecord) with get_conn(self.conn_string).cursor() as curs: curs.execute( 'INSERT INTO event_log (run_id, event_body) VALUES (%s, %s)', (event.run_id, serialize_dagster_namedtuple(event)), )
def store_event(self, event): '''Store an event corresponding to a pipeline run. Args: run_id (str): The id of the run that generated the event. event (EventRecord): The event to store. ''' check.inst_param(event, 'event', EventRecord) with get_conn(self.conn_string).cursor() as curs: event_body = serialize_dagster_namedtuple(event) curs.execute( '''INSERT INTO event_log (run_id, event_body) VALUES (%s, %s); NOTIFY {channel}, %s; '''.format(channel=CHANNEL_NAME), (event.run_id, event_body, event_body), )
def test_connection_leak(hostname, conn_string): num_instances = 20 copies = [] for _ in range(num_instances): copies.append( DagsterInstance.local_temp( overrides=yaml.safe_load(full_pg_config(hostname)))) with get_conn(conn_string).cursor() as curs: # count open connections curs.execute("SELECT count(*) from pg_stat_activity") res = curs.fetchall() # This includes a number of internal connections, so just ensure it did not scale # with number of instances assert res[0][0] < num_instances
def get_logs_for_run(self, run_id, cursor=-1): '''Get all of the logs corresponding to a run. Args: run_id (str): The id of the run for which to fetch logs. cursor (Optional[int]): Zero-indexed logs will be returned starting from cursor + 1, i.e., if cursor is -1, all logs will be returned. (default: -1) ''' check.str_param(run_id, 'run_id') check.int_param(cursor, 'cursor') check.invariant(cursor >= -1, 'Cursor must be -1 or greater') with get_conn(self.conn_string).cursor() as curs: FETCH_SQL = 'SELECT event_body FROM event_log WHERE run_id = %s OFFSET %s;' curs.execute(FETCH_SQL, (run_id, cursor + 1)) rows = curs.fetchall() return list( map(lambda r: deserialize_json_to_dagster_namedtuple(r[0]), rows))
def fetch_all_events(conn_string): conn = get_conn(conn_string) with conn.cursor() as curs: curs.execute('SELECT event_body from event_log') return curs.fetchall()
def conn(conn_string): # pylint: disable=redefined-outer-name cxn = get_conn(conn_string) cxn.cursor().execute(DROP_TABLE_SQL) cxn.cursor().execute(CREATE_TABLE_SQL) return cxn
def wipe(self): '''Clear the log storage.''' with get_conn(self.conn_string).cursor() as curs: curs.execute(DELETE_EVENT_LOG_SQL)
def __init__(self, postgres_url, inst_data=None): self.conn_string = check.str_param(postgres_url, 'postgres_url') self._event_watcher = create_event_watcher(self.conn_string) conn = get_conn(self.conn_string) conn.cursor().execute(CREATE_EVENT_LOG_SQL) super(PostgresEventLogStorage, self).__init__(inst_data=inst_data)
def delete_events(self, run_id): with get_conn(self.conn_string).cursor() as curs: curs.execute(DELETE_EVENT_LOG_SQL, (run_id, ))