def test_delete_obsolete_rows(operations_fixture): assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2), (4, 5), (9, 9)] operations.delete_obsolete_rows(ETLTable("t1"), ETLTable("t2")) assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2), (4, 5)]
def test_insert_missing_rows(operations_fixture): assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2), (4, 5), (9, 9)] operations.insert_missing_rows(ETLTable("t1"), ETLTable("t2")) assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2), (4, 5), (8, 8), (9, 9)]
def test_update_changed_rows(operations_fixture): assert execute_sql( "select id1, id2, name, description from t2 order by id1") == [ (1, 2, "three", "four"), (4, 5, "not six", "not seven"), (9, 9, "nine", "nine"), ] operations.update_changed_rows(ETLTable("t1"), ETLTable("t2")) assert execute_sql( "select id1, id2, name, description from t2 order by id1") == [ (1, 2, "three", "four"), (4, 5, "six", "seven"), (9, 9, "nine", "nine"), ]
def test_stage_table(operations_fixture): # Just copies the table. operations.stage_table(ETLTable("t1"), ETLTable("t2"), ETLTemporaryTable("t3")) assert execute_sql("select id1, id2 from t3 order by id1") == [(1, 2), (4, 5), (8, 8)]
def validate_deletions(source_temp_table, transaction_temp_table, key_column, broker_sql, **kwargs): """ This is probably unnecessary, but let's double check our deletions just in case a record didn't get copied over for whatever reason. It shouldn't take very long. """ with OneLineTimer(f"Validate {key_column}s deletions") as t: ids = tuple(row[0] for row in execute_sql(f""" select {key_column} from {source_temp_table} union select {key_column} from {transaction_temp_table} """)) if not ids: return sql = broker_sql % (str(ids) if len(ids) > 1 else f"({ids[0]})") connection = connections["data_broker"] with connection.cursor() as cursor: cursor.execute(sql) results = cursor.fetchall() ids = tuple(row[0] for row in results) if ids: raise RuntimeError( f"ERROR! Somehow we managed to identify {key_column}s that should not be " f"deleted! {ids if len(ids) < 1000 else 'There are too many to list.'}" ) t.log_message()
def get_last_closed_periods_per_year() -> List[ClosedPeriod]: """ Returns a list of ClosedPeriods. fiscal_quarter or fiscal_month may be None if the year didn't have a corresponding period or the period hasn't passed its reveal date yet. """ sql = """ select coalesce(q.submission_fiscal_year, m.submission_fiscal_year) as fiscal_year, q.submission_fiscal_quarter as fiscal_quarter, m.submission_fiscal_month as fiscal_month from ( select distinct on (submission_fiscal_year) submission_fiscal_year, submission_fiscal_quarter from dabs_submission_window_schedule where is_quarter is true and submission_reveal_date <= now() order by submission_fiscal_year, -submission_fiscal_quarter ) as q full outer join ( select distinct on (submission_fiscal_year) submission_fiscal_year, submission_fiscal_month from dabs_submission_window_schedule where is_quarter is false and submission_reveal_date <= now() order by submission_fiscal_year, -submission_fiscal_month ) as m on m.submission_fiscal_year = q.submission_fiscal_year """ return [ClosedPeriod(t[0], t[1], t[2]) for t in execute_sql(sql)]
def get_query_columns(sql: str) -> List[str]: """ Run a NOOP version of the query so we can ascertain its columns. """ sql = SQL("select * from ({}) as t where false").format(SQL(sql)) # IMPORTANT: Even though this is a read only operation, since this is being run in support of # a writable operation, we need to run it against the writable connection else we will be # unable to see objects living in our transaction if there is one. cursor = sql_helpers.execute_sql(sql, fetcher=sql_helpers.cursor_fetcher, read_only=False) return [col[0] for col in cursor.description]
def _perform_validations(self): sql = (Path(self.etl_dml_sql_directory) / "validations.sql").read_text().format( temp_table=TEMP_TABLE_NAME) messages = [result[0] for result in execute_sql(sql, read_only=False)] if messages: m = "\n".join(messages) raise RuntimeError( f"The following {len(messages):,} problem(s) have been found with the agency file:\n{m}" )
def test_identify_new_or_updated(operations_fixture): operations.identify_new_or_updated(ETLTable("t1"), ETLTable("t2"), ETLTemporaryTable("t3")) assert execute_sql("select id1, id2 from t3 order by id1") == [(4, 5), (8, 8)]
def get_ids(*temp_table_names): sql = " union ".join(f"select * from {t}" for t in temp_table_names) return [row[0] for row in execute_sql(sql)]