コード例 #1
0
ファイル: test_etl.py プロジェクト: umeshh/usaspending-api
def test_delete_obsolete_rows(operations_fixture):
    assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2),
                                                                   (4, 5),
                                                                   (9, 9)]
    operations.delete_obsolete_rows(ETLTable("t1"), ETLTable("t2"))
    assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2),
                                                                   (4, 5)]
コード例 #2
0
ファイル: test_etl.py プロジェクト: umeshh/usaspending-api
def test_insert_missing_rows(operations_fixture):
    assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2),
                                                                   (4, 5),
                                                                   (9, 9)]
    operations.insert_missing_rows(ETLTable("t1"), ETLTable("t2"))
    assert execute_sql("select id1, id2 from t2 order by id1") == [(1, 2),
                                                                   (4, 5),
                                                                   (8, 8),
                                                                   (9, 9)]
コード例 #3
0
ファイル: test_etl.py プロジェクト: umeshh/usaspending-api
def test_update_changed_rows(operations_fixture):
    assert execute_sql(
        "select id1, id2, name, description from t2 order by id1") == [
            (1, 2, "three", "four"),
            (4, 5, "not six", "not seven"),
            (9, 9, "nine", "nine"),
        ]
    operations.update_changed_rows(ETLTable("t1"), ETLTable("t2"))
    assert execute_sql(
        "select id1, id2, name, description from t2 order by id1") == [
            (1, 2, "three", "four"),
            (4, 5, "six", "seven"),
            (9, 9, "nine", "nine"),
        ]
コード例 #4
0
ファイル: test_etl.py プロジェクト: umeshh/usaspending-api
def test_stage_table(operations_fixture):
    # Just copies the table.
    operations.stage_table(ETLTable("t1"), ETLTable("t2"),
                           ETLTemporaryTable("t3"))
    assert execute_sql("select id1, id2 from t3 order by id1") == [(1, 2),
                                                                   (4, 5),
                                                                   (8, 8)]
コード例 #5
0
    def validate_deletions(source_temp_table, transaction_temp_table,
                           key_column, broker_sql, **kwargs):
        """
        This is probably unnecessary, but let's double check our deletions just in case a record
        didn't get copied over for whatever reason.  It shouldn't take very long.
        """
        with OneLineTimer(f"Validate {key_column}s deletions") as t:
            ids = tuple(row[0] for row in execute_sql(f"""
                    select {key_column} from {source_temp_table}
                    union
                    select {key_column} from {transaction_temp_table}
                """))
            if not ids:
                return
            sql = broker_sql % (str(ids) if len(ids) > 1 else f"({ids[0]})")

            connection = connections["data_broker"]
            with connection.cursor() as cursor:
                cursor.execute(sql)
                results = cursor.fetchall()

            ids = tuple(row[0] for row in results)
            if ids:
                raise RuntimeError(
                    f"ERROR!  Somehow we managed to identify {key_column}s that should not be "
                    f"deleted!  {ids if len(ids) < 1000 else 'There are too many to list.'}"
                )

        t.log_message()
コード例 #6
0
ファイル: helpers.py プロジェクト: umeshh/usaspending-api
def get_last_closed_periods_per_year() -> List[ClosedPeriod]:
    """
    Returns a list of ClosedPeriods.  fiscal_quarter or fiscal_month may be None if the year didn't
    have a corresponding period or the period hasn't passed its reveal date yet.
    """
    sql = """
        select  coalesce(q.submission_fiscal_year, m.submission_fiscal_year) as fiscal_year,
                q.submission_fiscal_quarter as fiscal_quarter,
                m.submission_fiscal_month as fiscal_month
        from    (
                    select  distinct on (submission_fiscal_year)
                            submission_fiscal_year, submission_fiscal_quarter
                    from    dabs_submission_window_schedule
                    where   is_quarter is true and
                            submission_reveal_date <= now()
                    order   by submission_fiscal_year, -submission_fiscal_quarter
                ) as q
                full outer join (
                    select  distinct on (submission_fiscal_year)
                            submission_fiscal_year, submission_fiscal_month
                    from    dabs_submission_window_schedule
                    where   is_quarter is false and
                            submission_reveal_date <= now()
                    order   by submission_fiscal_year, -submission_fiscal_month
                ) as m on m.submission_fiscal_year = q.submission_fiscal_year
    """
    return [ClosedPeriod(t[0], t[1], t[2]) for t in execute_sql(sql)]
コード例 #7
0
def get_query_columns(sql: str) -> List[str]:
    """ Run a NOOP version of the query so we can ascertain its columns. """
    sql = SQL("select * from ({}) as t where false").format(SQL(sql))
    # IMPORTANT:  Even though this is a read only operation, since this is being run in support of
    # a writable operation, we need to run it against the writable connection else we will be
    # unable to see objects living in our transaction if there is one.
    cursor = sql_helpers.execute_sql(sql,
                                     fetcher=sql_helpers.cursor_fetcher,
                                     read_only=False)
    return [col[0] for col in cursor.description]
コード例 #8
0
    def _perform_validations(self):

        sql = (Path(self.etl_dml_sql_directory) /
               "validations.sql").read_text().format(
                   temp_table=TEMP_TABLE_NAME)
        messages = [result[0] for result in execute_sql(sql, read_only=False)]

        if messages:
            m = "\n".join(messages)
            raise RuntimeError(
                f"The following {len(messages):,} problem(s) have been found with the agency file:\n{m}"
            )
コード例 #9
0
ファイル: test_etl.py プロジェクト: umeshh/usaspending-api
def test_identify_new_or_updated(operations_fixture):
    operations.identify_new_or_updated(ETLTable("t1"), ETLTable("t2"),
                                       ETLTemporaryTable("t3"))
    assert execute_sql("select id1, id2 from t3 order by id1") == [(4, 5),
                                                                   (8, 8)]
コード例 #10
0
def get_ids(*temp_table_names):
    sql = " union ".join(f"select * from {t}" for t in temp_table_names)
    return [row[0] for row in execute_sql(sql)]