Example #1
0
async def async_run_creates(sql_statements, wrapper):
    with wrapper:
        conn = await asyncpg.connect(dsn=get_database_dsn_string())
        for sql in sqlparse.split(sql_statements):
            stmt = await conn.prepare(sql)
            await stmt.fetch()
        await conn.close()
def download_csv(count_sql, copy_sql, filename, job_id, skip_counts, verbose):

    # Execute Copy SQL to download records to CSV
    # It is preferable to not use shell=True, but this command works. Limited user-input so risk is low
    subprocess.Popen("psql {} -c {}".format(get_database_dsn_string(),
                                            copy_sql),
                     shell=True).wait()
    download_count = count_rows_in_delimited_file(filename,
                                                  has_header=True,
                                                  safe=False)
    printf({
        "msg": "Wrote {} to this file: {}".format(download_count, filename),
        "job": job_id,
        "f": "Download"
    })

    # If --skip_counts is disabled, execute count_sql and compare this count to the download_count
    if not skip_counts:
        sql_count = execute_sql_statement(count_sql, True, verbose)[0]["count"]
        if sql_count != download_count:
            msg = "Mismatch between CSV and DB rows! Expected: {} | Actual {} in: {}"
            printf({
                "msg": msg.format(sql_count, download_count, filename),
                "job": job_id,
                "f": "Download"
            })
            raise SystemExit(1)
    else:
        printf({
            "msg": "Skipping count comparison checks (sql vs download)",
            "job": job_id,
            "f": "Download"
        })

    return download_count
Example #3
0
    def handle(self, *args, **options):

        source_location = options["source_location"]
        logger.info(f"SOURCE CSV LOCATION: {source_location}")

        with psycopg2.connect(get_database_dsn_string()) as connection:
            with connection.cursor() as cursor:
                self.connection = connection
                self.cursor = cursor
                for file_name, table_name in FILE_TO_TABLE_MAPPING.items():
                    with Timer(f"Copy {file_name}"):
                        uri = os.path.join(source_location, file_name)
                        file_path = RetrieveFileFromUri(
                            uri).copy_to_temporary_file()
                    with Timer(f"Get CSV headers from {file_name}"):
                        headers = self._get_headers(file_path)
                    with Timer(f"Create temporary table {table_name}"):
                        self._create_temporary_table(table_name, headers)
                    with Timer(f"Import {file_name}"):
                        self._import_file(file_path, table_name)
                    os.remove(file_path)

                destination_table_name = HistoricalAppropriationAccountBalances._meta.db_table
                with Timer(f"Empty {destination_table_name}"):
                    cursor.execute(f"delete from {destination_table_name}")
                with Timer(f"Import into {destination_table_name}"):
                    self._import_data()
Example #4
0
    def load_fpds_incrementally(self,
                                date: Optional[datetime],
                                chunk_size: int = CHUNK_SIZE) -> None:
        """Process incremental loads based on a date range or full data loads"""

        if date is None:
            logger.info("Skipping deletes. Fetching all fpds transactions...")
        else:
            logger.info(f"Handling fpds transactions since {date}...")

            detached_award_procurement_ids = retrieve_deleted_fpds_transactions(
                start_datetime=date)
            stale_awards = delete_stale_fpds(detached_award_procurement_ids)
            self.update_award_records(awards=stale_awards,
                                      skip_cd_linkage=True)

        with psycopg2.connect(dsn=get_database_dsn_string()) as connection:
            logger.info("Fetching records to update")
            total_records = self.get_cursor_for_date_query(
                connection, date, True).fetchall()[0][0]
            records_processed = 0
            logger.info("{} total records to update".format(total_records))
            cursor = self.get_cursor_for_date_query(connection, date)
            while True:
                id_list = cursor.fetchmany(chunk_size)
                if len(id_list) == 0:
                    break
                logger.info(
                    "Loading batch (size: {}) from date query...".format(
                        len(id_list)))
                self.modified_award_ids.extend(
                    load_fpds_transactions([row[0] for row in id_list]))
                records_processed = records_processed + len(id_list)
                logger.info("{} out of {} processed".format(
                    records_processed, total_records))
Example #5
0
async def async_run_create(sql, verify_text=None):
    conn = await asyncpg.connect(dsn=get_database_dsn_string())
    stmt = await conn.prepare(sql)
    await stmt.fetch()
    response_msg = stmt.get_statusmsg()
    await conn.close()

    if verify_text:
        if response_msg != verify_text:
            raise RuntimeError("SQL did not return the correct response")
def execute_sql_statement(cmd, results=False, verbose=False):
    """ Simple function to execute SQL using a psycopg2 connection"""
    rows = None
    if verbose:
        print(cmd)

    with psycopg2.connect(dsn=get_database_dsn_string()) as connection:
        connection.autocommit = True
        with connection.cursor() as cursor:
            cursor.execute(cmd)
            if results:
                rows = db_rows_to_dict(cursor)
    return rows
def _fetch_reference_data():
    global SUBTIER_AGENCY_LIST_CACHE
    with psycopg2.connect(dsn=get_database_dsn_string()) as connection:
        with connection.cursor(
                cursor_factory=psycopg2.extras.DictCursor) as cursor:
            sql = (
                "SELECT * FROM subtier_agency "
                "JOIN agency "
                "ON subtier_agency.subtier_agency_id = agency.subtier_agency_id"
            )

            cursor.execute(sql)
            SUBTIER_AGENCY_LIST_CACHE = {
                result["subtier_code"]: result
                for result in cursor.fetchall()
            }
def update_heartbeat(submission_id: int, processor_id: str) -> int:
    """
    We maintain a heartbeat on in-progress submissions so processing can be restarted in the event
    of a silent failure.  Returns the count of updated heartbeats.  Should always return 1.  If it
    doesn't then your submission no longer exists in the queue or someone else has claimed it and
    that's probably a problem.  This uses psycopg2 instead of Django because we need a connection
    outside of those managed by Django to ensure the heartbeat is outside of any outstanding
    transactions.
    """
    sql = f"""
        update  {DABSLoaderQueue._meta.db_table}
        set     heartbeat = %s::timestamptz
        where   submission_id = %s and processor_id = %s and state = %s
    """
    with psycopg2.connect(dsn=get_database_dsn_string()) as connection:
        with connection.cursor() as cursor:
            cursor.execute(sql, [
                now(), submission_id, processor_id, DABSLoaderQueue.IN_PROGRESS
            ])
            return cursor.rowcount
Example #9
0
def download_csv(count_sql, copy_sql, filename, job_id, skip_counts, verbose):
    if skip_counts:
        count = None
        printf({
            "msg":
            "Skipping count checks. Writing file: {}".format(filename),
            "job":
            job_id,
            "f":
            "Download"
        })
    else:
        count = execute_sql_statement(count_sql, True, verbose)[0]["count"]
        printf({
            "msg": "Writing {} to this file: {}".format(count, filename),
            "job": job_id,
            "f": "Download"
        })
    # It is preferable to not use shell=True, but this command works. Limited user-input so risk is low
    subprocess.Popen("psql {} -c {}".format(get_database_dsn_string(),
                                            copy_sql),
                     shell=True).wait()

    if not skip_counts:
        download_count = count_rows_in_delimited_file(filename,
                                                      has_header=True,
                                                      safe=False)
        if count != download_count:
            msg = "Mismatch between CSV and DB rows! Expected: {} | Actual {} in: {}"
            printf({
                "msg": msg.format(count, download_count, filename),
                "job": job_id,
                "f": "Download"
            })
            raise SystemExit(1)
    return count
def run_sql(sql, name):
    with psycopg2.connect(dsn=get_database_dsn_string()) as connection:
        with connection.cursor() as cursor:
            with Timer(name):
                cursor.execute(sql)
Example #11
0
async def async_run_select(sql):
    conn = await asyncpg.connect(dsn=get_database_dsn_string())
    sql_result = await conn.fetch(sql)
    await conn.close()
    return sql_result