def execute_query( db_session: pg8000.Connection, type_object: typing.Type, sql_file: str, args: any = None, msg_id: str = None, ) -> list: """ With a given sql_file and arguments sequentially ordered according to the sql_file, this function will execute a query to the database and return a list of objects of type_object :param db_session: :param type_object: :param sql_file: :param args: :param msg_id: :return: """ try: file = open(sql_file, "r") query = file.read() file.close() cursor = db_session.cursor() return map_results_into_object( cursor=cursor, query=query, args=args, type_object=type_object, ) except pg8000.DatabaseError as e: msg = ( f'{msg_id} ' f'Unexpected error: {e} ' f'for sql file {sql_file} ' f'and args {args}' ) logging.getLogger().error(msg) db_session.rollback() db_session.close() raise DbException except IOError as e: msg = ( f'{msg_id} ' f'Unexpected error: {e} ' f'for sql file {sql_file} ' f'and args {args}' ) logging.getLogger().error(msg) raise IOError
def dump_slt(conn: pg8000.Connection, query: str) -> None: query = sqlparse.format(query.rstrip(), reindent=True, keyword_case="upper") cursor = conn.cursor() cursor.execute("ROLLBACK") cursor.execute(query) row = cursor.fetchone() cols = len(row) colspec = "I" * cols print(f""" query {colspec} {query} ---- 9999999999 values hashing to YY query T multiline EXPLAIN {query} ---- EOF""")
def commit( db_session: pg8000.Connection, msg_id: str = None, ): """ Commits the Database :param db_session: :param msg_id: :return: """ try: db_session.commit() db_session.close() except pg8000.DatabaseError as e: msg = ( f'{msg_id} ' f'Unexpected error: {e} ' f'while trying to do a ' f'db commit' ) logging.getLogger().error(msg) db_session.rollback() db_session.close() raise DbException
def to_sql( df: pd.DataFrame, con: pg8000.Connection, table: str, schema: str, mode: str = "append", index: bool = False, dtype: Optional[Dict[str, str]] = None, varchar_lengths: Optional[Dict[str, int]] = None, ) -> None: """Write records stored in a DataFrame into PostgreSQL. Parameters ---------- df : pandas.DataFrame Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html con : pg8000.Connection Use pg8000.connect() to use " "credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog. table : str Table name schema : str Schema name mode : str Append or overwrite. index : bool True to store the DataFrame index as a column in the table, otherwise False to ignore it. dtype: Dict[str, str], optional Dictionary of columns names and PostgreSQL types to be casted. Useful when you have columns with undetermined or mixed data types. (e.g. {'col name': 'TEXT', 'col2 name': 'FLOAT'}) varchar_lengths : Dict[str, int], optional Dict of VARCHAR length by columns. (e.g. {"col1": 10, "col5": 200}). Returns ------- None None. Examples -------- Writing to PostgreSQL using a Glue Catalog Connections >>> import awswrangler as wr >>> con = wr.postgresql.connect("MY_GLUE_CONNECTION") >>> wr.postgresql.to_sql( ... df=df ... table="my_table", ... schema="public", ... con=con ... ) >>> con.close() """ if df.empty is True: raise exceptions.EmptyDataFrame() _validate_connection(con=con) try: with con.cursor() as cursor: _create_table( df=df, cursor=cursor, table=table, schema=schema, mode=mode, index=index, dtype=dtype, varchar_lengths=varchar_lengths, ) if index: df.reset_index(level=df.index.names, inplace=True) placeholders: str = ", ".join(["%s"] * len(df.columns)) sql: str = f'INSERT INTO "{schema}"."{table}" VALUES ({placeholders})' _logger.debug("sql: %s", sql) parameters: List[List[Any]] = _db_utils.extract_parameters(df=df) cursor.executemany(sql, parameters) con.commit() except Exception as ex: con.rollback() _logger.error(ex) raise
def __init__(self, conn: Connection): self._conn = conn self._cur = conn.cursor()
def to_sql( df: pd.DataFrame, con: pg8000.Connection, table: str, schema: str, mode: str = "append", index: bool = False, dtype: Optional[Dict[str, str]] = None, varchar_lengths: Optional[Dict[str, int]] = None, use_column_names: bool = False, chunksize: int = 200, ) -> None: """Write records stored in a DataFrame into PostgreSQL. Parameters ---------- df : pandas.DataFrame Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html con : pg8000.Connection Use pg8000.connect() to use credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog. table : str Table name schema : str Schema name mode : str Append or overwrite. index : bool True to store the DataFrame index as a column in the table, otherwise False to ignore it. dtype: Dict[str, str], optional Dictionary of columns names and PostgreSQL types to be casted. Useful when you have columns with undetermined or mixed data types. (e.g. {'col name': 'TEXT', 'col2 name': 'FLOAT'}) varchar_lengths : Dict[str, int], optional Dict of VARCHAR length by columns. (e.g. {"col1": 10, "col5": 200}). use_column_names: bool If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query. E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be inserted into the database columns `col1` and `col3`. chunksize: int Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query. Returns ------- None None. Examples -------- Writing to PostgreSQL using a Glue Catalog Connections >>> import awswrangler as wr >>> con = wr.postgresql.connect("MY_GLUE_CONNECTION") >>> wr.postgresql.to_sql( ... df=df, ... table="my_table", ... schema="public", ... con=con ... ) >>> con.close() """ if df.empty is True: raise exceptions.EmptyDataFrame() _validate_connection(con=con) try: with con.cursor() as cursor: _create_table( df=df, cursor=cursor, table=table, schema=schema, mode=mode, index=index, dtype=dtype, varchar_lengths=varchar_lengths, ) if index: df.reset_index(level=df.index.names, inplace=True) column_placeholders: str = ", ".join(["%s"] * len(df.columns)) insertion_columns = "" if use_column_names: insertion_columns = f"({', '.join(df.columns)})" placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs( df=df, column_placeholders=column_placeholders, chunksize=chunksize) for placeholders, parameters in placeholder_parameter_pair_generator: sql: str = f'INSERT INTO "{schema}"."{table}" {insertion_columns} VALUES {placeholders}' _logger.debug("sql: %s", sql) cursor.executemany(sql, (parameters, )) con.commit() except Exception as ex: con.rollback() _logger.error(ex) raise
def to_sql( df: pd.DataFrame, con: pg8000.Connection, table: str, schema: str, mode: str = "append", index: bool = False, dtype: Optional[Dict[str, str]] = None, varchar_lengths: Optional[Dict[str, int]] = None, use_column_names: bool = False, chunksize: int = 200, upsert_conflict_columns: Optional[List[str]] = None, insert_conflict_columns: Optional[List[str]] = None, ) -> None: """Write records stored in a DataFrame into PostgreSQL. Parameters ---------- df : pandas.DataFrame Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html con : pg8000.Connection Use pg8000.connect() to use credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog. table : str Table name schema : str Schema name mode : str Append, overwrite or upsert. append: Inserts new records into table. overwrite: Drops table and recreates. upsert: Perform an upsert which checks for conflicts on columns given by `upsert_conflict_columns` and sets the new values on conflicts. Note that `upsert_conflict_columns` is required for this mode. index : bool True to store the DataFrame index as a column in the table, otherwise False to ignore it. dtype: Dict[str, str], optional Dictionary of columns names and PostgreSQL types to be casted. Useful when you have columns with undetermined or mixed data types. (e.g. {'col name': 'TEXT', 'col2 name': 'FLOAT'}) varchar_lengths : Dict[str, int], optional Dict of VARCHAR length by columns. (e.g. {"col1": 10, "col5": 200}). use_column_names: bool If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query. E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be inserted into the database columns `col1` and `col3`. chunksize: int Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query. upsert_conflict_columns: List[str], optional This parameter is only supported if `mode` is set top `upsert`. In this case conflicts for the given columns are checked for evaluating the upsert. insert_conflict_columns: List[str], optional This parameter is only supported if `mode` is set top `append`. In this case conflicts for the given columns are checked for evaluating the insert 'ON CONFLICT DO NOTHING'. Returns ------- None None. Examples -------- Writing to PostgreSQL using a Glue Catalog Connections >>> import awswrangler as wr >>> con = wr.postgresql.connect("MY_GLUE_CONNECTION") >>> wr.postgresql.to_sql( ... df=df, ... table="my_table", ... schema="public", ... con=con ... ) >>> con.close() """ if df.empty is True: raise exceptions.EmptyDataFrame("DataFrame cannot be empty.") mode = mode.strip().lower() allowed_modes = ["append", "overwrite", "upsert"] _db_utils.validate_mode(mode=mode, allowed_modes=allowed_modes) if mode == "upsert" and not upsert_conflict_columns: raise exceptions.InvalidArgumentValue( "<upsert_conflict_columns> needs to be set when using upsert mode." ) _validate_connection(con=con) try: with con.cursor() as cursor: _create_table( df=df, cursor=cursor, table=table, schema=schema, mode=mode, index=index, dtype=dtype, varchar_lengths=varchar_lengths, ) if index: df.reset_index(level=df.index.names, inplace=True) column_placeholders: str = ", ".join(["%s"] * len(df.columns)) insertion_columns = "" upsert_str = "" if use_column_names: insertion_columns = f"({', '.join(df.columns)})" if mode == "upsert": upsert_columns = ", ".join( df.columns.map( lambda column: f"{column}=EXCLUDED.{column}")) conflict_columns = ", ".join( upsert_conflict_columns) # type: ignore upsert_str = f" ON CONFLICT ({conflict_columns}) DO UPDATE SET {upsert_columns}" if mode == "append" and insert_conflict_columns: conflict_columns = ", ".join( insert_conflict_columns) # type: ignore upsert_str = f" ON CONFLICT ({conflict_columns}) DO NOTHING" placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs( df=df, column_placeholders=column_placeholders, chunksize=chunksize) for placeholders, parameters in placeholder_parameter_pair_generator: sql: str = f'INSERT INTO "{schema}"."{table}" {insertion_columns} VALUES {placeholders}{upsert_str}' _logger.debug("sql: %s", sql) cursor.executemany(sql, (parameters, )) con.commit() except Exception as ex: con.rollback() _logger.error(ex) raise