Exemple #1
0
def execute_query(
        db_session: pg8000.Connection,
        type_object: typing.Type,
        sql_file: str,
        args: any = None,
        msg_id: str = None,
) -> list:
    """
    With a given sql_file and arguments sequentially ordered according to the sql_file,
    this function will execute a query to the database and return a list of objects
    of type_object
    :param db_session:
    :param type_object:
    :param sql_file:
    :param args:
    :param msg_id:
    :return:
    """
    try:
        file = open(sql_file, "r")
        query = file.read()
        file.close()
        cursor = db_session.cursor()
        return map_results_into_object(
            cursor=cursor,
            query=query,
            args=args,
            type_object=type_object,
        )
    except pg8000.DatabaseError as e:
        msg = (
            f'{msg_id} '
            f'Unexpected error: {e} '
            f'for sql file {sql_file} '
            f'and args {args}'
        )
        logging.getLogger().error(msg)
        db_session.rollback()
        db_session.close()
        raise DbException
    except IOError as e:
        msg = (
            f'{msg_id} '
            f'Unexpected error: {e} '
            f'for sql file {sql_file} '
            f'and args {args}'
        )
        logging.getLogger().error(msg)
        raise IOError
Exemple #2
0
def dump_slt(conn: pg8000.Connection, query: str) -> None:
    query = sqlparse.format(query.rstrip(),
                            reindent=True,
                            keyword_case="upper")
    cursor = conn.cursor()
    cursor.execute("ROLLBACK")
    cursor.execute(query)
    row = cursor.fetchone()
    cols = len(row)
    colspec = "I" * cols
    print(f"""

query {colspec}
{query}
----
9999999999 values hashing to YY

query T multiline
EXPLAIN {query}
----
EOF""")
Exemple #3
0
def commit(
        db_session: pg8000.Connection,
        msg_id: str = None,
):
    """
    Commits the Database
    :param db_session:
    :param msg_id:
    :return:
    """
    try:
        db_session.commit()
        db_session.close()
    except pg8000.DatabaseError as e:
        msg = (
            f'{msg_id} '
            f'Unexpected error: {e} '
            f'while trying to do a '
            f'db commit'
        )
        logging.getLogger().error(msg)
        db_session.rollback()
        db_session.close()
        raise DbException
Exemple #4
0
def to_sql(
    df: pd.DataFrame,
    con: pg8000.Connection,
    table: str,
    schema: str,
    mode: str = "append",
    index: bool = False,
    dtype: Optional[Dict[str, str]] = None,
    varchar_lengths: Optional[Dict[str, int]] = None,
) -> None:
    """Write records stored in a DataFrame into PostgreSQL.

    Parameters
    ----------
    df : pandas.DataFrame
        Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
    con : pg8000.Connection
        Use pg8000.connect() to use "
        "credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog.
    table : str
        Table name
    schema : str
        Schema name
    mode : str
        Append or overwrite.
    index : bool
        True to store the DataFrame index as a column in the table,
        otherwise False to ignore it.
    dtype: Dict[str, str], optional
        Dictionary of columns names and PostgreSQL types to be casted.
        Useful when you have columns with undetermined or mixed data types.
        (e.g. {'col name': 'TEXT', 'col2 name': 'FLOAT'})
    varchar_lengths : Dict[str, int], optional
        Dict of VARCHAR length by columns. (e.g. {"col1": 10, "col5": 200}).

    Returns
    -------
    None
        None.

    Examples
    --------
    Writing to PostgreSQL using a Glue Catalog Connections

    >>> import awswrangler as wr
    >>> con = wr.postgresql.connect("MY_GLUE_CONNECTION")
    >>> wr.postgresql.to_sql(
    ...     df=df
    ...     table="my_table",
    ...     schema="public",
    ...     con=con
    ... )
    >>> con.close()

    """
    if df.empty is True:
        raise exceptions.EmptyDataFrame()
    _validate_connection(con=con)
    try:
        with con.cursor() as cursor:
            _create_table(
                df=df,
                cursor=cursor,
                table=table,
                schema=schema,
                mode=mode,
                index=index,
                dtype=dtype,
                varchar_lengths=varchar_lengths,
            )
            if index:
                df.reset_index(level=df.index.names, inplace=True)
            placeholders: str = ", ".join(["%s"] * len(df.columns))
            sql: str = f'INSERT INTO "{schema}"."{table}" VALUES ({placeholders})'
            _logger.debug("sql: %s", sql)
            parameters: List[List[Any]] = _db_utils.extract_parameters(df=df)
            cursor.executemany(sql, parameters)
            con.commit()
    except Exception as ex:
        con.rollback()
        _logger.error(ex)
        raise
Exemple #5
0
 def __init__(self, conn: Connection):
     self._conn = conn
     self._cur = conn.cursor()
def to_sql(
    df: pd.DataFrame,
    con: pg8000.Connection,
    table: str,
    schema: str,
    mode: str = "append",
    index: bool = False,
    dtype: Optional[Dict[str, str]] = None,
    varchar_lengths: Optional[Dict[str, int]] = None,
    use_column_names: bool = False,
    chunksize: int = 200,
) -> None:
    """Write records stored in a DataFrame into PostgreSQL.

    Parameters
    ----------
    df : pandas.DataFrame
        Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
    con : pg8000.Connection
        Use pg8000.connect() to use credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog.
    table : str
        Table name
    schema : str
        Schema name
    mode : str
        Append or overwrite.
    index : bool
        True to store the DataFrame index as a column in the table,
        otherwise False to ignore it.
    dtype: Dict[str, str], optional
        Dictionary of columns names and PostgreSQL types to be casted.
        Useful when you have columns with undetermined or mixed data types.
        (e.g. {'col name': 'TEXT', 'col2 name': 'FLOAT'})
    varchar_lengths : Dict[str, int], optional
        Dict of VARCHAR length by columns. (e.g. {"col1": 10, "col5": 200}).
    use_column_names: bool
        If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query.
        E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be
        inserted into the database columns `col1` and `col3`.
    chunksize: int
        Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query.

    Returns
    -------
    None
        None.

    Examples
    --------
    Writing to PostgreSQL using a Glue Catalog Connections

    >>> import awswrangler as wr
    >>> con = wr.postgresql.connect("MY_GLUE_CONNECTION")
    >>> wr.postgresql.to_sql(
    ...     df=df,
    ...     table="my_table",
    ...     schema="public",
    ...     con=con
    ... )
    >>> con.close()

    """
    if df.empty is True:
        raise exceptions.EmptyDataFrame()
    _validate_connection(con=con)
    try:
        with con.cursor() as cursor:
            _create_table(
                df=df,
                cursor=cursor,
                table=table,
                schema=schema,
                mode=mode,
                index=index,
                dtype=dtype,
                varchar_lengths=varchar_lengths,
            )
            if index:
                df.reset_index(level=df.index.names, inplace=True)
            column_placeholders: str = ", ".join(["%s"] * len(df.columns))
            insertion_columns = ""
            if use_column_names:
                insertion_columns = f"({', '.join(df.columns)})"
            placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs(
                df=df,
                column_placeholders=column_placeholders,
                chunksize=chunksize)
            for placeholders, parameters in placeholder_parameter_pair_generator:
                sql: str = f'INSERT INTO "{schema}"."{table}" {insertion_columns} VALUES {placeholders}'
                _logger.debug("sql: %s", sql)
                cursor.executemany(sql, (parameters, ))
            con.commit()
    except Exception as ex:
        con.rollback()
        _logger.error(ex)
        raise
def to_sql(
    df: pd.DataFrame,
    con: pg8000.Connection,
    table: str,
    schema: str,
    mode: str = "append",
    index: bool = False,
    dtype: Optional[Dict[str, str]] = None,
    varchar_lengths: Optional[Dict[str, int]] = None,
    use_column_names: bool = False,
    chunksize: int = 200,
    upsert_conflict_columns: Optional[List[str]] = None,
    insert_conflict_columns: Optional[List[str]] = None,
) -> None:
    """Write records stored in a DataFrame into PostgreSQL.

    Parameters
    ----------
    df : pandas.DataFrame
        Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
    con : pg8000.Connection
        Use pg8000.connect() to use credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog.
    table : str
        Table name
    schema : str
        Schema name
    mode : str
        Append, overwrite or upsert.
            append: Inserts new records into table.
            overwrite: Drops table and recreates.
            upsert: Perform an upsert which checks for conflicts on columns given by `upsert_conflict_columns` and
            sets the new values on conflicts. Note that `upsert_conflict_columns` is required for this mode.
    index : bool
        True to store the DataFrame index as a column in the table,
        otherwise False to ignore it.
    dtype: Dict[str, str], optional
        Dictionary of columns names and PostgreSQL types to be casted.
        Useful when you have columns with undetermined or mixed data types.
        (e.g. {'col name': 'TEXT', 'col2 name': 'FLOAT'})
    varchar_lengths : Dict[str, int], optional
        Dict of VARCHAR length by columns. (e.g. {"col1": 10, "col5": 200}).
    use_column_names: bool
        If set to True, will use the column names of the DataFrame for generating the INSERT SQL Query.
        E.g. If the DataFrame has two columns `col1` and `col3` and `use_column_names` is True, data will only be
        inserted into the database columns `col1` and `col3`.
    chunksize: int
        Number of rows which are inserted with each SQL query. Defaults to inserting 200 rows per query.
    upsert_conflict_columns: List[str], optional
        This parameter is only supported if `mode` is set top `upsert`. In this case conflicts for the given columns are
        checked for evaluating the upsert.
    insert_conflict_columns: List[str], optional
        This parameter is only supported if `mode` is set top `append`. In this case conflicts for the given columns are
        checked for evaluating the insert 'ON CONFLICT DO NOTHING'.

    Returns
    -------
    None
        None.

    Examples
    --------
    Writing to PostgreSQL using a Glue Catalog Connections

    >>> import awswrangler as wr
    >>> con = wr.postgresql.connect("MY_GLUE_CONNECTION")
    >>> wr.postgresql.to_sql(
    ...     df=df,
    ...     table="my_table",
    ...     schema="public",
    ...     con=con
    ... )
    >>> con.close()

    """
    if df.empty is True:
        raise exceptions.EmptyDataFrame("DataFrame cannot be empty.")

    mode = mode.strip().lower()
    allowed_modes = ["append", "overwrite", "upsert"]
    _db_utils.validate_mode(mode=mode, allowed_modes=allowed_modes)
    if mode == "upsert" and not upsert_conflict_columns:
        raise exceptions.InvalidArgumentValue(
            "<upsert_conflict_columns> needs to be set when using upsert mode."
        )
    _validate_connection(con=con)
    try:
        with con.cursor() as cursor:
            _create_table(
                df=df,
                cursor=cursor,
                table=table,
                schema=schema,
                mode=mode,
                index=index,
                dtype=dtype,
                varchar_lengths=varchar_lengths,
            )
            if index:
                df.reset_index(level=df.index.names, inplace=True)
            column_placeholders: str = ", ".join(["%s"] * len(df.columns))
            insertion_columns = ""
            upsert_str = ""
            if use_column_names:
                insertion_columns = f"({', '.join(df.columns)})"
            if mode == "upsert":
                upsert_columns = ", ".join(
                    df.columns.map(
                        lambda column: f"{column}=EXCLUDED.{column}"))
                conflict_columns = ", ".join(
                    upsert_conflict_columns)  # type: ignore
                upsert_str = f" ON CONFLICT ({conflict_columns}) DO UPDATE SET {upsert_columns}"
            if mode == "append" and insert_conflict_columns:
                conflict_columns = ", ".join(
                    insert_conflict_columns)  # type: ignore
                upsert_str = f" ON CONFLICT ({conflict_columns}) DO NOTHING"
            placeholder_parameter_pair_generator = _db_utils.generate_placeholder_parameter_pairs(
                df=df,
                column_placeholders=column_placeholders,
                chunksize=chunksize)
            for placeholders, parameters in placeholder_parameter_pair_generator:
                sql: str = f'INSERT INTO "{schema}"."{table}" {insertion_columns} VALUES {placeholders}{upsert_str}'
                _logger.debug("sql: %s", sql)
                cursor.executemany(sql, (parameters, ))
            con.commit()
    except Exception as ex:
        con.rollback()
        _logger.error(ex)
        raise