Esempio n. 1
0
def _validate_engine(
        con: sqlalchemy.engine.Engine) -> None:  # pragma: no cover
    if not isinstance(con, sqlalchemy.engine.Engine):
        raise exceptions.InvalidConnection(
            "Invalid 'con' argument, please pass a "
            "SQLAlchemy Engine. Use wr.db.get_engine(), "
            "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()")
Esempio n. 2
0
def _validate_connection(con: pg8000.Connection) -> None:
    if not isinstance(con, pg8000.Connection):
        raise exceptions.InvalidConnection(
            "Invalid 'conn' argument, please pass a "
            "pg8000.Connection object. Use pg8000.connect() to use "
            "credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog."
        )
Esempio n. 3
0
def _validate_connection(con: "pyodbc.Connection") -> None:
    if not isinstance(con, pyodbc.Connection):
        raise exceptions.InvalidConnection(
            "Invalid 'conn' argument, please pass a "
            "pyodbc.Connection object. Use pyodbc.connect() to use "
            "credentials directly or wr.sqlserver.connect() to fetch it from the Glue Catalog."
        )
Esempio n. 4
0
def _validate_connection(con: redshift_connector.Connection) -> None:
    if not isinstance(con, redshift_connector.Connection):
        raise exceptions.InvalidConnection(
            "Invalid 'conn' argument, please pass a "
            "redshift_connector.Connection object. Use redshift_connector.connect() to use "
            "credentials directly or wr.redshift.connect() to fetch it from the Glue Catalog."
        )
Esempio n. 5
0
def _get_connection_attributes_from_secrets_manager(
        secret_id: str, dbname: Optional[str],
        boto3_session: Optional[boto3.Session]) -> ConnectionAttributes:
    secret_value: Dict[str, Any] = secretsmanager.get_secret_json(
        name=secret_id, boto3_session=boto3_session)
    kind: str = secret_value["engine"]
    if dbname is not None:
        _dbname: str = dbname
    elif "dbname" in secret_value:
        _dbname = secret_value["dbname"]
    else:
        if kind != "redshift":
            raise exceptions.InvalidConnection(
                f"The secret {secret_id} MUST have a dbname property.")
        _dbname = _get_dbname(cluster_id=secret_value["dbClusterIdentifier"],
                              boto3_session=boto3_session)
    return ConnectionAttributes(
        kind=kind,
        user=secret_value["username"],
        password=secret_value["password"],
        host=secret_value["host"],
        port=secret_value["port"],
        database=_dbname,
        ssl_context=None,
    )
def _get_connection_attributes_from_map(
        connection_details: Optional[Dict[str, str]]) -> ConnectionAttributes:
    required_fields: list[str] = [
        "user", "pass", "host", "port", "dbname", "kind"
    ]
    if all(field in connection_details.keys() for field in required_fields):
        return ConnectionAttributes(
            kind=connection_details["kind"],
            user=connection_details["user"],
            password=connection_details["pass"],
            host=connection_details["host"],
            port=int(connection_details["port"]),
            database=connection_details["dbname"],
        )
    else:
        raise exceptions.InvalidConnection(
            f"All the required fields({required_fields}) must be set when using a map."
        )
Esempio n. 7
0
def to_sql(df: pd.DataFrame, con: sqlalchemy.engine.Engine,
           **pandas_kwargs: Any) -> None:
    """Write records stored in a DataFrame to a SQL database.

    Support for **Redshift**, **PostgreSQL** and **MySQL**.

    Support for all pandas to_sql() arguments:
    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html

    Note
    ----
    Redshift: For large DataFrames (1MM+ rows) consider the function **wr.db.copy_to_redshift()**.

    Note
    ----
    Redshift: `index=False` will be forced.

    Parameters
    ----------
    df : pandas.DataFrame
        Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
    con : sqlalchemy.engine.Engine
        SQLAlchemy Engine. Please use,
        wr.db.get_engine(), wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()
    pandas_kwargs
        KEYWORD arguments forwarded to pandas.DataFrame.to_sql(). You can NOT pass `pandas_kwargs` explicit, just add
        valid Pandas arguments in the function call and Wrangler will accept it.
        e.g. wr.db.to_sql(df, con=con, name="table_name", schema="schema_name", if_exists="replace", index=False)
        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html

    Returns
    -------
    None
        None.

    Examples
    --------
    Writing to Redshift with temporary credentials

    >>> import awswrangler as wr
    >>> import pandas as pd
    >>> wr.db.to_sql(
    ...     df=pd.DataFrame({'col': [1, 2, 3]}),
    ...     con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******"),
    ...     name="table_name",
    ...     schema="schema_name"
    ... )

    Writing to Redshift with temporary credentials and using pandas_kwargs

    >>> import awswrangler as wr
    >>> import pandas as pd
    >>> wr.db.to_sql(
    ...     df=pd.DataFrame({'col': [1, 2, 3]}),
    ...     con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******"),
    ...     name="table_name",
    ...     schema="schema_name",
    ...     if_exists="replace",
    ...     index=False,
    ... )

    Writing to Redshift from Glue Catalog Connections

    >>> import awswrangler as wr
    >>> import pandas as pd
    >>> wr.db.to_sql(
    ...     df=pd.DataFrame({'col': [1, 2, 3]}),
    ...     con=wr.catalog.get_engine(connection="..."),
    ...     name="table_name",
    ...     schema="schema_name"
    ... )

    """
    if "pandas_kwargs" in pandas_kwargs:
        raise exceptions.InvalidArgument(
            "You can NOT pass `pandas_kwargs` explicit, just add valid "
            "Pandas arguments in the function call and Wrangler will accept it."
            "e.g. wr.db.to_sql(df, con, name='...', schema='...', if_exists='replace')"
        )
    if df.empty is True:
        raise exceptions.EmptyDataFrame()
    if not isinstance(con, sqlalchemy.engine.Engine):
        raise exceptions.InvalidConnection(
            "Invalid 'con' argument, please pass a "
            "SQLAlchemy Engine. Use wr.db.get_engine(), "
            "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()")
    if "dtype" in pandas_kwargs:
        cast_columns: Dict[str, VisitableType] = pandas_kwargs["dtype"]
    else:
        cast_columns = {}
    dtypes: Dict[str,
                 VisitableType] = _data_types.sqlalchemy_types_from_pandas(
                     df=df, db_type=con.name, dtype=cast_columns)
    pandas_kwargs["dtype"] = dtypes
    pandas_kwargs["con"] = con
    if pandas_kwargs["con"].name.lower(
    ) == "redshift":  # Redshift does not accept index
        pandas_kwargs["index"] = False
    _utils.try_it(f=df.to_sql,
                  ex=sqlalchemy.exc.InternalError,
                  **pandas_kwargs)
Esempio n. 8
0
def read_sql_query(
    sql: str,
    con: sqlalchemy.engine.Engine,
    index_col: Optional[Union[str, List[str]]] = None,
    params: Optional[Union[List, Tuple, Dict]] = None,
    chunksize: Optional[int] = None,
    dtype: Optional[Dict[str, pa.DataType]] = None,
) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
    """Return a DataFrame corresponding to the result set of the query string.

    Support for **Redshift**, **PostgreSQL** and **MySQL**.

    Note
    ----
    Redshift: For large extractions (1MM+ rows) consider the function **wr.db.unload_redshift()**.

    Parameters
    ----------
    sql : str
        Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
    con : sqlalchemy.engine.Engine
        SQLAlchemy Engine. Please use,
        wr.db.get_engine(), wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()
    index_col : Union[str, List[str]], optional
        Column(s) to set as index(MultiIndex).
    params :  Union[List, Tuple, Dict], optional
        List of parameters to pass to execute method.
        The syntax used to pass parameters is database driver dependent.
        Check your database driver documentation for which of the five syntax styles,
        described in PEP 249’s paramstyle, is supported.
        Eg. for psycopg2, uses %(name)s so use params={‘name’ : ‘value’}.
    chunksize : int, optional
        If specified, return an iterator where chunksize is the number of rows to include in each chunk.
    dtype : Dict[str, pyarrow.DataType], optional
        Specifying the datatype for columns.
        The keys should be the column names and the values should be the PyArrow types.

    Returns
    -------
    Union[pandas.DataFrame, Iterator[pandas.DataFrame]]
        Result as Pandas DataFrame(s).

    Examples
    --------
    Reading from Redshift with temporary credentials

    >>> import awswrangler as wr
    >>> df = wr.db.read_sql_query(
    ...     sql="SELECT * FROM public.my_table",
    ...     con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******")
    ... )

    Reading from Redshift from Glue Catalog Connections

    >>> import awswrangler as wr
    >>> df = wr.db.read_sql_query(
    ...     sql="SELECT * FROM public.my_table",
    ...     con=wr.catalog.get_engine(connection="...")
    ... )

    """
    if not isinstance(con, sqlalchemy.engine.Engine):  # pragma: no cover
        raise exceptions.InvalidConnection(
            "Invalid 'con' argument, please pass a "
            "SQLAlchemy Engine. Use wr.db.get_engine(), "
            "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()")
    with con.connect() as _con:
        args = _convert_params(sql, params)
        cursor = _con.execute(*args)
        if chunksize is None:
            return _records2df(records=cursor.fetchall(),
                               cols_names=cursor.keys(),
                               index=index_col,
                               dtype=dtype)
        return _iterate_cursor(cursor=cursor,
                               chunksize=chunksize,
                               index=index_col,
                               dtype=dtype)
Esempio n. 9
0
def to_sql(df: pd.DataFrame, con: sqlalchemy.engine.Engine,
           **pandas_kwargs) -> None:
    """Write records stored in a DataFrame to a SQL database.

    Support for **Redshift**, **PostgreSQL** and **MySQL**.

    Support for all pandas to_sql() arguments:
    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html

    Note
    ----
    Redshift: For large DataFrames (1MM+ rows) consider the function **wr.db.copy_to_redshift()**.

    Parameters
    ----------
    df : pandas.DataFrame
        Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
    con : sqlalchemy.engine.Engine
        SQLAlchemy Engine. Please use,
        wr.db.get_engine(), wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()
    pandas_kwargs
        keyword arguments forwarded to pandas.DataFrame.to_csv()
        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html

    Returns
    -------
    None
        None.

    Examples
    --------
    Writing to Redshift with temporary credentials

    >>> import awswrangler as wr
    >>> import pandas as pd
    >>> wr.db.to_sql(
    ...     df=pd.DataFrame({'col': [1, 2, 3]}),
    ...     con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******"),
    ...     name="table_name",
    ...     schema="schema_name"
    ... )

    Writing to Redshift from Glue Catalog Connections

    >>> import awswrangler as wr
    >>> import pandas as pd
    >>> wr.db.to_sql(
    ...     df=pd.DataFrame({'col': [1, 2, 3]}),
    ...     con=wr.catalog.get_engine(connection="..."),
    ...     name="table_name",
    ...     schema="schema_name"
    ... )

    """
    if df.empty is True:  # pragma: no cover
        raise exceptions.EmptyDataFrame()
    if not isinstance(con, sqlalchemy.engine.Engine):  # pragma: no cover
        raise exceptions.InvalidConnection(
            "Invalid 'con' argument, please pass a "
            "SQLAlchemy Engine. Use wr.db.get_engine(), "
            "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()")
    if "dtype" in pandas_kwargs:
        cast_columns: Dict[str, VisitableType] = pandas_kwargs["dtype"]
    else:
        cast_columns = {}
    dtypes: Dict[str,
                 VisitableType] = _data_types.sqlalchemy_types_from_pandas(
                     df=df, db_type=con.name, dtype=cast_columns)
    pandas_kwargs["dtype"] = dtypes
    pandas_kwargs["con"] = con
    df.to_sql(**pandas_kwargs)