def _validate_engine( con: sqlalchemy.engine.Engine) -> None: # pragma: no cover if not isinstance(con, sqlalchemy.engine.Engine): raise exceptions.InvalidConnection( "Invalid 'con' argument, please pass a " "SQLAlchemy Engine. Use wr.db.get_engine(), " "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()")
def _validate_connection(con: pg8000.Connection) -> None: if not isinstance(con, pg8000.Connection): raise exceptions.InvalidConnection( "Invalid 'conn' argument, please pass a " "pg8000.Connection object. Use pg8000.connect() to use " "credentials directly or wr.postgresql.connect() to fetch it from the Glue Catalog." )
def _validate_connection(con: "pyodbc.Connection") -> None: if not isinstance(con, pyodbc.Connection): raise exceptions.InvalidConnection( "Invalid 'conn' argument, please pass a " "pyodbc.Connection object. Use pyodbc.connect() to use " "credentials directly or wr.sqlserver.connect() to fetch it from the Glue Catalog." )
def _validate_connection(con: redshift_connector.Connection) -> None: if not isinstance(con, redshift_connector.Connection): raise exceptions.InvalidConnection( "Invalid 'conn' argument, please pass a " "redshift_connector.Connection object. Use redshift_connector.connect() to use " "credentials directly or wr.redshift.connect() to fetch it from the Glue Catalog." )
def _get_connection_attributes_from_secrets_manager( secret_id: str, dbname: Optional[str], boto3_session: Optional[boto3.Session]) -> ConnectionAttributes: secret_value: Dict[str, Any] = secretsmanager.get_secret_json( name=secret_id, boto3_session=boto3_session) kind: str = secret_value["engine"] if dbname is not None: _dbname: str = dbname elif "dbname" in secret_value: _dbname = secret_value["dbname"] else: if kind != "redshift": raise exceptions.InvalidConnection( f"The secret {secret_id} MUST have a dbname property.") _dbname = _get_dbname(cluster_id=secret_value["dbClusterIdentifier"], boto3_session=boto3_session) return ConnectionAttributes( kind=kind, user=secret_value["username"], password=secret_value["password"], host=secret_value["host"], port=secret_value["port"], database=_dbname, ssl_context=None, )
def _get_connection_attributes_from_map( connection_details: Optional[Dict[str, str]]) -> ConnectionAttributes: required_fields: list[str] = [ "user", "pass", "host", "port", "dbname", "kind" ] if all(field in connection_details.keys() for field in required_fields): return ConnectionAttributes( kind=connection_details["kind"], user=connection_details["user"], password=connection_details["pass"], host=connection_details["host"], port=int(connection_details["port"]), database=connection_details["dbname"], ) else: raise exceptions.InvalidConnection( f"All the required fields({required_fields}) must be set when using a map." )
def to_sql(df: pd.DataFrame, con: sqlalchemy.engine.Engine, **pandas_kwargs: Any) -> None: """Write records stored in a DataFrame to a SQL database. Support for **Redshift**, **PostgreSQL** and **MySQL**. Support for all pandas to_sql() arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html Note ---- Redshift: For large DataFrames (1MM+ rows) consider the function **wr.db.copy_to_redshift()**. Note ---- Redshift: `index=False` will be forced. Parameters ---------- df : pandas.DataFrame Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html con : sqlalchemy.engine.Engine SQLAlchemy Engine. Please use, wr.db.get_engine(), wr.db.get_redshift_temp_engine() or wr.catalog.get_engine() pandas_kwargs KEYWORD arguments forwarded to pandas.DataFrame.to_sql(). You can NOT pass `pandas_kwargs` explicit, just add valid Pandas arguments in the function call and Wrangler will accept it. e.g. wr.db.to_sql(df, con=con, name="table_name", schema="schema_name", if_exists="replace", index=False) https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html Returns ------- None None. Examples -------- Writing to Redshift with temporary credentials >>> import awswrangler as wr >>> import pandas as pd >>> wr.db.to_sql( ... df=pd.DataFrame({'col': [1, 2, 3]}), ... con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******"), ... name="table_name", ... schema="schema_name" ... ) Writing to Redshift with temporary credentials and using pandas_kwargs >>> import awswrangler as wr >>> import pandas as pd >>> wr.db.to_sql( ... df=pd.DataFrame({'col': [1, 2, 3]}), ... con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******"), ... name="table_name", ... schema="schema_name", ... if_exists="replace", ... index=False, ... ) Writing to Redshift from Glue Catalog Connections >>> import awswrangler as wr >>> import pandas as pd >>> wr.db.to_sql( ... df=pd.DataFrame({'col': [1, 2, 3]}), ... con=wr.catalog.get_engine(connection="..."), ... name="table_name", ... schema="schema_name" ... ) """ if "pandas_kwargs" in pandas_kwargs: raise exceptions.InvalidArgument( "You can NOT pass `pandas_kwargs` explicit, just add valid " "Pandas arguments in the function call and Wrangler will accept it." "e.g. wr.db.to_sql(df, con, name='...', schema='...', if_exists='replace')" ) if df.empty is True: raise exceptions.EmptyDataFrame() if not isinstance(con, sqlalchemy.engine.Engine): raise exceptions.InvalidConnection( "Invalid 'con' argument, please pass a " "SQLAlchemy Engine. Use wr.db.get_engine(), " "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()") if "dtype" in pandas_kwargs: cast_columns: Dict[str, VisitableType] = pandas_kwargs["dtype"] else: cast_columns = {} dtypes: Dict[str, VisitableType] = _data_types.sqlalchemy_types_from_pandas( df=df, db_type=con.name, dtype=cast_columns) pandas_kwargs["dtype"] = dtypes pandas_kwargs["con"] = con if pandas_kwargs["con"].name.lower( ) == "redshift": # Redshift does not accept index pandas_kwargs["index"] = False _utils.try_it(f=df.to_sql, ex=sqlalchemy.exc.InternalError, **pandas_kwargs)
def read_sql_query( sql: str, con: sqlalchemy.engine.Engine, index_col: Optional[Union[str, List[str]]] = None, params: Optional[Union[List, Tuple, Dict]] = None, chunksize: Optional[int] = None, dtype: Optional[Dict[str, pa.DataType]] = None, ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: """Return a DataFrame corresponding to the result set of the query string. Support for **Redshift**, **PostgreSQL** and **MySQL**. Note ---- Redshift: For large extractions (1MM+ rows) consider the function **wr.db.unload_redshift()**. Parameters ---------- sql : str Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html con : sqlalchemy.engine.Engine SQLAlchemy Engine. Please use, wr.db.get_engine(), wr.db.get_redshift_temp_engine() or wr.catalog.get_engine() index_col : Union[str, List[str]], optional Column(s) to set as index(MultiIndex). params : Union[List, Tuple, Dict], optional List of parameters to pass to execute method. The syntax used to pass parameters is database driver dependent. Check your database driver documentation for which of the five syntax styles, described in PEP 249’s paramstyle, is supported. Eg. for psycopg2, uses %(name)s so use params={‘name’ : ‘value’}. chunksize : int, optional If specified, return an iterator where chunksize is the number of rows to include in each chunk. dtype : Dict[str, pyarrow.DataType], optional Specifying the datatype for columns. The keys should be the column names and the values should be the PyArrow types. Returns ------- Union[pandas.DataFrame, Iterator[pandas.DataFrame]] Result as Pandas DataFrame(s). Examples -------- Reading from Redshift with temporary credentials >>> import awswrangler as wr >>> df = wr.db.read_sql_query( ... sql="SELECT * FROM public.my_table", ... con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******") ... ) Reading from Redshift from Glue Catalog Connections >>> import awswrangler as wr >>> df = wr.db.read_sql_query( ... sql="SELECT * FROM public.my_table", ... con=wr.catalog.get_engine(connection="...") ... ) """ if not isinstance(con, sqlalchemy.engine.Engine): # pragma: no cover raise exceptions.InvalidConnection( "Invalid 'con' argument, please pass a " "SQLAlchemy Engine. Use wr.db.get_engine(), " "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()") with con.connect() as _con: args = _convert_params(sql, params) cursor = _con.execute(*args) if chunksize is None: return _records2df(records=cursor.fetchall(), cols_names=cursor.keys(), index=index_col, dtype=dtype) return _iterate_cursor(cursor=cursor, chunksize=chunksize, index=index_col, dtype=dtype)
def to_sql(df: pd.DataFrame, con: sqlalchemy.engine.Engine, **pandas_kwargs) -> None: """Write records stored in a DataFrame to a SQL database. Support for **Redshift**, **PostgreSQL** and **MySQL**. Support for all pandas to_sql() arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html Note ---- Redshift: For large DataFrames (1MM+ rows) consider the function **wr.db.copy_to_redshift()**. Parameters ---------- df : pandas.DataFrame Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html con : sqlalchemy.engine.Engine SQLAlchemy Engine. Please use, wr.db.get_engine(), wr.db.get_redshift_temp_engine() or wr.catalog.get_engine() pandas_kwargs keyword arguments forwarded to pandas.DataFrame.to_csv() https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html Returns ------- None None. Examples -------- Writing to Redshift with temporary credentials >>> import awswrangler as wr >>> import pandas as pd >>> wr.db.to_sql( ... df=pd.DataFrame({'col': [1, 2, 3]}), ... con=wr.db.get_redshift_temp_engine(cluster_identifier="...", user="******"), ... name="table_name", ... schema="schema_name" ... ) Writing to Redshift from Glue Catalog Connections >>> import awswrangler as wr >>> import pandas as pd >>> wr.db.to_sql( ... df=pd.DataFrame({'col': [1, 2, 3]}), ... con=wr.catalog.get_engine(connection="..."), ... name="table_name", ... schema="schema_name" ... ) """ if df.empty is True: # pragma: no cover raise exceptions.EmptyDataFrame() if not isinstance(con, sqlalchemy.engine.Engine): # pragma: no cover raise exceptions.InvalidConnection( "Invalid 'con' argument, please pass a " "SQLAlchemy Engine. Use wr.db.get_engine(), " "wr.db.get_redshift_temp_engine() or wr.catalog.get_engine()") if "dtype" in pandas_kwargs: cast_columns: Dict[str, VisitableType] = pandas_kwargs["dtype"] else: cast_columns = {} dtypes: Dict[str, VisitableType] = _data_types.sqlalchemy_types_from_pandas( df=df, db_type=con.name, dtype=cast_columns) pandas_kwargs["dtype"] = dtypes pandas_kwargs["con"] = con df.to_sql(**pandas_kwargs)