Exemple #1
0
def connect(
    connection: Optional[str] = None,
    secret_id: Optional[str] = None,
    catalog_id: Optional[str] = None,
    dbname: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
    ssl_context: Optional[Dict[Any, Any]] = None,
    timeout: Optional[int] = None,
    tcp_keepalive: bool = True,
) -> pg8000.Connection:
    """Return a pg8000 connection from a Glue Catalog Connection.

    https://github.com/tlocke/pg8000

    Parameters
    ----------
    connection : Optional[str]
        Glue Catalog Connection name.
    secret_id: Optional[str]:
        Specifies the secret containing the version that you want to retrieve.
        You can specify either the Amazon Resource Name (ARN) or the friendly name of the secret.
    catalog_id : str, optional
        The ID of the Data Catalog.
        If none is provided, the AWS account ID is used by default.
    dbname: Optional[str]
        Optional database name to overwrite the stored one.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    ssl_context: Optional[Dict]
        This governs SSL encryption for TCP/IP sockets.
        This parameter is forward to pg8000.
        https://github.com/tlocke/pg8000#functions
    timeout: Optional[int]
        This is the time in seconds before the connection to the server will time out.
        The default is None which means no timeout.
        This parameter is forward to pg8000.
        https://github.com/tlocke/pg8000#functions
    tcp_keepalive: bool
        If True then use TCP keepalive. The default is True.
        This parameter is forward to pg8000.
        https://github.com/tlocke/pg8000#functions

    Returns
    -------
    pg8000.Connection
        pg8000 connection.

    Examples
    --------
    >>> import awswrangler as wr
    >>> con = wr.postgresql.connect("MY_GLUE_CONNECTION")
    >>> with con.cursor() as cursor:
    >>>     cursor.execute("SELECT 1")
    >>>     print(cursor.fetchall())
    >>> con.close()

    """
    attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
        connection=connection,
        secret_id=secret_id,
        catalog_id=catalog_id,
        dbname=dbname,
        boto3_session=boto3_session)
    if attrs.kind != "postgresql":
        exceptions.InvalidDatabaseType(
            f"Invalid connection type ({attrs.kind}. It must be a postgresql connection.)"
        )
    return pg8000.connect(
        user=attrs.user,
        database=attrs.database,
        password=attrs.password,
        port=attrs.port,
        host=attrs.host,
        ssl_context=ssl_context,
        timeout=timeout,
        tcp_keepalive=tcp_keepalive,
    )
Exemple #2
0
def connect(
    connection: Optional[str] = None,
    secret_id: Optional[str] = None,
    catalog_id: Optional[str] = None,
    dbname: Optional[str] = None,
    odbc_driver_version: int = 17,
    boto3_session: Optional[boto3.Session] = None,
    timeout: Optional[int] = 0,
) -> "pyodbc.Connection":
    """Return a pyodbc connection from a Glue Catalog Connection.

    https://github.com/mkleehammer/pyodbc

    Parameters
    ----------
    connection : Optional[str]
        Glue Catalog Connection name.
    secret_id: Optional[str]:
        Specifies the secret containing the version that you want to retrieve.
        You can specify either the Amazon Resource Name (ARN) or the friendly name of the secret.
    catalog_id : str, optional
        The ID of the Data Catalog.
        If none is provided, the AWS account ID is used by default.
    dbname: Optional[str]
        Optional database name to overwrite the stored one.
    odbc_driver_version : int
        Major version of the OBDC Driver version that is installed and should be used.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    timeout: Optional[int]
        This is the time in seconds before the connection to the server will time out.
        The default is None which means no timeout.
        This parameter is forwarded to pyodbc.
        https://github.com/mkleehammer/pyodbc/wiki/The-pyodbc-Module#connect

    Returns
    -------
    pyodbc.Connection
        pyodbc connection.

    Examples
    --------
    >>> import awswrangler as wr
    >>> con = wr.sqlserver.connect(connection="MY_GLUE_CONNECTION", odbc_driver_version=17)
    >>> with con.cursor() as cursor:
    >>>     cursor.execute("SELECT 1")
    >>>     print(cursor.fetchall())
    >>> con.close()

    """
    attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
        connection=connection, secret_id=secret_id, catalog_id=catalog_id, dbname=dbname, boto3_session=boto3_session
    )
    if attrs.kind != "sqlserver":
        raise exceptions.InvalidDatabaseType(
            f"Invalid connection type ({attrs.kind}. It must be a sqlserver connection.)"
        )
    connection_str = (
        f"DRIVER={{ODBC Driver {odbc_driver_version} for SQL Server}};"
        f"SERVER={attrs.host},{attrs.port};"
        f"DATABASE={attrs.database};"
        f"UID={attrs.user};"
        f"PWD={attrs.password}"
    )

    return pyodbc.connect(connection_str, timeout=timeout)
Exemple #3
0
def connect(
    connection: Optional[str] = None,
    secret_id: Optional[str] = None,
    catalog_id: Optional[str] = None,
    dbname: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
    read_timeout: Optional[int] = None,
    write_timeout: Optional[int] = None,
    connect_timeout: int = 10,
    cursorclass: Type[Cursor] = Cursor,
) -> "pymysql.connections.Connection[Any]":
    """Return a pymysql connection from a Glue Catalog Connection or Secrets Manager.

    https://pymysql.readthedocs.io

    Note
    ----
    You MUST pass a `connection` OR `secret_id`.
    Here is an example of the secret structure in Secrets Manager:
    {
    "host":"mysql-instance-wrangler.dr8vkeyrb9m1.us-east-1.rds.amazonaws.com",
    "username":"******",
    "password":"******",
    "engine":"mysql",
    "port":"3306",
    "dbname": "mydb" # Optional
    }

    Note
    ----
    It is only possible to configure SSL using Glue Catalog Connection. More at:
    https://docs.aws.amazon.com/glue/latest/dg/connection-defining.html

    Note
    ----
    Consider using SSCursor `cursorclass` for queries that return a lot of data. More at:
    https://pymysql.readthedocs.io/en/latest/modules/cursors.html#pymysql.cursors.SSCursor

    Parameters
    ----------
    connection : str
        Glue Catalog Connection name.
    secret_id: Optional[str]:
        Specifies the secret containing the connection details that you want to retrieve.
        You can specify either the Amazon Resource Name (ARN) or the friendly name of the secret.
    catalog_id : str, optional
        The ID of the Data Catalog.
        If none is provided, the AWS account ID is used by default.
    dbname: Optional[str]
        Optional database name to overwrite the stored one.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    read_timeout: Optional[int]
        The timeout for reading from the connection in seconds (default: None - no timeout).
        This parameter is forward to pymysql.
        https://pymysql.readthedocs.io/en/latest/modules/connections.html
    write_timeout: Optional[int]
        The timeout for writing to the connection in seconds (default: None - no timeout)
        This parameter is forward to pymysql.
        https://pymysql.readthedocs.io/en/latest/modules/connections.html
    connect_timeout: int
        Timeout before throwing an exception when connecting.
        (default: 10, min: 1, max: 31536000)
        This parameter is forward to pymysql.
        https://pymysql.readthedocs.io/en/latest/modules/connections.html
    cursorclass : Cursor
        Cursor class to use, e.g. SSCursor; defaults to :class:`pymysql.cursors.Cursor`
        https://pymysql.readthedocs.io/en/latest/modules/cursors.html

    Returns
    -------
    pymysql.connections.Connection
        pymysql connection.

    Examples
    --------
    >>> import awswrangler as wr
    >>> con = wr.mysql.connect("MY_GLUE_CONNECTION")
    >>> with con.cursor() as cursor:
    >>>     cursor.execute("SELECT 1")
    >>>     print(cursor.fetchall())
    >>> con.close()

    """
    attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
        connection=connection,
        secret_id=secret_id,
        catalog_id=catalog_id,
        dbname=dbname,
        boto3_session=boto3_session)
    if attrs.kind != "mysql":
        raise exceptions.InvalidDatabaseType(
            f"Invalid connection type ({attrs.kind}. It must be a MySQL connection.)"
        )
    return pymysql.connect(
        user=attrs.user,
        database=attrs.database,
        password=attrs.password,
        port=attrs.port,
        host=attrs.host,
        ssl=attrs.ssl_context,  # type: ignore
        read_timeout=read_timeout,
        write_timeout=write_timeout,
        connect_timeout=connect_timeout,
        cursorclass=cursorclass,
    )
Exemple #4
0
def connect(
    connection: Optional[str] = None,
    secret_id: Optional[str] = None,
    catalog_id: Optional[str] = None,
    dbname: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
    read_timeout: Optional[int] = None,
    write_timeout: Optional[int] = None,
    connect_timeout: int = 10,
) -> pymysql.connections.Connection:
    """Return a pymysql connection from a Glue Catalog Connection.

    https://pymysql.readthedocs.io

    Parameters
    ----------
    connection : str
        Glue Catalog Connection name.
    secret_id: Optional[str]:
        Specifies the secret containing the version that you want to retrieve.
        You can specify either the Amazon Resource Name (ARN) or the friendly name of the secret.
    catalog_id : str, optional
        The ID of the Data Catalog.
        If none is provided, the AWS account ID is used by default.
    dbname: Optional[str]
        Optional database name to overwrite the stored one.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    read_timeout: Optional[int]
        The timeout for reading from the connection in seconds (default: None - no timeout).
        This parameter is forward to pymysql.
        https://pymysql.readthedocs.io/en/latest/modules/connections.html
    write_timeout: Optional[int]
        The timeout for writing to the connection in seconds (default: None - no timeout)
        This parameter is forward to pymysql.
        https://pymysql.readthedocs.io/en/latest/modules/connections.html
    connect_timeout: int
        Timeout before throwing an exception when connecting.
        (default: 10, min: 1, max: 31536000)
        This parameter is forward to pymysql.
        https://pymysql.readthedocs.io/en/latest/modules/connections.html

    Returns
    -------
    pymysql.connections.Connection
        pymysql connection.

    Examples
    --------
    >>> import awswrangler as wr
    >>> con = wr.mysql.connect("MY_GLUE_CONNECTION")
    >>> with con.cursor() as cursor:
    >>>     cursor.execute("SELECT 1")
    >>>     print(cursor.fetchall())
    >>> con.close()

    """
    attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
        connection=connection, secret_id=secret_id, catalog_id=catalog_id, dbname=dbname, boto3_session=boto3_session
    )
    if attrs.kind != "mysql":
        raise exceptions.InvalidDatabaseType(f"Invalid connection type ({attrs.kind}. It must be a MySQL connection.)")
    return pymysql.connect(
        user=attrs.user,
        database=attrs.database,
        password=attrs.password,
        port=attrs.port,
        host=attrs.host,
        read_timeout=read_timeout,
        write_timeout=write_timeout,
        connect_timeout=connect_timeout,
    )
def connect(
    connection: str,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
    ssl: bool = True,
    timeout: Optional[int] = None,
    max_prepared_statements: int = 1000,
    tcp_keepalive: bool = True,
) -> redshift_connector.Connection:
    """Return a redshift_connector connection from a Glue Catalog Connection.

    https://github.com/aws/amazon-redshift-python-driver

    Parameters
    ----------
    connection : str
        Glue Catalog Connection name.
    catalog_id : str, optional
        The ID of the Data Catalog.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    ssl: bool
        This governs SSL encryption for TCP/IP sockets.
        This parameter is forward to redshift_connector.
        https://github.com/aws/amazon-redshift-python-driver
    timeout: Optional[int]
        This is the time in seconds before the connection to the server will time out.
        The default is None which means no timeout.
        This parameter is forward to redshift_connector.
        https://github.com/aws/amazon-redshift-python-driver
    max_prepared_statements: int
        This parameter is forward to redshift_connector.
        https://github.com/aws/amazon-redshift-python-driver
    tcp_keepalive: bool
        If True then use TCP keepalive. The default is True.
        This parameter is forward to redshift_connector.
        https://github.com/aws/amazon-redshift-python-driver

    Returns
    -------
    redshift_connector.Connection
        redshift_connector connection.

    Examples
    --------
    >>> import awswrangler as wr
    >>> con = wr.redshift.connect("MY_GLUE_CONNECTION")
    >>> with con.cursor() as cursor:
    >>>     cursor.execute("SELECT 1")
    >>>     print(cursor.fetchall())
    >>> con.close()

    """
    attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
        connection=connection,
        catalog_id=catalog_id,
        boto3_session=boto3_session)
    if attrs.kind != "redshift":
        exceptions.InvalidDatabaseType(
            f"Invalid connection type ({attrs.kind}. It must be a redshift connection.)"
        )
    return redshift_connector.connect(
        user=attrs.user,
        database=attrs.database,
        password=attrs.password,
        port=attrs.port,
        host=attrs.host,
        ssl=ssl,
        timeout=timeout,
        max_prepared_statements=max_prepared_statements,
        tcp_keepalive=tcp_keepalive,
    )