Exemple #1
0
def test_copy_unload_creds(path, redshift_table):
    credentials = _utils.get_credentials_from_session()
    df = pd.DataFrame({"col0": [1, 2, 3]})
    con = wr.redshift.connect("aws-data-wrangler-redshift")
    wr.redshift.copy(
        df=df,
        path=path,
        con=con,
        schema="public",
        table=redshift_table,
        mode="overwrite",
        aws_access_key_id=credentials.access_key,
        aws_secret_access_key=credentials.secret_key,
        aws_session_token=credentials.token,
    )
    df2 = wr.redshift.unload(
        sql=f"SELECT * FROM public.{redshift_table}",
        con=con,
        path=path,
        keep_files=False,
        aws_access_key_id=credentials.access_key,
        aws_secret_access_key=credentials.secret_key,
        aws_session_token=credentials.token,
    )
    assert df2.shape == (3, 1)
    wr.redshift.copy(
        df=df,
        path=path,
        con=con,
        schema="public",
        table=redshift_table,
        mode="append",
        aws_access_key_id=credentials.access_key,
        aws_secret_access_key=credentials.secret_key,
        aws_session_token=credentials.token,
    )
    df2 = wr.redshift.unload(
        sql=f"SELECT * FROM public.{redshift_table}",
        con=con,
        path=path,
        keep_files=False,
        aws_access_key_id=credentials.access_key,
        aws_secret_access_key=credentials.secret_key,
        aws_session_token=credentials.token,
    )
    assert df2.shape == (6, 1)
    dfs = wr.redshift.unload(
        sql=f"SELECT * FROM public.{redshift_table}",
        con=con,
        path=path,
        keep_files=False,
        chunked=True,
        aws_access_key_id=credentials.access_key,
        aws_secret_access_key=credentials.secret_key,
        aws_session_token=credentials.token,
    )
    for chunk in dfs:
        assert len(chunk.columns) == 1
    con.close()
def connect(
    host: str,
    port: Optional[int] = 443,
    boto3_session: Optional[boto3.Session] = boto3.Session(),
    region: Optional[str] = None,
    username: Optional[str] = None,
    password: Optional[str] = None,
) -> OpenSearch:
    """Create a secure connection to the specified Amazon OpenSearch domain.

    Note
    ----
    We use `opensearch-py <https://github.com/opensearch-project/opensearch-py>`_, an OpenSearch python client.

    The username and password are mandatory if the OS Cluster uses `Fine Grained Access Control \
<https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html>`_.
    If fine grained access control is disabled, session access key and secret keys are used.

    Parameters
    ----------
    host : str
        Amazon OpenSearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com.
    port : int
        OpenSearch Service only accepts connections over port 80 (HTTP) or 443 (HTTPS)
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
    region :
        AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session.
    username :
        Fine-grained access control username. Mandatory if OS Cluster uses Fine Grained Access Control.
    password :
        Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control.

    Returns
    -------
    opensearchpy.OpenSearch
        OpenSearch low-level client.
        https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py
    """
    valid_ports = {80, 443}

    if port not in valid_ports:
        raise ValueError(f"results: port must be one of {valid_ports}")

    if username and password:
        http_auth = (username, password)
    else:
        if region is None:
            region = _utils.get_region_from_session(
                boto3_session=boto3_session)
        creds = _utils.get_credentials_from_session(
            boto3_session=boto3_session)
        if creds.access_key is None or creds.secret_key is None:
            raise exceptions.InvalidArgument(
                "One of IAM Role or AWS ACCESS_KEY_ID and SECRET_ACCESS_KEY must be "
                "given. Unable to find ACCESS_KEY_ID and SECRET_ACCESS_KEY in boto3 "
                "session.")
        http_auth = AWS4Auth(creds.access_key,
                             creds.secret_key,
                             region,
                             "es",
                             session_token=creds.token)
    try:
        es = OpenSearch(
            host=_strip_endpoint(host),
            port=port,
            http_auth=http_auth,
            use_ssl=True,
            verify_certs=True,
            connection_class=RequestsHttpConnection,
            timeout=30,
            max_retries=10,
            retry_on_timeout=True,
        )
    except Exception as e:
        _logger.error(
            "Error connecting to Opensearch cluster. Please verify authentication details"
        )
        raise e
    return es