def test_copy_unload_creds(path, redshift_table): credentials = _utils.get_credentials_from_session() df = pd.DataFrame({"col0": [1, 2, 3]}) con = wr.redshift.connect("aws-data-wrangler-redshift") wr.redshift.copy( df=df, path=path, con=con, schema="public", table=redshift_table, mode="overwrite", aws_access_key_id=credentials.access_key, aws_secret_access_key=credentials.secret_key, aws_session_token=credentials.token, ) df2 = wr.redshift.unload( sql=f"SELECT * FROM public.{redshift_table}", con=con, path=path, keep_files=False, aws_access_key_id=credentials.access_key, aws_secret_access_key=credentials.secret_key, aws_session_token=credentials.token, ) assert df2.shape == (3, 1) wr.redshift.copy( df=df, path=path, con=con, schema="public", table=redshift_table, mode="append", aws_access_key_id=credentials.access_key, aws_secret_access_key=credentials.secret_key, aws_session_token=credentials.token, ) df2 = wr.redshift.unload( sql=f"SELECT * FROM public.{redshift_table}", con=con, path=path, keep_files=False, aws_access_key_id=credentials.access_key, aws_secret_access_key=credentials.secret_key, aws_session_token=credentials.token, ) assert df2.shape == (6, 1) dfs = wr.redshift.unload( sql=f"SELECT * FROM public.{redshift_table}", con=con, path=path, keep_files=False, chunked=True, aws_access_key_id=credentials.access_key, aws_secret_access_key=credentials.secret_key, aws_session_token=credentials.token, ) for chunk in dfs: assert len(chunk.columns) == 1 con.close()
def connect( host: str, port: Optional[int] = 443, boto3_session: Optional[boto3.Session] = boto3.Session(), region: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, ) -> OpenSearch: """Create a secure connection to the specified Amazon OpenSearch domain. Note ---- We use `opensearch-py <https://github.com/opensearch-project/opensearch-py>`_, an OpenSearch python client. The username and password are mandatory if the OS Cluster uses `Fine Grained Access Control \ <https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html>`_. If fine grained access control is disabled, session access key and secret keys are used. Parameters ---------- host : str Amazon OpenSearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com. port : int OpenSearch Service only accepts connections over port 80 (HTTP) or 443 (HTTPS) boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 Session will be used if boto3_session receive None. region : AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session. username : Fine-grained access control username. Mandatory if OS Cluster uses Fine Grained Access Control. password : Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control. Returns ------- opensearchpy.OpenSearch OpenSearch low-level client. https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py """ valid_ports = {80, 443} if port not in valid_ports: raise ValueError(f"results: port must be one of {valid_ports}") if username and password: http_auth = (username, password) else: if region is None: region = _utils.get_region_from_session( boto3_session=boto3_session) creds = _utils.get_credentials_from_session( boto3_session=boto3_session) if creds.access_key is None or creds.secret_key is None: raise exceptions.InvalidArgument( "One of IAM Role or AWS ACCESS_KEY_ID and SECRET_ACCESS_KEY must be " "given. Unable to find ACCESS_KEY_ID and SECRET_ACCESS_KEY in boto3 " "session.") http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, "es", session_token=creds.token) try: es = OpenSearch( host=_strip_endpoint(host), port=port, http_auth=http_auth, use_ssl=True, verify_certs=True, connection_class=RequestsHttpConnection, timeout=30, max_retries=10, retry_on_timeout=True, ) except Exception as e: _logger.error( "Error connecting to Opensearch cluster. Please verify authentication details" ) raise e return es