def test_idp_host_invalid_should_fail(idp_arg): wrong_idp_host: str = "andrew.okta.com" idp_arg["idp_host"] = wrong_idp_host with pytest.raises(redshift_connector.InterfaceError, match="Unauthorized"): redshift_connector.connect(**idp_arg)
def test_auth_req_digest(db_kwargs, algorithm): test_user: str = "{}_dbuser".format(algorithm) test_password: str = "My_{}_PaSsWoRdŽ".format(algorithm) with redshift_connector.connect(**db_kwargs) as conn: with conn.cursor() as cursor: cursor.execute("drop user if exists {}".format(test_user)) cursor.execute("create user {} with password '{}'".format( test_user, "{}|{}".format(algorithm, test_password))) conn.commit() try: with redshift_connector.connect(**{ **db_kwargs, **{ "user": test_user, "password": test_password } }) as conn: with conn.cursor() as cursor: cursor.execute("select 1") except: raise finally: with redshift_connector.connect(**db_kwargs) as conn: with conn.cursor() as cursor: cursor.execute("drop user if exists {}".format(test_user)) conn.commit()
def testClusterIdentifier(idp_arg): wrong_identifier = "redshift-cluster-11" idp_arg["cluster_identifier"] = wrong_identifier with pytest.raises(botocore.exceptions.ClientError, match="Cluster {} not found.".format(wrong_identifier)): redshift_connector.connect(**idp_arg)
def test_application_name_integer(db_kwargs): db_kwargs["application_name"] = 1 with pytest.raises( redshift_connector.InterfaceError, match= "The parameter application_name can't be of type <class 'int'>."): redshift_connector.connect(**db_kwargs)
def testIdpPassword(idp_arg): idp_arg = idp_arg idp_arg["password"] = "******" with pytest.raises( redshift_connector.InterfaceError, match=r"(Unauthorized)|(400 Client Error: Bad Request)"): redshift_connector.connect(**idp_arg)
def test_invalid_credentials_provider_should_raise(idp_arg): idp_arg["iam"] = False idp_arg["credentials_provider"] = "OktacredentialSProvider" with pytest.raises( redshift_connector.InterfaceError, match="Invalid credentials provider", ): redshift_connector.connect(**idp_arg)
def testSslAndIam(idp_arg): idp_arg["ssl"] = False idp_arg["iam"] = True with pytest.raises( redshift_connector.InterfaceError, match="Invalid connection property setting", ): redshift_connector.connect(**idp_arg)
def testSsl(db_kwargs): db_kwargs["ssl"] = True db_kwargs["sslmode"] = "verify-ca" with redshift_connector.connect(**db_kwargs): pass db_kwargs["sslmode"] = "verify-full" with redshift_connector.connect(**db_kwargs): pass
def testPreferredRole(idp_arg): idp_arg["preferred_role"] = conf.get("okta-idp", "preferred_role") with redshift_connector.connect(**idp_arg): pass idp_arg["preferred_role"] = "arn:aws:iam::111111111111:role/Okta-role" with pytest.raises(redshift_connector.InterfaceError, match="Preferred role not found in SamlAssertion"): redshift_connector.connect(**idp_arg)
def testRegion(idp_arg): wrong_region = "us-east-22" idp_arg["region"] = wrong_region with pytest.raises( botocore.exceptions.EndpointConnectionError, match='Could not connect to the endpoint URL: "https://redshift.{}.amazonaws.com/"'.format(wrong_region), ): redshift_connector.connect(**idp_arg)
def test_idp_password(idp_arg): idp_arg["password"] = "******" with pytest.raises( redshift_connector.InterfaceError, match= r"(Unauthorized)|(400 Client Error: Bad Request)|(Failed to find Adfs access_token)", ): redshift_connector.connect(**idp_arg)
def testSocketMissing(): conn_params = { "unix_sock": "/file-does-not-exist", "user": "******", "password": "******", "database": "myDb", } with pytest.raises(redshift_connector.InterfaceError): redshift_connector.connect(**conn_params)
def test_invalid_db_group(idp_arg): import botocore.exceptions idp_arg["db_groups"] = ["girl_dont_do_it"] with pytest.raises( expected_exception=(redshift_connector.ProgrammingError, botocore.exceptions.ClientError), match="{}".format(idp_arg["db_groups"][0]), ): redshift_connector.connect(**idp_arg)
def test_datatype_recv_support(db_kwargs, datatype): table_name: str = get_table_name(datatype) exp_results: typing.Tuple[typing.Tuple[str, ...], ...] = test_data[datatype.name] with redshift_connector.connect(**db_kwargs) as con: with con.cursor() as cursor: cursor.execute("select * from {}".format(table_name)) results = cursor.fetchall() assert results is not None assert len(results) == len(exp_results) for ridx, exp_row in enumerate(exp_results): assert results[ridx][0] == exp_row[0] # the expected Python value is stored in the last index of the tuple if datatype in FLOAT_DATATYPES: assert isclose( typing.cast(float, results[ridx][1]), typing.cast(float, exp_row[-1]), rel_tol=1e-05, abs_tol=1e-08, ) elif datatype in DATATYPES_WITH_MS: assert results[ridx][1].replace( microsecond=0) == exp_row[-1].replace(microsecond=0) assert isclose(results[ridx][1].microsecond, exp_row[-1].microsecond, rel_tol=1e1) else: assert results[ridx][1] == exp_row[-1]
def run_performance_test( test_datatype: PerformanceTestDatatypes, limit: float, test_protocol: ClientProtocolVersion, is_warmup=False ): with redshift_connector.connect( ssl=False, sslmode="disable", host="localhost", database="", user="", password="", client_protocol_version=test_protocol.value, ) as conn: assert conn._client_protocol_version == test_protocol.value with conn.cursor() as cursor: start_ms: float = timer.time() * 1000 cursor.execute( "select * from {table_name} limit {limit}".format(table_name=test_datatype.value, limit=limit) ) end_ms: float = timer.time() * 1000 total_ms: float = end_ms - start_ms if not is_warmup: print( "datatype: {}\tprotocol: {}\tfetch {:.0f}: {:.2f}ms".format( test_datatype.name, test_protocol.name, limit, total_ms ) ) results = cursor.fetchall() validate_results(test_datatype, results, limit) if not is_warmup: # total execution time is in msec and represents time required to # fetch data from server and copy to client deque execution_times[test_datatype][test_protocol][int(limit)].append(end_ms - start_ms)
def initiate_connection(cluster_urls, interface, database_name): conn = None if interface == "odbc": cluster_split = cluster_urls["odbc"].split(";") cluster_split[2] = "Database=" + database_name cluster = ";".join(cluster_split) try: conn = pyodbc.connect(cluster, autocommit=True) yield conn finally: if conn is not None: conn.close() elif interface == "psql": try: conn = redshift_connector.connect( user=cluster_urls["psql"]["username"], password=cluster_urls["psql"]["password"], host=cluster_urls["psql"]["host"], port=int(cluster_urls["psql"]["port"]), database=database_name, ) conn.autocommit = True yield conn finally: if conn is not None: conn.close()
def test_ssl_and_iam_invalid_should_fail(idp_arg): idp_arg["ssl"] = False idp_arg["iam"] = True with pytest.raises( redshift_connector.InterfaceError, match="Invalid connection property setting. SSL must be enabled when using IAM", ): redshift_connector.connect(**idp_arg) idp_arg["iam"] = False idp_arg["credentials_provider"] = "OktacredentialSProvider" with pytest.raises( redshift_connector.InterfaceError, match="Invalid credentials provider ", ): redshift_connector.connect(**idp_arg)
def connect_function() -> redshift_connector.Connection: con = redshift_connector.connect( **self._get_connection_params( database=database if database else None, ), ) con.autocommit = True # see https://stackoverflow.com/q/22019154 con.paramstyle = 'pyformat' return con
def test_get_columns(mocker, _input, db_kwargs): database_metadata_current_db_only_val, _args = _input db_kwargs[ "database_metadata_current_db_only"] = database_metadata_current_db_only_val with redshift_connector.connect(**db_kwargs) as conn: assert conn.is_single_database_metadata is database_metadata_current_db_only_val with conn.cursor() as cursor: spy = mocker.spy(cursor, "execute") result: typing.Tuple = cursor.get_columns(**_args) # ensure query was executed with arguments passed to get_schemas assert spy.called if _args[ "schema_pattern"] is not None and database_metadata_current_db_only_val: assert spy.call_count == 2 # call in __schema_pattern_match(), get_columns() else: assert spy.call_count == 1 for arg in ( _args["catalog"], _args["schema_pattern"], _args["tablename_pattern"], _args["columnname_pattern"], ): if arg is not None: assert arg in spy.call_args[0][0] assert len(result) > 0, print(spy.call_args, "\n", result) assert len(result[0]) == 24
def read_data_from_redshift(query, host, password, port=5439, database='dev', user='******', date_start=None, date_end=None) -> pd.DataFrame: """ arg: query: querry to obtain data from Redshift, str host: Redshift configuration password: Redshift configuration port: Redshift configuration database: Redshift configuration user: Redshift configuration date_start: date to start, strftime('%Y/%m/%d') date_start: date to end, strftime('%Y/%m/%d') return: df: target dataframe """ cursor = connect(host=host, port=port, database=database, user=user, password=password).cursor() query = query.format(date_start, date_end) cursor.execute(query) df = cursor.fetch_dataframe() return df
def get_conn(self) -> RedshiftConnection: """Returns a redshift_connector.Connection object""" conn_params = self._get_conn_params() conn_kwargs_dejson = self.conn.extra_dejson conn_kwargs: Dict = {**conn_params, **conn_kwargs_dejson} conn: RedshiftConnection = redshift_connector.connect(**conn_kwargs) return conn
def get_cursor(): conn = redshift_connector.connect( host=os.environ['RS_HOST'], database=os.environ['RS_DATABASE'], user=os.environ['RS_USER'], password=os.environ['RS_PASSWORD'], ) return conn.cursor()
def use_cached_temporary_credentials(idp_arg): # ensure nothing is in the credential cache redshift_connector.IamHelper.credentials_cache.clear() with redshift_connector.connect(**idp_arg): pass assert len(redshift_connector.IamHelper.credentials_cache) == 1 first_cred_cache_entry = redshift_connector.IamHelper.credentials_cache.popitem() with redshift_connector.connect(**idp_arg): pass # we should have used the temporary credentials retrieved in first AWS API call, verify cache still # holds these assert len(redshift_connector.IamHelper.credentials_cache) == 1 assert first_cred_cache_entry == redshift_connector.IamHelper.credentials_cache.popitem()
def testBytesPassword(con, db_kwargs): # Create user username = "******" password = "******" with con.cursor() as cur: cur.execute("drop user if exists {};".format(username)) cur.execute("create user {} with password '{}';".format(username, password)) con.commit() db_kwargs["user"] = username db_kwargs["password"] = password.encode("utf8") db_kwargs["database"] = "redshift_connector_md5" with pytest.raises(redshift_connector.ProgrammingError, match="3D000"): redshift_connector.connect(**db_kwargs) cur.execute("drop user {}".format(username)) con.commit()
def test_stl_connection_log_contains_os_version(db_kwargs): with redshift_connector.connect(**db_kwargs) as conn: with conn.cursor() as cursor: # verify stl_connection_log contains driver version as expected cursor.execute( "select top 1 1 from stl_connection_log where driver_version = '{}' and os_version = '{}'" .format(DriverInfo.driver_full_name(), conn.client_os_version)) res = cursor.fetchone() assert res is not None assert res[0] == 1
def test_redshift_specific_recv_support(db_kwargs, _input): datatype, data = _input test_val, exp_val = data with redshift_connector.connect(**db_kwargs) as con: with con.cursor() as cursor: cursor.execute("select {}".format(test_val)) results: typing.Tuple = cursor.fetchall() assert len(results) == 1 assert len(results[0]) == 1 assert results[0][0] == exp_val
def test_broken_pipe(con, db_kwargs): with redshift_connector.connect(**db_kwargs) as db1: with db1.cursor() as cur1, con.cursor() as cur2: cur1.execute("select pg_backend_pid()") pid1 = cur1.fetchone()[0] cur2.execute("select pg_terminate_backend(%s)", (pid1,)) try: cur1.execute("select 1") except Exception as e: assert isinstance(e, (socket.error, struct.error))
def test_read_sql_query_simple(databases_parameters): con = redshift_connector.connect( host=databases_parameters["redshift"]["host"], port=int(databases_parameters["redshift"]["port"]), database=databases_parameters["redshift"]["database"], user=databases_parameters["user"], password=databases_parameters["password"], ) df = wr.redshift.read_sql_query("SELECT 1", con=con) con.close() assert df.shape == (1, 1)
def perf_conn(): return redshift_connector.connect( database=conf.get("ci-cluster", "database"), host=conf.get("ci-cluster", "host"), port=conf.getint("default-test", "port"), user=conf.get("ci-cluster", "test_user"), password=conf.get("ci-cluster", "test_password"), ssl=True, sslmode=conf.get("default-test", "sslmode"), iam=False, )
def unload_system_table( source_cluster_urls, odbc_driver, unload_system_table_queries_file, unload_location, unload_iam_role, ): conn = None if odbc_driver: conn = pyodbc.connect(source_cluster_urls["odbc"]) else: conn = redshift_connector.connect( user=source_cluster_urls["psql"]["username"], password=source_cluster_urls["psql"]["password"], host=source_cluster_urls["psql"]["host"], port=int(source_cluster_urls["psql"]["port"]), database=source_cluster_urls["psql"]["database"], ) conn.autocommit = True unload_queries = {} table_name = "" query_text = "" for line in open(unload_system_table_queries_file, "r"): if line.startswith("--"): unload_queries[table_name] = query_text.strip("\n") table_name = line[2:].strip("\n") query_text = "" else: query_text += line unload_queries[table_name] = query_text.strip("\n") del unload_queries[""] cursor = conn.cursor() for table_name, unload_query in unload_queries.items(): if table_name and unload_query: unload_query = re.sub( r"to ''", f"TO '{unload_location}/system_tables/{table_name}/'", unload_query, flags=re.IGNORECASE, ) unload_query = re.sub( r"credentials ''", f"CREDENTIALS 'aws_iam_role={unload_iam_role}'", unload_query, flags=re.IGNORECASE, ) cursor.execute(unload_query) logger.debug(f"Executed unload query: {unload_query}")