def test_get_column_names(cls, mock_session, credentials=credentials(), dbapi=DBAPIS): with mock.patch(f"{dbapi}.connect") as mock_connect: r = locopy.Redshift(dbapi=psycopg2, **credentials) r.connect() assert r._get_column_names("query") is None sql = "SELECT * FROM (query) WHERE 1 = 0" assert mock_connect.return_value.cursor.return_value.execute.called_with( sql, ()) mock_connect.return_value.cursor.return_value.description = [[ "COL1 " ], ["COL2 "]] r = locopy.Redshift(dbapi=psycopg2, **credentials) r.connect() assert r._get_column_names("query") == ["COL1", "COL2"] mock_connect.return_value.cursor.return_value.execute.side_effect = Exception( ) r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() with pytest.raises(Exception): r._get_column_names("query")
def test_unload_generated_files(cls, mock_session, credentials=credentials(), dbapi=DBAPIS): with mock.patch(dbapi.__name__ + ".connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() r._unload_generated_files() assert r._unload_generated_files() is None mock_connect.return_value.cursor.return_value.fetchall.return_value = [ ["File1 "], ["File2 "], ] r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() r._unload_generated_files() assert r._unload_generated_files() == ["File1", "File2"] mock_connect.return_value.cursor.return_value.execute.side_effect = Exception( ) r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() with pytest.raises(Exception): r._unload_generated_files()
def testunload_no_connection(mock_session, credentials, dbapi): with mock.patch(dbapi.__name__ + ".connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) with pytest.raises(Exception): r.unload("query", "path") mock_connect.return_value.cursor.return_value.execute.side_effect = Exception( ) r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() with pytest.raises(Exception): r.unload("query", "path")
def test_redshiftcopy(mock_session, credentials, dbapi): with mock.patch(dbapi.__name__ + ".connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() r.copy("table", "s3bucket") assert mock_connect.return_value.cursor.return_value.execute.called (mock_connect.return_value.cursor.return_value.execute. assert_called_with( "COPY table FROM 's3bucket' CREDENTIALS " "'aws_access_key_id={0};aws_secret_access_key={1};token={2}' " "DELIMITER '|' DATEFORMAT 'auto' COMPUPDATE ON " "TRUNCATECOLUMNS;".format( r.session.get_credentials().access_key, r.session.get_credentials().secret_key, r.session.get_credentials().token, ), (), )) # tab delim r.copy("table", "s3bucket", delim="\t") assert mock_connect.return_value.cursor.return_value.execute.called (mock_connect.return_value.cursor.return_value.execute. assert_called_with( "COPY table FROM 's3bucket' CREDENTIALS " "'aws_access_key_id={0};aws_secret_access_key={1};token={2}' " "DELIMITER '\t' DATEFORMAT 'auto' COMPUPDATE ON " "TRUNCATECOLUMNS;".format( r.session.get_credentials().access_key, r.session.get_credentials().secret_key, r.session.get_credentials().token, ), (), ))
def test_copy(s3_bucket, dbapi): with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as redshift: redshift.execute( "CREATE TEMPORARY TABLE locopy_integration_testing (id INTEGER, variable VARCHAR(20)) DISTKEY(variable)" ) redshift.load_and_copy( LOCAL_FILE, S3_BUCKET, "locopy_integration_testing", delim="|", delete_s3_after=True, compress=False, ) redshift.execute( "SELECT * FROM locopy_integration_testing ORDER BY id") results = redshift.cursor.fetchall() expected = [ (1, "This iš line 1"), (2, "This is liné 2"), (3, "This is line 3"), (4, "This is lïne 4"), ] for i, result in enumerate(results): assert result[0] == expected[i][0] assert result[1] == expected[i][1]
def test_unload(s3_bucket, dbapi): with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as redshift: redshift.execute( "CREATE TEMPORARY TABLE locopy_integration_testing AS SELECT ('2017-12-31'::date + row_number() over (order by 1))::date from SVV_TABLES LIMIT 5" ) sql = "SELECT * FROM locopy_integration_testing" redshift.unload_and_copy(sql, S3_BUCKET, delimiter="|", export_path=LOCAL_FILE_DL) redshift.execute( "SELECT * FROM locopy_integration_testing ORDER BY date") results = redshift.cursor.fetchall() expected = [ ("2018-01-01", ), ("2018-01-02", ), ("2018-01-03", ), ("2018-01-04", ), ("2018-01-05", ), ] for i, result in enumerate(results): assert result[0].strftime("%Y-%m-%d") == expected[i][0] os.remove(LOCAL_FILE_DL)
def testinsert_dataframe_to_table(mock_session, credentials, dbapi): import pandas as pd test_df = pd.read_csv(os.path.join(CURR_DIR, "data", "mock_dataframe.txt"), sep=",") with mock.patch(dbapi.__name__ + ".connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() r.insert_dataframe_to_table(test_df, "database.schema.test") mock_connect.return_value.cursor.return_value.execute.assert_called_with( "INSERT INTO database.schema.test (a,b,c) VALUES ('1', 'x', '2011-01-01'), ('2', 'y', '2001-04-02')", (), ) r.insert_dataframe_to_table(test_df, "database.schema.test", create=True) mock_connect.return_value.cursor.return_value.execute.assert_any_call( "CREATE TABLE database.schema.test (a int,b varchar,c date)", ()) mock_connect.return_value.cursor.return_value.execute.assert_called_with( "INSERT INTO database.schema.test (a,b,c) VALUES ('1', 'x', '2011-01-01'), ('2', 'y', '2001-04-02')", (), ) r.insert_dataframe_to_table(test_df, "database.schema.test", columns=["a", "b"]) mock_connect.return_value.cursor.return_value.execute.assert_called_with( "INSERT INTO database.schema.test (a,b) VALUES ('1', 'x'), ('2', 'y')", ()) r.insert_dataframe_to_table( test_df, "database.schema.test", create=True, metadata=OrderedDict([("col1", "int"), ("col2", "varchar"), ("col3", "date")]), ) mock_connect.return_value.cursor.return_value.execute.assert_any_call( "CREATE TABLE database.schema.test (col1 int,col2 varchar,col3 date)", ()) mock_connect.return_value.cursor.return_value.execute.assert_called_with( "INSERT INTO database.schema.test (col1,col2,col3) VALUES ('1', 'x', '2011-01-01'), ('2', 'y', '2001-04-02')", (), ) r.insert_dataframe_to_table(test_df, "database.schema.test", create=False, batch_size=1) mock_connect.return_value.cursor.return_value.execute.assert_any_call( "INSERT INTO database.schema.test (a,b,c) VALUES ('1', 'x', '2011-01-01')", ()) mock_connect.return_value.cursor.return_value.execute.assert_any_call( "INSERT INTO database.schema.test (a,b,c) VALUES ('2', 'y', '2001-04-02')", ())
def test_insert_dataframe_to_table(s3_bucket, dbapi): with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as redshift: redshift.insert_dataframe_to_table(TEST_DF, "locopy_df_test", create=True) redshift.execute("SELECT a, b, c FROM locopy_df_test ORDER BY a ASC") results = redshift.cursor.fetchall() redshift.execute("drop table if exists locopy_df_test") expected = [ (1, "x", pd.to_datetime("2011-01-01").date()), (2, "y", pd.to_datetime("2001-04-02").date()), ] assert len(expected) == len(results) for i, result in enumerate(results): assert result[0] == expected[i][0] assert result[1] == expected[i][1] assert result[2] == expected[i][2] redshift.insert_dataframe_to_table(TEST_DF_2, "locopy_test_2", create=True, batch_size=3) redshift.execute("SELECT col1, col2 FROM locopy_test_2 ORDER BY col1 ASC") results = redshift.cursor.fetchall() redshift.execute("drop table if exists locopy_test_2") expected = [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e"), (6, "f"), (7, "g")] assert len(expected) == len(results) for i, result in enumerate(results): assert result[0] == expected[i][0] assert result[1] == expected[i][1] from decimal import Decimal TEST_DF_3 = pd.DataFrame( { "a": [1, 2], "b": [pd.to_datetime("2013-01-01"), pd.to_datetime("2019-01-01")], "c": [True, False], "d": [Decimal(2), Decimal(3)], "e": [None, "x'y"], } ) redshift.insert_dataframe_to_table(TEST_DF_3, "locopy_test_3", create=True) redshift.execute("SELECT a, b, c, d, e FROM locopy_test_3 ORDER BY a ASC") results = redshift.cursor.fetchall() redshift.execute("drop table if exists locopy_test_3") expected = [ (1, pd.to_datetime("2013-01-01"), True, 2, None), (2, pd.to_datetime("2019-01-01"), False, 3, "x'y"), ] assert len(expected) == len(results) for i, result in enumerate(results): assert result[0] == expected[i][0] assert result[1] == expected[i][1] assert result[2] == expected[i][2] assert result[3] == expected[i][3] assert result[4] == expected[i][4]
def test_redshift_execute_single_rows(dbapi): expected = pd.DataFrame({"field_1": [1], "field_2": [2]}) with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as test: test.execute("SELECT 1 AS field_1, 2 AS field_2 ") df = test.to_dataframe() assert np.allclose(df["field_1"], expected["field_1"])
def test_redshiftcopy_exception(mock_connected, mock_session, credentials, dbapi): with mock.patch(dbapi.__name__ + ".connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) mock_connected.return_value = False with pytest.raises(DBError): r.copy("table", "s3bucket") mock_connected.return_value = True (mock_connect.return_value.cursor.return_value.execute.side_effect ) = Exception("COPY Exception") with pytest.raises(DBError): r.copy("table", "s3bucket")
def testunload(mock_session, credentials=credentials(), dbapi=DBAPIS): with mock.patch(f"{dbapi}.connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() r.unload("query", "path") assert mock_connect.return_value.cursor.return_value.execute.called
def test_unload_and_copy( cls, mock_session, mock_generate_unload_path, mock_unload, mock_unload_generated_files, mock_get_col_names, mock_download_list_from_s3, mock_write, mock_delete_list_from_s3, mock_concat, credentials=credentials(), dbapi=DBAPIS, ): def reset_mocks(): mock_session.reset_mock() mock_generate_unload_path.reset_mock() mock_unload_generated_files.reset_mock() mock_get_col_names.reset_mock() mock_write.reset_mock() mock_download_list_from_s3.reset_mock() mock_delete_list_from_s3.reset_mock() mock_concat.reset_mock() with mock.patch(dbapi.__name__ + ".connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) ## ## Test 1: check that basic export pipeline functions are called mock_unload_generated_files.return_value = ["dummy_file"] mock_download_list_from_s3.return_value = ["s3.file"] mock_get_col_names.return_value = ["dummy_col_name"] mock_generate_unload_path.return_value = "dummy_s3_path" ## ensure nothing is returned when read=False r.unload_and_copy( query="query", s3_bucket="s3_bucket", s3_folder=None, export_path=False, delimiter=",", delete_s3_after=False, parallel_off=False, ) assert mock_unload_generated_files.called assert not mock_write.called, "write_file should only be called " "if export_path != False" mock_generate_unload_path.assert_called_with("s3_bucket", None) mock_get_col_names.assert_called_with("query") mock_unload.assert_called_with(query="query", s3path="dummy_s3_path", unload_options=["DELIMITER ','"]) assert not mock_delete_list_from_s3.called ## ## Test 2: different delimiter reset_mocks() mock_unload_generated_files.return_value = ["dummy_file"] mock_download_list_from_s3.return_value = ["s3.file"] mock_get_col_names.return_value = ["dummy_col_name"] mock_generate_unload_path.return_value = "dummy_s3_path" r.unload_and_copy( query="query", s3_bucket="s3_bucket", s3_folder=None, export_path=False, delimiter="|", delete_s3_after=False, parallel_off=True, ) ## check that unload options are modified based on supplied args mock_unload.assert_called_with( query="query", s3path="dummy_s3_path", unload_options=["DELIMITER '|'", "PARALLEL OFF"]) assert not mock_delete_list_from_s3.called ## ## Test 3: ensure exception is raised when no column names are retrieved reset_mocks() mock_unload_generated_files.return_value = ["dummy_file"] mock_generate_unload_path.return_value = "dummy_s3_path" mock_get_col_names.return_value = None with pytest.raises(Exception): r.unload_and_copy("query", "s3_bucket", None) ## ## Test 4: ensure exception is raised when no files are returned reset_mocks() mock_generate_unload_path.return_value = "dummy_s3_path" mock_get_col_names.return_value = ["dummy_col_name"] mock_unload_generated_files.return_value = None with pytest.raises(Exception): r.unload_and_copy("query", "s3_bucket", None) ## ## Test 5: ensure file writing is initiated when export_path is supplied reset_mocks() mock_get_col_names.return_value = ["dummy_col_name"] mock_download_list_from_s3.return_value = ["s3.file"] mock_generate_unload_path.return_value = "dummy_s3_path" mock_unload_generated_files.return_value = ["/dummy_file"] r.unload_and_copy( query="query", s3_bucket="s3_bucket", s3_folder=None, export_path="my_output.csv", delimiter=",", delete_s3_after=True, parallel_off=False, ) mock_concat.assert_called_with( mock_download_list_from_s3.return_value, "my_output.csv") assert mock_write.called assert mock_delete_list_from_s3.called_with( "s3_bucket", "my_output.csv")
def testunload(mock_session, credentials, dbapi): with mock.patch(dbapi.__name__ + ".connect") as mock_connect: r = locopy.Redshift(dbapi=dbapi, **credentials) r.connect() r.unload("query", "path") assert mock_connect.return_value.cursor.return_value.execute.called