Beispiel #1
0
    def test_get_column_names(cls,
                              mock_session,
                              credentials=credentials(),
                              dbapi=DBAPIS):
        with mock.patch(f"{dbapi}.connect") as mock_connect:
            r = locopy.Redshift(dbapi=psycopg2, **credentials)
            r.connect()
            assert r._get_column_names("query") is None
            sql = "SELECT * FROM (query) WHERE 1 = 0"
            assert mock_connect.return_value.cursor.return_value.execute.called_with(
                sql, ())

            mock_connect.return_value.cursor.return_value.description = [[
                "COL1 "
            ], ["COL2 "]]
            r = locopy.Redshift(dbapi=psycopg2, **credentials)
            r.connect()
            assert r._get_column_names("query") == ["COL1", "COL2"]

            mock_connect.return_value.cursor.return_value.execute.side_effect = Exception(
            )
            r = locopy.Redshift(dbapi=dbapi, **credentials)
            r.connect()
            with pytest.raises(Exception):
                r._get_column_names("query")
Beispiel #2
0
    def test_unload_generated_files(cls,
                                    mock_session,
                                    credentials=credentials(),
                                    dbapi=DBAPIS):
        with mock.patch(dbapi.__name__ + ".connect") as mock_connect:
            r = locopy.Redshift(dbapi=dbapi, **credentials)
            r.connect()
            r._unload_generated_files()
            assert r._unload_generated_files() is None

            mock_connect.return_value.cursor.return_value.fetchall.return_value = [
                ["File1 "],
                ["File2 "],
            ]
            r = locopy.Redshift(dbapi=dbapi, **credentials)
            r.connect()
            r._unload_generated_files()
            assert r._unload_generated_files() == ["File1", "File2"]

            mock_connect.return_value.cursor.return_value.execute.side_effect = Exception(
            )
            r = locopy.Redshift(dbapi=dbapi, **credentials)
            r.connect()
            with pytest.raises(Exception):
                r._unload_generated_files()
Beispiel #3
0
def testunload_no_connection(mock_session, credentials, dbapi):
    with mock.patch(dbapi.__name__ + ".connect") as mock_connect:
        r = locopy.Redshift(dbapi=dbapi, **credentials)
        with pytest.raises(Exception):
            r.unload("query", "path")

        mock_connect.return_value.cursor.return_value.execute.side_effect = Exception(
        )
        r = locopy.Redshift(dbapi=dbapi, **credentials)
        r.connect()
        with pytest.raises(Exception):
            r.unload("query", "path")
Beispiel #4
0
def test_redshiftcopy(mock_session, credentials, dbapi):

    with mock.patch(dbapi.__name__ + ".connect") as mock_connect:
        r = locopy.Redshift(dbapi=dbapi, **credentials)
        r.connect()
        r.copy("table", "s3bucket")
        assert mock_connect.return_value.cursor.return_value.execute.called
        (mock_connect.return_value.cursor.return_value.execute.
         assert_called_with(
             "COPY table FROM 's3bucket' CREDENTIALS "
             "'aws_access_key_id={0};aws_secret_access_key={1};token={2}' "
             "DELIMITER '|' DATEFORMAT 'auto' COMPUPDATE ON "
             "TRUNCATECOLUMNS;".format(
                 r.session.get_credentials().access_key,
                 r.session.get_credentials().secret_key,
                 r.session.get_credentials().token,
             ),
             (),
         ))

        # tab delim
        r.copy("table", "s3bucket", delim="\t")
        assert mock_connect.return_value.cursor.return_value.execute.called
        (mock_connect.return_value.cursor.return_value.execute.
         assert_called_with(
             "COPY table FROM 's3bucket' CREDENTIALS "
             "'aws_access_key_id={0};aws_secret_access_key={1};token={2}' "
             "DELIMITER '\t' DATEFORMAT 'auto' COMPUPDATE ON "
             "TRUNCATECOLUMNS;".format(
                 r.session.get_credentials().access_key,
                 r.session.get_credentials().secret_key,
                 r.session.get_credentials().token,
             ),
             (),
         ))
Beispiel #5
0
def test_copy(s3_bucket, dbapi):

    with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as redshift:
        redshift.execute(
            "CREATE TEMPORARY TABLE locopy_integration_testing (id INTEGER, variable VARCHAR(20)) DISTKEY(variable)"
        )
        redshift.load_and_copy(
            LOCAL_FILE,
            S3_BUCKET,
            "locopy_integration_testing",
            delim="|",
            delete_s3_after=True,
            compress=False,
        )
        redshift.execute(
            "SELECT * FROM locopy_integration_testing ORDER BY id")
        results = redshift.cursor.fetchall()

        expected = [
            (1, "This iš line 1"),
            (2, "This is liné 2"),
            (3, "This is line 3"),
            (4, "This is lïne 4"),
        ]

        for i, result in enumerate(results):
            assert result[0] == expected[i][0]
            assert result[1] == expected[i][1]
Beispiel #6
0
def test_unload(s3_bucket, dbapi):

    with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as redshift:
        redshift.execute(
            "CREATE TEMPORARY TABLE locopy_integration_testing AS SELECT ('2017-12-31'::date + row_number() over (order by 1))::date from SVV_TABLES LIMIT 5"
        )
        sql = "SELECT * FROM locopy_integration_testing"
        redshift.unload_and_copy(sql,
                                 S3_BUCKET,
                                 delimiter="|",
                                 export_path=LOCAL_FILE_DL)
        redshift.execute(
            "SELECT * FROM locopy_integration_testing ORDER BY date")
        results = redshift.cursor.fetchall()

        expected = [
            ("2018-01-01", ),
            ("2018-01-02", ),
            ("2018-01-03", ),
            ("2018-01-04", ),
            ("2018-01-05", ),
        ]

        for i, result in enumerate(results):
            assert result[0].strftime("%Y-%m-%d") == expected[i][0]

        os.remove(LOCAL_FILE_DL)
Beispiel #7
0
def testinsert_dataframe_to_table(mock_session, credentials, dbapi):
    import pandas as pd

    test_df = pd.read_csv(os.path.join(CURR_DIR, "data", "mock_dataframe.txt"),
                          sep=",")
    with mock.patch(dbapi.__name__ + ".connect") as mock_connect:
        r = locopy.Redshift(dbapi=dbapi, **credentials)
        r.connect()
        r.insert_dataframe_to_table(test_df, "database.schema.test")
        mock_connect.return_value.cursor.return_value.execute.assert_called_with(
            "INSERT INTO database.schema.test (a,b,c) VALUES ('1', 'x', '2011-01-01'), ('2', 'y', '2001-04-02')",
            (),
        )

        r.insert_dataframe_to_table(test_df,
                                    "database.schema.test",
                                    create=True)
        mock_connect.return_value.cursor.return_value.execute.assert_any_call(
            "CREATE TABLE database.schema.test (a int,b varchar,c date)", ())
        mock_connect.return_value.cursor.return_value.execute.assert_called_with(
            "INSERT INTO database.schema.test (a,b,c) VALUES ('1', 'x', '2011-01-01'), ('2', 'y', '2001-04-02')",
            (),
        )

        r.insert_dataframe_to_table(test_df,
                                    "database.schema.test",
                                    columns=["a", "b"])

        mock_connect.return_value.cursor.return_value.execute.assert_called_with(
            "INSERT INTO database.schema.test (a,b) VALUES ('1', 'x'), ('2', 'y')",
            ())

        r.insert_dataframe_to_table(
            test_df,
            "database.schema.test",
            create=True,
            metadata=OrderedDict([("col1", "int"), ("col2", "varchar"),
                                  ("col3", "date")]),
        )

        mock_connect.return_value.cursor.return_value.execute.assert_any_call(
            "CREATE TABLE database.schema.test (col1 int,col2 varchar,col3 date)",
            ())
        mock_connect.return_value.cursor.return_value.execute.assert_called_with(
            "INSERT INTO database.schema.test (col1,col2,col3) VALUES ('1', 'x', '2011-01-01'), ('2', 'y', '2001-04-02')",
            (),
        )

        r.insert_dataframe_to_table(test_df,
                                    "database.schema.test",
                                    create=False,
                                    batch_size=1)

        mock_connect.return_value.cursor.return_value.execute.assert_any_call(
            "INSERT INTO database.schema.test (a,b,c) VALUES ('1', 'x', '2011-01-01')",
            ())
        mock_connect.return_value.cursor.return_value.execute.assert_any_call(
            "INSERT INTO database.schema.test (a,b,c) VALUES ('2', 'y', '2001-04-02')",
            ())
Beispiel #8
0
def test_insert_dataframe_to_table(s3_bucket, dbapi):

    with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as redshift:
        redshift.insert_dataframe_to_table(TEST_DF, "locopy_df_test", create=True)
        redshift.execute("SELECT a, b, c FROM locopy_df_test ORDER BY a ASC")
        results = redshift.cursor.fetchall()
        redshift.execute("drop table if exists locopy_df_test")

        expected = [
            (1, "x", pd.to_datetime("2011-01-01").date()),
            (2, "y", pd.to_datetime("2001-04-02").date()),
        ]

        assert len(expected) == len(results)
        for i, result in enumerate(results):
            assert result[0] == expected[i][0]
            assert result[1] == expected[i][1]
            assert result[2] == expected[i][2]

        redshift.insert_dataframe_to_table(TEST_DF_2, "locopy_test_2", create=True, batch_size=3)
        redshift.execute("SELECT col1, col2 FROM locopy_test_2 ORDER BY col1 ASC")
        results = redshift.cursor.fetchall()
        redshift.execute("drop table if exists locopy_test_2")

        expected = [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e"), (6, "f"), (7, "g")]

        assert len(expected) == len(results)
        for i, result in enumerate(results):
            assert result[0] == expected[i][0]
            assert result[1] == expected[i][1]

        from decimal import Decimal

        TEST_DF_3 = pd.DataFrame(
            {
                "a": [1, 2],
                "b": [pd.to_datetime("2013-01-01"), pd.to_datetime("2019-01-01")],
                "c": [True, False],
                "d": [Decimal(2), Decimal(3)],
                "e": [None, "x'y"],
            }
        )
        redshift.insert_dataframe_to_table(TEST_DF_3, "locopy_test_3", create=True)
        redshift.execute("SELECT a, b, c, d, e FROM locopy_test_3 ORDER BY a ASC")
        results = redshift.cursor.fetchall()
        redshift.execute("drop table if exists locopy_test_3")

        expected = [
            (1, pd.to_datetime("2013-01-01"), True, 2, None),
            (2, pd.to_datetime("2019-01-01"), False, 3, "x'y"),
        ]

        assert len(expected) == len(results)
        for i, result in enumerate(results):
            assert result[0] == expected[i][0]
            assert result[1] == expected[i][1]
            assert result[2] == expected[i][2]
            assert result[3] == expected[i][3]
            assert result[4] == expected[i][4]
Beispiel #9
0
def test_redshift_execute_single_rows(dbapi):

    expected = pd.DataFrame({"field_1": [1], "field_2": [2]})
    with locopy.Redshift(dbapi=dbapi, **CREDS_DICT) as test:
        test.execute("SELECT 1 AS field_1, 2 AS field_2 ")
        df = test.to_dataframe()

    assert np.allclose(df["field_1"], expected["field_1"])
Beispiel #10
0
def test_redshiftcopy_exception(mock_connected, mock_session, credentials,
                                dbapi):

    with mock.patch(dbapi.__name__ + ".connect") as mock_connect:
        r = locopy.Redshift(dbapi=dbapi, **credentials)
        mock_connected.return_value = False

        with pytest.raises(DBError):
            r.copy("table", "s3bucket")

        mock_connected.return_value = True
        (mock_connect.return_value.cursor.return_value.execute.side_effect
         ) = Exception("COPY Exception")
        with pytest.raises(DBError):
            r.copy("table", "s3bucket")
Beispiel #11
0
 def testunload(mock_session, credentials=credentials(), dbapi=DBAPIS):
     with mock.patch(f"{dbapi}.connect") as mock_connect:
         r = locopy.Redshift(dbapi=dbapi, **credentials)
         r.connect()
         r.unload("query", "path")
         assert mock_connect.return_value.cursor.return_value.execute.called
Beispiel #12
0
    def test_unload_and_copy(
            cls,
            mock_session,
            mock_generate_unload_path,
            mock_unload,
            mock_unload_generated_files,
            mock_get_col_names,
            mock_download_list_from_s3,
            mock_write,
            mock_delete_list_from_s3,
            mock_concat,
            credentials=credentials(),
            dbapi=DBAPIS,
    ):
        def reset_mocks():
            mock_session.reset_mock()
            mock_generate_unload_path.reset_mock()
            mock_unload_generated_files.reset_mock()
            mock_get_col_names.reset_mock()
            mock_write.reset_mock()
            mock_download_list_from_s3.reset_mock()
            mock_delete_list_from_s3.reset_mock()
            mock_concat.reset_mock()

        with mock.patch(dbapi.__name__ + ".connect") as mock_connect:
            r = locopy.Redshift(dbapi=dbapi, **credentials)

            ##
            ## Test 1: check that basic export pipeline functions are called
            mock_unload_generated_files.return_value = ["dummy_file"]
            mock_download_list_from_s3.return_value = ["s3.file"]
            mock_get_col_names.return_value = ["dummy_col_name"]
            mock_generate_unload_path.return_value = "dummy_s3_path"

            ## ensure nothing is returned when read=False
            r.unload_and_copy(
                query="query",
                s3_bucket="s3_bucket",
                s3_folder=None,
                export_path=False,
                delimiter=",",
                delete_s3_after=False,
                parallel_off=False,
            )

            assert mock_unload_generated_files.called
            assert not mock_write.called, "write_file should only be called " "if export_path != False"
            mock_generate_unload_path.assert_called_with("s3_bucket", None)
            mock_get_col_names.assert_called_with("query")
            mock_unload.assert_called_with(query="query",
                                           s3path="dummy_s3_path",
                                           unload_options=["DELIMITER ','"])
            assert not mock_delete_list_from_s3.called

            ##
            ## Test 2: different delimiter
            reset_mocks()
            mock_unload_generated_files.return_value = ["dummy_file"]
            mock_download_list_from_s3.return_value = ["s3.file"]
            mock_get_col_names.return_value = ["dummy_col_name"]
            mock_generate_unload_path.return_value = "dummy_s3_path"
            r.unload_and_copy(
                query="query",
                s3_bucket="s3_bucket",
                s3_folder=None,
                export_path=False,
                delimiter="|",
                delete_s3_after=False,
                parallel_off=True,
            )

            ## check that unload options are modified based on supplied args
            mock_unload.assert_called_with(
                query="query",
                s3path="dummy_s3_path",
                unload_options=["DELIMITER '|'", "PARALLEL OFF"])
            assert not mock_delete_list_from_s3.called

            ##
            ## Test 3: ensure exception is raised when no column names are retrieved
            reset_mocks()
            mock_unload_generated_files.return_value = ["dummy_file"]
            mock_generate_unload_path.return_value = "dummy_s3_path"
            mock_get_col_names.return_value = None
            with pytest.raises(Exception):
                r.unload_and_copy("query", "s3_bucket", None)

            ##
            ## Test 4: ensure exception is raised when no files are returned
            reset_mocks()
            mock_generate_unload_path.return_value = "dummy_s3_path"
            mock_get_col_names.return_value = ["dummy_col_name"]
            mock_unload_generated_files.return_value = None
            with pytest.raises(Exception):
                r.unload_and_copy("query", "s3_bucket", None)

            ##
            ## Test 5: ensure file writing is initiated when export_path is supplied
            reset_mocks()
            mock_get_col_names.return_value = ["dummy_col_name"]
            mock_download_list_from_s3.return_value = ["s3.file"]
            mock_generate_unload_path.return_value = "dummy_s3_path"
            mock_unload_generated_files.return_value = ["/dummy_file"]
            r.unload_and_copy(
                query="query",
                s3_bucket="s3_bucket",
                s3_folder=None,
                export_path="my_output.csv",
                delimiter=",",
                delete_s3_after=True,
                parallel_off=False,
            )
            mock_concat.assert_called_with(
                mock_download_list_from_s3.return_value, "my_output.csv")
            assert mock_write.called
            assert mock_delete_list_from_s3.called_with(
                "s3_bucket", "my_output.csv")
Beispiel #13
0
def testunload(mock_session, credentials, dbapi):
    with mock.patch(dbapi.__name__ + ".connect") as mock_connect:
        r = locopy.Redshift(dbapi=dbapi, **credentials)
        r.connect()
        r.unload("query", "path")
        assert mock_connect.return_value.cursor.return_value.execute.called