def test__connect_with_environment(): cnxn = rs.connect_with_environment(ALYESKA_REDSHIFT_SECRET) assert isinstance(cnxn, psycopg2.extensions.connection) cnxn = rs.connect_with_environment(ALYESKA_REDSHIFT_SECRET, enable_autocommit=False) assert isinstance(cnxn, psycopg2.extensions.connection)
def test_input__assert_table_exists(): cnxn = rs.connect_with_environment(ALYESKA_REDSHIFT_SECRET) with pytest.raises(TypeError): rp.assert_table_exists("etl", "account") with pytest.raises(TypeError): rp.assert_table_exists("etl", "account", cnxn=cnxn) with pytest.raises(rp.MissingTableError): cnxn = rs.connect_with_environment(ALYESKA_REDSHIFT_SECRET) rp.assert_table_exists(cnxn, "bad_schema", "bad_table")
def test_output__execute_tasks(tmpdir): sql1, sql2, sql3, _ = make_dummy_dir(tmpdir) cnxn = connect_with_environment(ALYESKA_REDSHIFT_SECRET) sa.execute_tasks(cnxn, sql1, sql2, sql3) expectation = [1, 2, 3, 4, 5, 6] result = pd.read_sql("SELECT id FROM temp_alyeska ORDER BY id", cnxn)["id"].tolist() assert result == expectation
def test_output__run_subtasks(tmpdir): cnxn = connect_with_environment(ALYESKA_REDSHIFT_SECRET) p = pathlib.Path(tmpdir) sql1, sql2, sql3, _ = make_dummy_dir(p) subtasks = OrderedDict({sql1: "sql1", sql2: "sql2"}) sa.run_subtasks(cnxn, subtasks) assert not pd.read_sql("SELECT id FROM temp_alyeska", cnxn).empty
def test_input__run_subtasks(tmpdir): cnxn = connect_with_environment(ALYESKA_REDSHIFT_SECRET) p = pathlib.Path(tmpdir) sql1, sql2, sql3, txt = make_dummy_dir(p) subtasks = OrderedDict({sql1: "sql1", sql2: "sql2", sql3: "sql3", txt: "txt"}) with pytest.raises(ValueError): sa.run_subtasks(cnxn, subtasks)
def test_output__process_batch(tmpdir): make_dummy_dir(tmpdir) cnxn = connect_with_environment(ALYESKA_REDSHIFT_SECRET) sa.process_batch(cnxn, tmpdir) expectation = [1, 2, 3, 4, 5, 6] result = pd.read_sql("SELECT id FROM temp_alyeska ORDER BY id", cnxn)["id"].tolist() assert result == expectation
def test__insert_pandas_into(): cnxn = rs.connect_with_environment(ALYESKA_REDSHIFT_SECRET) expected_len = 3 expected_df = pd.DataFrame({"a": range(expected_len), "c": range(expected_len)}) table_name = "temp_test" aly.sqlagent.execute_sql( cnxn, f"CREATE TEMP TABLE {table_name}(a INT, b INT, c INT);" ) rp.insert_pandas_into(cnxn, table_name, expected_df) actual_df = pd.read_sql(f"SELECT * FROM {table_name}", cnxn) test_result = pd.merge(expected_df, actual_df, on=["a", "c"], how="inner") assert len(test_result) == expected_len aly.sqlagent.execute_sql(cnxn, f"DROP TABLE {table_name};")
def test_output__assert_table_exists(): cnxn = rs.connect_with_environment(ALYESKA_REDSHIFT_SECRET) rp.assert_table_exists(cnxn, "etl", "account")