def test_nrel_pv_rooftops_aspects():
    # config
    config = AWSDataLakeConfig(config_file=TEST_CONFIG_FILE)
    database_name = config.pv_rooftops_db_name
    table_name = "pv_rooftop_aspects"

    # Partitions
    glue = OEDIGlue(region_name=config.region_name)
    partition_keys = glue.get_partition_keys(database_name,
                                             table_name,
                                             with_pandas=False)
    assert len(partition_keys) == 1
    assert partition_keys[0]["Name"] == "city_year"

    partition_values = glue.get_partition_values(database_name, table_name)
    if "__HIVE_DEFAULT_PARTITION__" in partition_values:
        partition_values.remove("__HIVE_DEFAULT_PARTITION__")
    assert len(partition_values) == 168

    # Query
    athena = OEDIAthena(staging_location=config.staging_location,
                        region_name=config.region_name)
    sql = f"SELECT COUNT(*) FROM {database_name}.{table_name} WHERE city_year='augusta_me_08';"
    result = athena.run_query(query_string=sql, pandas_cursor=False)
    assert result[0][0] == 197547

    sql = f"SELECT * FROM {database_name}.{table_name} WHERE city_year='augusta_me_08' AND gid=53283;"
    df = athena.run_query(query_string=sql, pandas_cursor=True)
    assert df.shape == (1, 9)
    assert df["aspect"].values[0] == 7
def test_nrel_pv_rooftops_rasd():
    # config
    config = AWSDataLakeConfig(config_file=TEST_CONFIG_FILE)
    database_name = config.pv_rooftops_db_name
    table_name = "pv_rooftop_rasd"

    # Partitions
    glue = OEDIGlue(region_name=config.region_name)
    partition_keys = glue.get_partition_keys(database_name,
                                             table_name,
                                             with_pandas=False)
    assert len(partition_keys) == 1
    assert partition_keys[0]["Name"] == "city_year"

    partition_values = glue.get_partition_values(database_name, table_name)
    if "__HIVE_DEFAULT_PARTITION__" in partition_values:
        partition_values.remove("__HIVE_DEFAULT_PARTITION__")
    assert len(partition_values) == 166

    # Query
    athena = OEDIAthena(staging_location=config.staging_location,
                        region_name=config.region_name)
    sql = f"SELECT COUNT(*) FROM {database_name}.{table_name} WHERE city_year='topeka_ks_08';"
    result = athena.run_query(query_string=sql, pandas_cursor=False)
    assert result[0][0] == 1

    sql = f"SELECT * FROM {database_name}.{table_name} WHERE city_year='augusta_me_08'"
    df = athena.run_query(query_string=sql, pandas_cursor=True)
    assert df.shape == (1, 7)
    assert df["the_geom_96703"].values[0].startswith(
        "MULTIPOLYGON Z (((2059331.26461854 2664063.99720305 86.4199981689453,"
    )


#test_nrel_pv_rooftops_aspects()
def test_lbnl_tracking_the_sun_2019():
    # config
    config = AWSDataLakeConfig(config_file=TEST_CONFIG_FILE)
    database_name = config.tracking_the_sun_db_name
    table_name = "tracking_the_sun_2019"

    # Partitions
    glue = OEDIGlue(region_name=config.region_name)
    partition_keys = glue.get_partition_keys(database_name,
                                             table_name,
                                             with_pandas=False)
    assert len(partition_keys) == 1
    assert partition_keys[0]["Name"] == "state"

    partition_values = glue.get_partition_values(database_name, table_name)
    if "__HIVE_DEFAULT_PARTITION__" in partition_values:
        partition_values.remove("__HIVE_DEFAULT_PARTITION__")
    assert len(partition_values) == 28

    # Query
    athena = OEDIAthena(staging_location=config.staging_location,
                        region_name=config.region_name)
    sql = f"SELECT COUNT(*) FROM {database_name}.{table_name} WHERE state='CO';"
    result = athena.run_query(query_string=sql, pandas_cursor=False)
    assert result[0][0] == 93930

    sql = f"SELECT * FROM {database_name}.{table_name} WHERE state='CO' AND system_id_from_first_data_provider='SRO00164';"
    df = athena.run_query(query_string=sql, pandas_cursor=True)
    assert df.shape == (2, 60)
    assert pytest.approx(df["total_installed_price"].values[0]) == 22504.46
def test_nrel_pv_rooftops_buildings():
    # config
    config = AWSDataLakeConfig(config_file=TEST_CONFIG_FILE)
    database_name = config.pv_rooftops_db_name
    table_name = "pv_rooftop_buildings"

    # Partitions
    glue = OEDIGlue(region_name=config.region_name)
    partition_keys = glue.get_partition_keys(database_name,
                                             table_name,
                                             with_pandas=False)
    assert len(partition_keys) == 1
    assert partition_keys[0]["Name"] == "city_year"

    partition_values = glue.get_partition_values(database_name, table_name)
    if "__HIVE_DEFAULT_PARTITION__" in partition_values:
        partition_values.remove("__HIVE_DEFAULT_PARTITION__")
    assert len(partition_values) == 168

    # Query
    athena = OEDIAthena(staging_location=config.staging_location,
                        region_name=config.region_name)
    sql = f"SELECT COUNT(*) FROM {database_name}.{table_name} WHERE city_year='augusta_me_08';"
    result = athena.run_query(query_string=sql, pandas_cursor=False)
    assert result[0][0] == 17431

    sql = f"SELECT * FROM {database_name}.{table_name} WHERE city_year='augusta_me_08' AND gid=16039;"
    df = athena.run_query(query_string=sql, pandas_cursor=True)
    assert df.shape == (1, 8)
    assert df["the_geom_4326"].values[0].startswith(
        "MULTIPOLYGON(((-69.7528548573896 44.3860361743938,")
Ejemplo n.º 5
0
def test_nrel_pv_rooftops_developable_planes():
    # config
    config = AWSDataLakeConfig(config_file=TEST_CONFIG_FILE)
    database_name = config.database_name
    table_name = "pv_rooftops_developable_planes"

    # Partitions
    glue = OEDIGlue(region_name=config.region_name)
    partition_keys = glue.get_partition_keys(database_name,
                                             table_name,
                                             with_pandas=False)
    assert len(partition_keys) == 1
    assert partition_keys[0]["Name"] == "partition_0"

    partition_values = glue.get_partition_values(database_name, table_name)
    if "__HIVE_DEFAULT_PARTITION__" in partition_values:
        partition_values.remove("__HIVE_DEFAULT_PARTITION__")
    assert len(partition_values) == 166

    # Query
    athena = OEDIAthena(staging_location=config.staging_location,
                        region_name=config.region_name)
    sql = f"SELECT COUNT(*) FROM {database_name}.{table_name} WHERE partition_0='topeka_ks_08';"
    result = athena.run_query(query_string=sql, pandas_cursor=False)
    assert result[0][0] == 537304

    sql = f"SELECT * FROM {database_name}.{table_name} WHERE partition_0='augusta_me_08' AND gid=15924;"
    df = athena.run_query(query_string=sql, pandas_cursor=True)
    assert df.shape == (1, 18)
    assert df["slope"].values[0] == 26
Ejemplo n.º 6
0
def test_oedi_athena__run_query(mock_execute, mock_load_wkt, mock_connection):
    staging_location = "s3://my-testing-bucket/"
    region_name = "us-west-2"
    athena = OEDIAthena(staging_location, region_name)

    sql = "SELECT * FROM table LIMIT 10;"
    geometry = "geom_4326"
    athena.run_query(sql, geometry)

    assert mock_execute.called
    assert mock_load_wkt.called
Ejemplo n.º 7
0
def test_oedi_athena__properties(mock_connection):
    staging_location = "s3://my-testing-bucket/"
    region_name = "us-west-2"
    athena = OEDIAthena(staging_location, region_name)

    # Assertion
    assert athena.staging_location == staging_location
    assert athena.region_name == region_name
    assert athena.conn is not None

    assert mock_connection.called
Ejemplo n.º 8
0
def run_query(query_string, staging_location=None, region_name=None, output_file=None, head=False):
    """Run SQL query and show/export result."""
    config = AWSDataLakeConfig()
    region_name = region_name or config.region_name
    if not staging_location:
        staging_location = config.staging_location

    # The user may not configure Staging Location in config
    if not staging_location:
        raise ValueError("Invalid '--output-location' option value.")

    oedi_athena = OEDIAthena(staging_location, region_name=region_name)
    result = oedi_athena.run_query(query_string)
    if head:
        result = result.head()

    if output_file:
        result.to_csv(output_file, index=False)
        print(f"Exprted query result to csv file - {output_file}.")
        return

    print(result)