예제 #1
0
def test_reference_scenario(basic_residential_project_file):
    # verify that the reference_scenario get's added to the upgrade file

    upgrade_config = {
        'upgrades': [{
            'upgrade_name': 'Triple-Pane Windows',
            'reference_scenario': 'example_reference_scenario'
        }]
    }
    project_filename, results_dir = basic_residential_project_file(
        upgrade_config)

    with patch.object(BuildStockBatchBase, 'weather_dir', None), \
            patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
            patch.object(BuildStockBatchBase, 'results_dir', results_dir):
        bsb = BuildStockBatchBase(project_filename)
        bsb.process_results()
        get_dask_client_mock.assert_called_once()

    # test results.csv files
    test_path = os.path.join(results_dir, 'results_csvs')
    test_csv = pd.read_csv(os.path.join(
        test_path,
        'results_up01.csv.gz')).set_index('building_id').sort_index()
    assert len(test_csv['apply_upgrade.reference_scenario'].unique()) == 1
    assert test_csv['apply_upgrade.reference_scenario'].iloc[
        0] == 'example_reference_scenario'
예제 #2
0
def test_provide_buildstock_csv(basic_residential_project_file, mocker):
    buildstock_csv = os.path.join(here, 'buildstock.csv')
    df = pd.read_csv(buildstock_csv)
    project_filename, results_dir = basic_residential_project_file({
        'sampler': {
            'type': 'precomputed',
            'args': {
                'sample_file': buildstock_csv
            }
        }
    })
    mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
    mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
    mocker.patch.object(BuildStockBatchBase, 'CONTAINER_RUNTIME',
                        ContainerRuntime.DOCKER)

    bsb = BuildStockBatchBase(project_filename)
    sampling_output_csv = bsb.sampler.run_sampling()
    df2 = pd.read_csv(sampling_output_csv)
    pd.testing.assert_frame_equal(df, df2)

    # Test file missing
    with open(project_filename, 'r') as f:
        cfg = yaml.safe_load(f)
    cfg['sampler']['args']['sample_file'] = os.path.join(
        here, 'non_existant_file.csv')
    with open(project_filename, 'w') as f:
        yaml.dump(cfg, f)

    with pytest.raises(ValidationError, match=r"sample_file doesn't exist"):
        BuildStockBatchBase(project_filename).sampler.run_sampling()
예제 #3
0
def test_combine_files(basic_residential_project_file):

    project_filename, results_dir = basic_residential_project_file()

    with patch.object(BuildStockBatchBase, 'weather_dir', None), \
            patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
            patch.object(BuildStockBatchBase, 'results_dir', results_dir):
        bsb = BuildStockBatchBase(project_filename)
        bsb.process_results()
        get_dask_client_mock.assert_called_once()

    # test results.csv files
    reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  'test_results', 'results_csvs')
    test_path = os.path.join(results_dir, 'results_csvs')

    test_csv = pd.read_csv(os.path.join(test_path, 'results_up00.csv.gz')).sort_values('building_id').reset_index()\
        .drop(columns=['index'])
    reference_csv = pd.read_csv(os.path.join(reference_path, 'results_up00.csv.gz')).sort_values('building_id')\
        .reset_index().drop(columns=['index'])
    pd.testing.assert_frame_equal(test_csv, reference_csv)

    test_csv = pd.read_csv(os.path.join(test_path, 'results_up01.csv.gz')).sort_values('building_id').reset_index()\
        .drop(columns=['index'])
    reference_csv = pd.read_csv(os.path.join(reference_path, 'results_up01.csv.gz')).sort_values('building_id')\
        .reset_index().drop(columns=['index'])
    pd.testing.assert_frame_equal(test_csv, reference_csv)

    # test parquet files
    reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  'test_results', 'parquet')
    test_path = os.path.join(results_dir, 'parquet')

    # results parquet
    test_pq = pd.read_parquet(os.path.join(test_path, 'baseline', 'results_up00.parquet')).sort_values('building_id')\
        .reset_index().drop(columns=['index'])
    reference_pq = pd.read_parquet(os.path.join(reference_path, 'baseline', 'results_up00.parquet'))\
        .sort_values('building_id').reset_index().drop(columns=['index'])
    pd.testing.assert_frame_equal(test_pq, reference_pq)

    test_pq = pd.read_parquet(os.path.join(test_path, 'upgrades', 'upgrade=1', 'results_up01.parquet'))\
        .sort_values('building_id').reset_index().drop(columns=['index'])
    reference_pq = pd.read_parquet(os.path.join(reference_path,  'upgrades', 'upgrade=1', 'results_up01.parquet'))\
        .sort_values('building_id').reset_index().drop(columns=['index'])
    pd.testing.assert_frame_equal(test_pq, reference_pq)

    # timeseries parquet
    test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\
        .compute().reset_index()
    reference_pq = dd.read_parquet(os.path.join(reference_path,  'timeseries', 'upgrade=0'), engine='pyarrow')\
        .compute().reset_index()
    pd.testing.assert_frame_equal(test_pq, reference_pq)

    test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\
        .compute().reset_index()
    reference_pq = dd.read_parquet(os.path.join(reference_path,  'timeseries', 'upgrade=1'), engine='pyarrow')\
        .compute().reset_index()
    pd.testing.assert_frame_equal(test_pq, reference_pq)
예제 #4
0
def test_xor_violations_fail(project_file, expected):
    # patch the validate_options_lookup function to always return true for this case
    with patch.object(BuildStockBatchBase, 'validate_options_lookup',
                      lambda _: True):
        if expected is not True:
            with pytest.raises(expected):
                BuildStockBatchBase.validate_xor_nor_schema_keys(project_file)
        else:
            assert (
                BuildStockBatchBase.validate_xor_nor_schema_keys(project_file))
예제 #5
0
def test_validation_integration(project_file, expected):
    # patch the validate_options_lookup function to always return true for this case
    with patch.object(BuildStockBatchBase, 'validate_options_lookup', lambda _: True), \
            patch.object(BuildStockBatchBase, 'validate_measure_references', lambda _: True), \
            patch.object(BuildStockBatchBase, 'validate_workflow_generator', lambda _: True):
        if expected is not True:
            with pytest.raises(expected):
                BuildStockBatchBase.validate_project(project_file)
        else:
            assert (BuildStockBatchBase.validate_project(project_file))
예제 #6
0
def test_skipping_baseline(basic_residential_project_file):
    project_filename, results_dir = basic_residential_project_file(
        {'baseline': {
            'skip_sims': True,
            'sampling_algorithm': 'quota'
        }})

    sim_output_path = os.path.join(results_dir, 'simulation_output')
    shutil.rmtree(
        os.path.join(sim_output_path, 'timeseries',
                     'up00'))  # remove timeseries results for baseline

    # remove results.csv data for baseline from results_jobx.json.gz
    results_json_filename = os.path.join(sim_output_path,
                                         'results_job0.json.gz')
    with gzip.open(results_json_filename, 'rt', encoding='utf-8') as f:
        dpouts = json.load(f)
    dpouts2 = list(filter(lambda x: x['upgrade'] > 0, dpouts))
    with gzip.open(results_json_filename, 'wt', encoding='utf-8') as f:
        json.dump(dpouts2, f)

    # remove jobs for baseline from jobx.json
    with open(os.path.join(results_dir, '..', 'job0.json'), 'rt') as f:
        job_json = json.load(f)
    job_json['batch'] = list(
        filter(lambda job: job[1] is not None, job_json['batch']))
    with open(os.path.join(results_dir, '..', 'job0.json'), 'wt') as f:
        json.dump(job_json, f)

    # run postprocessing
    with patch.object(BuildStockBatchBase, 'weather_dir', None), \
            patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
            patch.object(BuildStockBatchBase, 'results_dir', results_dir):

        bsb = BuildStockBatchBase(project_filename)
        bsb.process_results()
        get_dask_client_mock.assert_called_once()

    up00_parquet = os.path.join(results_dir, 'parquet', 'baseline',
                                'results_up00.parquet')
    assert (not os.path.exists(up00_parquet))

    up01_parquet = os.path.join(results_dir, 'parquet', 'upgrades',
                                'upgrade=1', 'results_up01.parquet')
    assert (os.path.exists(up01_parquet))

    up00_csv_gz = os.path.join(results_dir, 'results_csvs',
                               'results_up00.csv.gz')
    assert (not os.path.exists(up00_csv_gz))

    up01_csv_gz = os.path.join(results_dir, 'results_csvs',
                               'results_up01.csv.gz')
    assert (os.path.exists(up01_csv_gz))
예제 #7
0
def test_large_parquet_combine(basic_residential_project_file):
    # Test a simulated scenario where the individual timeseries parquet are larger than the max memory per partition
    # allocated for the parquet file combining.

    project_filename, results_dir = basic_residential_project_file()

    with patch.object(BuildStockBatchBase, 'weather_dir', None), \
            patch.object(BuildStockBatchBase, 'get_dask_client'), \
            patch.object(BuildStockBatchBase, 'results_dir', results_dir),\
            patch.object(postprocessing, 'MAX_PARQUET_MEMORY', 1e6):  # set the max memory to just 1MB
        bsb = BuildStockBatchBase(project_filename)
        bsb.process_results(
        )  # this would raise exception if the postprocessing could not handle the situation
예제 #8
0
def test_bad_measures_validation(project_file):
    try:
        BuildStockBatchBase.validate_measure_references(project_file)
    except ValueError as er:
        er = str(er)
        assert "Measure directory" in er
        assert "not found" in er
        assert "ResidentialConstructionsUnfinishedBasement" in er
        assert "ResidentialConstructionsFinishedBasement" in er

    else:
        raise Exception(
            "validate_measure_references was supposed to raise ValueError for "
            "enforce-validate-measures-bad.yml")
예제 #9
0
def test_docker_image_exists_on_docker_hub(basic_residential_project_file):
    project_filename, results_dir = basic_residential_project_file()
    # Use a BuildStockBatchBase instance to get the version of OpenStudio
    # because instantiating a LocalDockerBatch fails to connect
    # with the docker website in the testing context for some reason.
    with patch.object(BuildStockBatchBase, 'weather_dir', None):
        bsb = BuildStockBatchBase(project_filename)
        docker_image = 'nrel/openstudio'
        docker_tag = bsb.os_version
        baseurl = 'https://registry.hub.docker.com/v2/'
        r1 = requests.get(baseurl)
        assert (r1.status_code == 401)
        m = re.search(r'realm="(.+?)"', r1.headers['Www-Authenticate'])
        authurl = m.group(1)
        m = re.search(r'service="(.+?)"', r1.headers['Www-Authenticate'])
        service = m.group(1)
        r2 = requests.get(authurl,
                          params={
                              'service': service,
                              'scope': f'repository:{docker_image}:pull'
                          })
        assert (r2.ok)
        token = r2.json()['token']
        r3 = requests.head(f'{baseurl}{docker_image}/manifests/{docker_tag}',
                           headers={'Authorization': f'Bearer {token}'})
        assert (r3.ok)
예제 #10
0
def test_good_measures(project_file):
    with LogCapture(level=logging.INFO) as logs:
        assert BuildStockBatchBase.validate_workflow_generator(project_file)
        warning_logs = filter_logs(logs, 'WARNING')
        error_logs = filter_logs(logs, 'ERROR')
        assert warning_logs == ''
        assert error_logs == ''
예제 #11
0
def test_good_reference_scenario(project_file):
    with LogCapture(level=logging.INFO) as logs:
        assert BuildStockBatchBase.validate_reference_scenario(project_file)
        warning_logs = filter_logs(logs, 'WARNING')
        error_logs = filter_logs(logs, 'ERROR')
        assert warning_logs == ''
        assert error_logs == ''
예제 #12
0
def test_upgrade_missing_ts(basic_residential_project_file, mocker, caplog):
    caplog.set_level(logging.WARNING, logger='buildstockbatch.postprocessing')

    project_filename, results_dir = basic_residential_project_file()
    results_path = pathlib.Path(results_dir)
    for filename in (results_path / 'simulation_output' / 'timeseries' /
                     'up01').glob('*.parquet'):
        os.remove(filename)

    mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
    mocker.patch.object(BuildStockBatchBase, 'get_dask_client')
    mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
    bsb = BuildStockBatchBase(project_filename)
    bsb.process_results()

    assert len(caplog.records) == 1
    record = caplog.records[0]
    assert record.levelname == 'WARNING'
    assert record.message == 'There are no timeseries files for upgrade1.'
예제 #13
0
def test_keep_individual_timeseries(keep_individual_timeseries,
                                    basic_residential_project_file, mocker):
    project_filename, results_dir = basic_residential_project_file({
        'postprocessing': {
            'keep_individual_timeseries': keep_individual_timeseries
        }
    })

    mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
    mocker.patch.object(BuildStockBatchBase, 'get_dask_client')
    mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
    bsb = BuildStockBatchBase(project_filename)
    bsb.process_results()

    results_path = pathlib.Path(results_dir)
    simout_path = results_path / 'simulation_output'
    assert len(list(simout_path.glob('results_job*.json.gz'))) == 0

    ts_path = simout_path / 'timeseries'
    assert ts_path.exists() == keep_individual_timeseries
예제 #14
0
def test_bad_measures(project_file):

    with LogCapture(level=logging.INFO) as logs:
        try:
            BuildStockBatchBase.validate_workflow_generator(project_file)
        except ValidationError as er:
            er = str(er)
            warning_logs = filter_logs(logs, 'WARNING')
            assert "Required argument calendar_year for" in warning_logs
            assert "ReportingMeasure2 does not exist" in er
            assert "Wrong argument value type for begin_day_of_month" in er
            assert "Found unexpected argument key output_variable" in er
            assert "Found unexpected argument value Huorly" in er
            assert "Fixed(1)" in er
            assert "Required argument include_enduse_subcategories" in er
            assert "Found unexpected argument key include_enduse_subcategory" in er

        else:
            raise Exception(
                "measures_and_arguments was supposed to raise ValueError for"
                " enforce-validate-measures-bad.yml")
예제 #15
0
def test_bad_options_validation(project_file):
    try:
        BuildStockBatchBase.validate_options_lookup(project_file)
    except ValueError as er:
        er = str(er)
        assert "Insulation Slab(Good) Option" in er
        assert "Insulation Unfinished&Basement" in er
        assert "Insulation Finished|Basement" in er
        assert "Extra Argument" in er
        assert "Invalid Option" in er
        assert "Insulation Wall|Good Option||" in er
        assert " 1980s" in er
        assert "1941s" in er
        assert "Option name empty" in er
        assert "Insulation Slat" in er
        assert "Vintage|1960s|Vintage|1960s" in er
        assert "Vintage|1960s||Vintage|1940s&&Vintage|1980s" in er

    else:
        raise Exception(
            "validate_options was supposed to raise ValueError for enforce-validate-options-bad.yml"
        )
예제 #16
0
def test_downselect_integer_options(basic_residential_project_file, mocker):
    with tempfile.TemporaryDirectory() as buildstock_csv_dir:
        buildstock_csv = os.path.join(buildstock_csv_dir, 'buildstock.csv')
        valid_option_values = set()
        with open(os.path.join(here, 'buildstock.csv'), 'r', newline='') as f_in, \
                open(buildstock_csv, 'w', newline='') as f_out:
            cf_in = csv.reader(f_in)
            cf_out = csv.writer(f_out)
            for i, row in enumerate(cf_in):
                if i == 0:
                    col_idx = row.index('Days Shifted')
                else:
                    # Convert values from "Day1" to "1.10" so we hit the bug
                    row[col_idx] = '{0}.{0}0'.format(
                        re.search(r'Day(\d+)', row[col_idx]).group(1))
                    valid_option_values.add(row[col_idx])
                cf_out.writerow(row)

        project_filename, results_dir = basic_residential_project_file({
            'sampler': {
                'type': 'residential_quota_downselect',
                'args': {
                    'n_datapoints': 8,
                    'resample': False,
                    'logic': 'Geometry House Size|1500-2499'
                }
            }
        })
        mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
        mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)
        sampler_property_mock = mocker.patch.object(BuildStockBatchBase,
                                                    'sampler',
                                                    new_callable=PropertyMock)
        sampler_mock = mocker.MagicMock()
        sampler_property_mock.return_value = sampler_mock
        sampler_mock.run_sampling = MagicMock(return_value=buildstock_csv)

        bsb = BuildStockBatchBase(project_filename)
        bsb.sampler.run_sampling()
        sampler_mock.run_sampling.assert_called_once()
        with open(buildstock_csv, 'r', newline='') as f:
            cf = csv.DictReader(f)
            for row in cf:
                assert (row['Days Shifted'] in valid_option_values)
예제 #17
0
def test_combine_files_flexible(basic_residential_project_file, mocker):
    # Allows addition/removable/rename of columns. For columns that remain unchanged, verifies that the data matches
    # with stored test_results. If this test passes but test_combine_files fails, then test_results/parquet and
    # test_results/results_csvs need to be updated with new data *if* columns were indeed supposed to be added/
    # removed/renamed.

    project_filename, results_dir = basic_residential_project_file()

    mocker.patch.object(BuildStockBatchBase, 'weather_dir', None)
    get_dask_client_mock = mocker.patch.object(BuildStockBatchBase,
                                               'get_dask_client')
    mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir)

    bsb = BuildStockBatchBase(project_filename)
    bsb.process_results()
    get_dask_client_mock.assert_called_once()

    def simplify_columns(colname):
        return colname.lower().replace('_', '')

    # test results.csv files
    reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  'test_results', 'results_csvs')
    test_path = os.path.join(results_dir, 'results_csvs')

    test_csv = pd.read_csv(os.path.join(test_path, 'results_up00.csv.gz')).rename(columns=simplify_columns).\
        sort_values('buildingid').reset_index().drop(columns=['index'])
    reference_csv = pd.read_csv(os.path.join(reference_path, 'results_up00.csv.gz')).rename(columns=simplify_columns).\
        sort_values('buildingid').reset_index().drop(columns=['index'])
    mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv)))
    pd.testing.assert_frame_equal(test_csv[mutul_cols],
                                  reference_csv[mutul_cols])

    test_csv = pd.read_csv(os.path.join(test_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\
        sort_values('buildingid').reset_index().drop(columns=['index'])
    reference_csv = pd.read_csv(os.path.join(reference_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\
        sort_values('buildingid').reset_index().drop(columns=['index'])
    mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv)))
    pd.testing.assert_frame_equal(test_csv[mutul_cols],
                                  reference_csv[mutul_cols])

    # test parquet files
    reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  'test_results', 'parquet')
    test_path = os.path.join(results_dir, 'parquet')

    # results parquet
    test_pq = pd.read_parquet(os.path.join(test_path, 'baseline', 'results_up00.parquet')).\
        rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
    reference_pq = pd.read_parquet(os.path.join(reference_path, 'baseline', 'results_up00.parquet')).\
        rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
    mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
    pd.testing.assert_frame_equal(test_pq[mutul_cols],
                                  reference_pq[mutul_cols])

    test_pq = pd.read_parquet(os.path.join(test_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\
        rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
    reference_pq = pd.read_parquet(os.path.join(reference_path,  'upgrades', 'upgrade=1', 'results_up01.parquet')).\
        rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index'])
    mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
    pd.testing.assert_frame_equal(test_pq[mutul_cols],
                                  reference_pq[mutul_cols])

    # timeseries parquet
    test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\
        .compute().reset_index()
    reference_pq = dd.read_parquet(os.path.join(reference_path,  'timeseries', 'upgrade=0'), engine='pyarrow')\
        .compute().reset_index()
    mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
    pd.testing.assert_frame_equal(test_pq[mutul_cols],
                                  reference_pq[mutul_cols])

    test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\
        .compute().reset_index()
    reference_pq = dd.read_parquet(os.path.join(reference_path,  'timeseries', 'upgrade=1'), engine='pyarrow')\
        .compute().reset_index()
    mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq)))
    pd.testing.assert_frame_equal(test_pq[mutul_cols],
                                  reference_pq[mutul_cols])
예제 #18
0
def test_bad_path_options_validation(project_file):
    with pytest.raises(FileNotFoundError):
        BuildStockBatchBase.validate_options_lookup(project_file)
예제 #19
0
def test_good_options_validation(project_file):
    assert BuildStockBatchBase.validate_options_lookup(project_file)
예제 #20
0
def test_missing_required_key_fails(project_file):
    # patch the validate_options_lookup function to always return true for this case
    with patch.object(BuildStockBatchBase, 'validate_options_lookup',
                      lambda _: True):
        with pytest.raises(ValueError):
            BuildStockBatchBase.validate_project_schema(project_file)
예제 #21
0
def test_minimal_schema_passes_validation():
    assert (BuildStockBatchBase.validate_project_schema(
        os.path.join(example_yml_dir, 'minimal-schema.yml')))
예제 #22
0
def test_upload_files(mocked_boto3, basic_residential_project_file):
    s3_bucket = 'test_bucket'
    s3_prefix = 'test_prefix'
    db_name = 'test_db_name'
    role = 'test_role'
    region = 'test_region'

    upload_config = {
        'postprocessing': {
            'aws': {
                'region_name': region,
                's3': {
                    'bucket': s3_bucket,
                    'prefix': s3_prefix,
                },
                'athena': {
                    'glue_service_role': role,
                    'database_name': db_name,
                    'max_crawling_time': 250
                }
            }
        }
    }
    mocked_glueclient = MagicMock()
    mocked_glueclient.get_crawler = MagicMock(
        return_value={'Crawler': {
            'State': 'READY'
        }})
    mocked_boto3.client = MagicMock(return_value=mocked_glueclient)
    mocked_boto3.resource().Bucket().objects.filter.side_effect = [[],
                                                                   [
                                                                       'a',
                                                                       'b', 'c'
                                                                   ]]
    project_filename, results_dir = basic_residential_project_file(
        upload_config)
    with patch.object(BuildStockBatchBase, 'weather_dir', None), \
            patch.object(BuildStockBatchBase, 'output_dir', results_dir), \
            patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \
            patch.object(BuildStockBatchBase, 'results_dir', results_dir):
        bsb = BuildStockBatchBase(project_filename)
        bsb.process_results()
        get_dask_client_mock.assert_called_once()

    files_uploaded = []
    crawler_created = False
    crawler_started = False
    for call in mocked_boto3.mock_calls[2:] + mocked_boto3.client().mock_calls:
        call_function = call[0].split('.')[-1]  # 0 is for the function name
        if call_function == 'resource':
            assert call[1][0] in ['s3'
                                  ]  # call[1] is for the positional arguments
        if call_function == 'Bucket':
            assert call[1][0] == s3_bucket
        if call_function == 'upload_file':
            source_file_path = call[1][0]
            destination_path = call[1][1]
            files_uploaded.append((source_file_path, destination_path))
        if call_function == 'create_crawler':
            crawler_para = call[2]  # 2 is for the keyword arguments
            crawler_created = True
            assert crawler_para['DatabaseName'] == upload_config[
                'postprocessing']['aws']['athena']['database_name']
            assert crawler_para['Role'] == upload_config['postprocessing'][
                'aws']['athena']['glue_service_role']
            assert crawler_para['TablePrefix'] == OUTPUT_FOLDER_NAME + '_'
            assert crawler_para['Name'] == db_name + '_' + OUTPUT_FOLDER_NAME
            assert crawler_para['Targets']['S3Targets'][0]['Path'] == 's3://' + s3_bucket + '/' + s3_prefix + '/' + \
                                                                      OUTPUT_FOLDER_NAME + '/'
        if call_function == 'start_crawler':
            assert crawler_created, "crawler attempted to start before creating"
            crawler_started = True
            crawler_para = call[2]  # 2 is for keyboard arguments.
            assert crawler_para['Name'] == db_name + '_' + OUTPUT_FOLDER_NAME

    assert crawler_started, "Crawler never started"

    # check if all the files are properly uploaded
    source_path = os.path.join(results_dir, 'parquet')
    s3_path = s3_prefix + '/' + OUTPUT_FOLDER_NAME + '/'

    s3_file_path = s3_path + 'baseline/results_up00.parquet'
    source_file_path = os.path.join(source_path, 'baseline',
                                    'results_up00.parquet')
    assert (source_file_path, s3_file_path) in files_uploaded
    files_uploaded.remove((source_file_path, s3_file_path))

    s3_file_path = s3_path + 'upgrades/upgrade=1/results_up01.parquet'
    source_file_path = os.path.join(source_path, 'upgrades', 'upgrade=1',
                                    'results_up01.parquet')
    assert (source_file_path, s3_file_path) in files_uploaded
    files_uploaded.remove((source_file_path, s3_file_path))

    s3_file_path = s3_path + 'timeseries/upgrade=0/group0.parquet'
    source_file_path = os.path.join(source_path, 'timeseries', 'upgrade=0',
                                    'group0.parquet')
    assert (source_file_path, s3_file_path) in files_uploaded
    files_uploaded.remove((source_file_path, s3_file_path))

    s3_file_path = s3_path + 'timeseries/upgrade=1/group0.parquet'
    source_file_path = os.path.join(source_path, 'timeseries', 'upgrade=1',
                                    'group0.parquet')
    assert (source_file_path, s3_file_path) in files_uploaded
    files_uploaded.remove((source_file_path, s3_file_path))

    assert len(
        files_uploaded
    ) == 0, f"These files shouldn't have been uploaded: {files_uploaded}"
예제 #23
0
def test_bad_reference_scenario(project_file):

    with LogCapture(level=logging.INFO) as logs:
        BuildStockBatchBase.validate_reference_scenario(project_file)
        warning_logs = filter_logs(logs, 'WARNING')
        assert "non-existing upgrade' does not match " in warning_logs
예제 #24
0
def test_good_measures_validation(project_file):
    assert BuildStockBatchBase.validate_measure_references(project_file)