def validate_reference_scenario(project_file): """ Checks if the reference_scenario mentioned in an upgrade points to a valid upgrade """ cfg = get_project_configuration(project_file) # collect all upgrade_names upgrade_names = set() for upgrade_count, upgrade in enumerate(cfg.get('upgrades', [])): upgrade_names.add(upgrade.get('upgrade_name', '')) warning_string = "" # check if the reference_scenario matches with any upgrade_names for upgrade_count, upgrade in enumerate(cfg.get('upgrades', [])): if 'reference_scenario' in upgrade: if upgrade['reference_scenario'] not in upgrade_names: warning_string += f"* In Upgrade '{upgrade.get('upgrade_name', '')}', reference_scenario: " \ f"'{upgrade['reference_scenario']}' does not match any existing upgrade names \n" elif upgrade['reference_scenario'] == upgrade.get( 'upgrade_name', ''): warning_string += f"* In Upgrade '{upgrade.get('upgrade_name', '')}', reference_scenario: " \ f"'{upgrade['reference_scenario']}' points to the same upgrade \n" if warning_string: logger.warning(warning_string) return True # Only print the warning, but always pass the validation
def validate_measure_references(project_file): """ Validates that the measures specified in the project yaml file are referenced in the options_lookup.tsv """ cfg = get_project_configuration(project_file) measure_dirs = set() buildstock_dir = BuildStockBatchBase.get_buildstock_dir( project_file, cfg) options_lookup_path = f'{buildstock_dir}/resources/options_lookup.tsv' # fill in the param_option_dict with {'param1':['valid_option1','valid_option2' ...]} from options_lookup.tsv try: with open(options_lookup_path, 'r') as f: options = csv.DictReader(f, delimiter='\t') for row in options: if row['Measure Dir']: measure_dirs.add(row['Measure Dir']) except FileNotFoundError as err: logger.error( f"Options lookup file not found at: '{options_lookup_path}'") raise err def get_errors(source_str, measure_str): """ Gives multiline descriptive error message if the measure_str is invalid. Returns '' otherwise :param source_str: the descriptive location where the measure_str occurs in the yaml configuration. :param measure_str: the string containing a reference to a measure directory :return: returns empty string if the measure_str is a valid measure directory name as referenced in the options_lookup.tsv. if not returns error message, close matches, and specifies where the error occurred (source_str). """ if measure_str not in measure_dirs: closest = difflib.get_close_matches(measure_str, list(measure_dirs)) return f"Measure directory {measure_str} not found. Closest matches: {closest}" \ f" {source_str}\n" return '' source_measures_str_list = [] if 'measures_to_ignore' in cfg['baseline']: source_str = "In baseline 'measures_to_ignore'" for measure_str in cfg['baseline']['measures_to_ignore']: source_measures_str_list.append((source_str, measure_str)) error_message = '' for source_str, measure_str in source_measures_str_list: error_message += get_errors(source_str, measure_str) if not error_message: return True else: error_message = 'Measure name(s)/directory(ies) is(are) invalid. \n' + error_message logger.error(error_message) raise ValueError(error_message)
def validate_sampler(project_file): cfg = get_project_configuration(project_file) sampler_name = cfg['sampler']['type'] try: Sampler = BuildStockBatchBase.get_sampler_class(sampler_name) except AttributeError: raise ValidationError( f'Sampler class `{sampler_name}` is not available.') args = cfg['sampler']['args'] return Sampler.validate_args(project_file, **args)
def validate_xor_nor_schema_keys(project_file): cfg = get_project_configuration(project_file) major, minor = cfg.get('version', __schema_version__).split('.') if int(major) >= 0: if int(minor) >= 0: # xor if ('weather_files_url' in cfg.keys()) is \ ('weather_files_path' in cfg.keys()): raise ValidationError( 'Both/neither weather_files_url and weather_files_path found in yaml root' ) return True
def validate_project_schema(project_file): cfg = get_project_configuration(project_file) schema_version = cfg.get('schema_version') version_schema = os.path.join(os.path.dirname(__file__), 'schemas', f'v{schema_version}.yaml') if not os.path.isfile(version_schema): logger.error( f'Could not find validation schema for YAML version {schema_version}' ) raise FileNotFoundError(version_schema) schema = yamale.make_schema(version_schema) data = yamale.make_data(project_file, parser='ruamel') return yamale.validate(schema, data, strict=True)
def test_empty_results_assertion(basic_residential_project_file, capsys): project_filename, results_dir = basic_residential_project_file({}) fs = LocalFileSystem() results_dir = pathlib.Path(results_dir) sim_out_dir = results_dir / 'simulation_output' shutil.rmtree(sim_out_dir) # no results cfg = get_project_configuration(project_filename) with pytest.raises(ValueError, match=r'No simulation results found to post-process'): assert postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False)
def test_report_additional_results_csv_columns(basic_residential_project_file): reporting_measures = ['ReportingMeasure1', 'ReportingMeasure2'] project_filename, results_dir = basic_residential_project_file( {'reporting_measures': reporting_measures}) fs = LocalFileSystem() results_dir = pathlib.Path(results_dir) sim_out_dir = results_dir / 'simulation_output' with tarfile.open(sim_out_dir / 'simulations_job0.tar.gz', 'r') as tarf: tarf.extractall(sim_out_dir) dpouts2 = [] for filename in sim_out_dir.rglob('data_point_out.json'): with filename.open('rt', encoding='utf-8') as f: dpout = json.load(f) dpout['ReportingMeasure1'] = {'column_1': 1, 'column_2': 2} dpout['ReportingMeasure2'] = {'column_3': 3, 'column_4': 4} with filename.open('wt', encoding='utf-8') as f: json.dump(dpout, f) sim_dir = str(filename.parent.parent) upgrade_id = int(re.search(r'up(\d+)', sim_dir).group(1)) building_id = int(re.search(r'bldg(\d+)', sim_dir).group(1)) dpouts2.append( postprocessing.read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, building_id)) with gzip.open(sim_out_dir / 'results_job0.json.gz', 'wt', encoding='utf-8') as f: json.dump(dpouts2, f) cfg = get_project_configuration(project_filename) postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False) for upgrade_id in (0, 1): df = pd.read_csv( str(results_dir / 'results_csvs' / f'results_up{upgrade_id:02d}.csv.gz')) assert (df['reporting_measure1.column_1'] == 1).all() assert (df['reporting_measure1.column_2'] == 2).all() assert (df['reporting_measure2.column_3'] == 3).all() assert (df['reporting_measure2.column_4'] == 4).all()
def __init__(self, project_filename): self.project_filename = os.path.abspath(project_filename) # Load project file to self.cfg self.cfg = get_project_configuration(project_filename) self.buildstock_dir = self.cfg['buildstock_directory'] if not os.path.isdir(self.buildstock_dir): raise FileNotFoundError( f'buildstock_directory = {self.buildstock_dir} is not a directory.' ) self.project_dir = os.path.join(self.buildstock_dir, self.cfg['project_directory']) if not os.path.isdir(self.project_dir): raise FileNotFoundError( f'project_directory = {self.project_dir} is not a directory.') # Load in OS_VERSION and OS_SHA arguments if they exist in the YAML, # otherwise use defaults specified here. self.os_version = self.cfg.get('os_version', self.DEFAULT_OS_VERSION) self.os_sha = self.cfg.get('os_sha', self.DEFAULT_OS_SHA) logger.debug( f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}" )
def test_hpc_run_building(mock_subprocess, monkeypatch, basic_residential_project_file): tar_filename = pathlib.Path(__file__).resolve( ).parent / 'test_results' / 'simulation_output' / 'simulations_job0.tar.gz' # noqa E501 with tarfile.open(tar_filename, 'r') as tarf: osw_dict = json.loads( tarf.extractfile('up00/bldg0000001/in.osw').read().decode('utf-8')) project_filename, results_dir = basic_residential_project_file() tmp_path = pathlib.Path(results_dir).parent sim_path = tmp_path / 'output' / 'simulation_output' / 'up00' / 'bldg0000001' os.makedirs(sim_path) cfg = get_project_configuration(project_filename) with patch.object(EagleBatch, 'weather_dir', None), \ patch.object(EagleBatch, 'singularity_image', '/path/to/singularity.simg'), \ patch.object(EagleBatch, 'create_osw', return_value=osw_dict), \ patch.object(EagleBatch, 'make_sim_dir', return_value=('bldg0000001up00', sim_path)): # Normal run run_bldg_args = [results_dir, cfg, 1, None] EagleBatch.run_building(*run_bldg_args) expected_singularity_args = [ 'singularity', 'exec', '--contain', '-e', '--pwd', '/var/simdata/openstudio', '-B', f'{sim_path}:/var/simdata/openstudio', '-B', '/tmp/scratch/buildstock/resources:/lib/resources', '-B', '/tmp/scratch/housing_characteristics:/lib/housing_characteristics', '-B', '/tmp/scratch/buildstock/measures:/measures:ro', '-B', '/tmp/scratch/weather:/weather:ro', '/tmp/scratch/openstudio.simg', 'bash', '-x' ] mock_subprocess.run.assert_called_once() assert ( mock_subprocess.run.call_args[0][0] == expected_singularity_args) called_kw = mock_subprocess.run.call_args[1] assert (called_kw.get('check') is True) assert ('input' in called_kw) assert ('stdout' in called_kw) assert ('stderr' in called_kw) assert (str(called_kw.get('cwd')) == '/tmp/scratch/output') assert ( called_kw['input'].decode('utf-8').find(' --measures_only') == -1) # Measures only run mock_subprocess.reset_mock() shutil.rmtree(sim_path) os.makedirs(sim_path) monkeypatch.setenv('MEASURESONLY', '1') EagleBatch.run_building(*run_bldg_args) mock_subprocess.run.assert_called_once() assert ( mock_subprocess.run.call_args[0][0] == expected_singularity_args) called_kw = mock_subprocess.run.call_args[1] assert (called_kw.get('check') is True) assert ('input' in called_kw) assert ('stdout' in called_kw) assert ('stderr' in called_kw) assert (str(called_kw.get('cwd')) == '/tmp/scratch/output') assert (called_kw['input'].decode('utf-8').find(' --measures_only') > -1)
def validate_options_lookup(project_file): """ Validates that the parameter|options specified in the project yaml file is available in the options_lookup.tsv """ cfg = get_project_configuration(project_file) param_option_dict = defaultdict(set) buildstock_dir = BuildStockBatchBase.get_buildstock_dir( project_file, cfg) options_lookup_path = f'{buildstock_dir}/resources/options_lookup.tsv' # fill in the param_option_dict with {'param1':['valid_option1','valid_option2' ...]} from options_lookup.tsv try: with open(options_lookup_path, 'r') as f: options = csv.DictReader(f, delimiter='\t') invalid_options_lookup_str = '' # Holds option/parameter names with invalid characters for row in options: for col in ['Parameter Name', 'Option Name']: invalid_chars = set(row[col]).intersection(set('|&()')) invalid_chars = ''.join(invalid_chars) if invalid_chars: invalid_options_lookup_str += f"{col}: '{row[col]}', Invalid chars: '{invalid_chars}' \n" param_option_dict[row['Parameter Name']].add( row['Option Name']) except FileNotFoundError as err: logger.error( f"Options lookup file not found at: '{options_lookup_path}'") raise err invalid_option_spec_counter = Counter() invalid_param_counter = Counter() invalid_option_counter_dict = defaultdict(Counter) def get_errors(source_str, option_str): """ Gives multiline descriptive error message if the option_str is invalid. Returns '' otherwise :param source_str: the descriptive location where the option_str occurs in the yaml configuration. :param option_str: the param|option string representing the option choice. Can be joined by either || or && to form composite string. eg. param1|option1||param2|option2 :return: returns empty string if the param|option is valid i.e. they are found in options_lookup.tsv if not returns error message, close matches, and specifies where the error occurred (source_str) """ if '||' in option_str and '&&' in option_str: invalid_option_spec_counter[( option_str, "has both || and && (not supported)")] += 1 return "" if '||' in option_str or '&&' in option_str: splitter = '||' if '||' in option_str else '&&' errors = '' broken_options = option_str.split(splitter) if broken_options[-1] == '': invalid_option_spec_counter[( option_str, "has trailing 'splitter'")] += 1 return "" for broken_option_str in broken_options: new_source_str = source_str + f" in composite option '{option_str}'" errors += get_errors(new_source_str, broken_option_str) return errors if not option_str or '|' == option_str: return f"* Option name empty. {source_str}\n" try: parameter_name, option_name = option_str.split('|') except ValueError: invalid_option_spec_counter[( option_str, "has has too many or too few '|' (exactly 1 required)." )] += 1 return "" if parameter_name not in param_option_dict: close_match = difflib.get_close_matches( parameter_name, param_option_dict.keys(), 1) close_match = close_match[0] if close_match else "" invalid_param_counter[(parameter_name, close_match)] += 1 return "" if not option_name or option_name not in param_option_dict[ parameter_name]: close_match = difflib.get_close_matches( option_name, list(param_option_dict[parameter_name]), 1) close_match = close_match[0] if close_match else "" invalid_option_counter_dict[parameter_name][(option_name, close_match)] += 1 return "" return '' def get_all_option_str(source_str, inp): """ Returns a list of (source_str, option_str) tuple by recursively traversing the logic inp structure. Check the get_errors function for more info about source_str and option_str :param source_str: the descriptive location where the inp logic is found :param inp: A nested apply_logic structure :return: List of tuples of (source_str, option_str) where source_str is the location in inp where the option_str is found. """ if not inp: return [] if type(inp) == str: return [(source_str, inp)] elif type(inp) == list: return sum([ get_all_option_str(source_str + f", in entry {count}", entry) for count, entry in enumerate(inp) ], []) elif type(inp) == dict: if len(inp) > 1: raise ValueError(f"{source_str} the logic is malformed.") source_str += f", in {list(inp.keys())[0]}" return sum( [get_all_option_str(source_str, i) for i in inp.values()], []) # store all of the option_str in the project file as a list of (source_str, option_str) tuple source_option_str_list = [] if 'upgrades' in cfg: for upgrade_count, upgrade in enumerate(cfg['upgrades']): upgrade_name = upgrade.get( 'upgrade_name', '') + f' (Upgrade Number: {upgrade_count})' source_str_upgrade = f"In upgrade '{upgrade_name}'" for option_count, option in enumerate(upgrade['options']): option_name = option.get( 'option', '') + f' (Option Number: {option_count})' source_str_option = source_str_upgrade + f", in option '{option_name}'" source_option_str_list.append( (source_str_option, option.get('option'))) if 'apply_logic' in option: source_str_logic = source_str_option + ", in apply_logic" source_option_str_list += get_all_option_str( source_str_logic, option['apply_logic']) if 'package_apply_logic' in upgrade: source_str_package = source_str_upgrade + ", in package_apply_logic" source_option_str_list += get_all_option_str( source_str_package, upgrade['package_apply_logic']) # FIXME: Get this working in new downselect sampler validation. # if 'downselect' in cfg: # source_str = "In downselect" # source_option_str_list += get_all_option_str(source_str, cfg['downselect']['logic']) # Gather all the errors in the option_str, if any error_message = '' for source_str, option_str in source_option_str_list: error_message += get_errors(source_str, option_str) if error_message: error_message = "Following option/parameter entries have problem:\n" + error_message + "\n" if invalid_option_spec_counter: error_message += "* Following option/parameter entries have problem:\n" for (invalid_entry, error), count in invalid_option_spec_counter.items(): error_message += f" '{invalid_entry}' {error} - used '{count}' times\n" if invalid_param_counter: error_message += "* Following parameters do not exist in options_lookup.tsv\n" for (param, close_match), count in invalid_param_counter.items(): error_message += f" '{param}' - used '{count}' times." if close_match: error_message += f" Maybe you meant to use '{close_match}'.\n" else: error_message += "\n" if invalid_option_counter_dict: "* Following options do not exist in options_lookup.tsv\n" for param, options_counter in invalid_option_counter_dict.items(): for (option, close_match), count in options_counter.items(): error_message += f"For param '{param}', invalid option '{option}' - used {count} times." if close_match: error_message += f" Maybe you meant to use '{close_match}'.\n" else: error_message += "\n" if invalid_options_lookup_str: error_message = "Following option/parameter names(s) have invalid characters in the options_lookup.tsv\n" +\ invalid_options_lookup_str + "*"*80 + "\n" + error_message if not error_message: return True else: logger.error(error_message) raise ValueError(error_message)
def validate_misc_constraints(project_file): # validate other miscellaneous constraints cfg = get_project_configuration(project_file) # noqa F841 return True
def validate_workflow_generator(cls, project_file): cfg = get_project_configuration(project_file) WorkflowGenerator = cls.get_workflow_generator_class( cfg['workflow_generator']['type']) return WorkflowGenerator.validate(cfg)