def test_single_ri_folder(self, standard, il): with patch('oasislmf.model_execution.bin.INPUT_FILES', ECHO_CONVERSION_INPUT_FILES), TemporaryDirectory( ) as csv_dir, TemporaryDirectory() as bin_dir: files = standard + il for target in files: with io_open(os.path.join(csv_dir, target + '.csv'), 'w', encoding='utf-8') as f: f.write(target) os.mkdir(os.path.join(csv_dir, "RI_1")) for target in files: with io_open(os.path.join(csv_dir, "RI_1", target + '.csv'), 'w', encoding='utf-8') as f: f.write(target) csv_to_bin(csv_dir, bin_dir, il=True, ri=True) self.assertEqual(len(files), len(glob.glob(os.path.join(bin_dir, '*.bin')))) for filename in (f + '.bin' for f in files): self.assertTrue(os.path.exists(os.path.join(bin_dir, filename))) self.assertEqual( len(files), len( glob.glob( os.path.join(bin_dir, 'RI_1{}*.bin'.format(os.sep))))) for filename in (f + '.bin' for f in files): self.assertTrue( os.path.exists(os.path.join(bin_dir, 'RI_1', filename))) print("ok")
def test_with_multiple_reinsurance_subfolders(self, targets): with TemporaryDirectory() as d: os.mkdir(os.path.join(d, 'RI_1')) os.mkdir(os.path.join(d, 'RI_2')) for target in targets: with io_open(os.path.join(d, target), 'w', encoding='utf-8') as f: f.write(target) with io_open(os.path.join(d, 'RI_1', target), 'w', encoding='utf-8') as f: f.write(target) with io_open(os.path.join(d, 'RI_2', target), 'w', encoding='utf-8') as f: f.write(target) create_binary_tar_file(d) all_targets = copy(targets) for t in targets: # tarfile converts os-specific separators to forward slashes all_targets.append("RI_1/{}".format(t)) all_targets.append("RI_2/{}".format(t)) with tarfile.open(os.path.join(d, TAR_FILE), 'r:gz', encoding='utf-8') as tar: self.assertEqual(len(all_targets), len(tar.getnames())) self.assertEqual(set(all_targets), set(tar.getnames()))
def test_periods_bin_doesnt_not_exist_event_set_isnt_specified___bin_is_copied_from_static(self): with TemporaryDirectory() as d: self.make_fake_bins(d) with io_open(os.path.join(d, 'static', 'periods.bin'), 'w', encoding='utf-8') as periods_file: periods_file.write('periods bin') periods_file.flush() prepare_run_inputs({}, d) with io_open(os.path.join(d, 'input', 'periods.bin'), 'r', encoding='utf-8') as new_periods_file: self.assertEqual('periods bin', new_periods_file.read())
def test_occurrence_bin_doesnt_not_exist_event_set_is_specified___event_occurrence_id_specific_bin_is_copied_from_static(self): with TemporaryDirectory() as d: self.make_fake_bins(d) with io_open(os.path.join(d, 'static', 'occurrence_occurrence_id.bin'), 'w', encoding='utf-8') as occurrence_file: occurrence_file.write('occurrence occurrence id bin') occurrence_file.flush() prepare_run_inputs({'model_settings': {'event_occurrence_id': 'occurrence id'}}, d) with io_open(os.path.join(d, 'input', 'occurrence.bin'), 'r', encoding='utf-8') as new_occurrence_file: self.assertEqual('occurrence occurrence id bin', new_occurrence_file.read())
def test_periods_bin_already_exists___existing_bin_is_uncahnged(self): with TemporaryDirectory() as d: self.make_fake_bins(d) with io_open(os.path.join(d, 'input', 'periods.bin'), 'w', encoding='utf-8') as periods_file: periods_file.write('periods bin') periods_file.flush() prepare_run_inputs({}, d) with io_open(os.path.join(d, 'input', 'periods.bin'), 'r', encoding='utf-8') as new_periods_file: self.assertEqual('periods bin', new_periods_file.read())
def unified_fm_profile_by_level(profiles=[], profile_paths=[]): if not (profiles or profile_paths): raise OasisException( 'A list of source profiles (loc. or acc.) or a list of source profile paths must be provided' ) if not profiles: for pp in profile_paths: with io_open(pp, 'r', encoding='utf-8') as f: profiles.append(json.load(f)) comb_prof = { k: v for p in profiles for k, v in ((k, v) for k, v in viewitems(p) if 'FMLevel' in v) } return OrderedDict({ int(k): {v['ProfileElementName']: v for v in g} for k, g in groupby(sorted(viewvalues(comb_prof), key=lambda v: v['FMLevel']), key=lambda v: v['FMLevel']) })
def __init__(self, config=None, config_json=None, config_fp=None, config_dir=None): if config: self._config = config self.config_dir = config_dir or '.' elif config_json: self._config = json.loads(config_json) self.config_dir = config_dir or '.' elif config_fp: self.config_dir = config_dir or os.path.dirname(config_fp) _config_fp = as_path(config_fp, 'config_fp') with io_open(_config_fp, 'r', encoding='utf-8') as f: self._config = json.load(f) keys_data_path = self._config.get('keys_data_path') keys_data_path = os.path.join(self.config_dir, keys_data_path) if keys_data_path else '' self._config['keys_data_path'] = as_path(keys_data_path, 'keys_data_path', preexists=(True if keys_data_path else False)) peril_config = self._config.get('peril') or {} self._peril_ids = tuple(peril_config.get('peril_ids') or ()) self._peril_id_col = peril_config.get('peril_id_col') or 'peril_id' coverage_config = self._config.get('coverage') or {} self._coverage_types = tuple(coverage_config.get('coverage_types') or ()) self._coverage_type_col = peril_config.get('coverage_type_col') or 'coverage_type' self._config.setdefault('exposure', self._config.get('exposure') or self._config.get('locations') or {}) self.__tweak_config_data__()
def get_model_info(cls, model_version_file_path): """ Get model information from the model version file. """ with io_open(model_version_file_path, 'r', encoding='utf-8') as f: return next(csv.DictReader( f, fieldnames=['supplier_id', 'model_id', 'model_version'] ))
def write_json_keys_file(cls, records, output_file_path): """ Writes the keys records as a simple list to file. """ with io_open(output_file_path, 'w', encoding='utf-8') as f: f.write(u'{}'.format(json.dumps(records, sort_keys=True, indent=4, ensure_ascii=False))) return output_file_path, len(records)
def test_directory_only_contains_excluded_files___tar_is_empty(self): with TemporaryDirectory() as csv_dir, TemporaryDirectory() as bin_dir: with io_open(os.path.join(csv_dir, 'another_file'), 'w', encoding='utf-8') as f: f.write('file data') csv_to_bin(csv_dir, bin_dir) self.assertEqual(0, len(glob.glob(os.path.join(csv_dir, '*.bin'))))
def test_analysis_settings_file_is_supplied___file_is_copied_into_run_dir(self): with TemporaryDirectory() as run_dir, TemporaryDirectory() as oasis_src_fp, TemporaryDirectory() as model_data_fp, NamedTemporaryFile('w') as analysis_settings_fp: analysis_settings_fp.write('{"analysis_settings": "analysis_settings"}') analysis_settings_fp.flush() prepare_run_directory(run_dir, oasis_src_fp, model_data_fp, analysis_settings_fp.name) with io_open(os.path.join(run_dir, 'analysis_settings.json'), encoding='utf-8') as expected_analysis_settings: self.assertEqual('{"analysis_settings": "analysis_settings"}', expected_analysis_settings.read())
def test_with_single_reinsurance_subfolder(self, targets): with TemporaryDirectory() as d: os.mkdir(os.path.join(d, 'RI_1')) for target in targets: with io_open(os.path.join(d, target), 'w', encoding='utf-8') as f: f.write(target) with io_open(os.path.join(d, 'RI_1', target), 'w', encoding='utf-8') as f: f.write(target) create_binary_tar_file(d) all_targets = copy(targets) for t in targets: all_targets.append("RI_1{}{}".format(os.sep, t)) with tarfile.open(os.path.join(d, TAR_FILE), 'r:gz', encoding='utf-8') as tar: self.assertEqual(len(all_targets), len(tar.getnames())) self.assertEqual(set(all_targets), set(tar.getnames()))
def test_directory_only_contains_excluded_files___tar_is_empty(self): with TemporaryDirectory() as d: with io_open(os.path.join(d, 'another_file'), 'w', encoding='utf-8') as f: f.write('file data') create_binary_tar_file(d) with tarfile.open(os.path.join(d, TAR_FILE), 'r:gz', encoding='utf-8') as tar: self.assertEqual(0, len(tar.getnames()))
def test_get_json__with_nesting_depth_of_1(self, data): expected = copy.deepcopy(data) with NamedTemporaryFile('w') as f1: f1.write(json.dumps(expected, indent=4, sort_keys=True)) f1.flush() with io_open(f1.name, 'r', encoding='utf-8') as f2: result = json.load(f2) self.assertEqual(result, expected)
def test_directory_contains_some_target_files___target_files_are_included(self, targets): with TemporaryDirectory() as d: for target in targets: with io_open(os.path.join(d, target), 'w', encoding='utf-8') as f: f.write(target) create_binary_tar_file(d) with tarfile.open(os.path.join(d, TAR_FILE), 'r:gz', encoding='utf-8') as tar: self.assertEqual(len(targets), len(tar.getnames())) self.assertEqual(set(targets), set(tar.getnames()))
def test_contains_il_and_standard_files_but_il_is_true___all_files_are_included(self, standard, il): with patch('oasislmf.model_execution.bin.INPUT_FILES', ECHO_CONVERSION_INPUT_FILES), TemporaryDirectory() as csv_dir, TemporaryDirectory() as bin_dir: for target in chain(standard, il): with io_open(os.path.join(csv_dir, target + '.csv'), 'w', encoding='utf-8') as f: f.write(target) csv_to_bin(csv_dir, bin_dir, il=True) self.assertEqual(len(standard) + len(il), len(glob.glob(os.path.join(bin_dir, '*.bin')))) for filename in (f + '.bin' for f in chain(standard, il)): self.assertTrue(os.path.exists(os.path.join(bin_dir, filename)))
def get_json(src_fp, key_transform=None): di = None try: with io_open(src_fp, 'r', encoding='utf-8') as f: di = json.load(f) except (IOError, JSONDecodeError, OSError, TypeError) as e: return return di if not key_transform else { key_transform(k): v for k, v in viewitems(di) }
def get_json(src_fp): """ Loads JSON from file. :param src_fp: Source JSON file path :type src_fp: str :return: dict :rtype: dict """ try: with io_open(src_fp, 'r', encoding='utf-8') as f: return json.load(f) except (IOError, JSONDecodeError, OSError, TypeError) as e: raise OasisException( 'Error trying to load JSON from {}'.format(src_fp))
def generate_oasis_files( self, target_dir, exposure_fp, exposure_profile=None, exposure_profile_fp=None, keys_fp=None, lookup_config=None, lookup_config_fp=None, keys_data_fp=None, model_version_fp=None, lookup_package_fp=None, supported_oed_coverage_types=None, accounts_fp=None, accounts_profile=None, accounts_profile_fp=None, fm_aggregation_profile=None, fm_aggregation_profile_fp=None, ri_info_fp=None, ri_scope_fp=None, oasis_files_prefixes=None ): # Check whether the invocation indicates a deterministic or model # analysis/run - the CLI supports deterministic analyses via a command # `oasislmf exposure run` which requires a preexisting input files # directory, which is usually the same as the analysis/output directory deterministic = not(keys_fp or (lookup_config or lookup_config_fp) or (keys_data_fp and model_version_fp and lookup_package_fp)) # Prepare the target directory and copy the source files, profiles and # model version file into it target_dir = prepare_input_files_directory( target_dir, exposure_fp, exposure_profile_fp=exposure_profile_fp, keys_fp=keys_fp, lookup_config_fp=lookup_config_fp, model_version_fp=model_version_fp, accounts_fp=accounts_fp, accounts_profile_fp=accounts_profile_fp, fm_aggregation_profile_fp=fm_aggregation_profile_fp, ri_info_fp=ri_info_fp, ri_scope_fp=ri_scope_fp ) # Get the profiles defining the exposure and accounts files, ID related # terms in these files, and FM aggregation hierarchy exposure_profile = exposure_profile or (get_json(src_fp=exposure_profile_fp) if exposure_profile_fp else self.exposure_profile) accounts_profile = accounts_profile or (get_json(src_fp=accounts_profile_fp) if accounts_profile_fp else self.accounts_profile) id_terms = unified_id_terms(profiles=(exposure_profile, accounts_profile,)) loc_id = id_terms['locid'] acc_id = id_terms['accid'] portfolio_num = id_terms['portid'] fm_aggregation_profile = ( fm_aggregation_profile or ({int(k): v for k, v in viewitems(get_json(src_fp=fm_aggregation_profile_fp))} if fm_aggregation_profile_fp else {}) or self.fm_aggregation_profile ) # If a pre-generated keys file path has not been provided, # then it is asssumed some model lookup assets have been provided, so # as to allow the lookup to be instantiated and called to generated # the keys file. Otherwise if no model keys file path or lookup assets # were provided then a "deterministic" keys file is generated. _keys_fp = _keys_errors_fp = None if not keys_fp: _keys_fp = os.path.join(target_dir, 'keys.csv') _keys_errors_fp = os.path.join(target_dir, 'keys-errors.csv') cov_types = supported_oed_coverage_types or self.supported_oed_coverage_types if deterministic: loc_numbers = (loc_num[loc_id] for _, loc_num in get_dataframe( src_fp=exposure_fp, col_dtypes={loc_id: 'str', acc_id: 'str', portfolio_num: 'str'}, empty_data_error_msg='No exposure found in the source exposure (loc.) file' )[[loc_id]].iterrows()) keys = [ {loc_id: loc_num, 'peril_id': 1, 'coverage_type': cov_type, 'area_peril_id': i + 1, 'vulnerability_id': i + 1} for i, (loc_num, cov_type) in enumerate(product(loc_numbers, cov_types)) ] _, _ = olf.write_oasis_keys_file(keys, _keys_fp) else: lookup_config = get_json(src_fp=lookup_config_fp) if lookup_config_fp else lookup_config if lookup_config: lookup_config['keys_data_path'] = os.path.abspath(os.path.dirname(lookup_config_fp)) _, lookup = olf.create( lookup_config=lookup_config, model_keys_data_path=keys_data_fp, model_version_file_path=model_version_fp, lookup_package_path=lookup_package_fp ) f1, n1, f2, n2 = olf.save_results( lookup, loc_id_col=loc_id, successes_fp=_keys_fp, errors_fp=_keys_errors_fp, source_exposure_fp=exposure_fp ) else: _keys_fp = os.path.join(target_dir, os.path.basename(keys_fp)) # Get the GUL input items and exposure dataframes gul_inputs_df, exposure_df = get_gul_input_items( exposure_fp, _keys_fp, exposure_profile=exposure_profile ) # Write the GUL input files files_prefixes = oasis_files_prefixes or self.oasis_files_prefixes gul_input_files = write_gul_input_files( gul_inputs_df, target_dir, oasis_files_prefixes=files_prefixes['gul'] ) # If no source accounts file path has been provided assume that IL # input files, and therefore also RI input files, are not needed if not accounts_fp: return gul_input_files # Get the IL input items il_inputs_df, _ = get_il_input_items( exposure_df, gul_inputs_df, accounts_fp=accounts_fp, exposure_profile=exposure_profile, accounts_profile=accounts_profile, fm_aggregation_profile=fm_aggregation_profile ) # Write the IL/FM input files il_input_files = write_il_input_files( il_inputs_df, target_dir, oasis_files_prefixes=files_prefixes['il'] ) # Combine the GUL and IL input file paths into a single dict (for convenience) oasis_files = {k: v for k, v in chain(gul_input_files.items(), il_input_files.items())} # If no RI input file paths (info. and scope) have been provided then # no RI input files are needed, just return the GUL and IL Oasis files if not (ri_info_fp or ri_scope_fp): return oasis_files # Write the RI input files, and write the returned RI layer info. as a # file, which can be reused by the model runner (in the model execution # stage) to set the number of RI iterations ri_layers = write_ri_input_files( exposure_fp, accounts_fp, oasis_files['items'], oasis_files['coverages'], oasis_files['gulsummaryxref'], oasis_files['fm_xref'], oasis_files['fmsummaryxref'], ri_info_fp, ri_scope_fp, target_dir ) with io_open(os.path.join(target_dir, 'ri_layers.json'), 'w', encoding='utf-8') as f: f.write(_unicode(json.dumps(ri_layers, ensure_ascii=False, indent=4))) oasis_files['ri_layers'] = os.path.abspath(f.name) for layer, layer_info in viewitems(ri_layers): oasis_files['RI_{}'.format(layer)] = layer_info['directory'] return oasis_files
def generate_deterministic_losses( self, input_dir, output_dir=None, loss_percentage_of_tiv=1.0, net=False ): losses = OrderedDict({ 'gul': None, 'il': None, 'ri': None }) output_dir = output_dir or input_dir il = all(p in os.listdir(input_dir) for p in ['fm_policytc.csv', 'fm_profile.csv', 'fm_programme.csv', 'fm_xref.csv']) ri = any(re.match(r'RI_\d+$', fn) for fn in os.listdir(input_dir)) csv_to_bin(input_dir, output_dir, il=il, ri=ri) # Generate an items and coverages dataframe and set column types (important!!) items_df = pd.merge( pd.read_csv(os.path.join(input_dir, 'items.csv')), pd.read_csv(os.path.join(input_dir, 'coverages.csv')) ) for col in items_df: if col != 'tiv': items_df[col] = items_df[col].astype(int) else: items_df[col] = items_df[col].astype(float) guls_items = [] for item_id, tiv in zip(items_df['item_id'], items_df['tiv']): event_loss = loss_percentage_of_tiv * tiv guls_items += [ oed.GulRecord(event_id=1, item_id=item_id, sidx=-1, loss=event_loss), oed.GulRecord(event_id=1, item_id=item_id, sidx=-2, loss=0), oed.GulRecord(event_id=1, item_id=item_id, sidx=1, loss=event_loss) ] guls = pd.DataFrame(guls_items) guls_fp = os.path.join(output_dir, "guls.csv") guls.to_csv(guls_fp, index=False) net_flag = "-n" if net else "" ils_fp = os.path.join(output_dir, 'ils.csv') cmd = 'gultobin -S 1 < {} | fmcalc -p {} {} -a {} | tee ils.bin | fmtocsv > {}'.format( guls_fp, output_dir, net_flag, oed.ALLOCATE_TO_ITEMS_BY_PREVIOUS_LEVEL_ALLOC_ID, ils_fp ) print("\nGenerating deterministic ground-up and direct insured losses with command: {}\n".format(cmd)) try: check_call(cmd, shell=True) except CalledProcessError as e: raise OasisException(e) guls.drop(guls[guls['sidx'] != 1].index, inplace=True) guls.reset_index(drop=True, inplace=True) guls.drop('sidx', axis=1, inplace=True) guls = guls[(guls[['loss']] != 0).any(axis=1)] guls['item_id'] = range(1, len(guls) + 1) losses['gul'] = guls ils = pd.read_csv(ils_fp) ils.drop(ils[ils['sidx'] != 1].index, inplace=True) ils.reset_index(drop=True, inplace=True) ils.drop('sidx', axis=1, inplace=True) ils = ils[(ils[['loss']] != 0).any(axis=1)] ils['output_id'] = range(1, len(ils) + 1) losses['il'] = ils if ri: try: [fn for fn in os.listdir(input_dir) if fn == 'ri_layers.json'][0] except IndexError: raise OasisException( 'No RI layers JSON file "ri_layers.json " found in the ' 'input directory despite presence of RI input files' ) else: try: with io_open(os.path.join(input_dir, 'ri_layers.json'), 'r', encoding='utf-8') as f: ri_layers = len(json.load(f)) except (IOError, JSONDecodeError, OSError, TypeError) as e: raise OasisException('Error trying to read the RI layers file: {}'.format(e)) else: def run_ri_layer(layer): layer_inputs_fp = os.path.join(input_dir, 'RI_{}'.format(layer)) _input = 'gultobin -S 1 < {} | fmcalc -p {} -a {} | tee ils.bin |'.format(guls_fp, input_dir, oed.ALLOCATE_TO_ITEMS_BY_PREVIOUS_LEVEL_ALLOC_ID) if layer == 1 else '' pipe_in_previous_layer = '< ri{}.bin'.format(layer - 1) if layer > 1 else '' ri_layer_fp = os.path.join(output_dir, 'ri{}.csv'.format(layer)) cmd = '{} fmcalc -p {} -n -a {} {}| tee ri{}.bin | fmtocsv > {}'.format( _input, layer_inputs_fp, oed.ALLOCATE_TO_ITEMS_BY_PREVIOUS_LEVEL_ALLOC_ID, pipe_in_previous_layer, layer, ri_layer_fp ) print("\nGenerating deterministic RI layer {} losses with command: {}\n".format(layer, cmd)) try: check_call(cmd, shell=True) except CalledProcessError as e: raise OasisException(e) rils = pd.read_csv(ri_layer_fp) rils.drop(rils[rils['sidx'] != 1].index, inplace=True) rils.drop('sidx', axis=1, inplace=True) rils.reset_index(drop=True, inplace=True) rils = rils[(rils[['loss']] != 0).any(axis=1)] return rils for i in range(1, ri_layers + 1): rils = run_ri_layer(i) if i in [1, ri_layers]: rils['output_id'] = range(1, len(rils) + 1) losses['ri'] = rils return losses
def generate_model_losses( self, model_run_fp, oasis_fp, analysis_settings_fp, model_data_fp, model_package_fp=None, ktools_num_processes=None, ktools_mem_limit=None, ktools_fifo_relative=None, ktools_alloc_rule=None ): il = all(p in os.listdir(oasis_fp) for p in ['fm_policytc.csv', 'fm_profile.csv', 'fm_programme.csv', 'fm_xref.csv']) ri = False if os.path.basename(oasis_fp) == 'csv': ri = any(re.match(r'RI_\d+$', fn) for fn in os.listdir(os.path.dirname(oasis_fp))) else: ri = any(re.match(r'RI_\d+$', fn) for fn in os.listdir(oasis_fp)) if not os.path.exists(model_run_fp): Path(model_run_fp).mkdir(parents=True, exist_ok=True) prepare_run_directory( model_run_fp, oasis_fp, model_data_fp, analysis_settings_fp, ri=ri ) if not ri: csv_to_bin(oasis_fp, os.path.join(model_run_fp, 'input'), il=il) else: contents = os.listdir(model_run_fp) for fp in [os.path.join(model_run_fp, fn) for fn in contents if re.match(r'RI_\d+$', fn) or re.match(r'input$', fn)]: csv_to_bin(fp, fp, il=True, ri=True) analysis_settings_fn = 'analysis_settings.json' _analysis_settings_fp = os.path.join(model_run_fp, analysis_settings_fn) try: with io_open(_analysis_settings_fp, 'r', encoding='utf-8') as f: analysis_settings = json.load(f) if analysis_settings.get('analysis_settings'): analysis_settings = analysis_settings['analysis_settings'] if il: analysis_settings['il_output'] = True else: analysis_settings['il_output'] = False analysis_settings['il_summaries'] = [] if ri: analysis_settings['ri_output'] = True else: analysis_settings['ri_output'] = False analysis_settings['ri_summaries'] = [] except (IOError, TypeError, ValueError): raise OasisException('Invalid analysis settings file or file path: {}.'.format(_analysis_settings_fp)) prepare_run_inputs(analysis_settings, model_run_fp, ri=ri) script_fp = os.path.join(model_run_fp, 'run_ktools.sh') if model_package_fp and os.path.exists(os.path.join(model_package_fp, 'supplier_model_runner.py')): path, package_name = os.path.split(model_package_fp) sys.path.append(path) model_runner_module = importlib.import_module('{}.supplier_model_runner'.format(package_name)) else: model_runner_module = runner print(runner) with setcwd(model_run_fp) as cwd_path: ri_layers = 0 if ri: try: with io_open(os.path.join(model_run_fp, 'ri_layers.json'), 'r', encoding='utf-8') as f: ri_layers = len(json.load(f)) except IOError: with io_open(os.path.join(model_run_fp, 'input', 'ri_layers.json'), 'r', encoding='utf-8') as f: ri_layers = len(json.load(f)) model_runner_module.run( analysis_settings, number_of_processes=(ktools_num_processes or self.ktools_num_processes), filename=script_fp, num_reinsurance_iterations=ri_layers, ktools_mem_limit=(ktools_mem_limit or self.ktools_mem_limit), set_alloc_rule=(ktools_alloc_rule or self.ktools_alloc_rule), fifo_tmp_dir=(not (ktools_fifo_relative or self.ktools_fifo_relative)) ) return model_run_fp