Python get_dataframeの例、oasislmf.utils.data.get_dataframe Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_cols_and_missing_some_required_cols__set_required_cols_option_and_use_defaults_for_all_other_options__oasis_exception_is_raised(self, data, missing_cols):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.drop(missing_cols, axis=1).to_csv(path_or_buf=fp.name, encoding='utf-8', index=False)

            with self.assertRaises(OasisException):
                get_dataframe(
                    src_fp=fp.name,
                    required_cols=df.columns.tolist()
                )

コード例 #2

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_empty_csv_file__set_empty_data_err_msg_and_defaults_for_all_other_options__oasis_exception_is_raised_with_empty_data_err_msg(self, empty_data_err_msg):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame()
            df.to_csv(path_or_buf=fp.name)

            with self.assertRaises(OasisException):
                try:
                    get_dataframe(src_fp=fp.name, empty_data_error_msg=empty_data_err_msg)
                except OasisException as e:
                    self.assertEqual(str(e), empty_data_err_msg)
                    raise e

コード例 #3

0

ファイルを表示

ファイル: oed.py プロジェクト: cihanb/OasisLMF

def load_oed_dfs(oed_dir, show_all=False):
    """
    Load OED data files.
    """

    do_reinsurance = True
    if oed_dir is not None:
        if not os.path.exists(oed_dir):
            print("Path does not exist: {}".format(oed_dir))
            exit(1)

        # RI files
        oed_ri_info_file = os.path.join(oed_dir, "ri_info.csv")
        oed_ri_scope_file = os.path.join(oed_dir, "ri_scope.csv")
        oed_ri_info_file_exists = os.path.exists(oed_ri_info_file)
        oed_ri_scope_file_exists = os.path.exists(oed_ri_scope_file)

        if not oed_ri_info_file_exists and not oed_ri_scope_file_exists:
            ri_info_df = None
            ri_scope_df = None
            do_reinsurance = False
        elif oed_ri_info_file_exists and oed_ri_scope_file_exists:
            ri_info_df = get_dataframe(oed_ri_info_file,
                                       lowercase_cols=False,
                                       required_cols=RI_INFO_REQUIRED_COLS,
                                       default_values=RI_INFO_DEFAULTS)
            ri_scope_df = get_dataframe(oed_ri_scope_file,
                                        lowercase_cols=False,
                                        required_cols=RI_SCOPE_REQUIRED_COLS,
                                        default_values=RI_SCOPE_DEFAULTS)
        else:
            print("Both reinsurance files must exist: {} {}".format(
                oed_ri_info_file, oed_ri_scope_file))

        if do_reinsurance:
            ri_info_df = ri_info_df[OED_REINS_INFO_FIELDS].copy()
            ri_scope_df = ri_scope_df[OED_REINS_SCOPE_FIELDS].copy()

        # Ensure Percent feilds are float
        info_float_cols = ['CededPercent', 'PlacedPercent', 'TreatyShare']
        scope_float_cols = ['CededPercent']
        ri_info_df[info_float_cols] = ri_info_df[info_float_cols].astype(float)
        ri_scope_df[scope_float_cols] = ri_scope_df[scope_float_cols].astype(
            float)

    return (ri_info_df, ri_scope_df, do_reinsurance)

コード例 #4

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__use_default_options(self, data):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.copy(deep=True)

            result = get_dataframe(src_fp=fp.name)

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #5

0

ファイルを表示

 def test_missing_required_cols_in_csv_throws_exception(self):
     with self.assertRaises(OasisException):
         with NamedTemporaryFile('w') as f:
             f.writelines([
                 'a,b\n1,2\n3,4',
             ])
             f.flush()
             df = get_dataframe(f.name,
                                index_col=False,
                                required_cols=['a', 'b', 'c'])

コード例 #6

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_columns___set_lowercase_cols_option_to_false_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.copy(deep=True)

            result = get_dataframe(src_fp=fp.name, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #7

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_columns__set_lowercase_col_option_to_false_and_col_dtypes_option_and_use_defaults_for_all_other_options(self, data, dtypes):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            for col, dtype in viewitems(dtypes):
                df[col] = df[col].astype(dtype)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = pd.read_csv(fp.name, dtype=dtypes)

            result = get_dataframe(src_fp=fp.name, col_dtypes=dtypes, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #8

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__set_sort_cols_option_on_single_col_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            data = [{k: (v if k != 'int_col' else np.random.choice(range(10))) for k, v in viewitems(it)} for it in data]
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            sort_cols = ['int_col']
            expected = df.sort_values(sort_cols, axis=0)

            result = get_dataframe(src_fp=fp.name, sort_cols=sort_cols)

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #9

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__set_col_defaults_option_and_use_defaults_for_all_other_options(self, data, defaults):
        with NamedTemporaryFile('w') as fp: 
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.copy(deep=True)
            for col, default in viewitems(defaults):
                expected.loc[:, col].fillna(defaults[col], inplace=True)

            result = get_dataframe(src_fp=fp.name, col_defaults=defaults)

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #10

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_cols_and_nulls_in_some_columns__set_lowercase_cols_option_to_false_and_non_na_cols_option_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            data[-1]['int_col'] = np.nan
            data[-2]['STR_COL'] = np.nan
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            non_na_cols = ['int_col', 'STR_COL']
            expected = df.dropna(subset=non_na_cols, axis=0)

            result = get_dataframe(src_fp=fp.name, non_na_cols=non_na_cols, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #11

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__set_subset_cols_option_and_use_defaults_for_all_other_options(self, data, subset_cols):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.drop([col for col in df.columns if col not in subset_cols], axis=1)

            result = get_dataframe(
                src_fp=fp.name,
                subset_cols=subset_cols
            )

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #12

0

ファイルを表示

    def test_basic_read_csv(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name, index_col=False)

            ref_data = {'a': [1, 3], 'b': [2, 4]}

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

コード例 #13

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_cols__set_lowercase_cols_option_to_false_and_sort_cols_option_on_two_cols_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            data = [
                {k: (v if k not in ('IntCol', 'STR_COL') else (np.random.choice(range(10)) if k == 'IntCol' else np.random.choice(list(string.ascii_lowercase)))) for k, v in viewitems(it)}
                for it in data
            ]
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            sort_cols = ['IntCol', 'STR_COL']
            expected = df.sort_values(sort_cols, axis=0)

            result = get_dataframe(src_fp=fp.name, sort_cols=sort_cols, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

コード例 #14

0

ファイルを表示

    def test_all_add_default_str_in_csv(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name,
                               index_col=False,
                               defaulted_cols={'c': 'abc'})

            ref_data = {'a': [1, 3], 'b': [2, 4], 'c': ['abc', 'abc']}

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

コード例 #15

0

ファイルを表示

    def test_all_required_cols_present_in_csv_case_insensitive(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name,
                               index_col=False,
                               lowercase_cols=True,
                               required_cols=['A', 'B'])

            ref_data = {'a': [1, 3], 'b': [2, 4]}

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

コード例 #16

0

ファイルを表示

    def test_all_default_cols_present_in_csv(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name,
                               index_col=False,
                               default_values={
                                   'a': 1,
                                   'b': 2
                               })

            ref_data = {
                'a': [1, 3],
                'b': [2, 4],
            }

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

コード例 #17

0

ファイルを表示

class TestReinsurance(unittest.TestCase):
    def _run_fm(self,
                input_name,
                output_name,
                xref_descriptions,
                allocation=oed.ALLOCATE_TO_ITEMS_BY_PREVIOUS_LEVEL_ALLOC_ID):

        command = "fmcalc -p {0} -n -a {2} < {1}.bin | tee {0}.bin | fmtocsv > {0}.csv".format(
            output_name, input_name, allocation)
        print(command)
        proc = subprocess.Popen(command, shell=True)
        proc.wait()
        if proc.returncode != 0:
            raise Exception("Failed to run fm")
        losses_df = pd.read_csv("{}.csv".format(output_name))
        inputs_df = pd.read_csv("{}.csv".format(input_name))

        losses_df.drop(losses_df[losses_df.sidx != 1].index, inplace=True)
        inputs_df.drop(inputs_df[inputs_df.sidx != 1].index, inplace=True)
        losses_df = pd.merge(inputs_df,
                             losses_df,
                             left_on='output_id',
                             right_on='output_id',
                             suffixes=('_pre', '_net'))

        losses_df = pd.merge(xref_descriptions,
                             losses_df,
                             left_on='xref_id',
                             right_on='output_id')

        del losses_df['event_id_pre']
        del losses_df['sidx_pre']
        del losses_df['event_id_net']
        del losses_df['sidx_net']
        del losses_df['output_id']
        del losses_df['xref_id']
        return losses_df

    def _run_test(self, account_df, location_df, ri_info_df, ri_scope_df,
                  loss_factor, do_reinsurance):
        """
        Run the direct and reinsurance layers through the Oasis FM.
        Returns an array of net loss data frames, the first for the direct layers
        and then one per inuring layer.
        """
        t_start = time.time()

        net_losses = OrderedDict()

        initial_dir = os.getcwd()
        try:

            with TemporaryDirectory() as run_dir:

                os.chdir(run_dir)

                direct_layer = DirectLayer(account_df, location_df)
                direct_layer.generate_oasis_structures()
                direct_layer.write_oasis_files()
                losses_df = direct_layer.apply_fm(
                    loss_percentage_of_tiv=loss_factor, net=False)
                net_losses['Direct'] = losses_df

                oed_validator = oed.OedValidator()
                if do_reinsurance:
                    (is_valid, error_msgs) = oed_validator.validate(
                        ri_info_df, ri_scope_df)
                    if not is_valid:
                        print(error_msgs)
                        exit(1)

                ri_layers = reinsurance_layer.generate_files_for_reinsurance(
                    items=direct_layer.items,
                    coverages=direct_layer.coverages,
                    fm_xrefs=direct_layer.fm_xrefs,
                    xref_descriptions=direct_layer.xref_descriptions,
                    gulsummaryxref=pd.DataFrame(),
                    fmsummaryxref=pd.DataFrame(),
                    ri_info_df=ri_info_df,
                    ri_scope_df=ri_scope_df,
                    direct_oasis_files_dir='',
                )

                for idx in ri_layers:
                    '''
                    {'inuring_priority': 1, 'risk_level': 'LOC', 'directory': 'run/RI_1'}
                    {'inuring_priority': 1, 'risk_level': 'ACC', 'directory': 'run/RI_2'}
                    {'inuring_priority': 2, 'risk_level': 'LOC', 'directory': 'run/RI_3'}
                    {'inuring_priority': 3, 'risk_level': 'LOC', 'directory': 'run/RI_4'}
    
                    '''
                    if idx < 2:
                        input_name = "ils"
                    else:
                        input_name = ri_layers[idx - 1]['directory']
                    bin.csv_to_bin(ri_layers[idx]['directory'],
                                   ri_layers[idx]['directory'],
                                   il=True)

                    reinsurance_layer_losses_df = self._run_fm(
                        input_name, ri_layers[idx]['directory'],
                        direct_layer.xref_descriptions)
                    output_name = "Inuring_priority:{} - Risk_level:{}".format(
                        ri_layers[idx]['inuring_priority'],
                        ri_layers[idx]['risk_level'])
                    net_losses[output_name] = reinsurance_layer_losses_df

                    return net_losses

        finally:
            os.chdir(initial_dir)
            t_end = time.time()
            print("Exec time: {}".format(t_end - t_start))

    def _load_acc_and_loc_dfs(self, oed_dir):

        # Account file
        oed_account_file = os.path.join(oed_dir, "account.csv")
        if not os.path.exists(oed_account_file):
            print("Path does not exist: {}".format(oed_account_file))
            exit(1)
        account_df = pd.read_csv(oed_account_file)

        # Location file
        oed_location_file = os.path.join(oed_dir, "location.csv")
        if not os.path.exists(oed_location_file):
            print("Path does not exist: {}".format(oed_location_file))
            exit(1)
        location_df = pd.read_csv(oed_location_file)

        return account_df, location_df

    @parameterized.expand(test_cases)
    def test_fmcalc(self, case, case_dir, expected_dir):

        print("Test case: {}".format(case))

        loss_factor = 1.0

        (account_df, location_df) = self._load_acc_and_loc_dfs(case_dir)

        (ri_info_df, ri_scope_df, do_reinsurance) = oed.load_oed_dfs(case_dir)

        net_losses = self._run_test(
            account_df,
            location_df,
            ri_info_df,
            ri_scope_df,
            loss_factor,
            do_reinsurance,
        )

        for key in net_losses.keys():
            expected_file = os.path.join(
                expected_dir, "{}.csv".format(key.replace(' ', '_')))

            dtypes = {
                "portfolio_number": "str",
                "policy_number": "str",
                "account_number": "str",
                "location_number": "str",
                "location_group": "str",
                "cedant_name": "str",
                "producer_name": "str",
                "lob": "str",
                "country_code": "str",
                "reins_tag": "str",
                "coverage_type_id": "str",
                "peril_id": "str",
                "tiv": "float",
                "loss_gul": "float",
                "loss_il": "float",
                "loss_net": "float"
            }

            expected_df = get_dataframe(expected_file, index_col=False)

            found_df = net_losses[key]
            found_df.to_csv("{}.csv".format(key.replace(' ', '_')))

            expected_df = expected_df.replace(np.nan, '', regex=True)
            found_df = found_df.replace(np.nan, '', regex=True)

            set_col_dtypes(expected_df, dtypes)
            set_col_dtypes(found_df, dtypes)

            expected_df.to_csv("/tmp/expected.csv", index=False)

            print(found_df.dtypes)
            print(expected_df.dtypes)

            assert_frame_equal(found_df, expected_df)

コード例 #18

0

ファイルを表示

ファイル: demo.py プロジェクト: OasisLMF/ZurichWorkshop2018

print('\ncentre of all the areas: {}'.format(plookup.peril_areas_centre))

# Get the vulnerability dict - should be consistent with the source file
# key columns are represented as tuple keys in the dict, and the vuln IDs
# are the values of keys in the dict
print('\nvuln dict: {}'.format(vlookup.vulnerabilities))
print('\nvuln file key columns: {}'.format(vlookup.key_cols))

# Create a locations dataframe from a test locations file
loc_df = get_dataframe(
    src_fp='../Catrisks/keys_data/MEEQ/catrisks_meeq_model_loc_test.csv',
    non_na_cols=(
        'Item_ID',
        'LONGITUDE',
        'LATITUDE',
    ),
    col_dtypes={
        'Item_ID': int,
        'LONGITUDE': float,
        'LATITUDE': float
    },
    sort_col='Item_ID')

# Do a combined, peril and vuln lookup for an individual location
# the combined lookup does the peril and vuln lookup - the individual
# peril and vuln lookups should be consistent with the combined lookup
loc = loc_df.iloc[0]
print('\nloc: {}'.format(loc))

print('\nperil lookup: {}'.format(plookup.lookup(loc)))
print('\nvuln lookup: {}'.format(vlookup.lookup(loc)))

コード例 #19

0

ファイルを表示

print('\ncentre of all the areas: {}'.format(plookup.peril_areas_centre))

# Get the vulnerability dict - should be consistent with the source file
# key columns are represented as tuple keys in the dict, and the vuln IDs
# are the values of keys in the dict
print('\nvuln dict: {}'.format(vlookup.vulnerabilities))
print('\nvuln file key columns: {}'.format(vlookup.key_cols))

# Create a locations dataframe from a test locations file
loc_df = get_dataframe(src_fp='keys_data/EuWs/model_loc_test.csv',
                       non_na_cols=(
                           'ID',
                           'LONGITUDE',
                           'LATITUDE',
                           'VULNERABILITY',
                       ),
                       col_dtypes={
                           'ID': int,
                           'LONGITUDE': float,
                           'LATITUDE': float,
                           'VULNERABILITY': int
                       },
                       sort_col='ID')

# Do a combined, peril and vuln lookup for an individual location
# the combined lookup does the peril and vuln lookup - the individual
# peril and vuln lookups should be consistent with the combined lookup
loc = loc_df.iloc[0]
print('\nloc: {}'.format(loc))

print('\nperil lookup: {}'.format(plookup.lookup(loc)))
print('\nvuln lookup: {}'.format(vlookup.lookup(loc)))

コード例 #20

0

ファイルを表示

ファイル: test_data.py プロジェクト: Parivesh123/OasisLMF

 def test_get_dataframe__no_src_fp_or_buf_or_data_provided__oasis_exception_is_raised(self):
     with self.assertRaises(OasisException):
         get_dataframe(src_fp=None, src_buf=None, src_data=None)