Python get_dataframe Beispiele, oasislmf.utils.data.get_dataframe Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_cols_and_missing_some_required_cols__set_required_cols_option_and_use_defaults_for_all_other_options__oasis_exception_is_raised(self, data, missing_cols):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.drop(missing_cols, axis=1).to_csv(path_or_buf=fp.name, encoding='utf-8', index=False)

            with self.assertRaises(OasisException):
                get_dataframe(
                    src_fp=fp.name,
                    required_cols=df.columns.tolist()
                )

Beispiel #2

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_empty_csv_file__set_empty_data_err_msg_and_defaults_for_all_other_options__oasis_exception_is_raised_with_empty_data_err_msg(self, empty_data_err_msg):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame()
            df.to_csv(path_or_buf=fp.name)

            with self.assertRaises(OasisException):
                try:
                    get_dataframe(src_fp=fp.name, empty_data_error_msg=empty_data_err_msg)
                except OasisException as e:
                    self.assertEqual(str(e), empty_data_err_msg)
                    raise e

Beispiel #3

0

Datei anzeigen

Datei: oed.py Projekt: cihanb/OasisLMF

def load_oed_dfs(oed_dir, show_all=False):
    """
    Load OED data files.
    """

    do_reinsurance = True
    if oed_dir is not None:
        if not os.path.exists(oed_dir):
            print("Path does not exist: {}".format(oed_dir))
            exit(1)

        # RI files
        oed_ri_info_file = os.path.join(oed_dir, "ri_info.csv")
        oed_ri_scope_file = os.path.join(oed_dir, "ri_scope.csv")
        oed_ri_info_file_exists = os.path.exists(oed_ri_info_file)
        oed_ri_scope_file_exists = os.path.exists(oed_ri_scope_file)

        if not oed_ri_info_file_exists and not oed_ri_scope_file_exists:
            ri_info_df = None
            ri_scope_df = None
            do_reinsurance = False
        elif oed_ri_info_file_exists and oed_ri_scope_file_exists:
            ri_info_df = get_dataframe(oed_ri_info_file,
                                       lowercase_cols=False,
                                       required_cols=RI_INFO_REQUIRED_COLS,
                                       default_values=RI_INFO_DEFAULTS)
            ri_scope_df = get_dataframe(oed_ri_scope_file,
                                        lowercase_cols=False,
                                        required_cols=RI_SCOPE_REQUIRED_COLS,
                                        default_values=RI_SCOPE_DEFAULTS)
        else:
            print("Both reinsurance files must exist: {} {}".format(
                oed_ri_info_file, oed_ri_scope_file))

        if do_reinsurance:
            ri_info_df = ri_info_df[OED_REINS_INFO_FIELDS].copy()
            ri_scope_df = ri_scope_df[OED_REINS_SCOPE_FIELDS].copy()

        # Ensure Percent feilds are float
        info_float_cols = ['CededPercent', 'PlacedPercent', 'TreatyShare']
        scope_float_cols = ['CededPercent']
        ri_info_df[info_float_cols] = ri_info_df[info_float_cols].astype(float)
        ri_scope_df[scope_float_cols] = ri_scope_df[scope_float_cols].astype(
            float)

    return (ri_info_df, ri_scope_df, do_reinsurance)

Beispiel #4

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__use_default_options(self, data):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.copy(deep=True)

            result = get_dataframe(src_fp=fp.name)

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #5

0

Datei anzeigen

 def test_missing_required_cols_in_csv_throws_exception(self):
     with self.assertRaises(OasisException):
         with NamedTemporaryFile('w') as f:
             f.writelines([
                 'a,b\n1,2\n3,4',
             ])
             f.flush()
             df = get_dataframe(f.name,
                                index_col=False,
                                required_cols=['a', 'b', 'c'])

Beispiel #6

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_columns___set_lowercase_cols_option_to_false_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.copy(deep=True)

            result = get_dataframe(src_fp=fp.name, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #7

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_columns__set_lowercase_col_option_to_false_and_col_dtypes_option_and_use_defaults_for_all_other_options(self, data, dtypes):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            for col, dtype in viewitems(dtypes):
                df[col] = df[col].astype(dtype)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = pd.read_csv(fp.name, dtype=dtypes)

            result = get_dataframe(src_fp=fp.name, col_dtypes=dtypes, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #8

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__set_sort_cols_option_on_single_col_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            data = [{k: (v if k != 'int_col' else np.random.choice(range(10))) for k, v in viewitems(it)} for it in data]
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            sort_cols = ['int_col']
            expected = df.sort_values(sort_cols, axis=0)

            result = get_dataframe(src_fp=fp.name, sort_cols=sort_cols)

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #9

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__set_col_defaults_option_and_use_defaults_for_all_other_options(self, data, defaults):
        with NamedTemporaryFile('w') as fp: 
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.copy(deep=True)
            for col, default in viewitems(defaults):
                expected.loc[:, col].fillna(defaults[col], inplace=True)

            result = get_dataframe(src_fp=fp.name, col_defaults=defaults)

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #10

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_cols_and_nulls_in_some_columns__set_lowercase_cols_option_to_false_and_non_na_cols_option_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            data[-1]['int_col'] = np.nan
            data[-2]['STR_COL'] = np.nan
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            non_na_cols = ['int_col', 'STR_COL']
            expected = df.dropna(subset=non_na_cols, axis=0)

            result = get_dataframe(src_fp=fp.name, non_na_cols=non_na_cols, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #11

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file__set_subset_cols_option_and_use_defaults_for_all_other_options(self, data, subset_cols):
        with NamedTemporaryFile('w') as fp:
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            expected = df.drop([col for col in df.columns if col not in subset_cols], axis=1)

            result = get_dataframe(
                src_fp=fp.name,
                subset_cols=subset_cols
            )

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #12

0

Datei anzeigen

    def test_basic_read_csv(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name, index_col=False)

            ref_data = {'a': [1, 3], 'b': [2, 4]}

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

Beispiel #13

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

    def test_get_dataframe__from_csv_file_with_mixed_case_cols__set_lowercase_cols_option_to_false_and_sort_cols_option_on_two_cols_and_use_defaults_for_all_other_options(self, data):
        with NamedTemporaryFile('w') as fp:
            data = [
                {k: (v if k not in ('IntCol', 'STR_COL') else (np.random.choice(range(10)) if k == 'IntCol' else np.random.choice(list(string.ascii_lowercase)))) for k, v in viewitems(it)}
                for it in data
            ]
            df = pd.DataFrame(data)
            df.to_csv(path_or_buf=fp.name, columns=df.columns, encoding='utf-8', index=False)

            sort_cols = ['IntCol', 'STR_COL']
            expected = df.sort_values(sort_cols, axis=0)

            result = get_dataframe(src_fp=fp.name, sort_cols=sort_cols, lowercase_cols=False)

            self.assertTrue(dataframes_are_identical(result, expected))

Beispiel #14

0

Datei anzeigen

    def test_all_add_default_str_in_csv(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name,
                               index_col=False,
                               defaulted_cols={'c': 'abc'})

            ref_data = {'a': [1, 3], 'b': [2, 4], 'c': ['abc', 'abc']}

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

Beispiel #15

0

Datei anzeigen

    def test_all_required_cols_present_in_csv_case_insensitive(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name,
                               index_col=False,
                               lowercase_cols=True,
                               required_cols=['A', 'B'])

            ref_data = {'a': [1, 3], 'b': [2, 4]}

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

Beispiel #16

0

Datei anzeigen

    def test_all_default_cols_present_in_csv(self):
        with NamedTemporaryFile('w') as f:
            f.writelines([
                'a,b\n1,2\n3,4',
            ])
            f.flush()
            df = get_dataframe(f.name,
                               index_col=False,
                               default_values={
                                   'a': 1,
                                   'b': 2
                               })

            ref_data = {
                'a': [1, 3],
                'b': [2, 4],
            }

            ref_df = pd.DataFrame.from_dict(ref_data)

        assert_frame_equal(df, ref_df)

Beispiel #17

0

Datei anzeigen

class TestReinsurance(unittest.TestCase):
    def _run_fm(self,
                input_name,
                output_name,
                xref_descriptions,
                allocation=oed.ALLOCATE_TO_ITEMS_BY_PREVIOUS_LEVEL_ALLOC_ID):

        command = "fmcalc -p {0} -n -a {2} < {1}.bin | tee {0}.bin | fmtocsv > {0}.csv".format(
            output_name, input_name, allocation)
        print(command)
        proc = subprocess.Popen(command, shell=True)
        proc.wait()
        if proc.returncode != 0:
            raise Exception("Failed to run fm")
        losses_df = pd.read_csv("{}.csv".format(output_name))
        inputs_df = pd.read_csv("{}.csv".format(input_name))

        losses_df.drop(losses_df[losses_df.sidx != 1].index, inplace=True)
        inputs_df.drop(inputs_df[inputs_df.sidx != 1].index, inplace=True)
        losses_df = pd.merge(inputs_df,
                             losses_df,
                             left_on='output_id',
                             right_on='output_id',
                             suffixes=('_pre', '_net'))

        losses_df = pd.merge(xref_descriptions,
                             losses_df,
                             left_on='xref_id',
                             right_on='output_id')

        del losses_df['event_id_pre']
        del losses_df['sidx_pre']
        del losses_df['event_id_net']
        del losses_df['sidx_net']
        del losses_df['output_id']
        del losses_df['xref_id']
        return losses_df

    def _run_test(self, account_df, location_df, ri_info_df, ri_scope_df,
                  loss_factor, do_reinsurance):
        """
        Run the direct and reinsurance layers through the Oasis FM.
        Returns an array of net loss data frames, the first for the direct layers
        and then one per inuring layer.
        """
        t_start = time.time()

        net_losses = OrderedDict()

        initial_dir = os.getcwd()
        try:

            with TemporaryDirectory() as run_dir:

                os.chdir(run_dir)

                direct_layer = DirectLayer(account_df, location_df)
                direct_layer.generate_oasis_structures()
                direct_layer.write_oasis_files()
                losses_df = direct_layer.apply_fm(
                    loss_percentage_of_tiv=loss_factor, net=False)
                net_losses['Direct'] = losses_df

                oed_validator = oed.OedValidator()
                if do_reinsurance:
                    (is_valid, error_msgs) = oed_validator.validate(
                        ri_info_df, ri_scope_df)
                    if not is_valid:
                        print(error_msgs)
                        exit(1)

                ri_layers = reinsurance_layer.generate_files_for_reinsurance(
                    items=direct_layer.items,
                    coverages=direct_layer.coverages,
                    fm_xrefs=direct_layer.fm_xrefs,
                    xref_descriptions=direct_layer.xref_descriptions,
                    gulsummaryxref=pd.DataFrame(),
                    fmsummaryxref=pd.DataFrame(),
                    ri_info_df=ri_info_df,
                    ri_scope_df=ri_scope_df,
                    direct_oasis_files_dir='',
                )

                for idx in ri_layers:
                    '''
                    {'inuring_priority': 1, 'risk_level': 'LOC', 'directory': 'run/RI_1'}
                    {'inuring_priority': 1, 'risk_level': 'ACC', 'directory': 'run/RI_2'}
                    {'inuring_priority': 2, 'risk_level': 'LOC', 'directory': 'run/RI_3'}
                    {'inuring_priority': 3, 'risk_level': 'LOC', 'directory': 'run/RI_4'}
    
                    '''
                    if idx < 2:
                        input_name = "ils"
                    else:
                        input_name = ri_layers[idx - 1]['directory']
                    bin.csv_to_bin(ri_layers[idx]['directory'],
                                   ri_layers[idx]['directory'],
                                   il=True)

                    reinsurance_layer_losses_df = self._run_fm(
                        input_name, ri_layers[idx]['directory'],
                        direct_layer.xref_descriptions)
                    output_name = "Inuring_priority:{} - Risk_level:{}".format(
                        ri_layers[idx]['inuring_priority'],
                        ri_layers[idx]['risk_level'])
                    net_losses[output_name] = reinsurance_layer_losses_df

                    return net_losses

        finally:
            os.chdir(initial_dir)
            t_end = time.time()
            print("Exec time: {}".format(t_end - t_start))

    def _load_acc_and_loc_dfs(self, oed_dir):

        # Account file
        oed_account_file = os.path.join(oed_dir, "account.csv")
        if not os.path.exists(oed_account_file):
            print("Path does not exist: {}".format(oed_account_file))
            exit(1)
        account_df = pd.read_csv(oed_account_file)

        # Location file
        oed_location_file = os.path.join(oed_dir, "location.csv")
        if not os.path.exists(oed_location_file):
            print("Path does not exist: {}".format(oed_location_file))
            exit(1)
        location_df = pd.read_csv(oed_location_file)

        return account_df, location_df

    @parameterized.expand(test_cases)
    def test_fmcalc(self, case, case_dir, expected_dir):

        print("Test case: {}".format(case))

        loss_factor = 1.0

        (account_df, location_df) = self._load_acc_and_loc_dfs(case_dir)

        (ri_info_df, ri_scope_df, do_reinsurance) = oed.load_oed_dfs(case_dir)

        net_losses = self._run_test(
            account_df,
            location_df,
            ri_info_df,
            ri_scope_df,
            loss_factor,
            do_reinsurance,
        )

        for key in net_losses.keys():
            expected_file = os.path.join(
                expected_dir, "{}.csv".format(key.replace(' ', '_')))

            dtypes = {
                "portfolio_number": "str",
                "policy_number": "str",
                "account_number": "str",
                "location_number": "str",
                "location_group": "str",
                "cedant_name": "str",
                "producer_name": "str",
                "lob": "str",
                "country_code": "str",
                "reins_tag": "str",
                "coverage_type_id": "str",
                "peril_id": "str",
                "tiv": "float",
                "loss_gul": "float",
                "loss_il": "float",
                "loss_net": "float"
            }

            expected_df = get_dataframe(expected_file, index_col=False)

            found_df = net_losses[key]
            found_df.to_csv("{}.csv".format(key.replace(' ', '_')))

            expected_df = expected_df.replace(np.nan, '', regex=True)
            found_df = found_df.replace(np.nan, '', regex=True)

            set_col_dtypes(expected_df, dtypes)
            set_col_dtypes(found_df, dtypes)

            expected_df.to_csv("/tmp/expected.csv", index=False)

            print(found_df.dtypes)
            print(expected_df.dtypes)

            assert_frame_equal(found_df, expected_df)

Beispiel #18

0

Datei anzeigen

Datei: demo.py Projekt: OasisLMF/ZurichWorkshop2018

print('\ncentre of all the areas: {}'.format(plookup.peril_areas_centre))

# Get the vulnerability dict - should be consistent with the source file
# key columns are represented as tuple keys in the dict, and the vuln IDs
# are the values of keys in the dict
print('\nvuln dict: {}'.format(vlookup.vulnerabilities))
print('\nvuln file key columns: {}'.format(vlookup.key_cols))

# Create a locations dataframe from a test locations file
loc_df = get_dataframe(
    src_fp='../Catrisks/keys_data/MEEQ/catrisks_meeq_model_loc_test.csv',
    non_na_cols=(
        'Item_ID',
        'LONGITUDE',
        'LATITUDE',
    ),
    col_dtypes={
        'Item_ID': int,
        'LONGITUDE': float,
        'LATITUDE': float
    },
    sort_col='Item_ID')

# Do a combined, peril and vuln lookup for an individual location
# the combined lookup does the peril and vuln lookup - the individual
# peril and vuln lookups should be consistent with the combined lookup
loc = loc_df.iloc[0]
print('\nloc: {}'.format(loc))

print('\nperil lookup: {}'.format(plookup.lookup(loc)))
print('\nvuln lookup: {}'.format(vlookup.lookup(loc)))

Beispiel #19

0

Datei anzeigen

print('\ncentre of all the areas: {}'.format(plookup.peril_areas_centre))

# Get the vulnerability dict - should be consistent with the source file
# key columns are represented as tuple keys in the dict, and the vuln IDs
# are the values of keys in the dict
print('\nvuln dict: {}'.format(vlookup.vulnerabilities))
print('\nvuln file key columns: {}'.format(vlookup.key_cols))

# Create a locations dataframe from a test locations file
loc_df = get_dataframe(src_fp='keys_data/EuWs/model_loc_test.csv',
                       non_na_cols=(
                           'ID',
                           'LONGITUDE',
                           'LATITUDE',
                           'VULNERABILITY',
                       ),
                       col_dtypes={
                           'ID': int,
                           'LONGITUDE': float,
                           'LATITUDE': float,
                           'VULNERABILITY': int
                       },
                       sort_col='ID')

# Do a combined, peril and vuln lookup for an individual location
# the combined lookup does the peril and vuln lookup - the individual
# peril and vuln lookups should be consistent with the combined lookup
loc = loc_df.iloc[0]
print('\nloc: {}'.format(loc))

print('\nperil lookup: {}'.format(plookup.lookup(loc)))
print('\nvuln lookup: {}'.format(vlookup.lookup(loc)))

Beispiel #20

0

Datei anzeigen

Datei: test_data.py Projekt: Parivesh123/OasisLMF

 def test_get_dataframe__no_src_fp_or_buf_or_data_provided__oasis_exception_is_raised(self):
     with self.assertRaises(OasisException):
         get_dataframe(src_fp=None, src_buf=None, src_data=None)