Beispiel #1
0
    def cum_to_incr(self, inplace=False):
        """Method to convert an cumlative triangle into a incremental triangle.

        Parameters
        ----------
            inplace: bool
                Set to True will update the instance data attribute inplace

        Returns
        -------
            Updated instance of triangle accumulated along the origin
        """
        if inplace:
            v = self.valuation_date
            if self.is_cumulative or self.is_cumulative is None:
                if self.is_pattern:
                    xp = self.get_array_module()
                    self.values = xp.nan_to_num(self.values)
                    self.values[self.values == 0] = 1
                    diff = self.iloc[..., :-1] / self.iloc[..., 1:].values
                    self = concat((diff, self.iloc[..., -1],), axis=3,)
                    self.values = self.values * self.nan_triangle
                else:
                    diff = self.iloc[..., 1:] - self.iloc[..., :-1].values
                    self = concat((self.iloc[..., 0], diff,), axis=3,)
                self.is_cumulative = False
            self.valuation_date = v
            return self
        else:
            new_obj = self.copy()
            return new_obj.cum_to_incr(inplace=True)
    def shift(self, periods=-1, axis=3):
        """ Shift elements along an axis by desired number of periods.

        Data that falls beyond the existing shape of the Triangle is eliminated
        and new cells default to zero.

        Parameters
        ----------
        periods : int
            Number of periods to shift. Can be positive or negative.
        axis : {2 or 'origin', 3 or 'development', None}, default 3
            Shift direction.

        Returns
        -------
        Triangle
            updated with shifted elements

        """
        axis = self._get_axis(axis)
        if axis < 2:
            raise AttributeError(
                "Lagging only supported for origin and development axes")
        if periods == 0:
            return self
        if periods > 0:
            if axis == 3:
                out = concat((self.iloc[..., 1:].rename(
                    'development', self.development[:-1]),
                              (self.iloc[..., -1:] * 0)),
                             axis=axis)
            else:
                out = concat(
                    (self.iloc[..., 1:, :].rename('origin', self.origin[:-1]),
                     (self.iloc[..., -1:, :] * 0)),
                    axis=axis)
        else:
            if axis == 3:
                out = concat((
                    (self.iloc[..., :1] * 0),
                    self.iloc[..., :-1].rename('development',
                                               self.development[1:]),
                ),
                             axis=axis)
            else:
                out = concat((
                    (self.iloc[..., :1, :] * 0),
                    self.iloc[..., :-1, :].rename('origin', self.origin[1:]),
                ),
                             axis=axis)
        if abs(periods) == 1:
            return out
        else:
            return self.shift(out, periods - 1, axis)
Beispiel #3
0
    def agg_func(self, *args, **kwargs):
        from chainladder.utils import concat

        xp = self.obj.get_array_module()

        values = [
            getattr(
                self.obj.iloc.__getitem__(
                    tuple([slice(None)] * self.axis + [i])),
                v)(self.axis, auto_sparse=False, keepdims=True)
            for i in self.groups.indices.values()
        ]
        self.obj = concat(values, axis=self.axis, ignore_index=True)

        if isinstance(self.groups.dtypes.index, pd.MultiIndex):
            index = pd.DataFrame(np.zeros(len(self.groups.dtypes.index)),
                                 index=self.groups.dtypes.index,
                                 columns=['_']).reset_index().iloc[:, :-1]
            self.obj.index = index
        else:
            index = pd.DataFrame(self.groups.dtypes.index)
            self.obj.key_labels = index.columns.tolist()
            self.obj.kdims = index.values
        self.obj._set_slicers()
        return self.obj
    def _set_ldf(self, X):
        paid_tri = X[self.paid_to_incurred[0]]
        incurred_tri = X[self.paid_to_incurred[1]]
        case = incurred_tri-paid_tri
        original_val_date = case.valuation_date

        case_ldf_ = self.case_ldf_.copy()
        case_ldf_.valuation_date = pd.Timestamp(options.ULT_VAL)
        xp = case_ldf_.get_array_module()
        # Broadcast triangle shape
        case_ldf_ = case_ldf_ * case.latest_diagonal / case.latest_diagonal
        case_ldf_.odims = case.odims
        case_ldf_.is_pattern = False
        case_ldf_.values = xp.concatenate(
            (xp.ones(list(case_ldf_.shape[:-1])+[1]), case_ldf_.values),
            axis=-1)

        case_ldf_.ddims = case.ddims
        case_ldf_.valuation_date = case_ldf_.valuation.max()
        case_ldf_ = case_ldf_.dev_to_val().set_backend(self.case_ldf_.array_backend)

        # Will this work for sparse?
        forward = case_ldf_[case_ldf_.valuation>original_val_date].values
        forward[xp.isnan(forward)] = 1.0
        forward = xp.cumprod(forward, -1)
        1/case_ldf_[case_ldf_.valuation<=original_val_date]

        backward = 1/case_ldf_[case_ldf_.valuation<=original_val_date].values
        backward[xp.isnan(backward)] = 1.0
        backward = xp.cumprod(backward[..., ::-1], -1)[..., ::-1][..., 1:]
        nans = case_ldf_/case_ldf_
        case_ldf_.values = xp.concatenate((backward, (case.latest_diagonal*0+1).values,  forward), -1)
        case = (case_ldf_*nans.values*case.latest_diagonal.values).val_to_dev().iloc[..., :len(case.ddims)]
        ld = case[case.valuation==X.valuation_date].sum('development').sum('origin')
        ld = ld / ld
        patterns = ((1-np.nan_to_num(X.nan_triangle[..., 1:]))*(self.paid_ldf_*ld).values)
        paid = (case.iloc[..., :-1]*patterns)
        paid.ddims = case.ddims[1:]
        paid.valuation_date = pd.Timestamp(options.ULT_VAL)
        #Create a full triangle of incurrds to support a multiplicative LDF
        paid = (paid_tri.cum_to_incr() + paid).incr_to_cum()
        inc = (case[case.valuation>X.valuation_date] +
               paid[paid.valuation>X.valuation_date] +
               incurred_tri)
        # Combined paid and incurred into a single object
        paid.columns = [self.paid_to_incurred[0]]
        inc.columns = [self.paid_to_incurred[1]]
        cols = X.columns[X.columns.isin([self.paid_to_incurred[0], self.paid_to_incurred[1]])]
        dev = concat((paid, inc), 1)[list(cols)]
        # Convert the paid/incurred to multiplicative LDF
        dev = (dev.iloc[..., -1]/dev).iloc[..., :-1]
        dev.valuation_date = pd.Timestamp(options.ULT_VAL)
        dev.ddims = X.link_ratio.ddims
        dev.is_pattern=True
        dev.is_cumulative=True
        self.case = case
        self.paid=paid
        return dev.cum_to_incr()
Beispiel #5
0
 def _arithmetic_mapper(self, obj, other, f):
     """ Use Dask if available, otherwise basic list comprehension """
     if db and obj.obj.array_backend == 'sparse':
         bag = db.from_sequence(self._get_key_union(obj, other))
         bag = bag.map(f, self, obj, other)
         c = bag.compute(scheduler='threads')
     else:
         c = [
             f(k, self, obj, other)
             for k in self._get_key_union(obj, other)
         ]
     return concat(c, 0).sort_index()
Beispiel #6
0
    def agg_func(self, *args, **kwargs):
        from chainladder.utils import concat

        xp = self.obj.get_array_module()
        obj = self.obj.copy()
        auto_sparse = kwargs.pop("auto_sparse", True)
        if db and obj.array_backend == 'sparse':
            def aggregate(i, obj, axis, v):
                return getattr(
                    obj.iloc.__getitem__(tuple([slice(None)] * axis + [i])), v
                )(axis, auto_sparse=False, keepdims=True)
            bag = db.from_sequence(self.groups.indices.values())
            bag = bag.map(aggregate, obj, self.axis, v)
            values = bag.compute(scheduler='threads')
        else:
            values = [
                getattr(obj.iloc.__getitem__(tuple([slice(None)] * self.axis + [i])), v)(
                    self.axis, auto_sparse=False, keepdims=True
                )
                for i in self.groups.indices.values()
            ]
        obj = concat(values, axis=self.axis, ignore_index=True)
        if self.axis == 0:
            if isinstance(self.groups.dtypes.index, pd.MultiIndex):
                index = (
                    pd.DataFrame(
                        np.zeros(len(self.groups.dtypes.index)),
                        index=self.groups.dtypes.index,
                        columns=["_"],
                    )
                    .reset_index()
                    .iloc[:, :-1]
                )
                obj.index = index
            else:
                index = pd.DataFrame(self.groups.dtypes.index)
                obj.key_labels = index.columns.tolist()
                obj.kdims = index.values
        if self.axis == 1:
            obj.vdims = pd.DataFrame(self.groups.dtypes.index).values[:, 0]
        if self.axis == 2:
            odims = self.obj._to_datetime(
                pd.Series(self.groups.indices.keys()).to_frame(), [0])
            obj.origin_grain = self.obj._get_grain(odims)
            obj.origin_grain = 'S' if obj.origin_grain == '2Q' else obj.origin_grain
            obj.odims = odims.values
        obj._set_slicers()
        if auto_sparse:
            obj = obj._auto_sparse()
        return obj
Beispiel #7
0
    def append(self, other):
        """ Append rows of other to the end of caller, returning a new object.

        Parameters
        ----------
        other : Triangle
            The data to append.

        Returns
        -------
            New Triangle with appended data.
        """
        from chainladder.utils.utility_functions import concat

        return concat((self, other), 0)
Beispiel #8
0
    def agg_func(self, *args, **kwargs):
        from chainladder.utils import concat
        from chainladder.methods import Chainladder

        xp = self.obj.get_array_module()
        obj = self.obj.copy()
        values = [
            getattr(obj.iloc.__getitem__(tuple([slice(None)] * self.axis + [i])), v)(
                self.axis, auto_sparse=False, keepdims=True
            )
            for i in self.groups.indices.values()
        ]
        obj = concat(values, axis=self.axis, ignore_index=True)
        if self.axis == 0:
            if isinstance(self.groups.dtypes.index, pd.MultiIndex):
                index = (
                    pd.DataFrame(
                        np.zeros(len(self.groups.dtypes.index)),
                        index=self.groups.dtypes.index,
                        columns=["_"],
                    )
                    .reset_index()
                    .iloc[:, :-1]
                )
                obj.index = index
            else:
                index = pd.DataFrame(self.groups.dtypes.index)
                obj.key_labels = index.columns.tolist()
                obj.kdims = index.values
        else:
            index = pd.DataFrame(self.groups.dtypes.index).values[:, 0]
        if self.axis == 1:
            obj.vdims = index
        obj._set_slicers()
        if hasattr(obj, 'ldf_'):
            if len(obj.ldf_) > 1: # Bypass grouped ldf_ if there is only one anyway
                new_ldf = Chainladder().fit(self.obj).full_expectation_
                new_ldf = new_ldf.groupby(self.by).sum()  # Need to generalize sum
                new_ldf = new_ldf.link_ratio.iloc[..., :self.obj.ldf_.shape[-1]]
                if new_ldf.get_array_module().all(
                    (new_ldf.values.max(2) - new_ldf.values.min(2)) < 1e-6):
                    # if after grouping there is still only one, then compress to 1
                    new_ldf = new_ldf.iloc[..., 0, :]
                obj.ldf_ = new_ldf
        return obj
Beispiel #9
0
    def dev_to_val(self, inplace=False):
        """ Converts triangle from a development lag triangle to a valuation
        triangle.

        Parameters
        ----------
        inplace : bool
            Whether to mutate the existing Triangle instance or return a new
            one.

        Returns
        -------
        Triangle
            Updated instance of the triangle with valuation periods.

        """
        if self.is_val_tri:
            if inplace:
                return self
            else:
                return self.copy()
        is_cumulative = self.is_cumulative
        if self.is_full:
            if is_cumulative:
                obj = self.cum_to_incr(inplace=inplace)
            else:
                obj = self.copy()
            if self.is_ultimate:
                ultimate = obj.iloc[..., -1:]
                obj = obj.iloc[..., :-1]
        else:
            obj = self
        obj = obj._val_dev(1, inplace)
        ddims = obj.valuation[obj.valuation <= obj.valuation_date]
        obj.ddims = ddims.drop_duplicates().sort_values()
        if self.is_full:
            if self.is_ultimate:
                ultimate.ddims = pd.DatetimeIndex(ultimate.valuation[0:1])
                obj = concat((obj, ultimate), -1)
            if is_cumulative:
                obj = obj.incr_to_cum(inplace=inplace)
        return obj
Beispiel #10
0
    def val_to_dev(self, inplace=False):
        """ Converts triangle from a valuation triangle to a development lag
        triangle.

        Parameters
        ----------
        inplace : bool
            Whether to mutate the existing Triangle instance or return a new
            one.

        Returns
        -------
            Updated instance of triangle with development lags
        """
        if not self.is_val_tri:
            if inplace:
                return self
            else:
                return self.copy()
        if self.is_ultimate:
            ultimate = self.iloc[..., -1:]
            ultimate.ddims = np.array([9999])
            obj = self.iloc[..., :-1]._val_dev(-1, inplace)
        else:
            obj = self.copy()._val_dev(-1, inplace)
        val_0 = obj.valuation[0]
        if self.ddims.shape[-1] == 1 and self.ddims[0] == self.valuation_date:
            origin_0 = pd.to_datetime(obj.odims[-1])
        else:
            origin_0 = pd.to_datetime(obj.odims[0])
        lag_0 = (val_0.year -
                 origin_0.year) * 12 + val_0.month - origin_0.month + 1
        scale = {"Y": 12, "Q": 3, "M": 1}[obj.development_grain]
        obj.ddims = np.arange(obj.values.shape[-1]) * scale + lag_0
        prune = obj[obj.origin == obj.origin.max()]
        if self.is_ultimate:
            obj = obj.iloc[..., :(
                prune.valuation <= prune.valuation_date).sum()]
            obj = concat((obj, ultimate), -1)
        return obj
Beispiel #11
0
    def grain(self, grain="", trailing=False, inplace=False):
        """Changes the grain of a cumulative triangle.

        Parameters
        ----------
        grain : str
            The grain to which you want your triangle converted, specified as
            'OXDY' where X and Y can take on values of ``['Y', 'Q', 'M'
            ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM'
            for Origin quarter/Development Month, etc.
        trailing : bool
            For partial years/quarters, trailing will set the year/quarter end to
            that of the latest available from the data.
        inplace : bool
            Whether to mutate the existing Triangle instance or return a new
            one.

        Returns
        -------
            Triangle
        """
        ograin_old, ograin_new = self.origin_grain, grain[1:2]
        dgrain_old, dgrain_new = self.development_grain, grain[-1]
        valid = {"Y": ["Y"], "Q": ["Q", "Y"], "M": ["Y", "Q", "M"]}
        if ograin_new not in valid.get(
                ograin_old, []) or dgrain_new not in valid.get(dgrain_old, []):
            raise ValueError("New grain not compatible with existing grain")
        if (self.is_cumulative is None and dgrain_old != dgrain_new
                and self.shape[-1] > 1):
            raise AttributeError(
                "The is_cumulative attribute must be set before using grain method."
            )
        if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new):
            raise ValueError(
                "Origin grain must be coarser than development grain")
        obj = self.dev_to_val()
        if ograin_new != ograin_old:
            if trailing:
                mn = self.origin[-1].strftime(
                    "%b").upper() if trailing else "DEC"
                freq = "Q-" if ograin_new == "Q" else "A-"
                o = pd.PeriodIndex(self.origin, freq=freq + mn)
                o = np.array(o.to_timestamp(how="s"))
            else:
                freq = "%YQ%q" if ograin_new == "Q" else "%Y"
                o = pd.to_datetime(self.origin.strftime(freq)).values
            values = [
                getattr(obj.loc[..., i, :], "sum")(2,
                                                   auto_sparse=False,
                                                   keepdims=True)
                for i in self.origin.groupby(o).values()
            ]
            obj = concat(values, axis=2, ignore_index=True)
            obj.odims = np.unique(o)
            obj.origin_grain = ograin_new
            if len(obj.ddims) > 1 and pd.Timestamp(obj.odims[0]).strftime(
                    "%Y%m") != obj.valuation[0].strftime("%Y%m"):
                addl_ts = (pd.period_range(
                    obj.odims[0], obj.valuation[0],
                    freq="M")[:-1].to_timestamp().values)
                addl = obj.iloc[..., -len(addl_ts):] * 0
                addl.ddims = addl_ts
                obj = concat((addl, obj), axis=-1)
        if dgrain_old != dgrain_new and obj.shape[-1] > 1:
            step = self._dstep()[dgrain_old][dgrain_new]
            d = np.sort(
                len(obj.development) -
                np.arange(0, len(obj.development), step) - 1)
            if obj.is_cumulative:
                obj = obj.iloc[..., d]
            else:
                ddims = obj.ddims[d]
                d2 = [d[0]] * (d[0] + 1) + list(
                    np.repeat(np.array(d[1:]), step))
                values = [
                    getattr(obj.iloc[..., i], "sum")(3,
                                                     auto_sparse=False,
                                                     keepdims=True)
                    for i in obj.development.groupby(d2).groups.values()
                ]
                obj = concat(values, axis=3, ignore_index=True)
                obj.ddims = ddims
            obj.development_grain = dgrain_new
        obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev()
        if inplace:
            self = obj
            return self
        return obj
    def grain(self, grain="", trailing=False, inplace=False):
        """Changes the grain of a cumulative triangle.

        Parameters
        ----------
        grain : str
            The grain to which you want your triangle converted, specified as
            'OXDY' where X and Y can take on values of ``['Y', 'S', 'Q', 'M'
            ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM'
            for Origin quarter/Development Month, etc.
        trailing : bool
            For partial origin years/quarters, trailing will set the year/quarter
            end to that of the latest available from the origin data.
        inplace : bool
            Whether to mutate the existing Triangle instance or return a new
            one.

        Returns
        -------
            Triangle
        """
        ograin_old, ograin_new = self.origin_grain, grain[1:2]
        dgrain_old, dgrain_new = self.development_grain, grain[-1]
        valid = {"Y": ["Y"], "Q": ["Q", "S", "Y"], "M": ["Y", "S", "Q", "M"],
                 "S": ["S", "Y"]}
        if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get(
            dgrain_old, []
        ):
            raise ValueError("New grain not compatible with existing grain")
        if (
            self.is_cumulative is None
            and dgrain_old != dgrain_new
            and self.shape[-1] > 1
        ):
            raise AttributeError(
                "The is_cumulative attribute must be set before using grain method."
            )
        if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new):
            raise ValueError("Origin grain must be coarser than development grain")
        if self.is_full and not self.is_ultimate and not self.is_val_tri:
            warnings.warn('Triangle includes extraneous development lags')
        else:
            d_limit = None
        obj = self.dev_to_val()
        if ograin_new != ograin_old:
            freq = {"Y": "A", "S": "2Q"}.get(ograin_new, ograin_new)
            mn = self.origin[-1].strftime("%b").upper() if trailing else "DEC"
            indices = pd.Series(
                range(len(self.origin)), index=self.origin).resample(
                    '-'.join([freq, mn])).indices
            groups = pd.concat([
                pd.Series([k]*len(v), index=v)
                 for k, v in indices.items()], axis=0).values
            obj = obj.groupby(groups, axis=2).sum()
            obj.origin_close = mn
            if len(obj.ddims) > 1 and pd.Timestamp(obj.odims[0]).strftime(
                "%Y%m"
            ) != obj.valuation[0].strftime("%Y%m"):
                addl_ts = (
                    pd.period_range(obj.odims[0], obj.valuation[0], freq="M")[:-1]
                    .to_timestamp()
                    .values
                )
                addl = obj.iloc[..., -len(addl_ts) :] * 0
                addl.ddims = addl_ts
                obj = concat((addl, obj), axis=-1)
                obj.values = num_to_nan(obj.values)
        if dgrain_old != dgrain_new and obj.shape[-1] > 1:
            step = self._dstep()[dgrain_old][dgrain_new]
            d = np.sort(len(obj.development) -
                        np.arange(0, len(obj.development), step) - 1)
            if obj.is_cumulative:
                obj = obj.iloc[..., d]
            else:
                ddims = obj.ddims[d]
                d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step))
                obj = obj.groupby(d2, axis=3).sum()
                obj.ddims = ddims
            obj.development_grain = dgrain_new
        obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev()
        if inplace:
            self = obj
            return self
        return obj
 def __init__(self, data=None, origin=None, development=None, columns=None,
              index=None, origin_format=None, development_format=None,
              cumulative=None, array_backend=None, pattern=False,
              trailing=False, *args, **kwargs):
     if data is None:
         return
     index, columns, origin, development = self._input_validation(
         data, index, columns, origin, development)
     data, ult = self._split_ult(data, index, columns, origin, development)
     origin_date = self._to_datetime(
         data, origin, format=origin_format).rename('__origin__')
     self.origin_grain = self._get_grain(origin_date)
     self.origin_grain = 'S' if self.origin_grain == '2Q' else self.origin_grain
     development_date = self._set_development(
         data, development, development_format, origin_date)
     self.development_grain = (
         self._get_grain(development_date) if development_date.nunique() != 1
         else self.origin_grain)
     data_agg = self._aggregate_data(
         data, origin_date, development_date, index, columns)
     date_axes = self._get_date_axes(
         data_agg["__origin__"], data_agg["__development__"])
     # Deal with labels
     if not index:
         index = ["Total"]
         data_agg[index[0]] = "Total"
     self.kdims, key_idx = self._set_kdims(data_agg, index)
     self.vdims = np.array(columns)
     self.odims, orig_idx = self._set_odims(data_agg, date_axes)
     self.ddims, dev_idx = self._set_ddims(data_agg, date_axes)
     # Set the Triangle values
     coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx)
     self.values = num_to_nan(
         sp(coords, amts, prune=True,
            has_duplicates=False, sorted=True,
            shape=(len(self.kdims), len(self.vdims),
                   len(self.odims), len(self.ddims))))
     # Set remaining triangle properties
     val_date = data_agg["__development__"].max()
     val_date = val_date.compute() if hasattr(val_date, 'compute') else val_date
     self.key_labels = index
     self.valuation_date = val_date
     self.is_cumulative = cumulative
     self.virtual_columns = VirtualColumns(self)
     self.is_pattern = pattern
     self.origin_close = 'DEC'
     if self.origin_grain != 'M' and trailing:
         self.origin_close = pd.to_datetime(self.odims[-1]).strftime('%b').upper()
     # Deal with array backend
     self.array_backend = "sparse"
     if array_backend is None:
         array_backend = options.ARRAY_BACKEND
     if not options.AUTO_SPARSE or array_backend == "cupy":
         self.set_backend(array_backend, inplace=True)
     else:
         self = self._auto_sparse()
     self._set_slicers()
     # Deal with special properties
     if self.is_pattern:
         obj = self.dropna()
         self.odims = obj.odims
         self.ddims = obj.ddims
         self.values = obj.values
     if ult:
         obj = concat((self.dev_to_val().iloc[..., :len(ult.odims), :], ult), -1)
         obj = obj.val_to_dev()
         self.odims = obj.odims
         self.ddims = obj.ddims
         self.values = obj.values
         self.valuation_date = pd.Timestamp(options.ULT_VAL)
class CaseOutstanding(DevelopmentBase):
    """ A determinisic method based on outstanding case reserves.

    The CaseOutstanding method is a deterministic approach that develops
    patterns of incremental payments as a percent of previous period case
    reserves as well as patterns for case reserves as a percent of previous
    period case reserves.  Although the patterns produces by the approach
    approximate incremental payments and case outstanding, they are converted
    into comparable multiplicative patterns for usage with the various IBNR
    methods.

    .. versionadded:: 0.8.0

    Parameters
    ----------
    paid_to_incurred : tuple or list of tuples
        A tuple representing the paid and incurred ``columns`` of the triangles
        such as ``('paid', 'incurred')``
    paid_n_periods : integer, optional (default=-1)
        number of origin periods to be used in the paid pattern averages. For
        all origin periods, set paid_n_periods=-1
    case_n_periods : integer, optional (default=-1)
        number of origin periods to be used in the case pattern averages. For
        all origin periods, set paid_n_periods=-1

    Attributes
    ----------
    ldf_ : Triangle
        The estimated (multiplicative) loss development patterns.
    cdf_ : Triangle
        The estimated (multiplicative) cumulative development patterns.
    case_to_prior_case_ : Triangle
        The case to prior case ratios used for fitting the estimator
    case_ldf_ :
        The selected case to prior case ratios of the fitted estimator
    paid_to_prior_case_ : Triangle
        The paid to prior case ratios used for fitting the estimator
    paid_ldf_ :
        The selected paid to prior case ratios of the fitted estimator
    """
    def __init__(self, paid_to_incurred=None, paid_n_periods=-1, case_n_periods=-1):
        self.paid_to_incurred = paid_to_incurred
        self.paid_n_periods = paid_n_periods
        self.case_n_periods = case_n_periods

    def fit(self, X, y=None, sample_weight=None):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle
            Set of LDFs to which the munich adjustment will be applied.
        y : Ignored
        sample_weight : Ignored

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        backend = "cupy" if X.array_backend == "cupy" else "numpy"
        self.X_ = X.copy()
        paid_tri = self.X_[self.paid_to_incurred[0]]
        incurred_tri = self.X_[self.paid_to_incurred[1]]
        self.paid_w_ = Development(n_periods=self.paid_n_periods).fit(self.X_.iloc[0,0]).w_
        self.case_w_ = Development(n_periods=self.case_n_periods).fit(self.X_.iloc[0,0]).w_
        self.case_ldf_ = self.case_to_prior_case_.mean(2)
        self.paid_ldf_ = self.paid_to_prior_case_.mean(2)

        case = incurred_tri-paid_tri
        patterns = ((1 - np.nan_to_num(case.nan_triangle[..., 1:])) *
                    self.case_ldf_.values)
        for i in range(np.isnan(case.nan_triangle[-1]).sum()):
            increment = (
                (case - case[case.valuation < case.valuation_date]).iloc[..., :-1] *
                patterns)
            increment.ddims = case.ddims[1:]
            increment.valuation_date = case.valuation[case.valuation>=case.valuation_date].drop_duplicates()[1]
            case = case + increment

        patterns = ((1-np.nan_to_num(self.X_.nan_triangle[..., 1:]))*self.paid_ldf_.values)

        paid = (case.iloc[..., :-1]*patterns)
        paid.ddims = case.ddims[1:]
        paid.valuation_date = pd.Timestamp(ULT_VAL)
        paid = (paid_tri.cum_to_incr() + paid).incr_to_cum()
        inc = (case[case.valuation>self.X_.valuation_date] +
               paid[paid.valuation>self.X_.valuation_date] +
               incurred_tri)
        paid.columns = [self.paid_to_incurred[0]]
        inc.columns = [self.paid_to_incurred[1]]
        cols = self.X_.columns[self.X_.columns.isin([self.paid_to_incurred[0], self.paid_to_incurred[1]])]
        dev = concat((paid, inc), 1)[list(cols)]
        self.dev_ = dev
        dev = (dev.iloc[..., -1]/dev).iloc[..., :-1]
        dev.valuation_date = pd.Timestamp(ULT_VAL)
        dev.ddims = self.X_.link_ratio.ddims
        dev.is_pattern=True
        dev.is_cumulative=True

        self.ldf_ = dev.cum_to_incr().set_backend(backend)
        return self
Beispiel #15
0
def _get_full_expectation(cdf_, ultimate_):
    """ Private method that builds full expectation"""
    from chainladder.utils.utility_functions import concat
    full = ultimate_ / cdf_
    return concat((full, ultimate_.copy().rename('development', [9999])),
                  axis=3)
 def fit(self, X, y=None, sample_weight=None):
     if X.shape[1] > 1:
         from chainladder.utils.utility_functions import concat
         out = [
             BootstrapODPSample(**self.get_params()).fit(X.iloc[:, i])
             for i in range(X.shape[1])
         ]
         xp = X.get_array_module(out[0].design_matrix_)
         self.design_matrix_ = xp.concatenate(
             [i.design_matrix_[None] for i in out], axis=0)
         self.hat_ = xp.concatenate([i.hat_[None] for i in out], axis=0)
         self.resampled_triangles_ = concat(
             [i.resampled_triangles_ for i in out], axis=1)
         self.scale_ = xp.array([i.scale_ for i in out])
         self.w_ = out[0].w_
     else:
         backend = X.array_backend
         if backend == "sparse":
             X = X.set_backend("numpy")
         else:
             X = X.copy()
         xp = X.get_array_module()
         if len(X) != 1:
             raise ValueError("Only single index triangles are supported")
         if type(X.ddims) != np.ndarray:
             raise ValueError(
                 "Triangle must be expressed with development lags")
         lag = {"M": 1, "Q": 3, "Y": 12}[X.development_grain]
         obj = Development(
             n_periods=self.n_periods,
             drop=self.drop,
             drop_high=self.drop_high,
             drop_low=self.drop_low,
             drop_valuation=self.drop_valuation,
         ).fit_transform(X)
         self.w_ = obj.w_
         obj = Chainladder().fit(obj)
         # Works for only a single triangle - can we generalize this
         exp_incr_triangle = obj.full_expectation_.cum_to_incr().values[
             0, 0, :, :X.shape[-1]]
         exp_incr_triangle = xp.nan_to_num(
             exp_incr_triangle) * obj.X_.nan_triangle
         self.design_matrix_ = self._get_design_matrix(X)
         if self.hat_adj:
             try:
                 self.hat_ = self._get_hat(X, exp_incr_triangle)
             except:
                 warn(
                     "Could not compute hat matrix.  Setting hat_adj to False"
                 )
                 self.had_adj = False
                 self.hat_ = None
         else:
             self.hat_ = None
         self.resampled_triangles_, self.scale_ = self._get_simulation(
             X, exp_incr_triangle)
         n_obs = xp.nansum(self.w_)
         n_origin_params = X.shape[2]
         n_dev_params = X.shape[3] - 1
         deg_free = n_obs - n_origin_params - n_dev_params
         deg_free_adj_fctr = xp.sqrt(n_obs / deg_free)
     return self
Beispiel #17
0
    def __init__(self,
                 data=None,
                 origin=None,
                 development=None,
                 columns=None,
                 index=None,
                 origin_format=None,
                 development_format=None,
                 cumulative=None,
                 array_backend=None,
                 pattern=False,
                 *args,
                 **kwargs):
        # Allow Empty Triangle so that we can piece it together programatically
        if data is None:
            return

        # Check whether all columns are unique and numeric
        check = data[columns].dtypes
        check = [check] if isinstance(check, np.dtype) else check.to_list()
        columns = [columns] if type(columns) is not list else columns
        if "object" in check:
            raise TypeError("column attribute must be numeric.")
        if data[columns].shape[1] != len(columns):
            raise AttributeError("Columns are required to have unique names")

        # Sanitize all axis inputs to lists
        str_to_list = lambda *args: tuple(
            [arg] if type(arg) in [str, pd.Period] else arg for arg in args)
        index, columns, origin, development = str_to_list(
            index, columns, origin, development)

        # Determine desired array backend of the Triangle
        if array_backend is None:
            from chainladder import ARRAY_BACKEND

            array_backend = ARRAY_BACKEND
        if (development and len(development) == 1
                and data[development[0]].dtype == "<M8[ns]"):
            u = data[data[development[0]] == ULT_VAL].copy()
            if len(u) > 0 and len(u) != len(data):
                u = TriangleBase(
                    u,
                    origin=origin,
                    development=development,
                    columns=columns,
                    index=index,
                )
                data = data[data[development[0]] != ULT_VAL]
            else:
                u = None
        else:
            u = None
        # Initialize origin and its grain
        origin = development if origin is None else origin
        origin_date = TriangleBase._to_datetime(data,
                                                origin,
                                                format=origin_format)
        self.origin_grain = TriangleBase._get_grain(origin_date)
        origin_date = (pd.PeriodIndex(
            origin_date,
            freq=self.origin_grain).to_timestamp().rename("origin"))

        # Initialize development and its grain
        m_cnt = {"Y": 12, "Q": 3, "M": 1}
        has_dev = development and len(np.unique(data[development])) > 1
        if has_dev:
            development_date = TriangleBase._to_datetime(
                data, development, period_end=True, format=development_format)
            self.development_grain = TriangleBase._get_grain(development_date)
        else:
            development_date = pd.PeriodIndex(
                origin_date +
                pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]),
                freq={
                    "Y": "A"
                }.get(self.origin_grain, self.origin_grain),
            ).to_timestamp(how="e")
            self.development_grain = self.origin_grain
        development_date.name = "development"

        # Summarize dataframe to the level specified in axes
        key_gr = [origin_date, development_date
                  ] + [data[item] for item in ([] if not index else index)]
        data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0)
        if not index:
            index = ["Total"]
            data_agg[index[0]] = "Total"

        # Fill in any gaps in origin/development
        date_axes = self._get_date_axes(
            data_agg["origin"], data_agg["development"])  # cartesian product
        dev_lag = TriangleBase._development_lag(data_agg["origin"],
                                                data_agg["development"])

        # Grab unique index, origin, development
        dev_lag_unique = np.sort(
            TriangleBase._development_lag(date_axes["origin"],
                                          date_axes["development"]).unique())

        orig_unique = np.sort(date_axes["origin"].unique())
        kdims = data_agg[index].drop_duplicates().reset_index(
            drop=True).reset_index()

        # Map index, origin, development indices to data
        set_idx = (lambda col, unique: col.map(
            dict(zip(unique, range(len(unique))))).values[None].T)
        orig_idx = set_idx(data_agg["origin"], orig_unique)
        dev_idx = set_idx(dev_lag, dev_lag_unique)
        key_idx = (data_agg[index].merge(kdims, how="left",
                                         on=index)["index"].values[None].T)

        # origin <= development is required - truncate bad records if not true
        valid = data_agg["origin"] <= data_agg["development"]
        if sum(~valid) > 0:
            warnings.warn("Observations with development before " +
                          "origin start have been removed.")
        data_agg, orig_idx = data_agg[valid], orig_idx[valid]
        dev_idx, key_idx = dev_idx[valid], key_idx[valid]

        # All Triangles start out as sparse arrays
        val_idx = (((np.ones(len(data_agg))[None].T) *
                    range(len(columns))).reshape((1, -1), order="F").T)
        coords = np.concatenate(
            tuple([np.concatenate((orig_idx, dev_idx), 1)] * len(columns)), 0)
        coords = np.concatenate((np.concatenate(
            tuple([key_idx] * len(columns)), 0), val_idx, coords), 1)
        amts = data_agg[columns].unstack()
        amts = amts.values.astype("float64")
        self.array_backend = "sparse"
        self.values = num_to_nan(
            sp(
                coords.T.astype('int64'),
                amts,
                prune=True,
                has_duplicates=False,
                sorted=True,
                shape=(
                    len(kdims),
                    len(columns),
                    len(orig_unique),
                    len(dev_lag_unique) if has_dev else 1,
                ),
            ))

        # Set all axis values
        self.valuation_date = data_agg["development"].max()
        self.kdims = kdims.drop("index", 1).values
        self.odims = orig_unique
        self.ddims = dev_lag_unique if has_dev else dev_lag[0:1].values
        self.ddims = self.ddims * (m_cnt[self.development_grain])
        if development and not has_dev:
            self.ddims = pd.DatetimeIndex(
                TriangleBase._to_datetime(data,
                                          development,
                                          period_end=True,
                                          format=development_format)[0:1])
            self.valuation_date = self.ddims[0]
        self.vdims = np.array(columns)

        # Set remaining triangle properties
        self.key_labels = index
        self.is_cumulative = cumulative
        self.virtual_columns = VirtualColumns(self)
        self.is_pattern = pattern
        if not AUTO_SPARSE or array_backend == "cupy":
            self.set_backend(array_backend, inplace=True)
        else:
            self = self._auto_sparse()
        self._set_slicers()
        if self.is_pattern:
            obj = self.dropna()
            self.odims = obj.odims
            self.ddims = obj.ddims
            self.values = obj.values
        if u:
            obj = concat((self.dev_to_val().iloc[..., :len(u.odims), :], u),
                         -1)
            obj = obj.val_to_dev()
            self.odims = obj.odims
            self.ddims = obj.ddims
            self.values = obj.values
            self.valuation_date = pd.Timestamp(ULT_VAL)
    def fit(self, X, y=None, sample_weight=None):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle-like
            Data to which the model will be applied.
        y : Ignored
        sample_weight : Ignored

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        from chainladder.utils.utility_functions import parallelogram_olf, concat

        if X.array_backend == "sparse":
            obj = X.set_backend("numpy")
        else:
            obj = X.copy()

        groups = list(
            set(X.key_labels).intersection(self.rate_history.columns))

        if len(groups) == 0:
            idx = obj
        else:
            idx = obj.groupby(groups).sum()

        kw = dict(
            start_date=X.origin[0].to_timestamp(how="s"),
            end_date=X.origin[-1].to_timestamp(how="e"),
            grain=X.origin_grain,
            vertical_line=self.vertical_line,
        )

        if len(groups) > 0:
            tris = []
            for item in idx.index.set_index(groups).iterrows():
                r = self.rate_history.set_index(groups).loc[item[0]].copy()
                r[self.change_col] = r[self.change_col] + 1
                r = (r.groupby(self.date_col)[self.change_col].prod() -
                     1).reset_index()
                date = r[self.date_col]
                values = r[self.change_col]
                olf = parallelogram_olf(values=values, date=date,
                                        **kw).values[None, None]
                if X.array_backend == "cupy":
                    olf = X.get_array_module().array(olf)
                tris.append((idx.loc[item[0]] * 0 + 1) * olf)
            self.olf_ = concat(tris, 0).latest_diagonal
        else:
            r = self.rate_history.copy()
            r[self.change_col] = r[self.change_col] + 1
            r = (r.groupby(self.date_col)[self.change_col].prod() -
                 1).reset_index()
            date = r[self.date_col]
            values = r[self.change_col]
            olf = parallelogram_olf(values=values, date=date, **kw)
            self.olf_ = ((idx * 0 + 1) *
                         olf.values[None, None]).latest_diagonal
        return self