def _fit_OLS_thru_orig(self):
        from chainladder.utils.utility_functions import num_to_nan

        w, x, y, axis = self.w, self.x, self.y, self.axis
        xp = self.xp
        d = num_to_nan(xp.nansum((y * 0 + 1) * w * x * x, axis))
        coef = num_to_nan(xp.nansum(w * x * y, axis)) / d
        fitted_value = xp.repeat(xp.expand_dims(coef, axis), x.shape[axis],
                                 axis)
        fitted_value = fitted_value * x * (y * 0 + 1)
        residual = (y - fitted_value) * xp.sqrt(w)
        wss_residual = xp.nansum(residual**2, axis)
        mse_denom = xp.nansum((y * 0 + 1) * (w != 0), axis) - 1
        mse_denom = num_to_nan(mse_denom)
        mse = wss_residual / mse_denom
        std_err = xp.sqrt(num_to_nan(mse) / d)
        std_err = std_err[..., None]
        if xp != sp:
            std_err[std_err == 0] = xp.nan
        coef = coef[..., None]
        sigma = xp.sqrt(mse)[..., None]
        self.slope_ = coef
        self.sigma_ = sigma
        self.std_err_ = std_err
        return self
Exemple #2
0
 def _get_ultimate(self, X, expectation):
     from chainladder.utils.utility_functions import num_to_nan
     if X.is_cumulative == False:
         ld = X.sum('development')
         ultimate = ld.val_to_dev()
     else:
         ld = X.latest_diagonal
         ultimate = X.copy()
     cdf = self._align_cdf(ultimate, expectation)
     backend = cdf.array_backend
     xp = cdf.get_array_module()
     cdf = cdf.sort_index()
     ld = ld.sort_index()
     expectation = expectation.sort_index()
     ultimate = ultimate.sort_index()
     cdf = (1 - 1 / num_to_nan(cdf.values))[None]
     exponents = xp.arange(self.n_iters + 1)
     exponents = xp.reshape(exponents, tuple([len(exponents)] + [1] * 4))
     cdf = cdf ** (((cdf + 1e-16) / (cdf + 1e-16) * exponents))
     cdf = xp.nan_to_num(cdf)
     a = xp.sum(cdf[:-1, ...], 0) * xp.nan_to_num(ld.set_backend(backend).values)
     b = cdf[-1, ...] * xp.nan_to_num(expectation.set_backend(backend).values)
     ultimate.values = num_to_nan(a + b)
     ultimate.array_backend = backend
     ultimate.ddims = self.cdf_.ddims[:ultimate.shape[-1]]
     return self._set_ult_attr(ultimate)
    def _fit_OLS(self):
        """ Given a set of w, x, y, and an axis, this Function
            returns OLS slope and intercept.
            TODO:
                Make this work with n_periods = 1 without numpy warning.
        """
        from chainladder.utils.utility_functions import num_to_nan

        w, x, y, axis = self.w.copy(), self.x.copy(), self.y.copy(), self.axis
        xp = self.xp
        if xp != sp:
            x[w == 0] = xp.nan
            y[w == 0] = xp.nan
        else:
            w2 = w.copy()
            w2.fill_value = sp.nan
            x, y = x * sp(w2), y * sp(w2)
        slope = num_to_nan(
            xp.nansum(w * x * y, axis) -
            xp.nansum(x * w, axis) * xp.nanmean(y, axis)) / num_to_nan(
                xp.nansum(w * x * x, axis) -
                xp.nanmean(x, axis) * xp.nansum(w * x, axis))
        intercept = xp.nanmean(y, axis) - slope * xp.nanmean(x, axis)
        self.slope_ = slope[..., None]
        self.intercept_ = intercept[..., None]
        return self
 def loglinear_interpolation(self, y):
     ''' Use Cases: generally for filling in last element of sigma_
     '''
     from chainladder.utils.utility_functions import num_to_nan
     xp = self.xp
     ly = xp.log(num_to_nan(y))
     w = xp.nan_to_num(ly * 0 + 1)
     reg = WeightedRegression(self.axis, False, xp=xp).fit(None, ly, w)
     slope, intercept = reg.slope_, reg.intercept_
     fill_ = xp.exp(reg.x * slope + intercept) * (1 - w)
     out = xp.nan_to_num(y) + xp.nan_to_num(fill_)
     return num_to_nan(out)
    def mack_interpolation(self, y):
        """ Use Mack's approximation to fill last element of sigma_ which is the
            same as loglinear extrapolation using the preceding two element to
            the missing value. This function needs a recursive definition...
        """
        from chainladder.utils.utility_functions import num_to_nan

        xp = self.xp
        w = xp.nan_to_num(y * 0 + 1)
        slicer_n, slicer_d, slicer_a = (
            ([slice(None)] * 4),
            ([slice(None)] * 4),
            ([slice(None)] * 4),
        )
        slicer_n[self.axis], slicer_d[self.axis], slicer_a[self.axis] = (
            slice(1, -1, 1),
            slice(0, -2, 1),
            slice(0, 2, 1),
        )
        slicer_n, slicer_d, slicer_a = (
            tuple(slicer_n),
            tuple(slicer_d),
            tuple(slicer_a),
        )
        fill_ = xp.sqrt(
            abs(
                xp.minimum(
                    (y[slicer_n]**4 / y[slicer_d]**2),
                    xp.minimum(y[slicer_d]**2, y[slicer_n]**2),
                )))
        fill_ = xp.concatenate(
            (w[slicer_a], xp.nan_to_num(fill_)), axis=self.axis) * (1 - w)
        out = xp.nan_to_num(y) + fill_
        return num_to_nan(out)
Exemple #6
0
    def _get_ultimate(self, X, sample_weight):
        xp = X.get_array_module()
        from chainladder.utils.utility_functions import num_to_nan
        ultimate = copy.deepcopy(X)

        # Apriori
        if self.apriori_sigma != 0:
            random_state = xp.random.RandomState(self.random_state)
            apriori = random_state.normal(self.apriori, self.apriori_sigma,
                                          X.shape[0])
            apriori = apriori.reshape(X.shape[0], -1)[..., None, None]
            apriori = sample_weight.values * apriori
        else:
            apriori = sample_weight.values * self.apriori
        # Benktander formula -> Triangle
        cdf = self._align_cdf(ultimate, sample_weight)
        cdf = (1 - 1 / num_to_nan(cdf))[None]
        exponents = xp.arange(self.n_iters + 1)
        exponents = xp.reshape(exponents, tuple([len(exponents)] + [1] * 4))
        cdf = cdf**(((cdf + 1e-16) / (cdf + 1e-16) * exponents))
        cdf = xp.nan_to_num(cdf)
        ultimate.values = (xp.sum(cdf[:-1, ...], 0) *
                           xp.nan_to_num(X.latest_diagonal.values) +
                           cdf[-1, ...] * xp.nan_to_num(apriori))
        return self._set_ult_attr(ultimate)
    def _align_cdf(self, ultimate, sample_weight=None):
        """ Vertically align CDF to ultimate vector to origin period latest
        diagonal.
        """
        xp = ultimate.get_array_module()
        from chainladder.utils.utility_functions import num_to_nan

        if self.cdf_.key_labels != ultimate.key_labels and len(
                self.ldf_.index) > 1:
            level = list(
                set(self.cdf_.key_labels).intersection(ultimate.key_labels))
            idx = (ultimate.index[level].merge(
                self.cdf_.index[level].reset_index(), how="left",
                on=level)["index"].values)
            cdf = self.cdf_.values[list(idx.astype(int)),
                                   ..., :ultimate.shape[-1]]
        else:
            cdf = self.cdf_.values[..., :ultimate.shape[-1]]
        a = ultimate.iloc[0, 0] * 0
        a = a + a.nan_triangle
        if ultimate.array_backend == "sparse":
            a = a - a[a.valuation < a.valuation_date]
        a = a.set_backend(ultimate.array_backend)
        if sample_weight:
            ultimate.values = xp.nan_to_num(
                ultimate.values * a.values) + xp.nan_to_num(
                    sample_weight.values * a.values)
        else:
            ultimate.values = xp.nan_to_num(ultimate.values * a.values)
        ultimate.values = num_to_nan(ultimate.values)
        ultimate = ultimate / ultimate
        cdf = ultimate * cdf
        cdf = cdf.latest_diagonal.values
        return cdf
Exemple #8
0
 def agg_func(self, axis=None, *args, **kwargs):
     keepdims = kwargs.get("keepdims", None)
     obj = self.copy()
     auto_sparse = kwargs.pop("auto_sparse", True)
     if axis is None:
         axis = min([num for num, _ in enumerate(obj.shape) if _ != 1])
     else:
         axis = self._get_axis(axis)
     xp = obj.get_array_module()
     func = getattr(xp, v)
     kwargs.update({"keepdims": True})
     obj.values = func(obj.values, axis=axis, *args, **kwargs)
     if axis == 0 and obj.values.shape[axis] == 1 and len(obj.kdims) > 1:
         obj.kdims = np.array([["(All)"] * len(obj.key_labels)])
     if axis == 1 and obj.values.shape[axis] == 1 and len(obj.vdims) > 1:
         obj.vdims = np.array([0])
     if axis == 2 and obj.values.shape[axis] == 1 and len(obj.odims) > 1:
         obj.odims = obj.odims[0:1]
     if axis == 3 and obj.values.shape[axis] == 1 and len(obj.ddims) > 1:
         obj.ddims = pd.DatetimeIndex([self.valuation_date],
                                      dtype="datetime64[ns]",
                                      freq=None)
     if auto_sparse:
         obj._set_slicers()
     obj.values = num_to_nan(obj.values)
     if not keepdims and obj.shape == (1, 1, 1, 1):
         return obj.values[0, 0, 0, 0]
     else:
         return obj
Exemple #9
0
    def cum_to_incr(self, inplace=False):
        """Method to convert an cumlative triangle into a incremental triangle.

        Parameters
        ----------
            inplace: bool
                Set to True will update the instance data attribute inplace

        Returns
        -------
            Updated instance of triangle accumulated along the origin
        """
        xp = self.get_array_module()
        from chainladder.utils.utility_functions import num_to_nan

        if inplace:
            if self.is_cumulative or self.is_cumulative is None:
                temp = (xp.nan_to_num(self.values)[..., 1:] -
                        xp.nan_to_num(self.values)[..., :-1])
                temp = xp.concatenate(
                    (xp.nan_to_num(self.values[..., 0:1]), temp), axis=3)
                self.values = num_to_nan(temp * self.nan_triangle)
                self.is_cumulative = False
                self._set_slicers()
            return self
        else:
            new_obj = self.copy()
            return new_obj.cum_to_incr(inplace=True)
Exemple #10
0
    def incr_to_cum(self, inplace=False):
        """Method to convert an incremental triangle into a cumulative triangle.

        Parameters
        ----------
        inplace: bool
            Set to True will update the instance data attribute inplace

        Returns
        -------
            Updated instance of triangle accumulated along the origin
        """
        from chainladder.utils.utility_functions import num_to_nan

        xp = self.get_array_module()
        if inplace:
            if not self.is_cumulative:
                self.values = (
                    num_to_nan(xp.cumsum(xp.nan_to_num(self.values), axis=3)) *
                    self.nan_triangle[None, None, ...])
                self.is_cumulative = True
                self._set_slicers()
            return self
        else:
            new_obj = self.copy()
            return new_obj.incr_to_cum(inplace=True)
    def fit(self, X, y=None, sample_weight=None):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle-like
            Triangle to which the incremental method is applied.  Triangle must
            be cumulative.
        y : Ignored
        sample_weight : Exposure used in the method.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        from chainladder import ULT_VAL
        from chainladder.utils.utility_functions import num_to_nan
        if (type(X.ddims) != np.ndarray):
            raise ValueError('Triangle must be expressed with development lags')
        if X.array_backend == 'sparse':
            X = X.set_backend('numpy')
        else:
            X = copy.deepcopy(X)
        if sample_weight.array_backend == 'sparse':
            sample_weight = sample_weight.set_backend('numpy')
        else:
            sample_weight = copy.deepcopy(sample_weight)
        xp = X.get_array_module()
        sample_weight.is_cumulative = False
        obj = X.cum_to_incr()/sample_weight
        x = obj.trend(self.trend)
        w_ = Development(n_periods=self.n_periods-1).fit(x).w_
        w_ = num_to_nan(w_)
        w_ = xp.concatenate((w_, (w_[..., -1:]*x.nan_triangle)[..., -1:]),
                            axis=-1)
        if self.average == 'simple':
            y_ = xp.nanmean(w_*x.values, axis=-2)
        if self.average == 'volume':
            y_ = xp.nansum(w_*x.values*sample_weight.values, axis=-2)
            y_ = y_ / xp.nansum(w_*sample_weight.values, axis=-2)
        y_ = xp.repeat(y_[..., None, :], len(x.odims), -2)
        obj = copy.copy(x)
        keeps = 1-xp.nan_to_num(x.nan_triangle) + \
            xp.nan_to_num(
                x[x.valuation==x.valuation_date].values[0, 0, ...]*0+1)
        obj.values = (1+self.trend) ** \
            xp.flip((xp.abs(xp.arange(obj.shape[-2])[None].T -
                     xp.arange(obj.shape[-2])[None])), 0)*y_*keeps
        obj.values = obj.values*(1-xp.nan_to_num(x.nan_triangle)) + \
            xp.nan_to_num((X.cum_to_incr()/sample_weight).values)

        obj.values[obj.values == 0] = xp.nan
        obj._set_slicers()
        obj.valuation_date = pd.to_datetime(ULT_VAL)
        self.ldf_ = obj.incr_to_cum().link_ratio
        self.incremental_ = obj*sample_weight
        self.sigma_ = self.std_err_ = 0*self.ldf_
        return self
Exemple #12
0
    def latest_diagonal(self):
        """ The latest diagonal of the Triangle """
        from chainladder.utils.utility_functions import num_to_nan

        obj = self.copy()
        xp = self.get_array_module()
        val = (self.valuation == self.valuation_date).reshape(self.shape[-2:],
                                                              order="F")
        val = xp.array(np.nan_to_num(val))
        obj.values = num_to_nan(
            xp.nansum(num_to_nan(val * 1.0) * self.values,
                      axis=-1,
                      keepdims=True))
        obj.ddims = pd.DatetimeIndex([self.valuation_date],
                                     dtype="datetime64[ns]",
                                     freq=None)
        return obj
Exemple #13
0
    def incr_to_cum(self, inplace=False):
        """Method to convert an incremental triangle into a cumulative triangle.

        Parameters
        ----------
        inplace: bool
            Set to True will update the instance data attribute inplace

        Returns
        -------
            Updated instance of triangle accumulated along the origin
        """
        if inplace:
            xp = self.get_array_module()
            if not self.is_cumulative:
                if self.is_pattern:
                    values = xp.nan_to_num(self.values[..., ::-1])
                    if self.array_backend == "sparse":
                        xp = np
                        values = self.set_backend("numpy").values
                    values[values == 0] = 1.0
                    values = xp.cumprod(values, -1)[..., ::-1]
                    self.values = values = values * self.nan_triangle
                    if self.array_backend == "sparse":
                        self.values = self.get_array_module()(self.values)
                else:
                    if self.array_backend != "sparse":
                        self.values = (
                            num_to_nan(xp.cumsum(xp.nan_to_num(self.values), 3))
                            * self.nan_triangle[None, None, ...]
                        )
                    else:
                        values = xp.nan_to_num(self.values)
                        nan_triangle = xp.nan_to_num(self.nan_triangle)
                        l1 = lambda i: values[..., 0 : (i + 1)]
                        l2 = lambda i: l1(i) * nan_triangle[..., i : i + 1]
                        l3 = lambda i: l2(i).sum(3, keepdims=True)
                        out = [l3(i) for i in range(self.shape[-1])]
                        self.values = num_to_nan(xp.concatenate(out, axis=3))
                self.is_cumulative = True
            return self
        else:
            new_obj = self.copy()
            return new_obj.incr_to_cum(inplace=True)
Exemple #14
0
 def _get_ultimate(self, X, expectation):
     xp = X.get_array_module()
     from chainladder.utils.utility_functions import num_to_nan
     ultimate = X.copy()
     cdf = self._align_cdf(ultimate, expectation)
     cdf = (1 - 1 / num_to_nan(cdf))[None]
     exponents = xp.arange(self.n_iters + 1)
     exponents = xp.reshape(exponents, tuple([len(exponents)] + [1] * 4))
     cdf = cdf**(((cdf + 1e-16) / (cdf + 1e-16) * exponents))
     cdf = xp.nan_to_num(cdf)
     ultimate.values = xp.sum(cdf[:-1, ...], 0) * xp.nan_to_num(
         X.latest_diagonal.values) + cdf[-1, ...] * xp.nan_to_num(
             expectation.set_backend(X.array_backend).values)
     return self._set_ult_attr(ultimate)
 def link_ratio(self):
     if not self.is_pattern:
         obj = (1 / self.iloc[..., :-1]) * self.iloc[..., 1:].values
         if not obj.is_full:
             obj = obj[obj.valuation < obj.valuation_date]
         if hasattr(obj, "w_"):
             w_ = obj.w_[..., 0:1, : len(obj.odims), :]
             obj = obj * w_ if obj.shape == w_.shape else obj
         obj.is_pattern = True
         obj.is_cumulative = False
         obj.values = num_to_nan(obj.values)
         return obj
     else:
         return self
Exemple #16
0
    def _get_tail_stats(self, X):
        """ Method to approximate the tail sigma using
        log-linear extrapolation applied to tail average period
        """
        from chainladder.utils.utility_functions import num_to_nan

        time_pd = self._get_tail_weighted_time_period(X)
        xp = X.sigma_.get_array_module()
        reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(X.sigma_.values), None)
        sigma_ = xp.exp(time_pd * reg.slope_ + reg.intercept_)
        y = X.std_err_.values
        y = num_to_nan(y)
        reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(y), None)
        std_err_ = xp.exp(time_pd * reg.slope_ + reg.intercept_)
        return sigma_, std_err_
    def incr_to_cum(self, inplace=False):
        """Method to convert an incremental triangle into a cumulative triangle.

        Parameters
        ----------
        inplace: bool
            Set to True will update the instance data attribute inplace

        Returns
        -------
            Updated instance of triangle accumulated along the origin
        """
        if inplace:
            xp = self.get_array_module()
            if not self.is_cumulative:
                if self.is_pattern:
                    values = xp.nan_to_num(self.values[..., ::-1])
                    values = num_to_value(values, 1)
                    values = xp.cumprod(values, -1)[..., ::-1]
                    self.values = values * self.nan_triangle
                    values = num_to_value(values, self.get_array_module(values).nan)
                else:
                    if self.array_backend not in ["sparse", "dask"]:
                        self.values = (
                            xp.cumsum(xp.nan_to_num(self.values), 3)
                            * self.nan_triangle[None, None, ...])
                    else:
                        values = xp.nan_to_num(self.values)
                        nan_triangle = xp.nan_to_num(self.nan_triangle)
                        l1 = lambda i: values[..., 0 : i + 1]
                        l2 = lambda i: l1(i) * nan_triangle[..., i : i + 1]
                        l3 = lambda i: l2(i).sum(3, keepdims=True)
                        if db:
                            bag = db.from_sequence(range(self.shape[-1]))
                            bag = bag.map(l3)
                            out = bag.compute(scheduler='threads')
                        else:
                            out = [l3(i) for i in range(self.shape[-1])]
                        self.values = xp.concatenate(out, axis=3)
                    self.values = num_to_nan(self.values)
                self.is_cumulative = True
            return self
        else:
            new_obj = self.copy()
            return new_obj.incr_to_cum(inplace=True)
Exemple #18
0
    def _get_full_std_err_(self, X=None):
        from chainladder.utils.utility_functions import num_to_nan

        obj = X.copy()
        xp = obj.get_array_module()
        lxp = X.ldf_.get_array_module()
        full = getattr(X, "_full_triangle_", self.full_triangle_)
        avg = {"regression": 0, "volume": 1, "simple": 2}
        avg = [avg.get(item, item) for item in X.average_]
        val = xp.broadcast_to(xp.array(avg + [avg[-1]]), X.shape)
        weight = xp.sqrt(full.values[..., :len(X.ddims)]**(2 - val))
        obj.values = X.sigma_.values / num_to_nan(weight)
        w = lxp.concatenate((X.w_, lxp.ones((1, 1, val.shape[2], 1))), 3)
        w[xp.isnan(w)] = 1
        obj.values = xp.nan_to_num(obj.values) * xp.array(w)
        obj.valuation_date = full.valuation_date
        obj._set_slicers()
        return obj
Exemple #19
0
 def link_ratio(self):
     from chainladder.utils.utility_functions import num_to_nan
     xp = self.get_array_module()
     obj = copy.deepcopy(self)
     temp = num_to_nan(obj.values.copy())
     val_array = obj.valuation.values.reshape(obj.shape[-2:], order='f')[:,
                                                                         1:]
     obj.ddims = np.array([
         '{}-{}'.format(obj.ddims[i], obj.ddims[i + 1])
         for i in range(len(obj.ddims) - 1)
     ])
     obj.values = temp[..., 1:] / temp[..., :-1]
     if self.array_backend == 'sparse':
         obj.values.shape = tuple(obj.values.coords.max(1) + 1)
     else:
         if xp.max(xp.sum(~xp.isnan(self.values[..., -1, :]), 2) - 1) <= 0:
             obj.values = obj.values[..., :-1, :]
     obj.odims = obj.odims[:obj.values.shape[2]]
     if hasattr(obj, 'w_'):
         if obj.shape == obj.w_[..., 0:1, :len(obj.odims), :].shape:
             obj = obj * obj.w_[..., 0:1, :len(obj.odims), :]
     return obj
Exemple #20
0
    def _get_tail_stats(self, X):
        """ Method to approximate the tail sigma using
        log-linear extrapolation applied to tail average period
        """
        from chainladder.utils.utility_functions import num_to_nan
        if not hasattr(X, 'sigma_'):
            self.sigma_ = None
            self.std_err_ = None
        else:
            time_pd = self._get_tail_weighted_time_period(X)
            xp = X.sigma_.get_array_module()
            reg = WeightedRegression(axis=3,
                                     xp=xp).fit(None, xp.log(X.sigma_.values),
                                                None)
            sigma_ = xp.exp(time_pd * reg.slope_ + reg.intercept_)
            y = X.std_err_.values
            y = num_to_nan(y)
            reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(y), None)
            std_err_ = xp.exp(time_pd * reg.slope_ + reg.intercept_)

            self.sigma_.values = xp.concatenate(
                (self.sigma_.values[..., :-1], sigma_[..., -1:]), axis=-1)
            self.std_err_.values = xp.concatenate(
                (self.std_err_.values[..., :-1], std_err_[..., -1:]), axis=-1)
Exemple #21
0
 def full_std_err_(self):
     from chainladder.utils.utility_functions import num_to_nan
     obj = copy.copy(self.X_)
     xp = obj.get_array_module()
     lxp = self.X_.ldf_.get_array_module()
     full = self.full_triangle_
     tri_array = full.values
     weight_dict = {'regression': 0, 'volume': 1, 'simple': 2}
     avg = list(self.average_) if type(
         self.average_) is not list else self.average_
     val = xp.array(
         [weight_dict.get(item.lower(), 2) for item in avg + [avg[-1]]])
     val = xp.broadcast_to(val, self.X_.shape)
     weight = num_to_nan(
         xp.sqrt(tri_array[..., :len(self.X_.ddims)]**(2 - val)))
     obj.values = self.X_.sigma_.values / weight
     w = lxp.concatenate((self.X_.w_, lxp.ones(
         (*val.shape[:3], 1)) * xp.nan),
                         axis=3)
     w[xp.isnan(w)] = 1
     obj.values = xp.nan_to_num(obj.values) * xp.array(w)
     obj.valuation_date = full.valuation_date
     obj._set_slicers()
     return obj
Exemple #22
0
    def __init__(self,
                 data=None,
                 origin=None,
                 development=None,
                 columns=None,
                 index=None,
                 origin_format=None,
                 development_format=None,
                 cumulative=None,
                 array_backend=None,
                 pattern=False,
                 *args,
                 **kwargs):
        # Allow Empty Triangle so that we can piece it together programatically
        if data is None:
            return

        # Check whether all columns are unique and numeric
        check = data[columns].dtypes
        check = [check] if isinstance(check, np.dtype) else check.to_list()
        columns = [columns] if type(columns) is not list else columns
        if "object" in check:
            raise TypeError("column attribute must be numeric.")
        if data[columns].shape[1] != len(columns):
            raise AttributeError("Columns are required to have unique names")

        # Sanitize all axis inputs to lists
        str_to_list = lambda *args: tuple(
            [arg] if type(arg) in [str, pd.Period] else arg for arg in args)
        index, columns, origin, development = str_to_list(
            index, columns, origin, development)

        # Determine desired array backend of the Triangle
        if array_backend is None:
            from chainladder import ARRAY_BACKEND

            array_backend = ARRAY_BACKEND
        if (development and len(development) == 1
                and data[development[0]].dtype == "<M8[ns]"):
            u = data[data[development[0]] == ULT_VAL].copy()
            if len(u) > 0 and len(u) != len(data):
                u = TriangleBase(
                    u,
                    origin=origin,
                    development=development,
                    columns=columns,
                    index=index,
                )
                data = data[data[development[0]] != ULT_VAL]
            else:
                u = None
        else:
            u = None
        # Initialize origin and its grain
        origin = development if origin is None else origin
        origin_date = TriangleBase._to_datetime(data,
                                                origin,
                                                format=origin_format)
        self.origin_grain = TriangleBase._get_grain(origin_date)
        origin_date = (pd.PeriodIndex(
            origin_date,
            freq=self.origin_grain).to_timestamp().rename("origin"))

        # Initialize development and its grain
        m_cnt = {"Y": 12, "Q": 3, "M": 1}
        has_dev = development and len(np.unique(data[development])) > 1
        if has_dev:
            development_date = TriangleBase._to_datetime(
                data, development, period_end=True, format=development_format)
            self.development_grain = TriangleBase._get_grain(development_date)
        else:
            development_date = pd.PeriodIndex(
                origin_date +
                pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]),
                freq={
                    "Y": "A"
                }.get(self.origin_grain, self.origin_grain),
            ).to_timestamp(how="e")
            self.development_grain = self.origin_grain
        development_date.name = "development"

        # Summarize dataframe to the level specified in axes
        key_gr = [origin_date, development_date
                  ] + [data[item] for item in ([] if not index else index)]
        data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0)
        if not index:
            index = ["Total"]
            data_agg[index[0]] = "Total"

        # Fill in any gaps in origin/development
        date_axes = self._get_date_axes(
            data_agg["origin"], data_agg["development"])  # cartesian product
        dev_lag = TriangleBase._development_lag(data_agg["origin"],
                                                data_agg["development"])

        # Grab unique index, origin, development
        dev_lag_unique = np.sort(
            TriangleBase._development_lag(date_axes["origin"],
                                          date_axes["development"]).unique())

        orig_unique = np.sort(date_axes["origin"].unique())
        kdims = data_agg[index].drop_duplicates().reset_index(
            drop=True).reset_index()

        # Map index, origin, development indices to data
        set_idx = (lambda col, unique: col.map(
            dict(zip(unique, range(len(unique))))).values[None].T)
        orig_idx = set_idx(data_agg["origin"], orig_unique)
        dev_idx = set_idx(dev_lag, dev_lag_unique)
        key_idx = (data_agg[index].merge(kdims, how="left",
                                         on=index)["index"].values[None].T)

        # origin <= development is required - truncate bad records if not true
        valid = data_agg["origin"] <= data_agg["development"]
        if sum(~valid) > 0:
            warnings.warn("Observations with development before " +
                          "origin start have been removed.")
        data_agg, orig_idx = data_agg[valid], orig_idx[valid]
        dev_idx, key_idx = dev_idx[valid], key_idx[valid]

        # All Triangles start out as sparse arrays
        val_idx = (((np.ones(len(data_agg))[None].T) *
                    range(len(columns))).reshape((1, -1), order="F").T)
        coords = np.concatenate(
            tuple([np.concatenate((orig_idx, dev_idx), 1)] * len(columns)), 0)
        coords = np.concatenate((np.concatenate(
            tuple([key_idx] * len(columns)), 0), val_idx, coords), 1)
        amts = data_agg[columns].unstack()
        amts = amts.values.astype("float64")
        self.array_backend = "sparse"
        self.values = num_to_nan(
            sp(
                coords.T.astype('int64'),
                amts,
                prune=True,
                has_duplicates=False,
                sorted=True,
                shape=(
                    len(kdims),
                    len(columns),
                    len(orig_unique),
                    len(dev_lag_unique) if has_dev else 1,
                ),
            ))

        # Set all axis values
        self.valuation_date = data_agg["development"].max()
        self.kdims = kdims.drop("index", 1).values
        self.odims = orig_unique
        self.ddims = dev_lag_unique if has_dev else dev_lag[0:1].values
        self.ddims = self.ddims * (m_cnt[self.development_grain])
        if development and not has_dev:
            self.ddims = pd.DatetimeIndex(
                TriangleBase._to_datetime(data,
                                          development,
                                          period_end=True,
                                          format=development_format)[0:1])
            self.valuation_date = self.ddims[0]
        self.vdims = np.array(columns)

        # Set remaining triangle properties
        self.key_labels = index
        self.is_cumulative = cumulative
        self.virtual_columns = VirtualColumns(self)
        self.is_pattern = pattern
        if not AUTO_SPARSE or array_backend == "cupy":
            self.set_backend(array_backend, inplace=True)
        else:
            self = self._auto_sparse()
        self._set_slicers()
        if self.is_pattern:
            obj = self.dropna()
            self.odims = obj.odims
            self.ddims = obj.ddims
            self.values = obj.values
        if u:
            obj = concat((self.dev_to_val().iloc[..., :len(u.odims), :], u),
                         -1)
            obj = obj.val_to_dev()
            self.odims = obj.odims
            self.ddims = obj.ddims
            self.values = obj.values
            self.valuation_date = pd.Timestamp(ULT_VAL)
Exemple #23
0
    def fit(self, X, y=None, sample_weight=None):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle-like
            Set of LDFs to which the munich adjustment will be applied.
        y : None
            Ignored
        sample_weight :
            Ignored

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if X.array_backend == "sparse":
            X = X.set_backend("numpy")
        else:
            X = X.copy()
        xp = X.get_array_module()
        from chainladder.utils.utility_functions import num_to_nan

        if type(X.ddims) != np.ndarray:
            raise ValueError(
                "Triangle must be expressed with development lags")
        if self.fillna:
            tri_array = num_to_nan((X + self.fillna).values)
        else:
            tri_array = num_to_nan(X.values.copy())
        if type(self.average) is not list:
            self.average_ = np.array([self.average] *
                                     (tri_array.shape[-1] - 1))
        else:
            self.average_ = np.array(self.average)
        if type(self.n_periods) is not list:
            n_periods = [self.n_periods] * (tri_array.shape[-1] - 1)
        else:
            n_periods = self.n_periods
        n_periods = np.array(n_periods)
        self.n_periods_ = n_periods
        weight_dict = {"regression": 0, "volume": 1, "simple": 2}
        x, y = tri_array[..., :-1], tri_array[..., 1:]
        val = xp.nan_to_num(
            xp.array([weight_dict.get(item, item)
                      for item in self.average_])[None, None, None] *
            (y * 0 + 1))
        link_ratio = y / x
        self.w_ = xp.array(
            self._assign_n_periods_weight(X) *
            self._drop_adjustment(X, link_ratio))
        w = self.w_ / (x**(val))
        params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w)
        if self.n_periods != 1:
            params = params.sigma_fill(self.sigma_interpolation)
        else:
            warnings.warn("Setting n_periods=1 does not allow enough degrees "
                          "of freedom to support calculation of all regression"
                          " statistics.  Only LDFs have been calculated.")
        params.std_err_ = xp.nan_to_num(params.std_err_) + xp.nan_to_num(
            (1 - xp.nan_to_num(params.std_err_ * 0 + 1)) * params.sigma_ /
            xp.swapaxes(xp.sqrt(x**(2 - val))[..., 0:1, :], -1, -2))
        params = xp.concatenate(
            (params.slope_, params.sigma_, params.std_err_), 3)
        params = xp.swapaxes(params, 2, 3)
        self.ldf_ = self._param_property(X, params, 0)
        self.sigma_ = self._param_property(X, params, 1)
        self.std_err_ = self._param_property(X, params, 2)

        resid = -X.iloc[..., :-1] * self.ldf_.values + X.iloc[..., 1:].values

        std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_**2).values)
        resid = resid / std
        self.std_residuals_ = resid[resid.valuation < X.valuation_date]
        return self
    def grain(self, grain="", trailing=False, inplace=False):
        """Changes the grain of a cumulative triangle.

        Parameters
        ----------
        grain : str
            The grain to which you want your triangle converted, specified as
            'OXDY' where X and Y can take on values of ``['Y', 'S', 'Q', 'M'
            ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM'
            for Origin quarter/Development Month, etc.
        trailing : bool
            For partial origin years/quarters, trailing will set the year/quarter
            end to that of the latest available from the origin data.
        inplace : bool
            Whether to mutate the existing Triangle instance or return a new
            one.

        Returns
        -------
            Triangle
        """
        ograin_old, ograin_new = self.origin_grain, grain[1:2]
        dgrain_old, dgrain_new = self.development_grain, grain[-1]
        valid = {"Y": ["Y"], "Q": ["Q", "S", "Y"], "M": ["Y", "S", "Q", "M"],
                 "S": ["S", "Y"]}
        if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get(
            dgrain_old, []
        ):
            raise ValueError("New grain not compatible with existing grain")
        if (
            self.is_cumulative is None
            and dgrain_old != dgrain_new
            and self.shape[-1] > 1
        ):
            raise AttributeError(
                "The is_cumulative attribute must be set before using grain method."
            )
        if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new):
            raise ValueError("Origin grain must be coarser than development grain")
        if self.is_full and not self.is_ultimate and not self.is_val_tri:
            warnings.warn('Triangle includes extraneous development lags')
        else:
            d_limit = None
        obj = self.dev_to_val()
        if ograin_new != ograin_old:
            freq = {"Y": "A", "S": "2Q"}.get(ograin_new, ograin_new)
            mn = self.origin[-1].strftime("%b").upper() if trailing else "DEC"
            indices = pd.Series(
                range(len(self.origin)), index=self.origin).resample(
                    '-'.join([freq, mn])).indices
            groups = pd.concat([
                pd.Series([k]*len(v), index=v)
                 for k, v in indices.items()], axis=0).values
            obj = obj.groupby(groups, axis=2).sum()
            obj.origin_close = mn
            if len(obj.ddims) > 1 and pd.Timestamp(obj.odims[0]).strftime(
                "%Y%m"
            ) != obj.valuation[0].strftime("%Y%m"):
                addl_ts = (
                    pd.period_range(obj.odims[0], obj.valuation[0], freq="M")[:-1]
                    .to_timestamp()
                    .values
                )
                addl = obj.iloc[..., -len(addl_ts) :] * 0
                addl.ddims = addl_ts
                obj = concat((addl, obj), axis=-1)
                obj.values = num_to_nan(obj.values)
        if dgrain_old != dgrain_new and obj.shape[-1] > 1:
            step = self._dstep()[dgrain_old][dgrain_new]
            d = np.sort(len(obj.development) -
                        np.arange(0, len(obj.development), step) - 1)
            if obj.is_cumulative:
                obj = obj.iloc[..., d]
            else:
                ddims = obj.ddims[d]
                d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step))
                obj = obj.groupby(d2, axis=3).sum()
                obj.ddims = ddims
            obj.development_grain = dgrain_new
        obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev()
        if inplace:
            self = obj
            return self
        return obj
Exemple #25
0
    def fit(self, X, y=None, sample_weight=None):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle-like
            Triangle to which the incremental method is applied.  Triangle must
            be cumulative.
        y : None
            Ignored
        sample_weight :
            Exposure used in the method.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        from chainladder import ULT_VAL
        from chainladder.utils.utility_functions import num_to_nan

        if type(X.ddims) != np.ndarray:
            raise ValueError("Triangle must be expressed with development lags")
        if X.array_backend == "sparse":
            X = X.set_backend("numpy")
        else:
            X = X.copy()
        if sample_weight.array_backend == "sparse":
            sample_weight = sample_weight.set_backend("numpy")
        xp = X.get_array_module()
        sample_weight.is_cumulative = False
        obj = X.cum_to_incr() / sample_weight.values
        if hasattr(X, "trend_"):
            if self.trend != 0:
                warnings.warn(
                    "IncrementalAdditive Trend assumption is ignored when X has a trend_ property."
                )
            x = obj * obj.trend_.values
        else:
            x = obj.trend(self.trend, axis='valuation')

        w_ = Development(
            n_periods=self.n_periods - 1, drop=self.drop,
            drop_high=self.drop_high, drop_low=self.drop_low,
            drop_valuation=self.drop_valuation).fit(x).w_
        # This will miss drops on the latest diagonal
        w_ = num_to_nan(w_)
        w_ = xp.concatenate((w_, (w_[..., -1:] * x.nan_triangle)[..., -1:]), axis=-1)
        if self.average == "simple":
            y_ = xp.nanmean(w_ * x.values, axis=-2)
        if self.average == "volume":
            y_ = xp.nansum(w_ * x.values * sample_weight.values, axis=-2)
            y_ = y_ / xp.nansum(w_ * sample_weight.values, axis=-2)
        self.zeta_ = X.iloc[..., -1:, :]
        self.zeta_.values = y_[:, :, None, :]
        y_ = xp.repeat(y_[..., None, :], len(x.odims), -2)
        obj = x.copy()
        keeps = (
            1
            - xp.nan_to_num(x.nan_triangle)
            + xp.nan_to_num(
                x[x.valuation == x.valuation_date].values[0, 0, ...] * 0 + 1
            )
        )
        obj.values = y_ * keeps
        obj.valuation_date = obj.valuation.max()
        obj.values = obj.values * (1 - xp.nan_to_num(x.nan_triangle)) + xp.nan_to_num(
            (X.cum_to_incr().values / sample_weight.values)
        )

        obj.values[obj.values == 0] = xp.nan
        obj._set_slicers()
        obj.valuation_date = obj.valuation.max()
        future_trend = self.trend if not self.future_trend else self.future_trend
        self.incremental_ = obj * sample_weight.values
        self.incremental_ = self.incremental_.trend(
            1/(1+future_trend)-1, axis='valuation', start=X.valuation_date,
            end=self.incremental_.valuation_date)
        self.ldf_ = obj.incr_to_cum().link_ratio
        self.sigma_ = self.std_err_ = 0 * self.ldf_
        return self
 def __init__(self, data=None, origin=None, development=None, columns=None,
              index=None, origin_format=None, development_format=None,
              cumulative=None, array_backend=None, pattern=False,
              trailing=False, *args, **kwargs):
     if data is None:
         return
     index, columns, origin, development = self._input_validation(
         data, index, columns, origin, development)
     data, ult = self._split_ult(data, index, columns, origin, development)
     origin_date = self._to_datetime(
         data, origin, format=origin_format).rename('__origin__')
     self.origin_grain = self._get_grain(origin_date)
     self.origin_grain = 'S' if self.origin_grain == '2Q' else self.origin_grain
     development_date = self._set_development(
         data, development, development_format, origin_date)
     self.development_grain = (
         self._get_grain(development_date) if development_date.nunique() != 1
         else self.origin_grain)
     data_agg = self._aggregate_data(
         data, origin_date, development_date, index, columns)
     date_axes = self._get_date_axes(
         data_agg["__origin__"], data_agg["__development__"])
     # Deal with labels
     if not index:
         index = ["Total"]
         data_agg[index[0]] = "Total"
     self.kdims, key_idx = self._set_kdims(data_agg, index)
     self.vdims = np.array(columns)
     self.odims, orig_idx = self._set_odims(data_agg, date_axes)
     self.ddims, dev_idx = self._set_ddims(data_agg, date_axes)
     # Set the Triangle values
     coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx)
     self.values = num_to_nan(
         sp(coords, amts, prune=True,
            has_duplicates=False, sorted=True,
            shape=(len(self.kdims), len(self.vdims),
                   len(self.odims), len(self.ddims))))
     # Set remaining triangle properties
     val_date = data_agg["__development__"].max()
     val_date = val_date.compute() if hasattr(val_date, 'compute') else val_date
     self.key_labels = index
     self.valuation_date = val_date
     self.is_cumulative = cumulative
     self.virtual_columns = VirtualColumns(self)
     self.is_pattern = pattern
     self.origin_close = 'DEC'
     if self.origin_grain != 'M' and trailing:
         self.origin_close = pd.to_datetime(self.odims[-1]).strftime('%b').upper()
     # Deal with array backend
     self.array_backend = "sparse"
     if array_backend is None:
         array_backend = options.ARRAY_BACKEND
     if not options.AUTO_SPARSE or array_backend == "cupy":
         self.set_backend(array_backend, inplace=True)
     else:
         self = self._auto_sparse()
     self._set_slicers()
     # Deal with special properties
     if self.is_pattern:
         obj = self.dropna()
         self.odims = obj.odims
         self.ddims = obj.ddims
         self.values = obj.values
     if ult:
         obj = concat((self.dev_to_val().iloc[..., :len(ult.odims), :], ult), -1)
         obj = obj.val_to_dev()
         self.odims = obj.odims
         self.ddims = obj.ddims
         self.values = obj.values
         self.valuation_date = pd.Timestamp(options.ULT_VAL)
Exemple #27
0
 def _arithmetic_cleanup(self, obj):
     """ Common functionality AFTER arithmetic operations """
     obj.values = obj.values * obj.get_array_module().nan_to_num(
         obj.nan_triangle)
     obj.values = num_to_nan(obj.values)
     return obj
Exemple #28
0
 def __rtruediv__(self, other):
     obj = self.copy()
     obj.values = other / self.values
     obj.values = num_to_nan(obj.values)
     return obj
Exemple #29
0
    def fit(self, X, y=None, sample_weight=None):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle-like
            Set of LDFs to which the munich adjustment will be applied.
        y : Ignored
        sample_weight : Ignored

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if X.array_backend == 'sparse':
            X = X.set_backend('numpy')
        else:
            X = copy.deepcopy(X)
        xp = X.get_array_module()
        from chainladder.utils.utility_functions import num_to_nan
        if (type(X.ddims) != np.ndarray):
            raise ValueError(
                'Triangle must be expressed with development lags')
        if self.fillna:
            tri_array = num_to_nan((X + self.fillna).values)
        else:
            tri_array = num_to_nan(X.values.copy())
        if type(self.average) is not list:
            average = [self.average] * (tri_array.shape[-1] - 1)
        else:
            average = self.average
        average = np.array(average)
        self.average_ = average
        if type(self.n_periods) is not list:
            n_periods = [self.n_periods] * (tri_array.shape[-1] - 1)
        else:
            n_periods = self.n_periods
        n_periods = np.array(n_periods)
        self.n_periods_ = n_periods
        weight_dict = {'regression': 0, 'volume': 1, 'simple': 2}
        x, y = tri_array[..., :-1], tri_array[..., 1:]
        val = xp.array([weight_dict.get(item.lower(), 1) for item in average])
        for i in [2, 1, 0]:
            val = xp.repeat(val[None], tri_array.shape[i], axis=0)
        val = xp.nan_to_num(val * (y * 0 + 1))
        link_ratio = y / x
        self.w_ = xp.array(
            self._assign_n_periods_weight(X) *
            self._drop_adjustment(X, link_ratio))
        w = self.w_ / (x**(val))
        params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w)
        if self.n_periods != 1:
            params = params.sigma_fill(self.sigma_interpolation)
        else:
            warnings.warn('Setting n_periods=1 does not allow enough degrees '
                          'of freedom to support calculation of all regression'
                          ' statistics.  Only LDFs have been calculated.')
        params.std_err_ = xp.nan_to_num(params.std_err_) + \
            xp.nan_to_num(
                (1-xp.nan_to_num(params.std_err_*0+1)) *
                params.sigma_ /
                xp.swapaxes(xp.sqrt(x**(2-val))[..., 0:1, :], -1, -2))
        params = xp.concatenate(
            (params.slope_, params.sigma_, params.std_err_), 3)
        params = xp.swapaxes(params, 2, 3)
        self.ldf_ = self._param_property(X, params, 0)
        self.sigma_ = self._param_property(X, params, 1)
        self.std_err_ = self._param_property(X, params, 2)
        return self
 def num_to_nan(self):
     from chainladder.utils.utility_functions import num_to_nan
     self.values = num_to_nan(self.values)