def _fit_OLS_thru_orig(self): from chainladder.utils.utility_functions import num_to_nan w, x, y, axis = self.w, self.x, self.y, self.axis xp = self.xp d = num_to_nan(xp.nansum((y * 0 + 1) * w * x * x, axis)) coef = num_to_nan(xp.nansum(w * x * y, axis)) / d fitted_value = xp.repeat(xp.expand_dims(coef, axis), x.shape[axis], axis) fitted_value = fitted_value * x * (y * 0 + 1) residual = (y - fitted_value) * xp.sqrt(w) wss_residual = xp.nansum(residual**2, axis) mse_denom = xp.nansum((y * 0 + 1) * (w != 0), axis) - 1 mse_denom = num_to_nan(mse_denom) mse = wss_residual / mse_denom std_err = xp.sqrt(num_to_nan(mse) / d) std_err = std_err[..., None] if xp != sp: std_err[std_err == 0] = xp.nan coef = coef[..., None] sigma = xp.sqrt(mse)[..., None] self.slope_ = coef self.sigma_ = sigma self.std_err_ = std_err return self
def _get_ultimate(self, X, expectation): from chainladder.utils.utility_functions import num_to_nan if X.is_cumulative == False: ld = X.sum('development') ultimate = ld.val_to_dev() else: ld = X.latest_diagonal ultimate = X.copy() cdf = self._align_cdf(ultimate, expectation) backend = cdf.array_backend xp = cdf.get_array_module() cdf = cdf.sort_index() ld = ld.sort_index() expectation = expectation.sort_index() ultimate = ultimate.sort_index() cdf = (1 - 1 / num_to_nan(cdf.values))[None] exponents = xp.arange(self.n_iters + 1) exponents = xp.reshape(exponents, tuple([len(exponents)] + [1] * 4)) cdf = cdf ** (((cdf + 1e-16) / (cdf + 1e-16) * exponents)) cdf = xp.nan_to_num(cdf) a = xp.sum(cdf[:-1, ...], 0) * xp.nan_to_num(ld.set_backend(backend).values) b = cdf[-1, ...] * xp.nan_to_num(expectation.set_backend(backend).values) ultimate.values = num_to_nan(a + b) ultimate.array_backend = backend ultimate.ddims = self.cdf_.ddims[:ultimate.shape[-1]] return self._set_ult_attr(ultimate)
def _fit_OLS(self): """ Given a set of w, x, y, and an axis, this Function returns OLS slope and intercept. TODO: Make this work with n_periods = 1 without numpy warning. """ from chainladder.utils.utility_functions import num_to_nan w, x, y, axis = self.w.copy(), self.x.copy(), self.y.copy(), self.axis xp = self.xp if xp != sp: x[w == 0] = xp.nan y[w == 0] = xp.nan else: w2 = w.copy() w2.fill_value = sp.nan x, y = x * sp(w2), y * sp(w2) slope = num_to_nan( xp.nansum(w * x * y, axis) - xp.nansum(x * w, axis) * xp.nanmean(y, axis)) / num_to_nan( xp.nansum(w * x * x, axis) - xp.nanmean(x, axis) * xp.nansum(w * x, axis)) intercept = xp.nanmean(y, axis) - slope * xp.nanmean(x, axis) self.slope_ = slope[..., None] self.intercept_ = intercept[..., None] return self
def loglinear_interpolation(self, y): ''' Use Cases: generally for filling in last element of sigma_ ''' from chainladder.utils.utility_functions import num_to_nan xp = self.xp ly = xp.log(num_to_nan(y)) w = xp.nan_to_num(ly * 0 + 1) reg = WeightedRegression(self.axis, False, xp=xp).fit(None, ly, w) slope, intercept = reg.slope_, reg.intercept_ fill_ = xp.exp(reg.x * slope + intercept) * (1 - w) out = xp.nan_to_num(y) + xp.nan_to_num(fill_) return num_to_nan(out)
def mack_interpolation(self, y): """ Use Mack's approximation to fill last element of sigma_ which is the same as loglinear extrapolation using the preceding two element to the missing value. This function needs a recursive definition... """ from chainladder.utils.utility_functions import num_to_nan xp = self.xp w = xp.nan_to_num(y * 0 + 1) slicer_n, slicer_d, slicer_a = ( ([slice(None)] * 4), ([slice(None)] * 4), ([slice(None)] * 4), ) slicer_n[self.axis], slicer_d[self.axis], slicer_a[self.axis] = ( slice(1, -1, 1), slice(0, -2, 1), slice(0, 2, 1), ) slicer_n, slicer_d, slicer_a = ( tuple(slicer_n), tuple(slicer_d), tuple(slicer_a), ) fill_ = xp.sqrt( abs( xp.minimum( (y[slicer_n]**4 / y[slicer_d]**2), xp.minimum(y[slicer_d]**2, y[slicer_n]**2), ))) fill_ = xp.concatenate( (w[slicer_a], xp.nan_to_num(fill_)), axis=self.axis) * (1 - w) out = xp.nan_to_num(y) + fill_ return num_to_nan(out)
def _get_ultimate(self, X, sample_weight): xp = X.get_array_module() from chainladder.utils.utility_functions import num_to_nan ultimate = copy.deepcopy(X) # Apriori if self.apriori_sigma != 0: random_state = xp.random.RandomState(self.random_state) apriori = random_state.normal(self.apriori, self.apriori_sigma, X.shape[0]) apriori = apriori.reshape(X.shape[0], -1)[..., None, None] apriori = sample_weight.values * apriori else: apriori = sample_weight.values * self.apriori # Benktander formula -> Triangle cdf = self._align_cdf(ultimate, sample_weight) cdf = (1 - 1 / num_to_nan(cdf))[None] exponents = xp.arange(self.n_iters + 1) exponents = xp.reshape(exponents, tuple([len(exponents)] + [1] * 4)) cdf = cdf**(((cdf + 1e-16) / (cdf + 1e-16) * exponents)) cdf = xp.nan_to_num(cdf) ultimate.values = (xp.sum(cdf[:-1, ...], 0) * xp.nan_to_num(X.latest_diagonal.values) + cdf[-1, ...] * xp.nan_to_num(apriori)) return self._set_ult_attr(ultimate)
def _align_cdf(self, ultimate, sample_weight=None): """ Vertically align CDF to ultimate vector to origin period latest diagonal. """ xp = ultimate.get_array_module() from chainladder.utils.utility_functions import num_to_nan if self.cdf_.key_labels != ultimate.key_labels and len( self.ldf_.index) > 1: level = list( set(self.cdf_.key_labels).intersection(ultimate.key_labels)) idx = (ultimate.index[level].merge( self.cdf_.index[level].reset_index(), how="left", on=level)["index"].values) cdf = self.cdf_.values[list(idx.astype(int)), ..., :ultimate.shape[-1]] else: cdf = self.cdf_.values[..., :ultimate.shape[-1]] a = ultimate.iloc[0, 0] * 0 a = a + a.nan_triangle if ultimate.array_backend == "sparse": a = a - a[a.valuation < a.valuation_date] a = a.set_backend(ultimate.array_backend) if sample_weight: ultimate.values = xp.nan_to_num( ultimate.values * a.values) + xp.nan_to_num( sample_weight.values * a.values) else: ultimate.values = xp.nan_to_num(ultimate.values * a.values) ultimate.values = num_to_nan(ultimate.values) ultimate = ultimate / ultimate cdf = ultimate * cdf cdf = cdf.latest_diagonal.values return cdf
def agg_func(self, axis=None, *args, **kwargs): keepdims = kwargs.get("keepdims", None) obj = self.copy() auto_sparse = kwargs.pop("auto_sparse", True) if axis is None: axis = min([num for num, _ in enumerate(obj.shape) if _ != 1]) else: axis = self._get_axis(axis) xp = obj.get_array_module() func = getattr(xp, v) kwargs.update({"keepdims": True}) obj.values = func(obj.values, axis=axis, *args, **kwargs) if axis == 0 and obj.values.shape[axis] == 1 and len(obj.kdims) > 1: obj.kdims = np.array([["(All)"] * len(obj.key_labels)]) if axis == 1 and obj.values.shape[axis] == 1 and len(obj.vdims) > 1: obj.vdims = np.array([0]) if axis == 2 and obj.values.shape[axis] == 1 and len(obj.odims) > 1: obj.odims = obj.odims[0:1] if axis == 3 and obj.values.shape[axis] == 1 and len(obj.ddims) > 1: obj.ddims = pd.DatetimeIndex([self.valuation_date], dtype="datetime64[ns]", freq=None) if auto_sparse: obj._set_slicers() obj.values = num_to_nan(obj.values) if not keepdims and obj.shape == (1, 1, 1, 1): return obj.values[0, 0, 0, 0] else: return obj
def cum_to_incr(self, inplace=False): """Method to convert an cumlative triangle into a incremental triangle. Parameters ---------- inplace: bool Set to True will update the instance data attribute inplace Returns ------- Updated instance of triangle accumulated along the origin """ xp = self.get_array_module() from chainladder.utils.utility_functions import num_to_nan if inplace: if self.is_cumulative or self.is_cumulative is None: temp = (xp.nan_to_num(self.values)[..., 1:] - xp.nan_to_num(self.values)[..., :-1]) temp = xp.concatenate( (xp.nan_to_num(self.values[..., 0:1]), temp), axis=3) self.values = num_to_nan(temp * self.nan_triangle) self.is_cumulative = False self._set_slicers() return self else: new_obj = self.copy() return new_obj.cum_to_incr(inplace=True)
def incr_to_cum(self, inplace=False): """Method to convert an incremental triangle into a cumulative triangle. Parameters ---------- inplace: bool Set to True will update the instance data attribute inplace Returns ------- Updated instance of triangle accumulated along the origin """ from chainladder.utils.utility_functions import num_to_nan xp = self.get_array_module() if inplace: if not self.is_cumulative: self.values = ( num_to_nan(xp.cumsum(xp.nan_to_num(self.values), axis=3)) * self.nan_triangle[None, None, ...]) self.is_cumulative = True self._set_slicers() return self else: new_obj = self.copy() return new_obj.incr_to_cum(inplace=True)
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Triangle to which the incremental method is applied. Triangle must be cumulative. y : Ignored sample_weight : Exposure used in the method. Returns ------- self : object Returns the instance itself. """ from chainladder import ULT_VAL from chainladder.utils.utility_functions import num_to_nan if (type(X.ddims) != np.ndarray): raise ValueError('Triangle must be expressed with development lags') if X.array_backend == 'sparse': X = X.set_backend('numpy') else: X = copy.deepcopy(X) if sample_weight.array_backend == 'sparse': sample_weight = sample_weight.set_backend('numpy') else: sample_weight = copy.deepcopy(sample_weight) xp = X.get_array_module() sample_weight.is_cumulative = False obj = X.cum_to_incr()/sample_weight x = obj.trend(self.trend) w_ = Development(n_periods=self.n_periods-1).fit(x).w_ w_ = num_to_nan(w_) w_ = xp.concatenate((w_, (w_[..., -1:]*x.nan_triangle)[..., -1:]), axis=-1) if self.average == 'simple': y_ = xp.nanmean(w_*x.values, axis=-2) if self.average == 'volume': y_ = xp.nansum(w_*x.values*sample_weight.values, axis=-2) y_ = y_ / xp.nansum(w_*sample_weight.values, axis=-2) y_ = xp.repeat(y_[..., None, :], len(x.odims), -2) obj = copy.copy(x) keeps = 1-xp.nan_to_num(x.nan_triangle) + \ xp.nan_to_num( x[x.valuation==x.valuation_date].values[0, 0, ...]*0+1) obj.values = (1+self.trend) ** \ xp.flip((xp.abs(xp.arange(obj.shape[-2])[None].T - xp.arange(obj.shape[-2])[None])), 0)*y_*keeps obj.values = obj.values*(1-xp.nan_to_num(x.nan_triangle)) + \ xp.nan_to_num((X.cum_to_incr()/sample_weight).values) obj.values[obj.values == 0] = xp.nan obj._set_slicers() obj.valuation_date = pd.to_datetime(ULT_VAL) self.ldf_ = obj.incr_to_cum().link_ratio self.incremental_ = obj*sample_weight self.sigma_ = self.std_err_ = 0*self.ldf_ return self
def latest_diagonal(self): """ The latest diagonal of the Triangle """ from chainladder.utils.utility_functions import num_to_nan obj = self.copy() xp = self.get_array_module() val = (self.valuation == self.valuation_date).reshape(self.shape[-2:], order="F") val = xp.array(np.nan_to_num(val)) obj.values = num_to_nan( xp.nansum(num_to_nan(val * 1.0) * self.values, axis=-1, keepdims=True)) obj.ddims = pd.DatetimeIndex([self.valuation_date], dtype="datetime64[ns]", freq=None) return obj
def incr_to_cum(self, inplace=False): """Method to convert an incremental triangle into a cumulative triangle. Parameters ---------- inplace: bool Set to True will update the instance data attribute inplace Returns ------- Updated instance of triangle accumulated along the origin """ if inplace: xp = self.get_array_module() if not self.is_cumulative: if self.is_pattern: values = xp.nan_to_num(self.values[..., ::-1]) if self.array_backend == "sparse": xp = np values = self.set_backend("numpy").values values[values == 0] = 1.0 values = xp.cumprod(values, -1)[..., ::-1] self.values = values = values * self.nan_triangle if self.array_backend == "sparse": self.values = self.get_array_module()(self.values) else: if self.array_backend != "sparse": self.values = ( num_to_nan(xp.cumsum(xp.nan_to_num(self.values), 3)) * self.nan_triangle[None, None, ...] ) else: values = xp.nan_to_num(self.values) nan_triangle = xp.nan_to_num(self.nan_triangle) l1 = lambda i: values[..., 0 : (i + 1)] l2 = lambda i: l1(i) * nan_triangle[..., i : i + 1] l3 = lambda i: l2(i).sum(3, keepdims=True) out = [l3(i) for i in range(self.shape[-1])] self.values = num_to_nan(xp.concatenate(out, axis=3)) self.is_cumulative = True return self else: new_obj = self.copy() return new_obj.incr_to_cum(inplace=True)
def _get_ultimate(self, X, expectation): xp = X.get_array_module() from chainladder.utils.utility_functions import num_to_nan ultimate = X.copy() cdf = self._align_cdf(ultimate, expectation) cdf = (1 - 1 / num_to_nan(cdf))[None] exponents = xp.arange(self.n_iters + 1) exponents = xp.reshape(exponents, tuple([len(exponents)] + [1] * 4)) cdf = cdf**(((cdf + 1e-16) / (cdf + 1e-16) * exponents)) cdf = xp.nan_to_num(cdf) ultimate.values = xp.sum(cdf[:-1, ...], 0) * xp.nan_to_num( X.latest_diagonal.values) + cdf[-1, ...] * xp.nan_to_num( expectation.set_backend(X.array_backend).values) return self._set_ult_attr(ultimate)
def link_ratio(self): if not self.is_pattern: obj = (1 / self.iloc[..., :-1]) * self.iloc[..., 1:].values if not obj.is_full: obj = obj[obj.valuation < obj.valuation_date] if hasattr(obj, "w_"): w_ = obj.w_[..., 0:1, : len(obj.odims), :] obj = obj * w_ if obj.shape == w_.shape else obj obj.is_pattern = True obj.is_cumulative = False obj.values = num_to_nan(obj.values) return obj else: return self
def _get_tail_stats(self, X): """ Method to approximate the tail sigma using log-linear extrapolation applied to tail average period """ from chainladder.utils.utility_functions import num_to_nan time_pd = self._get_tail_weighted_time_period(X) xp = X.sigma_.get_array_module() reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(X.sigma_.values), None) sigma_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) y = X.std_err_.values y = num_to_nan(y) reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(y), None) std_err_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) return sigma_, std_err_
def incr_to_cum(self, inplace=False): """Method to convert an incremental triangle into a cumulative triangle. Parameters ---------- inplace: bool Set to True will update the instance data attribute inplace Returns ------- Updated instance of triangle accumulated along the origin """ if inplace: xp = self.get_array_module() if not self.is_cumulative: if self.is_pattern: values = xp.nan_to_num(self.values[..., ::-1]) values = num_to_value(values, 1) values = xp.cumprod(values, -1)[..., ::-1] self.values = values * self.nan_triangle values = num_to_value(values, self.get_array_module(values).nan) else: if self.array_backend not in ["sparse", "dask"]: self.values = ( xp.cumsum(xp.nan_to_num(self.values), 3) * self.nan_triangle[None, None, ...]) else: values = xp.nan_to_num(self.values) nan_triangle = xp.nan_to_num(self.nan_triangle) l1 = lambda i: values[..., 0 : i + 1] l2 = lambda i: l1(i) * nan_triangle[..., i : i + 1] l3 = lambda i: l2(i).sum(3, keepdims=True) if db: bag = db.from_sequence(range(self.shape[-1])) bag = bag.map(l3) out = bag.compute(scheduler='threads') else: out = [l3(i) for i in range(self.shape[-1])] self.values = xp.concatenate(out, axis=3) self.values = num_to_nan(self.values) self.is_cumulative = True return self else: new_obj = self.copy() return new_obj.incr_to_cum(inplace=True)
def _get_full_std_err_(self, X=None): from chainladder.utils.utility_functions import num_to_nan obj = X.copy() xp = obj.get_array_module() lxp = X.ldf_.get_array_module() full = getattr(X, "_full_triangle_", self.full_triangle_) avg = {"regression": 0, "volume": 1, "simple": 2} avg = [avg.get(item, item) for item in X.average_] val = xp.broadcast_to(xp.array(avg + [avg[-1]]), X.shape) weight = xp.sqrt(full.values[..., :len(X.ddims)]**(2 - val)) obj.values = X.sigma_.values / num_to_nan(weight) w = lxp.concatenate((X.w_, lxp.ones((1, 1, val.shape[2], 1))), 3) w[xp.isnan(w)] = 1 obj.values = xp.nan_to_num(obj.values) * xp.array(w) obj.valuation_date = full.valuation_date obj._set_slicers() return obj
def link_ratio(self): from chainladder.utils.utility_functions import num_to_nan xp = self.get_array_module() obj = copy.deepcopy(self) temp = num_to_nan(obj.values.copy()) val_array = obj.valuation.values.reshape(obj.shape[-2:], order='f')[:, 1:] obj.ddims = np.array([ '{}-{}'.format(obj.ddims[i], obj.ddims[i + 1]) for i in range(len(obj.ddims) - 1) ]) obj.values = temp[..., 1:] / temp[..., :-1] if self.array_backend == 'sparse': obj.values.shape = tuple(obj.values.coords.max(1) + 1) else: if xp.max(xp.sum(~xp.isnan(self.values[..., -1, :]), 2) - 1) <= 0: obj.values = obj.values[..., :-1, :] obj.odims = obj.odims[:obj.values.shape[2]] if hasattr(obj, 'w_'): if obj.shape == obj.w_[..., 0:1, :len(obj.odims), :].shape: obj = obj * obj.w_[..., 0:1, :len(obj.odims), :] return obj
def _get_tail_stats(self, X): """ Method to approximate the tail sigma using log-linear extrapolation applied to tail average period """ from chainladder.utils.utility_functions import num_to_nan if not hasattr(X, 'sigma_'): self.sigma_ = None self.std_err_ = None else: time_pd = self._get_tail_weighted_time_period(X) xp = X.sigma_.get_array_module() reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(X.sigma_.values), None) sigma_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) y = X.std_err_.values y = num_to_nan(y) reg = WeightedRegression(axis=3, xp=xp).fit(None, xp.log(y), None) std_err_ = xp.exp(time_pd * reg.slope_ + reg.intercept_) self.sigma_.values = xp.concatenate( (self.sigma_.values[..., :-1], sigma_[..., -1:]), axis=-1) self.std_err_.values = xp.concatenate( (self.std_err_.values[..., :-1], std_err_[..., -1:]), axis=-1)
def full_std_err_(self): from chainladder.utils.utility_functions import num_to_nan obj = copy.copy(self.X_) xp = obj.get_array_module() lxp = self.X_.ldf_.get_array_module() full = self.full_triangle_ tri_array = full.values weight_dict = {'regression': 0, 'volume': 1, 'simple': 2} avg = list(self.average_) if type( self.average_) is not list else self.average_ val = xp.array( [weight_dict.get(item.lower(), 2) for item in avg + [avg[-1]]]) val = xp.broadcast_to(val, self.X_.shape) weight = num_to_nan( xp.sqrt(tri_array[..., :len(self.X_.ddims)]**(2 - val))) obj.values = self.X_.sigma_.values / weight w = lxp.concatenate((self.X_.w_, lxp.ones( (*val.shape[:3], 1)) * xp.nan), axis=3) w[xp.isnan(w)] = 1 obj.values = xp.nan_to_num(obj.values) * xp.array(w) obj.valuation_date = full.valuation_date obj._set_slicers() return obj
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, pattern=False, *args, **kwargs): # Allow Empty Triangle so that we can piece it together programatically if data is None: return # Check whether all columns are unique and numeric check = data[columns].dtypes check = [check] if isinstance(check, np.dtype) else check.to_list() columns = [columns] if type(columns) is not list else columns if "object" in check: raise TypeError("column attribute must be numeric.") if data[columns].shape[1] != len(columns): raise AttributeError("Columns are required to have unique names") # Sanitize all axis inputs to lists str_to_list = lambda *args: tuple( [arg] if type(arg) in [str, pd.Period] else arg for arg in args) index, columns, origin, development = str_to_list( index, columns, origin, development) # Determine desired array backend of the Triangle if array_backend is None: from chainladder import ARRAY_BACKEND array_backend = ARRAY_BACKEND if (development and len(development) == 1 and data[development[0]].dtype == "<M8[ns]"): u = data[data[development[0]] == ULT_VAL].copy() if len(u) > 0 and len(u) != len(data): u = TriangleBase( u, origin=origin, development=development, columns=columns, index=index, ) data = data[data[development[0]] != ULT_VAL] else: u = None else: u = None # Initialize origin and its grain origin = development if origin is None else origin origin_date = TriangleBase._to_datetime(data, origin, format=origin_format) self.origin_grain = TriangleBase._get_grain(origin_date) origin_date = (pd.PeriodIndex( origin_date, freq=self.origin_grain).to_timestamp().rename("origin")) # Initialize development and its grain m_cnt = {"Y": 12, "Q": 3, "M": 1} has_dev = development and len(np.unique(data[development])) > 1 if has_dev: development_date = TriangleBase._to_datetime( data, development, period_end=True, format=development_format) self.development_grain = TriangleBase._get_grain(development_date) else: development_date = pd.PeriodIndex( origin_date + pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]), freq={ "Y": "A" }.get(self.origin_grain, self.origin_grain), ).to_timestamp(how="e") self.development_grain = self.origin_grain development_date.name = "development" # Summarize dataframe to the level specified in axes key_gr = [origin_date, development_date ] + [data[item] for item in ([] if not index else index)] data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0) if not index: index = ["Total"] data_agg[index[0]] = "Total" # Fill in any gaps in origin/development date_axes = self._get_date_axes( data_agg["origin"], data_agg["development"]) # cartesian product dev_lag = TriangleBase._development_lag(data_agg["origin"], data_agg["development"]) # Grab unique index, origin, development dev_lag_unique = np.sort( TriangleBase._development_lag(date_axes["origin"], date_axes["development"]).unique()) orig_unique = np.sort(date_axes["origin"].unique()) kdims = data_agg[index].drop_duplicates().reset_index( drop=True).reset_index() # Map index, origin, development indices to data set_idx = (lambda col, unique: col.map( dict(zip(unique, range(len(unique))))).values[None].T) orig_idx = set_idx(data_agg["origin"], orig_unique) dev_idx = set_idx(dev_lag, dev_lag_unique) key_idx = (data_agg[index].merge(kdims, how="left", on=index)["index"].values[None].T) # origin <= development is required - truncate bad records if not true valid = data_agg["origin"] <= data_agg["development"] if sum(~valid) > 0: warnings.warn("Observations with development before " + "origin start have been removed.") data_agg, orig_idx = data_agg[valid], orig_idx[valid] dev_idx, key_idx = dev_idx[valid], key_idx[valid] # All Triangles start out as sparse arrays val_idx = (((np.ones(len(data_agg))[None].T) * range(len(columns))).reshape((1, -1), order="F").T) coords = np.concatenate( tuple([np.concatenate((orig_idx, dev_idx), 1)] * len(columns)), 0) coords = np.concatenate((np.concatenate( tuple([key_idx] * len(columns)), 0), val_idx, coords), 1) amts = data_agg[columns].unstack() amts = amts.values.astype("float64") self.array_backend = "sparse" self.values = num_to_nan( sp( coords.T.astype('int64'), amts, prune=True, has_duplicates=False, sorted=True, shape=( len(kdims), len(columns), len(orig_unique), len(dev_lag_unique) if has_dev else 1, ), )) # Set all axis values self.valuation_date = data_agg["development"].max() self.kdims = kdims.drop("index", 1).values self.odims = orig_unique self.ddims = dev_lag_unique if has_dev else dev_lag[0:1].values self.ddims = self.ddims * (m_cnt[self.development_grain]) if development and not has_dev: self.ddims = pd.DatetimeIndex( TriangleBase._to_datetime(data, development, period_end=True, format=development_format)[0:1]) self.valuation_date = self.ddims[0] self.vdims = np.array(columns) # Set remaining triangle properties self.key_labels = index self.is_cumulative = cumulative self.virtual_columns = VirtualColumns(self) self.is_pattern = pattern if not AUTO_SPARSE or array_backend == "cupy": self.set_backend(array_backend, inplace=True) else: self = self._auto_sparse() self._set_slicers() if self.is_pattern: obj = self.dropna() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values if u: obj = concat((self.dev_to_val().iloc[..., :len(u.odims), :], u), -1) obj = obj.val_to_dev() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values self.valuation_date = pd.Timestamp(ULT_VAL)
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the munich adjustment will be applied. y : None Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ if X.array_backend == "sparse": X = X.set_backend("numpy") else: X = X.copy() xp = X.get_array_module() from chainladder.utils.utility_functions import num_to_nan if type(X.ddims) != np.ndarray: raise ValueError( "Triangle must be expressed with development lags") if self.fillna: tri_array = num_to_nan((X + self.fillna).values) else: tri_array = num_to_nan(X.values.copy()) if type(self.average) is not list: self.average_ = np.array([self.average] * (tri_array.shape[-1] - 1)) else: self.average_ = np.array(self.average) if type(self.n_periods) is not list: n_periods = [self.n_periods] * (tri_array.shape[-1] - 1) else: n_periods = self.n_periods n_periods = np.array(n_periods) self.n_periods_ = n_periods weight_dict = {"regression": 0, "volume": 1, "simple": 2} x, y = tri_array[..., :-1], tri_array[..., 1:] val = xp.nan_to_num( xp.array([weight_dict.get(item, item) for item in self.average_])[None, None, None] * (y * 0 + 1)) link_ratio = y / x self.w_ = xp.array( self._assign_n_periods_weight(X) * self._drop_adjustment(X, link_ratio)) w = self.w_ / (x**(val)) params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w) if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: warnings.warn("Setting n_periods=1 does not allow enough degrees " "of freedom to support calculation of all regression" " statistics. Only LDFs have been calculated.") params.std_err_ = xp.nan_to_num(params.std_err_) + xp.nan_to_num( (1 - xp.nan_to_num(params.std_err_ * 0 + 1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x**(2 - val))[..., 0:1, :], -1, -2)) params = xp.concatenate( (params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(X, params, 0) self.sigma_ = self._param_property(X, params, 1) self.std_err_ = self._param_property(X, params, 2) resid = -X.iloc[..., :-1] * self.ldf_.values + X.iloc[..., 1:].values std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_**2).values) resid = resid / std self.std_residuals_ = resid[resid.valuation < X.valuation_date] return self
def grain(self, grain="", trailing=False, inplace=False): """Changes the grain of a cumulative triangle. Parameters ---------- grain : str The grain to which you want your triangle converted, specified as 'OXDY' where X and Y can take on values of ``['Y', 'S', 'Q', 'M' ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM' for Origin quarter/Development Month, etc. trailing : bool For partial origin years/quarters, trailing will set the year/quarter end to that of the latest available from the origin data. inplace : bool Whether to mutate the existing Triangle instance or return a new one. Returns ------- Triangle """ ograin_old, ograin_new = self.origin_grain, grain[1:2] dgrain_old, dgrain_new = self.development_grain, grain[-1] valid = {"Y": ["Y"], "Q": ["Q", "S", "Y"], "M": ["Y", "S", "Q", "M"], "S": ["S", "Y"]} if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get( dgrain_old, [] ): raise ValueError("New grain not compatible with existing grain") if ( self.is_cumulative is None and dgrain_old != dgrain_new and self.shape[-1] > 1 ): raise AttributeError( "The is_cumulative attribute must be set before using grain method." ) if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new): raise ValueError("Origin grain must be coarser than development grain") if self.is_full and not self.is_ultimate and not self.is_val_tri: warnings.warn('Triangle includes extraneous development lags') else: d_limit = None obj = self.dev_to_val() if ograin_new != ograin_old: freq = {"Y": "A", "S": "2Q"}.get(ograin_new, ograin_new) mn = self.origin[-1].strftime("%b").upper() if trailing else "DEC" indices = pd.Series( range(len(self.origin)), index=self.origin).resample( '-'.join([freq, mn])).indices groups = pd.concat([ pd.Series([k]*len(v), index=v) for k, v in indices.items()], axis=0).values obj = obj.groupby(groups, axis=2).sum() obj.origin_close = mn if len(obj.ddims) > 1 and pd.Timestamp(obj.odims[0]).strftime( "%Y%m" ) != obj.valuation[0].strftime("%Y%m"): addl_ts = ( pd.period_range(obj.odims[0], obj.valuation[0], freq="M")[:-1] .to_timestamp() .values ) addl = obj.iloc[..., -len(addl_ts) :] * 0 addl.ddims = addl_ts obj = concat((addl, obj), axis=-1) obj.values = num_to_nan(obj.values) if dgrain_old != dgrain_new and obj.shape[-1] > 1: step = self._dstep()[dgrain_old][dgrain_new] d = np.sort(len(obj.development) - np.arange(0, len(obj.development), step) - 1) if obj.is_cumulative: obj = obj.iloc[..., d] else: ddims = obj.ddims[d] d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step)) obj = obj.groupby(d2, axis=3).sum() obj.ddims = ddims obj.development_grain = dgrain_new obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev() if inplace: self = obj return self return obj
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Triangle to which the incremental method is applied. Triangle must be cumulative. y : None Ignored sample_weight : Exposure used in the method. Returns ------- self : object Returns the instance itself. """ from chainladder import ULT_VAL from chainladder.utils.utility_functions import num_to_nan if type(X.ddims) != np.ndarray: raise ValueError("Triangle must be expressed with development lags") if X.array_backend == "sparse": X = X.set_backend("numpy") else: X = X.copy() if sample_weight.array_backend == "sparse": sample_weight = sample_weight.set_backend("numpy") xp = X.get_array_module() sample_weight.is_cumulative = False obj = X.cum_to_incr() / sample_weight.values if hasattr(X, "trend_"): if self.trend != 0: warnings.warn( "IncrementalAdditive Trend assumption is ignored when X has a trend_ property." ) x = obj * obj.trend_.values else: x = obj.trend(self.trend, axis='valuation') w_ = Development( n_periods=self.n_periods - 1, drop=self.drop, drop_high=self.drop_high, drop_low=self.drop_low, drop_valuation=self.drop_valuation).fit(x).w_ # This will miss drops on the latest diagonal w_ = num_to_nan(w_) w_ = xp.concatenate((w_, (w_[..., -1:] * x.nan_triangle)[..., -1:]), axis=-1) if self.average == "simple": y_ = xp.nanmean(w_ * x.values, axis=-2) if self.average == "volume": y_ = xp.nansum(w_ * x.values * sample_weight.values, axis=-2) y_ = y_ / xp.nansum(w_ * sample_weight.values, axis=-2) self.zeta_ = X.iloc[..., -1:, :] self.zeta_.values = y_[:, :, None, :] y_ = xp.repeat(y_[..., None, :], len(x.odims), -2) obj = x.copy() keeps = ( 1 - xp.nan_to_num(x.nan_triangle) + xp.nan_to_num( x[x.valuation == x.valuation_date].values[0, 0, ...] * 0 + 1 ) ) obj.values = y_ * keeps obj.valuation_date = obj.valuation.max() obj.values = obj.values * (1 - xp.nan_to_num(x.nan_triangle)) + xp.nan_to_num( (X.cum_to_incr().values / sample_weight.values) ) obj.values[obj.values == 0] = xp.nan obj._set_slicers() obj.valuation_date = obj.valuation.max() future_trend = self.trend if not self.future_trend else self.future_trend self.incremental_ = obj * sample_weight.values self.incremental_ = self.incremental_.trend( 1/(1+future_trend)-1, axis='valuation', start=X.valuation_date, end=self.incremental_.valuation_date) self.ldf_ = obj.incr_to_cum().link_ratio self.sigma_ = self.std_err_ = 0 * self.ldf_ return self
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, pattern=False, trailing=False, *args, **kwargs): if data is None: return index, columns, origin, development = self._input_validation( data, index, columns, origin, development) data, ult = self._split_ult(data, index, columns, origin, development) origin_date = self._to_datetime( data, origin, format=origin_format).rename('__origin__') self.origin_grain = self._get_grain(origin_date) self.origin_grain = 'S' if self.origin_grain == '2Q' else self.origin_grain development_date = self._set_development( data, development, development_format, origin_date) self.development_grain = ( self._get_grain(development_date) if development_date.nunique() != 1 else self.origin_grain) data_agg = self._aggregate_data( data, origin_date, development_date, index, columns) date_axes = self._get_date_axes( data_agg["__origin__"], data_agg["__development__"]) # Deal with labels if not index: index = ["Total"] data_agg[index[0]] = "Total" self.kdims, key_idx = self._set_kdims(data_agg, index) self.vdims = np.array(columns) self.odims, orig_idx = self._set_odims(data_agg, date_axes) self.ddims, dev_idx = self._set_ddims(data_agg, date_axes) # Set the Triangle values coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx) self.values = num_to_nan( sp(coords, amts, prune=True, has_duplicates=False, sorted=True, shape=(len(self.kdims), len(self.vdims), len(self.odims), len(self.ddims)))) # Set remaining triangle properties val_date = data_agg["__development__"].max() val_date = val_date.compute() if hasattr(val_date, 'compute') else val_date self.key_labels = index self.valuation_date = val_date self.is_cumulative = cumulative self.virtual_columns = VirtualColumns(self) self.is_pattern = pattern self.origin_close = 'DEC' if self.origin_grain != 'M' and trailing: self.origin_close = pd.to_datetime(self.odims[-1]).strftime('%b').upper() # Deal with array backend self.array_backend = "sparse" if array_backend is None: array_backend = options.ARRAY_BACKEND if not options.AUTO_SPARSE or array_backend == "cupy": self.set_backend(array_backend, inplace=True) else: self = self._auto_sparse() self._set_slicers() # Deal with special properties if self.is_pattern: obj = self.dropna() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values if ult: obj = concat((self.dev_to_val().iloc[..., :len(ult.odims), :], ult), -1) obj = obj.val_to_dev() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values self.valuation_date = pd.Timestamp(options.ULT_VAL)
def _arithmetic_cleanup(self, obj): """ Common functionality AFTER arithmetic operations """ obj.values = obj.values * obj.get_array_module().nan_to_num( obj.nan_triangle) obj.values = num_to_nan(obj.values) return obj
def __rtruediv__(self, other): obj = self.copy() obj.values = other / self.values obj.values = num_to_nan(obj.values) return obj
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the munich adjustment will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ if X.array_backend == 'sparse': X = X.set_backend('numpy') else: X = copy.deepcopy(X) xp = X.get_array_module() from chainladder.utils.utility_functions import num_to_nan if (type(X.ddims) != np.ndarray): raise ValueError( 'Triangle must be expressed with development lags') if self.fillna: tri_array = num_to_nan((X + self.fillna).values) else: tri_array = num_to_nan(X.values.copy()) if type(self.average) is not list: average = [self.average] * (tri_array.shape[-1] - 1) else: average = self.average average = np.array(average) self.average_ = average if type(self.n_periods) is not list: n_periods = [self.n_periods] * (tri_array.shape[-1] - 1) else: n_periods = self.n_periods n_periods = np.array(n_periods) self.n_periods_ = n_periods weight_dict = {'regression': 0, 'volume': 1, 'simple': 2} x, y = tri_array[..., :-1], tri_array[..., 1:] val = xp.array([weight_dict.get(item.lower(), 1) for item in average]) for i in [2, 1, 0]: val = xp.repeat(val[None], tri_array.shape[i], axis=0) val = xp.nan_to_num(val * (y * 0 + 1)) link_ratio = y / x self.w_ = xp.array( self._assign_n_periods_weight(X) * self._drop_adjustment(X, link_ratio)) w = self.w_ / (x**(val)) params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w) if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: warnings.warn('Setting n_periods=1 does not allow enough degrees ' 'of freedom to support calculation of all regression' ' statistics. Only LDFs have been calculated.') params.std_err_ = xp.nan_to_num(params.std_err_) + \ xp.nan_to_num( (1-xp.nan_to_num(params.std_err_*0+1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x**(2-val))[..., 0:1, :], -1, -2)) params = xp.concatenate( (params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(X, params, 0) self.sigma_ = self._param_property(X, params, 1) self.std_err_ = self._param_property(X, params, 2) return self
def num_to_nan(self): from chainladder.utils.utility_functions import num_to_nan self.values = num_to_nan(self.values)