def cum_to_incr(self, inplace=False): """Method to convert an cumlative triangle into a incremental triangle. Parameters ---------- inplace: bool Set to True will update the instance data attribute inplace Returns ------- Updated instance of triangle accumulated along the origin """ if inplace: v = self.valuation_date if self.is_cumulative or self.is_cumulative is None: if self.is_pattern: xp = self.get_array_module() self.values = xp.nan_to_num(self.values) self.values[self.values == 0] = 1 diff = self.iloc[..., :-1] / self.iloc[..., 1:].values self = concat((diff, self.iloc[..., -1],), axis=3,) self.values = self.values * self.nan_triangle else: diff = self.iloc[..., 1:] - self.iloc[..., :-1].values self = concat((self.iloc[..., 0], diff,), axis=3,) self.is_cumulative = False self.valuation_date = v return self else: new_obj = self.copy() return new_obj.cum_to_incr(inplace=True)
def shift(self, periods=-1, axis=3): """ Shift elements along an axis by desired number of periods. Data that falls beyond the existing shape of the Triangle is eliminated and new cells default to zero. Parameters ---------- periods : int Number of periods to shift. Can be positive or negative. axis : {2 or 'origin', 3 or 'development', None}, default 3 Shift direction. Returns ------- Triangle updated with shifted elements """ axis = self._get_axis(axis) if axis < 2: raise AttributeError( "Lagging only supported for origin and development axes") if periods == 0: return self if periods > 0: if axis == 3: out = concat((self.iloc[..., 1:].rename( 'development', self.development[:-1]), (self.iloc[..., -1:] * 0)), axis=axis) else: out = concat( (self.iloc[..., 1:, :].rename('origin', self.origin[:-1]), (self.iloc[..., -1:, :] * 0)), axis=axis) else: if axis == 3: out = concat(( (self.iloc[..., :1] * 0), self.iloc[..., :-1].rename('development', self.development[1:]), ), axis=axis) else: out = concat(( (self.iloc[..., :1, :] * 0), self.iloc[..., :-1, :].rename('origin', self.origin[1:]), ), axis=axis) if abs(periods) == 1: return out else: return self.shift(out, periods - 1, axis)
def agg_func(self, *args, **kwargs): from chainladder.utils import concat xp = self.obj.get_array_module() values = [ getattr( self.obj.iloc.__getitem__( tuple([slice(None)] * self.axis + [i])), v)(self.axis, auto_sparse=False, keepdims=True) for i in self.groups.indices.values() ] self.obj = concat(values, axis=self.axis, ignore_index=True) if isinstance(self.groups.dtypes.index, pd.MultiIndex): index = pd.DataFrame(np.zeros(len(self.groups.dtypes.index)), index=self.groups.dtypes.index, columns=['_']).reset_index().iloc[:, :-1] self.obj.index = index else: index = pd.DataFrame(self.groups.dtypes.index) self.obj.key_labels = index.columns.tolist() self.obj.kdims = index.values self.obj._set_slicers() return self.obj
def _set_ldf(self, X): paid_tri = X[self.paid_to_incurred[0]] incurred_tri = X[self.paid_to_incurred[1]] case = incurred_tri-paid_tri original_val_date = case.valuation_date case_ldf_ = self.case_ldf_.copy() case_ldf_.valuation_date = pd.Timestamp(options.ULT_VAL) xp = case_ldf_.get_array_module() # Broadcast triangle shape case_ldf_ = case_ldf_ * case.latest_diagonal / case.latest_diagonal case_ldf_.odims = case.odims case_ldf_.is_pattern = False case_ldf_.values = xp.concatenate( (xp.ones(list(case_ldf_.shape[:-1])+[1]), case_ldf_.values), axis=-1) case_ldf_.ddims = case.ddims case_ldf_.valuation_date = case_ldf_.valuation.max() case_ldf_ = case_ldf_.dev_to_val().set_backend(self.case_ldf_.array_backend) # Will this work for sparse? forward = case_ldf_[case_ldf_.valuation>original_val_date].values forward[xp.isnan(forward)] = 1.0 forward = xp.cumprod(forward, -1) 1/case_ldf_[case_ldf_.valuation<=original_val_date] backward = 1/case_ldf_[case_ldf_.valuation<=original_val_date].values backward[xp.isnan(backward)] = 1.0 backward = xp.cumprod(backward[..., ::-1], -1)[..., ::-1][..., 1:] nans = case_ldf_/case_ldf_ case_ldf_.values = xp.concatenate((backward, (case.latest_diagonal*0+1).values, forward), -1) case = (case_ldf_*nans.values*case.latest_diagonal.values).val_to_dev().iloc[..., :len(case.ddims)] ld = case[case.valuation==X.valuation_date].sum('development').sum('origin') ld = ld / ld patterns = ((1-np.nan_to_num(X.nan_triangle[..., 1:]))*(self.paid_ldf_*ld).values) paid = (case.iloc[..., :-1]*patterns) paid.ddims = case.ddims[1:] paid.valuation_date = pd.Timestamp(options.ULT_VAL) #Create a full triangle of incurrds to support a multiplicative LDF paid = (paid_tri.cum_to_incr() + paid).incr_to_cum() inc = (case[case.valuation>X.valuation_date] + paid[paid.valuation>X.valuation_date] + incurred_tri) # Combined paid and incurred into a single object paid.columns = [self.paid_to_incurred[0]] inc.columns = [self.paid_to_incurred[1]] cols = X.columns[X.columns.isin([self.paid_to_incurred[0], self.paid_to_incurred[1]])] dev = concat((paid, inc), 1)[list(cols)] # Convert the paid/incurred to multiplicative LDF dev = (dev.iloc[..., -1]/dev).iloc[..., :-1] dev.valuation_date = pd.Timestamp(options.ULT_VAL) dev.ddims = X.link_ratio.ddims dev.is_pattern=True dev.is_cumulative=True self.case = case self.paid=paid return dev.cum_to_incr()
def _arithmetic_mapper(self, obj, other, f): """ Use Dask if available, otherwise basic list comprehension """ if db and obj.obj.array_backend == 'sparse': bag = db.from_sequence(self._get_key_union(obj, other)) bag = bag.map(f, self, obj, other) c = bag.compute(scheduler='threads') else: c = [ f(k, self, obj, other) for k in self._get_key_union(obj, other) ] return concat(c, 0).sort_index()
def agg_func(self, *args, **kwargs): from chainladder.utils import concat xp = self.obj.get_array_module() obj = self.obj.copy() auto_sparse = kwargs.pop("auto_sparse", True) if db and obj.array_backend == 'sparse': def aggregate(i, obj, axis, v): return getattr( obj.iloc.__getitem__(tuple([slice(None)] * axis + [i])), v )(axis, auto_sparse=False, keepdims=True) bag = db.from_sequence(self.groups.indices.values()) bag = bag.map(aggregate, obj, self.axis, v) values = bag.compute(scheduler='threads') else: values = [ getattr(obj.iloc.__getitem__(tuple([slice(None)] * self.axis + [i])), v)( self.axis, auto_sparse=False, keepdims=True ) for i in self.groups.indices.values() ] obj = concat(values, axis=self.axis, ignore_index=True) if self.axis == 0: if isinstance(self.groups.dtypes.index, pd.MultiIndex): index = ( pd.DataFrame( np.zeros(len(self.groups.dtypes.index)), index=self.groups.dtypes.index, columns=["_"], ) .reset_index() .iloc[:, :-1] ) obj.index = index else: index = pd.DataFrame(self.groups.dtypes.index) obj.key_labels = index.columns.tolist() obj.kdims = index.values if self.axis == 1: obj.vdims = pd.DataFrame(self.groups.dtypes.index).values[:, 0] if self.axis == 2: odims = self.obj._to_datetime( pd.Series(self.groups.indices.keys()).to_frame(), [0]) obj.origin_grain = self.obj._get_grain(odims) obj.origin_grain = 'S' if obj.origin_grain == '2Q' else obj.origin_grain obj.odims = odims.values obj._set_slicers() if auto_sparse: obj = obj._auto_sparse() return obj
def append(self, other): """ Append rows of other to the end of caller, returning a new object. Parameters ---------- other : Triangle The data to append. Returns ------- New Triangle with appended data. """ from chainladder.utils.utility_functions import concat return concat((self, other), 0)
def agg_func(self, *args, **kwargs): from chainladder.utils import concat from chainladder.methods import Chainladder xp = self.obj.get_array_module() obj = self.obj.copy() values = [ getattr(obj.iloc.__getitem__(tuple([slice(None)] * self.axis + [i])), v)( self.axis, auto_sparse=False, keepdims=True ) for i in self.groups.indices.values() ] obj = concat(values, axis=self.axis, ignore_index=True) if self.axis == 0: if isinstance(self.groups.dtypes.index, pd.MultiIndex): index = ( pd.DataFrame( np.zeros(len(self.groups.dtypes.index)), index=self.groups.dtypes.index, columns=["_"], ) .reset_index() .iloc[:, :-1] ) obj.index = index else: index = pd.DataFrame(self.groups.dtypes.index) obj.key_labels = index.columns.tolist() obj.kdims = index.values else: index = pd.DataFrame(self.groups.dtypes.index).values[:, 0] if self.axis == 1: obj.vdims = index obj._set_slicers() if hasattr(obj, 'ldf_'): if len(obj.ldf_) > 1: # Bypass grouped ldf_ if there is only one anyway new_ldf = Chainladder().fit(self.obj).full_expectation_ new_ldf = new_ldf.groupby(self.by).sum() # Need to generalize sum new_ldf = new_ldf.link_ratio.iloc[..., :self.obj.ldf_.shape[-1]] if new_ldf.get_array_module().all( (new_ldf.values.max(2) - new_ldf.values.min(2)) < 1e-6): # if after grouping there is still only one, then compress to 1 new_ldf = new_ldf.iloc[..., 0, :] obj.ldf_ = new_ldf return obj
def dev_to_val(self, inplace=False): """ Converts triangle from a development lag triangle to a valuation triangle. Parameters ---------- inplace : bool Whether to mutate the existing Triangle instance or return a new one. Returns ------- Triangle Updated instance of the triangle with valuation periods. """ if self.is_val_tri: if inplace: return self else: return self.copy() is_cumulative = self.is_cumulative if self.is_full: if is_cumulative: obj = self.cum_to_incr(inplace=inplace) else: obj = self.copy() if self.is_ultimate: ultimate = obj.iloc[..., -1:] obj = obj.iloc[..., :-1] else: obj = self obj = obj._val_dev(1, inplace) ddims = obj.valuation[obj.valuation <= obj.valuation_date] obj.ddims = ddims.drop_duplicates().sort_values() if self.is_full: if self.is_ultimate: ultimate.ddims = pd.DatetimeIndex(ultimate.valuation[0:1]) obj = concat((obj, ultimate), -1) if is_cumulative: obj = obj.incr_to_cum(inplace=inplace) return obj
def val_to_dev(self, inplace=False): """ Converts triangle from a valuation triangle to a development lag triangle. Parameters ---------- inplace : bool Whether to mutate the existing Triangle instance or return a new one. Returns ------- Updated instance of triangle with development lags """ if not self.is_val_tri: if inplace: return self else: return self.copy() if self.is_ultimate: ultimate = self.iloc[..., -1:] ultimate.ddims = np.array([9999]) obj = self.iloc[..., :-1]._val_dev(-1, inplace) else: obj = self.copy()._val_dev(-1, inplace) val_0 = obj.valuation[0] if self.ddims.shape[-1] == 1 and self.ddims[0] == self.valuation_date: origin_0 = pd.to_datetime(obj.odims[-1]) else: origin_0 = pd.to_datetime(obj.odims[0]) lag_0 = (val_0.year - origin_0.year) * 12 + val_0.month - origin_0.month + 1 scale = {"Y": 12, "Q": 3, "M": 1}[obj.development_grain] obj.ddims = np.arange(obj.values.shape[-1]) * scale + lag_0 prune = obj[obj.origin == obj.origin.max()] if self.is_ultimate: obj = obj.iloc[..., :( prune.valuation <= prune.valuation_date).sum()] obj = concat((obj, ultimate), -1) return obj
def grain(self, grain="", trailing=False, inplace=False): """Changes the grain of a cumulative triangle. Parameters ---------- grain : str The grain to which you want your triangle converted, specified as 'OXDY' where X and Y can take on values of ``['Y', 'Q', 'M' ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM' for Origin quarter/Development Month, etc. trailing : bool For partial years/quarters, trailing will set the year/quarter end to that of the latest available from the data. inplace : bool Whether to mutate the existing Triangle instance or return a new one. Returns ------- Triangle """ ograin_old, ograin_new = self.origin_grain, grain[1:2] dgrain_old, dgrain_new = self.development_grain, grain[-1] valid = {"Y": ["Y"], "Q": ["Q", "Y"], "M": ["Y", "Q", "M"]} if ograin_new not in valid.get( ograin_old, []) or dgrain_new not in valid.get(dgrain_old, []): raise ValueError("New grain not compatible with existing grain") if (self.is_cumulative is None and dgrain_old != dgrain_new and self.shape[-1] > 1): raise AttributeError( "The is_cumulative attribute must be set before using grain method." ) if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new): raise ValueError( "Origin grain must be coarser than development grain") obj = self.dev_to_val() if ograin_new != ograin_old: if trailing: mn = self.origin[-1].strftime( "%b").upper() if trailing else "DEC" freq = "Q-" if ograin_new == "Q" else "A-" o = pd.PeriodIndex(self.origin, freq=freq + mn) o = np.array(o.to_timestamp(how="s")) else: freq = "%YQ%q" if ograin_new == "Q" else "%Y" o = pd.to_datetime(self.origin.strftime(freq)).values values = [ getattr(obj.loc[..., i, :], "sum")(2, auto_sparse=False, keepdims=True) for i in self.origin.groupby(o).values() ] obj = concat(values, axis=2, ignore_index=True) obj.odims = np.unique(o) obj.origin_grain = ograin_new if len(obj.ddims) > 1 and pd.Timestamp(obj.odims[0]).strftime( "%Y%m") != obj.valuation[0].strftime("%Y%m"): addl_ts = (pd.period_range( obj.odims[0], obj.valuation[0], freq="M")[:-1].to_timestamp().values) addl = obj.iloc[..., -len(addl_ts):] * 0 addl.ddims = addl_ts obj = concat((addl, obj), axis=-1) if dgrain_old != dgrain_new and obj.shape[-1] > 1: step = self._dstep()[dgrain_old][dgrain_new] d = np.sort( len(obj.development) - np.arange(0, len(obj.development), step) - 1) if obj.is_cumulative: obj = obj.iloc[..., d] else: ddims = obj.ddims[d] d2 = [d[0]] * (d[0] + 1) + list( np.repeat(np.array(d[1:]), step)) values = [ getattr(obj.iloc[..., i], "sum")(3, auto_sparse=False, keepdims=True) for i in obj.development.groupby(d2).groups.values() ] obj = concat(values, axis=3, ignore_index=True) obj.ddims = ddims obj.development_grain = dgrain_new obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev() if inplace: self = obj return self return obj
def grain(self, grain="", trailing=False, inplace=False): """Changes the grain of a cumulative triangle. Parameters ---------- grain : str The grain to which you want your triangle converted, specified as 'OXDY' where X and Y can take on values of ``['Y', 'S', 'Q', 'M' ]`` For example, 'OYDY' for Origin Year/Development Year, 'OQDM' for Origin quarter/Development Month, etc. trailing : bool For partial origin years/quarters, trailing will set the year/quarter end to that of the latest available from the origin data. inplace : bool Whether to mutate the existing Triangle instance or return a new one. Returns ------- Triangle """ ograin_old, ograin_new = self.origin_grain, grain[1:2] dgrain_old, dgrain_new = self.development_grain, grain[-1] valid = {"Y": ["Y"], "Q": ["Q", "S", "Y"], "M": ["Y", "S", "Q", "M"], "S": ["S", "Y"]} if ograin_new not in valid.get(ograin_old, []) or dgrain_new not in valid.get( dgrain_old, [] ): raise ValueError("New grain not compatible with existing grain") if ( self.is_cumulative is None and dgrain_old != dgrain_new and self.shape[-1] > 1 ): raise AttributeError( "The is_cumulative attribute must be set before using grain method." ) if valid["M"].index(ograin_new) > valid["M"].index(dgrain_new): raise ValueError("Origin grain must be coarser than development grain") if self.is_full and not self.is_ultimate and not self.is_val_tri: warnings.warn('Triangle includes extraneous development lags') else: d_limit = None obj = self.dev_to_val() if ograin_new != ograin_old: freq = {"Y": "A", "S": "2Q"}.get(ograin_new, ograin_new) mn = self.origin[-1].strftime("%b").upper() if trailing else "DEC" indices = pd.Series( range(len(self.origin)), index=self.origin).resample( '-'.join([freq, mn])).indices groups = pd.concat([ pd.Series([k]*len(v), index=v) for k, v in indices.items()], axis=0).values obj = obj.groupby(groups, axis=2).sum() obj.origin_close = mn if len(obj.ddims) > 1 and pd.Timestamp(obj.odims[0]).strftime( "%Y%m" ) != obj.valuation[0].strftime("%Y%m"): addl_ts = ( pd.period_range(obj.odims[0], obj.valuation[0], freq="M")[:-1] .to_timestamp() .values ) addl = obj.iloc[..., -len(addl_ts) :] * 0 addl.ddims = addl_ts obj = concat((addl, obj), axis=-1) obj.values = num_to_nan(obj.values) if dgrain_old != dgrain_new and obj.shape[-1] > 1: step = self._dstep()[dgrain_old][dgrain_new] d = np.sort(len(obj.development) - np.arange(0, len(obj.development), step) - 1) if obj.is_cumulative: obj = obj.iloc[..., d] else: ddims = obj.ddims[d] d2 = [d[0]] * (d[0] + 1) + list(np.repeat(np.array(d[1:]), step)) obj = obj.groupby(d2, axis=3).sum() obj.ddims = ddims obj.development_grain = dgrain_new obj = obj.dev_to_val() if self.is_val_tri else obj.val_to_dev() if inplace: self = obj return self return obj
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, pattern=False, trailing=False, *args, **kwargs): if data is None: return index, columns, origin, development = self._input_validation( data, index, columns, origin, development) data, ult = self._split_ult(data, index, columns, origin, development) origin_date = self._to_datetime( data, origin, format=origin_format).rename('__origin__') self.origin_grain = self._get_grain(origin_date) self.origin_grain = 'S' if self.origin_grain == '2Q' else self.origin_grain development_date = self._set_development( data, development, development_format, origin_date) self.development_grain = ( self._get_grain(development_date) if development_date.nunique() != 1 else self.origin_grain) data_agg = self._aggregate_data( data, origin_date, development_date, index, columns) date_axes = self._get_date_axes( data_agg["__origin__"], data_agg["__development__"]) # Deal with labels if not index: index = ["Total"] data_agg[index[0]] = "Total" self.kdims, key_idx = self._set_kdims(data_agg, index) self.vdims = np.array(columns) self.odims, orig_idx = self._set_odims(data_agg, date_axes) self.ddims, dev_idx = self._set_ddims(data_agg, date_axes) # Set the Triangle values coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx) self.values = num_to_nan( sp(coords, amts, prune=True, has_duplicates=False, sorted=True, shape=(len(self.kdims), len(self.vdims), len(self.odims), len(self.ddims)))) # Set remaining triangle properties val_date = data_agg["__development__"].max() val_date = val_date.compute() if hasattr(val_date, 'compute') else val_date self.key_labels = index self.valuation_date = val_date self.is_cumulative = cumulative self.virtual_columns = VirtualColumns(self) self.is_pattern = pattern self.origin_close = 'DEC' if self.origin_grain != 'M' and trailing: self.origin_close = pd.to_datetime(self.odims[-1]).strftime('%b').upper() # Deal with array backend self.array_backend = "sparse" if array_backend is None: array_backend = options.ARRAY_BACKEND if not options.AUTO_SPARSE or array_backend == "cupy": self.set_backend(array_backend, inplace=True) else: self = self._auto_sparse() self._set_slicers() # Deal with special properties if self.is_pattern: obj = self.dropna() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values if ult: obj = concat((self.dev_to_val().iloc[..., :len(ult.odims), :], ult), -1) obj = obj.val_to_dev() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values self.valuation_date = pd.Timestamp(options.ULT_VAL)
class CaseOutstanding(DevelopmentBase): """ A determinisic method based on outstanding case reserves. The CaseOutstanding method is a deterministic approach that develops patterns of incremental payments as a percent of previous period case reserves as well as patterns for case reserves as a percent of previous period case reserves. Although the patterns produces by the approach approximate incremental payments and case outstanding, they are converted into comparable multiplicative patterns for usage with the various IBNR methods. .. versionadded:: 0.8.0 Parameters ---------- paid_to_incurred : tuple or list of tuples A tuple representing the paid and incurred ``columns`` of the triangles such as ``('paid', 'incurred')`` paid_n_periods : integer, optional (default=-1) number of origin periods to be used in the paid pattern averages. For all origin periods, set paid_n_periods=-1 case_n_periods : integer, optional (default=-1) number of origin periods to be used in the case pattern averages. For all origin periods, set paid_n_periods=-1 Attributes ---------- ldf_ : Triangle The estimated (multiplicative) loss development patterns. cdf_ : Triangle The estimated (multiplicative) cumulative development patterns. case_to_prior_case_ : Triangle The case to prior case ratios used for fitting the estimator case_ldf_ : The selected case to prior case ratios of the fitted estimator paid_to_prior_case_ : Triangle The paid to prior case ratios used for fitting the estimator paid_ldf_ : The selected paid to prior case ratios of the fitted estimator """ def __init__(self, paid_to_incurred=None, paid_n_periods=-1, case_n_periods=-1): self.paid_to_incurred = paid_to_incurred self.paid_n_periods = paid_n_periods self.case_n_periods = case_n_periods def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle Set of LDFs to which the munich adjustment will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ backend = "cupy" if X.array_backend == "cupy" else "numpy" self.X_ = X.copy() paid_tri = self.X_[self.paid_to_incurred[0]] incurred_tri = self.X_[self.paid_to_incurred[1]] self.paid_w_ = Development(n_periods=self.paid_n_periods).fit(self.X_.iloc[0,0]).w_ self.case_w_ = Development(n_periods=self.case_n_periods).fit(self.X_.iloc[0,0]).w_ self.case_ldf_ = self.case_to_prior_case_.mean(2) self.paid_ldf_ = self.paid_to_prior_case_.mean(2) case = incurred_tri-paid_tri patterns = ((1 - np.nan_to_num(case.nan_triangle[..., 1:])) * self.case_ldf_.values) for i in range(np.isnan(case.nan_triangle[-1]).sum()): increment = ( (case - case[case.valuation < case.valuation_date]).iloc[..., :-1] * patterns) increment.ddims = case.ddims[1:] increment.valuation_date = case.valuation[case.valuation>=case.valuation_date].drop_duplicates()[1] case = case + increment patterns = ((1-np.nan_to_num(self.X_.nan_triangle[..., 1:]))*self.paid_ldf_.values) paid = (case.iloc[..., :-1]*patterns) paid.ddims = case.ddims[1:] paid.valuation_date = pd.Timestamp(ULT_VAL) paid = (paid_tri.cum_to_incr() + paid).incr_to_cum() inc = (case[case.valuation>self.X_.valuation_date] + paid[paid.valuation>self.X_.valuation_date] + incurred_tri) paid.columns = [self.paid_to_incurred[0]] inc.columns = [self.paid_to_incurred[1]] cols = self.X_.columns[self.X_.columns.isin([self.paid_to_incurred[0], self.paid_to_incurred[1]])] dev = concat((paid, inc), 1)[list(cols)] self.dev_ = dev dev = (dev.iloc[..., -1]/dev).iloc[..., :-1] dev.valuation_date = pd.Timestamp(ULT_VAL) dev.ddims = self.X_.link_ratio.ddims dev.is_pattern=True dev.is_cumulative=True self.ldf_ = dev.cum_to_incr().set_backend(backend) return self
def _get_full_expectation(cdf_, ultimate_): """ Private method that builds full expectation""" from chainladder.utils.utility_functions import concat full = ultimate_ / cdf_ return concat((full, ultimate_.copy().rename('development', [9999])), axis=3)
def fit(self, X, y=None, sample_weight=None): if X.shape[1] > 1: from chainladder.utils.utility_functions import concat out = [ BootstrapODPSample(**self.get_params()).fit(X.iloc[:, i]) for i in range(X.shape[1]) ] xp = X.get_array_module(out[0].design_matrix_) self.design_matrix_ = xp.concatenate( [i.design_matrix_[None] for i in out], axis=0) self.hat_ = xp.concatenate([i.hat_[None] for i in out], axis=0) self.resampled_triangles_ = concat( [i.resampled_triangles_ for i in out], axis=1) self.scale_ = xp.array([i.scale_ for i in out]) self.w_ = out[0].w_ else: backend = X.array_backend if backend == "sparse": X = X.set_backend("numpy") else: X = X.copy() xp = X.get_array_module() if len(X) != 1: raise ValueError("Only single index triangles are supported") if type(X.ddims) != np.ndarray: raise ValueError( "Triangle must be expressed with development lags") lag = {"M": 1, "Q": 3, "Y": 12}[X.development_grain] obj = Development( n_periods=self.n_periods, drop=self.drop, drop_high=self.drop_high, drop_low=self.drop_low, drop_valuation=self.drop_valuation, ).fit_transform(X) self.w_ = obj.w_ obj = Chainladder().fit(obj) # Works for only a single triangle - can we generalize this exp_incr_triangle = obj.full_expectation_.cum_to_incr().values[ 0, 0, :, :X.shape[-1]] exp_incr_triangle = xp.nan_to_num( exp_incr_triangle) * obj.X_.nan_triangle self.design_matrix_ = self._get_design_matrix(X) if self.hat_adj: try: self.hat_ = self._get_hat(X, exp_incr_triangle) except: warn( "Could not compute hat matrix. Setting hat_adj to False" ) self.had_adj = False self.hat_ = None else: self.hat_ = None self.resampled_triangles_, self.scale_ = self._get_simulation( X, exp_incr_triangle) n_obs = xp.nansum(self.w_) n_origin_params = X.shape[2] n_dev_params = X.shape[3] - 1 deg_free = n_obs - n_origin_params - n_dev_params deg_free_adj_fctr = xp.sqrt(n_obs / deg_free) return self
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, pattern=False, *args, **kwargs): # Allow Empty Triangle so that we can piece it together programatically if data is None: return # Check whether all columns are unique and numeric check = data[columns].dtypes check = [check] if isinstance(check, np.dtype) else check.to_list() columns = [columns] if type(columns) is not list else columns if "object" in check: raise TypeError("column attribute must be numeric.") if data[columns].shape[1] != len(columns): raise AttributeError("Columns are required to have unique names") # Sanitize all axis inputs to lists str_to_list = lambda *args: tuple( [arg] if type(arg) in [str, pd.Period] else arg for arg in args) index, columns, origin, development = str_to_list( index, columns, origin, development) # Determine desired array backend of the Triangle if array_backend is None: from chainladder import ARRAY_BACKEND array_backend = ARRAY_BACKEND if (development and len(development) == 1 and data[development[0]].dtype == "<M8[ns]"): u = data[data[development[0]] == ULT_VAL].copy() if len(u) > 0 and len(u) != len(data): u = TriangleBase( u, origin=origin, development=development, columns=columns, index=index, ) data = data[data[development[0]] != ULT_VAL] else: u = None else: u = None # Initialize origin and its grain origin = development if origin is None else origin origin_date = TriangleBase._to_datetime(data, origin, format=origin_format) self.origin_grain = TriangleBase._get_grain(origin_date) origin_date = (pd.PeriodIndex( origin_date, freq=self.origin_grain).to_timestamp().rename("origin")) # Initialize development and its grain m_cnt = {"Y": 12, "Q": 3, "M": 1} has_dev = development and len(np.unique(data[development])) > 1 if has_dev: development_date = TriangleBase._to_datetime( data, development, period_end=True, format=development_format) self.development_grain = TriangleBase._get_grain(development_date) else: development_date = pd.PeriodIndex( origin_date + pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]), freq={ "Y": "A" }.get(self.origin_grain, self.origin_grain), ).to_timestamp(how="e") self.development_grain = self.origin_grain development_date.name = "development" # Summarize dataframe to the level specified in axes key_gr = [origin_date, development_date ] + [data[item] for item in ([] if not index else index)] data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0) if not index: index = ["Total"] data_agg[index[0]] = "Total" # Fill in any gaps in origin/development date_axes = self._get_date_axes( data_agg["origin"], data_agg["development"]) # cartesian product dev_lag = TriangleBase._development_lag(data_agg["origin"], data_agg["development"]) # Grab unique index, origin, development dev_lag_unique = np.sort( TriangleBase._development_lag(date_axes["origin"], date_axes["development"]).unique()) orig_unique = np.sort(date_axes["origin"].unique()) kdims = data_agg[index].drop_duplicates().reset_index( drop=True).reset_index() # Map index, origin, development indices to data set_idx = (lambda col, unique: col.map( dict(zip(unique, range(len(unique))))).values[None].T) orig_idx = set_idx(data_agg["origin"], orig_unique) dev_idx = set_idx(dev_lag, dev_lag_unique) key_idx = (data_agg[index].merge(kdims, how="left", on=index)["index"].values[None].T) # origin <= development is required - truncate bad records if not true valid = data_agg["origin"] <= data_agg["development"] if sum(~valid) > 0: warnings.warn("Observations with development before " + "origin start have been removed.") data_agg, orig_idx = data_agg[valid], orig_idx[valid] dev_idx, key_idx = dev_idx[valid], key_idx[valid] # All Triangles start out as sparse arrays val_idx = (((np.ones(len(data_agg))[None].T) * range(len(columns))).reshape((1, -1), order="F").T) coords = np.concatenate( tuple([np.concatenate((orig_idx, dev_idx), 1)] * len(columns)), 0) coords = np.concatenate((np.concatenate( tuple([key_idx] * len(columns)), 0), val_idx, coords), 1) amts = data_agg[columns].unstack() amts = amts.values.astype("float64") self.array_backend = "sparse" self.values = num_to_nan( sp( coords.T.astype('int64'), amts, prune=True, has_duplicates=False, sorted=True, shape=( len(kdims), len(columns), len(orig_unique), len(dev_lag_unique) if has_dev else 1, ), )) # Set all axis values self.valuation_date = data_agg["development"].max() self.kdims = kdims.drop("index", 1).values self.odims = orig_unique self.ddims = dev_lag_unique if has_dev else dev_lag[0:1].values self.ddims = self.ddims * (m_cnt[self.development_grain]) if development and not has_dev: self.ddims = pd.DatetimeIndex( TriangleBase._to_datetime(data, development, period_end=True, format=development_format)[0:1]) self.valuation_date = self.ddims[0] self.vdims = np.array(columns) # Set remaining triangle properties self.key_labels = index self.is_cumulative = cumulative self.virtual_columns = VirtualColumns(self) self.is_pattern = pattern if not AUTO_SPARSE or array_backend == "cupy": self.set_backend(array_backend, inplace=True) else: self = self._auto_sparse() self._set_slicers() if self.is_pattern: obj = self.dropna() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values if u: obj = concat((self.dev_to_val().iloc[..., :len(u.odims), :], u), -1) obj = obj.val_to_dev() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values self.valuation_date = pd.Timestamp(ULT_VAL)
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Data to which the model will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ from chainladder.utils.utility_functions import parallelogram_olf, concat if X.array_backend == "sparse": obj = X.set_backend("numpy") else: obj = X.copy() groups = list( set(X.key_labels).intersection(self.rate_history.columns)) if len(groups) == 0: idx = obj else: idx = obj.groupby(groups).sum() kw = dict( start_date=X.origin[0].to_timestamp(how="s"), end_date=X.origin[-1].to_timestamp(how="e"), grain=X.origin_grain, vertical_line=self.vertical_line, ) if len(groups) > 0: tris = [] for item in idx.index.set_index(groups).iterrows(): r = self.rate_history.set_index(groups).loc[item[0]].copy() r[self.change_col] = r[self.change_col] + 1 r = (r.groupby(self.date_col)[self.change_col].prod() - 1).reset_index() date = r[self.date_col] values = r[self.change_col] olf = parallelogram_olf(values=values, date=date, **kw).values[None, None] if X.array_backend == "cupy": olf = X.get_array_module().array(olf) tris.append((idx.loc[item[0]] * 0 + 1) * olf) self.olf_ = concat(tris, 0).latest_diagonal else: r = self.rate_history.copy() r[self.change_col] = r[self.change_col] + 1 r = (r.groupby(self.date_col)[self.change_col].prod() - 1).reset_index() date = r[self.date_col] values = r[self.change_col] olf = parallelogram_olf(values=values, date=date, **kw) self.olf_ = ((idx * 0 + 1) * olf.values[None, None]).latest_diagonal return self