def _fit_OLS(self): """ Given a set of w, x, y, and an axis, this Function returns OLS slope and intercept. TODO: Make this work with n_periods = 1 without numpy warning. """ from chainladder.utils.utility_functions import num_to_nan w, x, y, axis = self.w.copy(), self.x.copy(), self.y.copy(), self.axis xp = self.xp if xp != sp: x[w == 0] = xp.nan y[w == 0] = xp.nan else: w2 = w.copy() w2.fill_value = sp.nan x, y = x * sp(w2), y * sp(w2) slope = num_to_nan( xp.nansum(w * x * y, axis) - xp.nansum(x * w, axis) * xp.nanmean(y, axis)) / num_to_nan( xp.nansum(w * x * x, axis) - xp.nanmean(x, axis) * xp.nansum(w * x, axis)) intercept = xp.nanmean(y, axis) - slope * xp.nanmean(x, axis) self.slope_ = slope[..., None] self.intercept_ = intercept[..., None] return self
def _arithmetic_cleanup(self, obj, other): ''' Common functionality AFTER arithmetic operations ''' xp = cp.get_array_module(obj.values) if xp == sp: obj.values = sp(obj.values) * sp(obj._expand_dims( obj.nan_triangle)) else: obj.values = obj.values * obj._expand_dims(obj.nan_triangle) obj.num_to_nan() return obj
def __truediv__(self, other): xp = cp.get_array_module(self.values) obj, other = self._validate_arithmetic(other) if xp == sp: other.fill_value = xp.nan obj.values = sp(xp.nan_to_num(obj.values)) / sp(other) obj.values.fill_value = 0.0 else: obj.values = xp.nan_to_num(obj.values) / other return self._arithmetic_cleanup(obj, other)
def link_ratio(self): xp = cp.get_array_module(self.values) obj = copy.deepcopy(self) temp = obj.values.copy() val_array = obj.valuation.values.reshape(obj.shape[-2:], order='f')[:, 1:] obj.ddims = np.array([ '{}-{}'.format(obj.ddims[i], obj.ddims[i + 1]) for i in range(len(obj.ddims) - 1) ]) if xp != sp: temp[temp == 0] = np.nan obj.values = temp[..., 1:] / temp[..., :-1] # Check whether we want to eliminate the last origin period if xp.max(xp.sum(~xp.isnan(self.values[..., -1, :]), 2) - 1) <= 0: obj.values = obj.values[..., :-1, :] else: temp.fill_value = np.nan temp = temp[..., 1:] / temp[..., :-1] temp.fill_value = 0.0 temp.coords = temp.coords[:, temp.data != 0] temp.data = temp.data[temp.data != 0] temp.shape = tuple(temp.coords.max(1) + 1) obj.values = sp(temp) obj.odims = obj.odims[:obj.values.shape[2]] if hasattr(obj, 'w_'): if obj.shape == obj.w_[..., 0:1, :len(obj.odims), :].shape: obj = obj * obj.w_[..., 0:1, :len(obj.odims), :] return obj
def num_to_nan(arr): """ Function that turns all zeros to nan values in an array """ backend = arr.__class__.__module__.split(".")[0] if backend == "sparse": if arr.fill_value == 0 or sp.isnan(arr.fill_value): arr.fill_value = sp.nan arr.coords = arr.coords[:, arr.data != 0] arr.data = arr.data[arr.data != 0] arr = sp(arr) else: arr = sp(num_to_nan(np.nan_to_num(arr.todense())), fill_value=sp.nan) else: nan = np.nan if backend == "numpy" else cp.nan arr[arr == 0] = nan return arr
def _slice_valuation(self, key): ''' private method for handling of valuation slicing ''' obj = copy.deepcopy(self) obj.valuation_date = min(obj.valuation[key].max(), obj.valuation_date) key = key.reshape(self.shape[-2:], order='f') nan_tri = np.ones(self.shape[-2:]) nan_tri = key * nan_tri nan_tri[nan_tri == 0] = np.nan o, d = nan_tri.shape o_idx = np.arange(o)[list(np.sum(np.isnan(nan_tri), 1) != d)] d_idx = np.arange(d)[list(np.sum(np.isnan(nan_tri), 0) != o)] obj.odims = obj.odims[np.sum(np.isnan(nan_tri), 1) != d] if len(obj.ddims) > 1: obj.ddims = obj.ddims[np.sum(np.isnan(nan_tri), 0) != o] xp = cp.get_array_module(obj.values) if xp == cp: nan_tri = cp.array(nan_tri) if xp == sp: nan_tri = sp(nan_tri) obj.values = (obj.values * nan_tri) if np.all(o_idx == np.array(range(o_idx[0], o_idx[-1] + 1))): o_idx = slice(o_idx[0], o_idx[-1] + 1) if np.all(d_idx == np.array(range(d_idx[0], d_idx[-1] + 1))): d_idx = slice(d_idx[0], d_idx[-1] + 1) if type(o_idx) is slice or type(d_idx) is slice: # If contiguous slices, this is faster obj.values = obj.values[..., o_idx, d_idx] else: obj.values = xp.take(xp.take(obj.values, o_idx, -2), d_idx, -1) return obj
def num_to_value(arr, value): """ Function that turns all zeros to nan values in an array """ backend = arr.__class__.__module__.split(".")[0] if backend == "sparse": if arr.fill_value == 0 or sp.isnan(arr.fill_value): arr.coords = arr.coords[:, arr.data != 0] arr.data = arr.data[arr.data != 0] arr = sp(coords=arr.coords, data=arr.data, fill_value=sp.nan, shape=arr.shape) else: arr = sp(num_to_nan(np.nan_to_num(arr.todense())), fill_value=value) else: arr[arr == 0] = value return arr
def latest_diagonal(self): """ The latest diagonal of the Triangle """ obj = copy.deepcopy(self) xp = cp.get_array_module(self.values) val = (self.valuation == self.valuation_date).reshape(self.shape[-2:], order='F') if xp == sp: val = sp(val) obj.values = xp.nansum(val * self.values, axis=-1, keepdims=True) obj.ddims = pd.DatetimeIndex([self.valuation_date], dtype='datetime64[ns]', freq=None) return obj
def _val_dev_chg(self): xp = cp.get_array_module(self.values) obj = copy.deepcopy(self) x = xp.nan_to_num(obj.values) val_mtrx = \ (np.array(obj.valuation.year).reshape(obj.shape[-2:], order='f') - np.array(pd.DatetimeIndex(obj.odims).year)[..., None])*12 + \ (np.array(obj.valuation.month).reshape(obj.shape[-2:], order='f') - np.array(pd.DatetimeIndex(obj.odims).month)[..., None]) + 1 rng = np.sort(np.unique(val_mtrx.flatten()[val_mtrx.flatten() > 0])) if sp == xp: val_mtrx = sp(val_mtrx) x = [ xp.sum((val_mtrx == item) * x, -1, keepdims=True) for item in xp.array(rng) ] x = xp.concatenate(x, -1) obj.values = x obj.num_to_nan() obj.ddims = np.array([item for item in rng]) obj._set_slicers() return obj
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, pattern=False, *args, **kwargs): # Allow Empty Triangle so that we can piece it together programatically if data is None: return # Check whether all columns are unique and numeric check = data[columns].dtypes check = [check] if isinstance(check, np.dtype) else check.to_list() columns = [columns] if type(columns) is not list else columns if "object" in check: raise TypeError("column attribute must be numeric.") if data[columns].shape[1] != len(columns): raise AttributeError("Columns are required to have unique names") # Sanitize all axis inputs to lists str_to_list = lambda *args: tuple( [arg] if type(arg) in [str, pd.Period] else arg for arg in args) index, columns, origin, development = str_to_list( index, columns, origin, development) # Determine desired array backend of the Triangle if array_backend is None: from chainladder import ARRAY_BACKEND array_backend = ARRAY_BACKEND if (development and len(development) == 1 and data[development[0]].dtype == "<M8[ns]"): u = data[data[development[0]] == ULT_VAL].copy() if len(u) > 0 and len(u) != len(data): u = TriangleBase( u, origin=origin, development=development, columns=columns, index=index, ) data = data[data[development[0]] != ULT_VAL] else: u = None else: u = None # Initialize origin and its grain origin = development if origin is None else origin origin_date = TriangleBase._to_datetime(data, origin, format=origin_format) self.origin_grain = TriangleBase._get_grain(origin_date) origin_date = (pd.PeriodIndex( origin_date, freq=self.origin_grain).to_timestamp().rename("origin")) # Initialize development and its grain m_cnt = {"Y": 12, "Q": 3, "M": 1} has_dev = development and len(np.unique(data[development])) > 1 if has_dev: development_date = TriangleBase._to_datetime( data, development, period_end=True, format=development_format) self.development_grain = TriangleBase._get_grain(development_date) else: development_date = pd.PeriodIndex( origin_date + pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]), freq={ "Y": "A" }.get(self.origin_grain, self.origin_grain), ).to_timestamp(how="e") self.development_grain = self.origin_grain development_date.name = "development" # Summarize dataframe to the level specified in axes key_gr = [origin_date, development_date ] + [data[item] for item in ([] if not index else index)] data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0) if not index: index = ["Total"] data_agg[index[0]] = "Total" # Fill in any gaps in origin/development date_axes = self._get_date_axes( data_agg["origin"], data_agg["development"]) # cartesian product dev_lag = TriangleBase._development_lag(data_agg["origin"], data_agg["development"]) # Grab unique index, origin, development dev_lag_unique = np.sort( TriangleBase._development_lag(date_axes["origin"], date_axes["development"]).unique()) orig_unique = np.sort(date_axes["origin"].unique()) kdims = data_agg[index].drop_duplicates().reset_index( drop=True).reset_index() # Map index, origin, development indices to data set_idx = (lambda col, unique: col.map( dict(zip(unique, range(len(unique))))).values[None].T) orig_idx = set_idx(data_agg["origin"], orig_unique) dev_idx = set_idx(dev_lag, dev_lag_unique) key_idx = (data_agg[index].merge(kdims, how="left", on=index)["index"].values[None].T) # origin <= development is required - truncate bad records if not true valid = data_agg["origin"] <= data_agg["development"] if sum(~valid) > 0: warnings.warn("Observations with development before " + "origin start have been removed.") data_agg, orig_idx = data_agg[valid], orig_idx[valid] dev_idx, key_idx = dev_idx[valid], key_idx[valid] # All Triangles start out as sparse arrays val_idx = (((np.ones(len(data_agg))[None].T) * range(len(columns))).reshape((1, -1), order="F").T) coords = np.concatenate( tuple([np.concatenate((orig_idx, dev_idx), 1)] * len(columns)), 0) coords = np.concatenate((np.concatenate( tuple([key_idx] * len(columns)), 0), val_idx, coords), 1) amts = data_agg[columns].unstack() amts = amts.values.astype("float64") self.array_backend = "sparse" self.values = num_to_nan( sp( coords.T.astype('int64'), amts, prune=True, has_duplicates=False, sorted=True, shape=( len(kdims), len(columns), len(orig_unique), len(dev_lag_unique) if has_dev else 1, ), )) # Set all axis values self.valuation_date = data_agg["development"].max() self.kdims = kdims.drop("index", 1).values self.odims = orig_unique self.ddims = dev_lag_unique if has_dev else dev_lag[0:1].values self.ddims = self.ddims * (m_cnt[self.development_grain]) if development and not has_dev: self.ddims = pd.DatetimeIndex( TriangleBase._to_datetime(data, development, period_end=True, format=development_format)[0:1]) self.valuation_date = self.ddims[0] self.vdims = np.array(columns) # Set remaining triangle properties self.key_labels = index self.is_cumulative = cumulative self.virtual_columns = VirtualColumns(self) self.is_pattern = pattern if not AUTO_SPARSE or array_backend == "cupy": self.set_backend(array_backend, inplace=True) else: self = self._auto_sparse() self._set_slicers() if self.is_pattern: obj = self.dropna() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values if u: obj = concat((self.dev_to_val().iloc[..., :len(u.odims), :], u), -1) obj = obj.val_to_dev() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values self.valuation_date = pd.Timestamp(ULT_VAL)
def fit(self, X, y=None, sample_weight=None): """Fit the model with X. Parameters ---------- X : Triangle-like Set of LDFs to which the munich adjustment will be applied. y : Ignored sample_weight : Ignored Returns ------- self : object Returns the instance itself. """ xp = cp.get_array_module(X.values) if (type(X.ddims) != np.ndarray): raise ValueError( 'Triangle must be expressed with development lags') if self.fillna: tri_array = (X + self.fillna).values else: tri_array = X.values.copy() if xp != sp: tri_array[tri_array == 0] = xp.nan if type(self.average) is not list: average = [self.average] * (tri_array.shape[-1] - 1) else: average = self.average average = np.array(average) self.average_ = average if type(self.n_periods) is not list: n_periods = [self.n_periods] * (tri_array.shape[-1] - 1) else: n_periods = self.n_periods n_periods = np.array(n_periods) self.n_periods_ = n_periods weight_dict = {'regression': 0, 'volume': 1, 'simple': 2} x, y = tri_array[..., :-1], tri_array[..., 1:] val = xp.array([weight_dict.get(item.lower(), 1) for item in average]) for i in [2, 1, 0]: val = xp.repeat(val[None], tri_array.shape[i], axis=0) val = xp.nan_to_num(val * (y * 0 + 1)) if xp in [cp, sp]: link_ratio = y / x else: link_ratio = xp.divide(y, x, where=xp.nan_to_num(x) != 0) if xp == sp: self.w_ = sp( self._assign_n_periods_weight(X) * self._drop_adjustment(X, link_ratio)) else: self.w_ = xp.array( self._assign_n_periods_weight(X) * self._drop_adjustment(X, link_ratio)) w = self.w_ / (x**(val)) params = WeightedRegression(axis=2, thru_orig=True).fit(x, y, w) if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: warnings.warn('Setting n_periods=1 does not allow enough degrees ' 'of freedom to support calculation of all regression' ' statistics. Only LDFs have been calculated.') params.std_err_ = xp.nan_to_num(params.std_err_) + \ xp.nan_to_num( (1-xp.nan_to_num(params.std_err_*0+1)) * params.sigma_ / xp.swapaxes(xp.sqrt(x**(2-val))[..., 0:1, :], -1, -2)) params = xp.concatenate( (params.slope_, params.sigma_, params.std_err_), 3) params = xp.swapaxes(params, 2, 3) self.ldf_ = self._param_property(X, params, 0) self.sigma_ = self._param_property(X, params, 1) self.std_err_ = self._param_property(X, params, 2) return self
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, pattern=False, trailing=False, *args, **kwargs): if data is None: return index, columns, origin, development = self._input_validation( data, index, columns, origin, development) data, ult = self._split_ult(data, index, columns, origin, development) origin_date = self._to_datetime( data, origin, format=origin_format).rename('__origin__') self.origin_grain = self._get_grain(origin_date) self.origin_grain = 'S' if self.origin_grain == '2Q' else self.origin_grain development_date = self._set_development( data, development, development_format, origin_date) self.development_grain = ( self._get_grain(development_date) if development_date.nunique() != 1 else self.origin_grain) data_agg = self._aggregate_data( data, origin_date, development_date, index, columns) date_axes = self._get_date_axes( data_agg["__origin__"], data_agg["__development__"]) # Deal with labels if not index: index = ["Total"] data_agg[index[0]] = "Total" self.kdims, key_idx = self._set_kdims(data_agg, index) self.vdims = np.array(columns) self.odims, orig_idx = self._set_odims(data_agg, date_axes) self.ddims, dev_idx = self._set_ddims(data_agg, date_axes) # Set the Triangle values coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx) self.values = num_to_nan( sp(coords, amts, prune=True, has_duplicates=False, sorted=True, shape=(len(self.kdims), len(self.vdims), len(self.odims), len(self.ddims)))) # Set remaining triangle properties val_date = data_agg["__development__"].max() val_date = val_date.compute() if hasattr(val_date, 'compute') else val_date self.key_labels = index self.valuation_date = val_date self.is_cumulative = cumulative self.virtual_columns = VirtualColumns(self) self.is_pattern = pattern self.origin_close = 'DEC' if self.origin_grain != 'M' and trailing: self.origin_close = pd.to_datetime(self.odims[-1]).strftime('%b').upper() # Deal with array backend self.array_backend = "sparse" if array_backend is None: array_backend = options.ARRAY_BACKEND if not options.AUTO_SPARSE or array_backend == "cupy": self.set_backend(array_backend, inplace=True) else: self = self._auto_sparse() self._set_slicers() # Deal with special properties if self.is_pattern: obj = self.dropna() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values if ult: obj = concat((self.dev_to_val().iloc[..., :len(ult.odims), :], ult), -1) obj = obj.val_to_dev() self.odims = obj.odims self.ddims = obj.ddims self.values = obj.values self.valuation_date = pd.Timestamp(options.ULT_VAL)
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, *args, **kwargs): from chainladder import AUTO_SPARSE self._set_array_backend(array_backend) if data is None: return if columns: check = data[columns].dtypes check = [check] if check.__class__.__name__ == 'dtype' else check.to_list() columns = [columns] if type(columns) is not list else columns if 'object' in check: raise TypeError("column attribute must be numeric.") if data[columns].shape[1] != len(columns): raise AttributeError("Columns are required to have unique names") # Sanitize inputs str_to_list = lambda *args : tuple( [arg] if type(arg) in [str, pd.Period] else arg for arg in args) index, columns, origin, development = str_to_list( index, columns, origin, development) # Initialize origin and development dates and grains origin_date = TriangleBase._to_datetime( data, origin, format=origin_format) origin_date.name = 'origin' self.origin_grain = TriangleBase._get_grain(origin_date) origin_date = pd.PeriodIndex(origin_date, freq=self.origin_grain).to_timestamp() m_cnt = {'Y': 12, 'Q': 3, 'M': 1} if development: development_date = TriangleBase._to_datetime( data, development, period_end=True, format=development_format) self.development_grain = TriangleBase._get_grain(development_date) col = 'development' else: development_date = origin_date + \ pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]) self.development_grain = self.origin_grain col = None development_date.name = 'development' # Aggregate data key_gr = [origin_date, development_date] + \ [data[item] for item in self._flatten(index)] data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0) if not index: index = ['Total'] data_agg[index[0]] = 'Total' for item in index: if pd.api.types.is_numeric_dtype(data_agg[item]): data_agg[item] = data_agg[item].astype(str) # Prep the data for 4D Triangle self.valuation_date = data_agg['development'].max() # Assign object properties date_axes = self._get_date_axes(data_agg['origin'], data_agg['development']) # cartesian product dev_lag_unique = TriangleBase._development_lag(date_axes['origin'], date_axes['development']) dev_lag = TriangleBase._development_lag(data_agg['origin'], data_agg['development']) dev = np.sort(dev_lag_unique.unique()) orig = np.sort(date_axes['origin'].unique()) key = data_agg[index].drop_duplicates().reset_index(drop=True) dev = dict(zip(dev, range(len(dev)))) orig = dict(zip(orig, range(len(orig)))) kdims = {v:k for k, v in key.sum(axis=1).to_dict().items()} orig_idx = data_agg['origin'].map(orig).values[None].T if development: dev_idx = dev_lag.map(dev).values[None].T else: dev_idx = (dev_lag*0).values[None].T valid = data_agg['origin']<=data_agg['development'] data_agg, orig_idx, dev_idx = data_agg[valid], orig_idx[valid], dev_idx[valid] if sum(data_agg['origin']>data_agg['development']) > 0: warnings.warn("Observations with development before origin start have been removed.") key_idx = data_agg[index].sum(axis=1).map(kdims).values[None].T val_idx = ((np.ones(len(data_agg))[None].T)*range(len(columns))).reshape((1,-1), order='F').T coords = np.concatenate(tuple([np.concatenate((orig_idx, dev_idx), axis=1)]*len(columns)), axis=0) coords = np.concatenate((np.concatenate(tuple([key_idx]*len(columns)), axis=0), val_idx, coords), axis=1) amts = data_agg[columns].unstack() amts.loc[amts==0] = sp.nan amts = amts.values.astype('float64') values = sp(coords.T, amts, prune=True, fill_value=sp.nan, shape=(len(key), len(columns), len(orig), len(dev) if development else 1)) self.kdims = np.array(key) self.key_labels = index for num, item in enumerate(index): if item in data.columns: if pd.api.types.is_numeric_dtype(data[item]): self.kdims[:, num] = self.kdims[:, num].astype(data[item].dtype) self.odims = np.sort(date_axes['origin'].unique()) if development: self.ddims = np.sort(dev_lag_unique.unique()) self.ddims = self.ddims*(m_cnt[self.development_grain]) else: self.ddims = np.array([None]) self.vdims = np.array(columns) # Create 4D Triangle if self.array_backend in ['numpy', 'sparse']: if AUTO_SPARSE: if not(values.density < 0.2 and np.prod(values.shape)/1e6*8>30): self.array_backend = 'numpy' self.values = np.array(values.todense(), dtype=kwargs.get('dtype', None)) else: self.array_backend = 'sparse' self.values=values else: if self.array_backend == 'numpy': self.values = np.array(values.todense(), dtype=kwargs.get('dtype', None)) elif self.array_backend == 'sparse': self.values=values else: xp = cp if cp == np: warnings.warn('Unable to load CuPY. Using numpy instead.') self.array_backend = 'numpy' values = values.todense() self.values = xp.array(values, dtype=kwargs.get('dtype', None)) self.is_cumulative = cumulative self._set_slicers()
def __init__(self, data=None, origin=None, development=None, columns=None, index=None, origin_format=None, development_format=None, cumulative=None, array_backend=None, *args, **kwargs): if array_backend is None: from chainladder import ARRAY_BACKEND self.array_backend = ARRAY_BACKEND else: self.array_backend = array_backend if data is None: ' Instance with nothing set' return if columns: check = data[columns].dtypes check = [ check ] if check.__class__.__name__ == 'dtype' else check.to_list() if 'object' in check: raise TypeError("column attribute must be numeric.") # Sanitize inputs index, columns, origin, development = self._str_to_list( index, columns, origin, development) key_gr = origin + self._flatten(development, index) # Aggregate data data_agg = data.groupby(key_gr).sum().reset_index().fillna(0) if not index: index = ['Total'] data_agg[index[0]] = 'Total' for item in index: if pd.api.types.is_numeric_dtype(data_agg[item]): data_agg[item] = data_agg[item].astype(str) # Initialize origin and development dates and grains origin_date = TriangleBase._to_datetime(data_agg, origin, format=origin_format) self.origin_grain = TriangleBase._get_grain(origin_date) m_cnt = {'Y': 12, 'Q': 3, 'M': 1} if development: development_date = TriangleBase._to_datetime( data_agg, development, period_end=True, format=development_format) self.development_grain = TriangleBase._get_grain(development_date) col = 'development' else: development_date = origin_date + \ pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]) self.development_grain = self.origin_grain col = None # Prep the data for 4D Triangle self.valuation_date = development_date.max() origin_date = pd.PeriodIndex(origin_date, freq=self.origin_grain).to_timestamp() # Assign object properties date_axes = self._get_date_axes(origin_date, development_date) # cartesian product dev_lag_unique = TriangleBase._development_lag( date_axes['origin'], date_axes['development']) dev_lag = TriangleBase._development_lag(pd.Series(origin_date), pd.Series(development_date)) dev = np.sort(dev_lag_unique.unique()) orig = np.sort(date_axes['origin'].unique()) key = data_agg[index].drop_duplicates().reset_index(drop=True) dev = dict(zip(dev, range(len(dev)))) orig = dict(zip(orig, range(len(orig)))) kdims = {v: k for k, v in key.sum(axis=1).to_dict().items()} orig_idx = origin_date.map(orig).values[None].T if development: dev_idx = dev_lag.map(dev).values[None].T else: dev_idx = (dev_lag * 0).values[None].T data_agg = data_agg[origin_date <= development_date] orig_idx = orig_idx[origin_date <= development_date] dev_idx = dev_idx[origin_date <= development_date] if sum(origin_date > development_date) > 0: warnings.warn( "Observations with development before origin start have been removed." ) key_idx = data_agg[index].sum(axis=1).map(kdims).values[None].T val_idx = ((np.ones(len(data_agg))[None].T) * range(len(columns))).reshape((1, -1), order='F').T coords = np.concatenate(tuple( [np.concatenate((orig_idx, dev_idx), axis=1)] * len(columns)), axis=0) coords = np.concatenate((np.concatenate( tuple([key_idx] * len(columns)), axis=0), val_idx, coords), axis=1) amts = data_agg[columns].unstack().values.astype('float64') values = sp(coords.T.astype('int32'), amts, shape=(len(key), len(columns), len(orig), len(dev) if development else 1), prune=True) self.kdims = np.array(key) self.key_labels = index for num, item in enumerate(index): if item in data.columns: if pd.api.types.is_numeric_dtype(data[item]): self.kdims[:, num] = self.kdims[:, num].astype(data[item].dtype) self.odims = np.sort(date_axes['origin'].unique()) if development: self.ddims = np.sort(dev_lag_unique.unique()) self.ddims = self.ddims * (m_cnt[self.development_grain]) else: self.ddims = np.array([None]) self.vdims = np.array(columns) self._set_slicers() # Create 4D Triangle if self.array_backend == 'numpy': self.values = np.array(values.todense(), dtype=kwargs.get('dtype', None)) self.values[self.values == 0.] = np.nan elif self.array_backend == 'sparse': self.values = values else: xp = cp if cp == np: warnings.warn('Unable to load CuPY. Using numpy instead.') self.array_backend = 'numpy' self.values = xp.array(values, dtype=kwargs.get('dtype', None)) self.is_cumulative = cumulative