def _fit_OLS(self):
        """ Given a set of w, x, y, and an axis, this Function
            returns OLS slope and intercept.
            TODO:
                Make this work with n_periods = 1 without numpy warning.
        """
        from chainladder.utils.utility_functions import num_to_nan

        w, x, y, axis = self.w.copy(), self.x.copy(), self.y.copy(), self.axis
        xp = self.xp
        if xp != sp:
            x[w == 0] = xp.nan
            y[w == 0] = xp.nan
        else:
            w2 = w.copy()
            w2.fill_value = sp.nan
            x, y = x * sp(w2), y * sp(w2)
        slope = num_to_nan(
            xp.nansum(w * x * y, axis) -
            xp.nansum(x * w, axis) * xp.nanmean(y, axis)) / num_to_nan(
                xp.nansum(w * x * x, axis) -
                xp.nanmean(x, axis) * xp.nansum(w * x, axis))
        intercept = xp.nanmean(y, axis) - slope * xp.nanmean(x, axis)
        self.slope_ = slope[..., None]
        self.intercept_ = intercept[..., None]
        return self
예제 #2
0
 def _arithmetic_cleanup(self, obj, other):
     ''' Common functionality AFTER arithmetic operations '''
     xp = cp.get_array_module(obj.values)
     if xp == sp:
         obj.values = sp(obj.values) * sp(obj._expand_dims(
             obj.nan_triangle))
     else:
         obj.values = obj.values * obj._expand_dims(obj.nan_triangle)
     obj.num_to_nan()
     return obj
예제 #3
0
 def __truediv__(self, other):
     xp = cp.get_array_module(self.values)
     obj, other = self._validate_arithmetic(other)
     if xp == sp:
         other.fill_value = xp.nan
         obj.values = sp(xp.nan_to_num(obj.values)) / sp(other)
         obj.values.fill_value = 0.0
     else:
         obj.values = xp.nan_to_num(obj.values) / other
     return self._arithmetic_cleanup(obj, other)
예제 #4
0
 def link_ratio(self):
     xp = cp.get_array_module(self.values)
     obj = copy.deepcopy(self)
     temp = obj.values.copy()
     val_array = obj.valuation.values.reshape(obj.shape[-2:], order='f')[:,
                                                                         1:]
     obj.ddims = np.array([
         '{}-{}'.format(obj.ddims[i], obj.ddims[i + 1])
         for i in range(len(obj.ddims) - 1)
     ])
     if xp != sp:
         temp[temp == 0] = np.nan
         obj.values = temp[..., 1:] / temp[..., :-1]
         # Check whether we want to eliminate the last origin period
         if xp.max(xp.sum(~xp.isnan(self.values[..., -1, :]), 2) - 1) <= 0:
             obj.values = obj.values[..., :-1, :]
     else:
         temp.fill_value = np.nan
         temp = temp[..., 1:] / temp[..., :-1]
         temp.fill_value = 0.0
         temp.coords = temp.coords[:, temp.data != 0]
         temp.data = temp.data[temp.data != 0]
         temp.shape = tuple(temp.coords.max(1) + 1)
         obj.values = sp(temp)
     obj.odims = obj.odims[:obj.values.shape[2]]
     if hasattr(obj, 'w_'):
         if obj.shape == obj.w_[..., 0:1, :len(obj.odims), :].shape:
             obj = obj * obj.w_[..., 0:1, :len(obj.odims), :]
     return obj
예제 #5
0
def num_to_nan(arr):
    """ Function that turns all zeros to nan values in an array """
    backend = arr.__class__.__module__.split(".")[0]
    if backend == "sparse":
        if arr.fill_value == 0 or sp.isnan(arr.fill_value):
            arr.fill_value = sp.nan
            arr.coords = arr.coords[:, arr.data != 0]
            arr.data = arr.data[arr.data != 0]
            arr = sp(arr)
        else:
            arr = sp(num_to_nan(np.nan_to_num(arr.todense())),
                     fill_value=sp.nan)
    else:
        nan = np.nan if backend == "numpy" else cp.nan
        arr[arr == 0] = nan
    return arr
예제 #6
0
 def _slice_valuation(self, key):
     ''' private method for handling of valuation slicing '''
     obj = copy.deepcopy(self)
     obj.valuation_date = min(obj.valuation[key].max(), obj.valuation_date)
     key = key.reshape(self.shape[-2:], order='f')
     nan_tri = np.ones(self.shape[-2:])
     nan_tri = key * nan_tri
     nan_tri[nan_tri == 0] = np.nan
     o, d = nan_tri.shape
     o_idx = np.arange(o)[list(np.sum(np.isnan(nan_tri), 1) != d)]
     d_idx = np.arange(d)[list(np.sum(np.isnan(nan_tri), 0) != o)]
     obj.odims = obj.odims[np.sum(np.isnan(nan_tri), 1) != d]
     if len(obj.ddims) > 1:
         obj.ddims = obj.ddims[np.sum(np.isnan(nan_tri), 0) != o]
     xp = cp.get_array_module(obj.values)
     if xp == cp:
         nan_tri = cp.array(nan_tri)
     if xp == sp:
         nan_tri = sp(nan_tri)
     obj.values = (obj.values * nan_tri)
     if np.all(o_idx == np.array(range(o_idx[0], o_idx[-1] + 1))):
         o_idx = slice(o_idx[0], o_idx[-1] + 1)
     if np.all(d_idx == np.array(range(d_idx[0], d_idx[-1] + 1))):
         d_idx = slice(d_idx[0], d_idx[-1] + 1)
     if type(o_idx) is slice or type(d_idx) is slice:
         # If contiguous slices, this is faster
         obj.values = obj.values[..., o_idx, d_idx]
     else:
         obj.values = xp.take(xp.take(obj.values, o_idx, -2), d_idx, -1)
     return obj
예제 #7
0
def num_to_value(arr, value):
    """ Function that turns all zeros to nan values in an array """
    backend = arr.__class__.__module__.split(".")[0]
    if backend == "sparse":
        if arr.fill_value == 0 or sp.isnan(arr.fill_value):
            arr.coords = arr.coords[:, arr.data != 0]
            arr.data = arr.data[arr.data != 0]
            arr = sp(coords=arr.coords,
                     data=arr.data,
                     fill_value=sp.nan,
                     shape=arr.shape)
        else:
            arr = sp(num_to_nan(np.nan_to_num(arr.todense())),
                     fill_value=value)
    else:
        arr[arr == 0] = value
    return arr
예제 #8
0
    def latest_diagonal(self):
        """ The latest diagonal of the Triangle """
        obj = copy.deepcopy(self)
        xp = cp.get_array_module(self.values)
        val = (self.valuation == self.valuation_date).reshape(self.shape[-2:],
                                                              order='F')
        if xp == sp:
            val = sp(val)

        obj.values = xp.nansum(val * self.values, axis=-1, keepdims=True)
        obj.ddims = pd.DatetimeIndex([self.valuation_date],
                                     dtype='datetime64[ns]',
                                     freq=None)
        return obj
예제 #9
0
 def _val_dev_chg(self):
     xp = cp.get_array_module(self.values)
     obj = copy.deepcopy(self)
     x = xp.nan_to_num(obj.values)
     val_mtrx = \
         (np.array(obj.valuation.year).reshape(obj.shape[-2:], order='f') -
          np.array(pd.DatetimeIndex(obj.odims).year)[..., None])*12 + \
         (np.array(obj.valuation.month).reshape(obj.shape[-2:], order='f') -
          np.array(pd.DatetimeIndex(obj.odims).month)[..., None]) + 1
     rng = np.sort(np.unique(val_mtrx.flatten()[val_mtrx.flatten() > 0]))
     if sp == xp:
         val_mtrx = sp(val_mtrx)
     x = [
         xp.sum((val_mtrx == item) * x, -1, keepdims=True)
         for item in xp.array(rng)
     ]
     x = xp.concatenate(x, -1)
     obj.values = x
     obj.num_to_nan()
     obj.ddims = np.array([item for item in rng])
     obj._set_slicers()
     return obj
예제 #10
0
    def __init__(self,
                 data=None,
                 origin=None,
                 development=None,
                 columns=None,
                 index=None,
                 origin_format=None,
                 development_format=None,
                 cumulative=None,
                 array_backend=None,
                 pattern=False,
                 *args,
                 **kwargs):
        # Allow Empty Triangle so that we can piece it together programatically
        if data is None:
            return

        # Check whether all columns are unique and numeric
        check = data[columns].dtypes
        check = [check] if isinstance(check, np.dtype) else check.to_list()
        columns = [columns] if type(columns) is not list else columns
        if "object" in check:
            raise TypeError("column attribute must be numeric.")
        if data[columns].shape[1] != len(columns):
            raise AttributeError("Columns are required to have unique names")

        # Sanitize all axis inputs to lists
        str_to_list = lambda *args: tuple(
            [arg] if type(arg) in [str, pd.Period] else arg for arg in args)
        index, columns, origin, development = str_to_list(
            index, columns, origin, development)

        # Determine desired array backend of the Triangle
        if array_backend is None:
            from chainladder import ARRAY_BACKEND

            array_backend = ARRAY_BACKEND
        if (development and len(development) == 1
                and data[development[0]].dtype == "<M8[ns]"):
            u = data[data[development[0]] == ULT_VAL].copy()
            if len(u) > 0 and len(u) != len(data):
                u = TriangleBase(
                    u,
                    origin=origin,
                    development=development,
                    columns=columns,
                    index=index,
                )
                data = data[data[development[0]] != ULT_VAL]
            else:
                u = None
        else:
            u = None
        # Initialize origin and its grain
        origin = development if origin is None else origin
        origin_date = TriangleBase._to_datetime(data,
                                                origin,
                                                format=origin_format)
        self.origin_grain = TriangleBase._get_grain(origin_date)
        origin_date = (pd.PeriodIndex(
            origin_date,
            freq=self.origin_grain).to_timestamp().rename("origin"))

        # Initialize development and its grain
        m_cnt = {"Y": 12, "Q": 3, "M": 1}
        has_dev = development and len(np.unique(data[development])) > 1
        if has_dev:
            development_date = TriangleBase._to_datetime(
                data, development, period_end=True, format=development_format)
            self.development_grain = TriangleBase._get_grain(development_date)
        else:
            development_date = pd.PeriodIndex(
                origin_date +
                pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain]),
                freq={
                    "Y": "A"
                }.get(self.origin_grain, self.origin_grain),
            ).to_timestamp(how="e")
            self.development_grain = self.origin_grain
        development_date.name = "development"

        # Summarize dataframe to the level specified in axes
        key_gr = [origin_date, development_date
                  ] + [data[item] for item in ([] if not index else index)]
        data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0)
        if not index:
            index = ["Total"]
            data_agg[index[0]] = "Total"

        # Fill in any gaps in origin/development
        date_axes = self._get_date_axes(
            data_agg["origin"], data_agg["development"])  # cartesian product
        dev_lag = TriangleBase._development_lag(data_agg["origin"],
                                                data_agg["development"])

        # Grab unique index, origin, development
        dev_lag_unique = np.sort(
            TriangleBase._development_lag(date_axes["origin"],
                                          date_axes["development"]).unique())

        orig_unique = np.sort(date_axes["origin"].unique())
        kdims = data_agg[index].drop_duplicates().reset_index(
            drop=True).reset_index()

        # Map index, origin, development indices to data
        set_idx = (lambda col, unique: col.map(
            dict(zip(unique, range(len(unique))))).values[None].T)
        orig_idx = set_idx(data_agg["origin"], orig_unique)
        dev_idx = set_idx(dev_lag, dev_lag_unique)
        key_idx = (data_agg[index].merge(kdims, how="left",
                                         on=index)["index"].values[None].T)

        # origin <= development is required - truncate bad records if not true
        valid = data_agg["origin"] <= data_agg["development"]
        if sum(~valid) > 0:
            warnings.warn("Observations with development before " +
                          "origin start have been removed.")
        data_agg, orig_idx = data_agg[valid], orig_idx[valid]
        dev_idx, key_idx = dev_idx[valid], key_idx[valid]

        # All Triangles start out as sparse arrays
        val_idx = (((np.ones(len(data_agg))[None].T) *
                    range(len(columns))).reshape((1, -1), order="F").T)
        coords = np.concatenate(
            tuple([np.concatenate((orig_idx, dev_idx), 1)] * len(columns)), 0)
        coords = np.concatenate((np.concatenate(
            tuple([key_idx] * len(columns)), 0), val_idx, coords), 1)
        amts = data_agg[columns].unstack()
        amts = amts.values.astype("float64")
        self.array_backend = "sparse"
        self.values = num_to_nan(
            sp(
                coords.T.astype('int64'),
                amts,
                prune=True,
                has_duplicates=False,
                sorted=True,
                shape=(
                    len(kdims),
                    len(columns),
                    len(orig_unique),
                    len(dev_lag_unique) if has_dev else 1,
                ),
            ))

        # Set all axis values
        self.valuation_date = data_agg["development"].max()
        self.kdims = kdims.drop("index", 1).values
        self.odims = orig_unique
        self.ddims = dev_lag_unique if has_dev else dev_lag[0:1].values
        self.ddims = self.ddims * (m_cnt[self.development_grain])
        if development and not has_dev:
            self.ddims = pd.DatetimeIndex(
                TriangleBase._to_datetime(data,
                                          development,
                                          period_end=True,
                                          format=development_format)[0:1])
            self.valuation_date = self.ddims[0]
        self.vdims = np.array(columns)

        # Set remaining triangle properties
        self.key_labels = index
        self.is_cumulative = cumulative
        self.virtual_columns = VirtualColumns(self)
        self.is_pattern = pattern
        if not AUTO_SPARSE or array_backend == "cupy":
            self.set_backend(array_backend, inplace=True)
        else:
            self = self._auto_sparse()
        self._set_slicers()
        if self.is_pattern:
            obj = self.dropna()
            self.odims = obj.odims
            self.ddims = obj.ddims
            self.values = obj.values
        if u:
            obj = concat((self.dev_to_val().iloc[..., :len(u.odims), :], u),
                         -1)
            obj = obj.val_to_dev()
            self.odims = obj.odims
            self.ddims = obj.ddims
            self.values = obj.values
            self.valuation_date = pd.Timestamp(ULT_VAL)
예제 #11
0
    def fit(self, X, y=None, sample_weight=None):
        """Fit the model with X.

        Parameters
        ----------
        X : Triangle-like
            Set of LDFs to which the munich adjustment will be applied.
        y : Ignored
        sample_weight : Ignored

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        xp = cp.get_array_module(X.values)
        if (type(X.ddims) != np.ndarray):
            raise ValueError(
                'Triangle must be expressed with development lags')
        if self.fillna:
            tri_array = (X + self.fillna).values
        else:
            tri_array = X.values.copy()
        if xp != sp:
            tri_array[tri_array == 0] = xp.nan
        if type(self.average) is not list:
            average = [self.average] * (tri_array.shape[-1] - 1)
        else:
            average = self.average
        average = np.array(average)
        self.average_ = average
        if type(self.n_periods) is not list:
            n_periods = [self.n_periods] * (tri_array.shape[-1] - 1)
        else:
            n_periods = self.n_periods
        n_periods = np.array(n_periods)
        self.n_periods_ = n_periods
        weight_dict = {'regression': 0, 'volume': 1, 'simple': 2}
        x, y = tri_array[..., :-1], tri_array[..., 1:]
        val = xp.array([weight_dict.get(item.lower(), 1) for item in average])
        for i in [2, 1, 0]:
            val = xp.repeat(val[None], tri_array.shape[i], axis=0)
        val = xp.nan_to_num(val * (y * 0 + 1))
        if xp in [cp, sp]:
            link_ratio = y / x
        else:
            link_ratio = xp.divide(y, x, where=xp.nan_to_num(x) != 0)
        if xp == sp:
            self.w_ = sp(
                self._assign_n_periods_weight(X) *
                self._drop_adjustment(X, link_ratio))
        else:
            self.w_ = xp.array(
                self._assign_n_periods_weight(X) *
                self._drop_adjustment(X, link_ratio))
        w = self.w_ / (x**(val))
        params = WeightedRegression(axis=2, thru_orig=True).fit(x, y, w)
        if self.n_periods != 1:
            params = params.sigma_fill(self.sigma_interpolation)
        else:
            warnings.warn('Setting n_periods=1 does not allow enough degrees '
                          'of freedom to support calculation of all regression'
                          ' statistics.  Only LDFs have been calculated.')
        params.std_err_ = xp.nan_to_num(params.std_err_) + \
            xp.nan_to_num(
                (1-xp.nan_to_num(params.std_err_*0+1)) *
                params.sigma_ /
                xp.swapaxes(xp.sqrt(x**(2-val))[..., 0:1, :], -1, -2))
        params = xp.concatenate(
            (params.slope_, params.sigma_, params.std_err_), 3)
        params = xp.swapaxes(params, 2, 3)
        self.ldf_ = self._param_property(X, params, 0)
        self.sigma_ = self._param_property(X, params, 1)
        self.std_err_ = self._param_property(X, params, 2)
        return self
예제 #12
0
 def __init__(self, data=None, origin=None, development=None, columns=None,
              index=None, origin_format=None, development_format=None,
              cumulative=None, array_backend=None, pattern=False,
              trailing=False, *args, **kwargs):
     if data is None:
         return
     index, columns, origin, development = self._input_validation(
         data, index, columns, origin, development)
     data, ult = self._split_ult(data, index, columns, origin, development)
     origin_date = self._to_datetime(
         data, origin, format=origin_format).rename('__origin__')
     self.origin_grain = self._get_grain(origin_date)
     self.origin_grain = 'S' if self.origin_grain == '2Q' else self.origin_grain
     development_date = self._set_development(
         data, development, development_format, origin_date)
     self.development_grain = (
         self._get_grain(development_date) if development_date.nunique() != 1
         else self.origin_grain)
     data_agg = self._aggregate_data(
         data, origin_date, development_date, index, columns)
     date_axes = self._get_date_axes(
         data_agg["__origin__"], data_agg["__development__"])
     # Deal with labels
     if not index:
         index = ["Total"]
         data_agg[index[0]] = "Total"
     self.kdims, key_idx = self._set_kdims(data_agg, index)
     self.vdims = np.array(columns)
     self.odims, orig_idx = self._set_odims(data_agg, date_axes)
     self.ddims, dev_idx = self._set_ddims(data_agg, date_axes)
     # Set the Triangle values
     coords, amts = self._set_values(data_agg, key_idx, columns, orig_idx, dev_idx)
     self.values = num_to_nan(
         sp(coords, amts, prune=True,
            has_duplicates=False, sorted=True,
            shape=(len(self.kdims), len(self.vdims),
                   len(self.odims), len(self.ddims))))
     # Set remaining triangle properties
     val_date = data_agg["__development__"].max()
     val_date = val_date.compute() if hasattr(val_date, 'compute') else val_date
     self.key_labels = index
     self.valuation_date = val_date
     self.is_cumulative = cumulative
     self.virtual_columns = VirtualColumns(self)
     self.is_pattern = pattern
     self.origin_close = 'DEC'
     if self.origin_grain != 'M' and trailing:
         self.origin_close = pd.to_datetime(self.odims[-1]).strftime('%b').upper()
     # Deal with array backend
     self.array_backend = "sparse"
     if array_backend is None:
         array_backend = options.ARRAY_BACKEND
     if not options.AUTO_SPARSE or array_backend == "cupy":
         self.set_backend(array_backend, inplace=True)
     else:
         self = self._auto_sparse()
     self._set_slicers()
     # Deal with special properties
     if self.is_pattern:
         obj = self.dropna()
         self.odims = obj.odims
         self.ddims = obj.ddims
         self.values = obj.values
     if ult:
         obj = concat((self.dev_to_val().iloc[..., :len(ult.odims), :], ult), -1)
         obj = obj.val_to_dev()
         self.odims = obj.odims
         self.ddims = obj.ddims
         self.values = obj.values
         self.valuation_date = pd.Timestamp(options.ULT_VAL)
예제 #13
0
    def __init__(self, data=None, origin=None, development=None,
                 columns=None, index=None, origin_format=None,
                 development_format=None, cumulative=None,
                 array_backend=None, *args, **kwargs):
        from chainladder import AUTO_SPARSE
        self._set_array_backend(array_backend)
        if data is None:
            return
        if columns:
            check = data[columns].dtypes
            check = [check] if check.__class__.__name__ == 'dtype' else check.to_list()
            columns = [columns] if type(columns) is not list else columns
            if 'object' in check:
                raise TypeError("column attribute must be numeric.")
            if data[columns].shape[1] != len(columns):
                raise AttributeError("Columns are required to have unique names")
        # Sanitize inputs
        str_to_list = lambda *args : tuple(
            [arg] if type(arg) in [str, pd.Period] else arg for arg in args)
        index, columns, origin, development = str_to_list(
            index, columns, origin, development)

        # Initialize origin and development dates and grains
        origin_date = TriangleBase._to_datetime(
            data, origin, format=origin_format)
        origin_date.name = 'origin'
        self.origin_grain = TriangleBase._get_grain(origin_date)
        origin_date = pd.PeriodIndex(origin_date, freq=self.origin_grain).to_timestamp()
        m_cnt = {'Y': 12, 'Q': 3, 'M': 1}
        if development:
            development_date = TriangleBase._to_datetime(
                data, development, period_end=True,
                format=development_format)
            self.development_grain = TriangleBase._get_grain(development_date)
            col = 'development'
        else:
            development_date = origin_date + \
                pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain])
            self.development_grain = self.origin_grain
            col = None
        development_date.name = 'development'

        # Aggregate data
        key_gr = [origin_date, development_date] + \
                 [data[item] for item in self._flatten(index)]
        data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0)
        if not index:
            index = ['Total']
            data_agg[index[0]] = 'Total'
        for item in index:
            if pd.api.types.is_numeric_dtype(data_agg[item]):
                data_agg[item] = data_agg[item].astype(str)

        # Prep the data for 4D Triangle
        self.valuation_date = data_agg['development'].max()
        # Assign object properties
        date_axes = self._get_date_axes(data_agg['origin'], data_agg['development']) # cartesian product
        dev_lag_unique = TriangleBase._development_lag(date_axes['origin'], date_axes['development'])
        dev_lag = TriangleBase._development_lag(data_agg['origin'], data_agg['development'])
        dev = np.sort(dev_lag_unique.unique())
        orig = np.sort(date_axes['origin'].unique())
        key = data_agg[index].drop_duplicates().reset_index(drop=True)
        dev = dict(zip(dev, range(len(dev))))
        orig = dict(zip(orig, range(len(orig))))
        kdims = {v:k for k, v in key.sum(axis=1).to_dict().items()}
        orig_idx = data_agg['origin'].map(orig).values[None].T
        if development:
            dev_idx = dev_lag.map(dev).values[None].T
        else:
            dev_idx = (dev_lag*0).values[None].T
        valid = data_agg['origin']<=data_agg['development']
        data_agg, orig_idx, dev_idx = data_agg[valid], orig_idx[valid], dev_idx[valid]
        if sum(data_agg['origin']>data_agg['development']) > 0:
            warnings.warn("Observations with development before origin start have been removed.")
        key_idx = data_agg[index].sum(axis=1).map(kdims).values[None].T
        val_idx = ((np.ones(len(data_agg))[None].T)*range(len(columns))).reshape((1,-1), order='F').T
        coords = np.concatenate(tuple([np.concatenate((orig_idx, dev_idx), axis=1)]*len(columns)),  axis=0)
        coords = np.concatenate((np.concatenate(tuple([key_idx]*len(columns)), axis=0), val_idx, coords), axis=1)
        amts = data_agg[columns].unstack()
        amts.loc[amts==0] = sp.nan
        amts = amts.values.astype('float64')
        values = sp(coords.T, amts, prune=True, fill_value=sp.nan,
                    shape=(len(key), len(columns), len(orig),
                           len(dev) if development else 1))
        self.kdims = np.array(key)
        self.key_labels = index
        for num, item in enumerate(index):
            if item in data.columns:
                if pd.api.types.is_numeric_dtype(data[item]):
                    self.kdims[:, num] = self.kdims[:, num].astype(data[item].dtype)
        self.odims = np.sort(date_axes['origin'].unique())
        if development:
            self.ddims = np.sort(dev_lag_unique.unique())
            self.ddims = self.ddims*(m_cnt[self.development_grain])
        else:
            self.ddims = np.array([None])
        self.vdims = np.array(columns)
        # Create 4D Triangle
        if self.array_backend in ['numpy', 'sparse']:
            if AUTO_SPARSE:
                if not(values.density < 0.2 and np.prod(values.shape)/1e6*8>30):
                    self.array_backend = 'numpy'
                    self.values = np.array(values.todense(), dtype=kwargs.get('dtype', None))
                else:
                    self.array_backend = 'sparse'
                    self.values=values
            else:
                if self.array_backend == 'numpy':
                    self.values = np.array(values.todense(), dtype=kwargs.get('dtype', None))
                elif self.array_backend == 'sparse':
                    self.values=values
        else:
            xp = cp
            if cp == np:
                warnings.warn('Unable to load CuPY.  Using numpy instead.')
                self.array_backend = 'numpy'
            values = values.todense()
            self.values = xp.array(values, dtype=kwargs.get('dtype', None))
        self.is_cumulative = cumulative
        self._set_slicers()
예제 #14
0
    def __init__(self,
                 data=None,
                 origin=None,
                 development=None,
                 columns=None,
                 index=None,
                 origin_format=None,
                 development_format=None,
                 cumulative=None,
                 array_backend=None,
                 *args,
                 **kwargs):
        if array_backend is None:
            from chainladder import ARRAY_BACKEND
            self.array_backend = ARRAY_BACKEND
        else:
            self.array_backend = array_backend
        if data is None:
            ' Instance with nothing set'
            return
        if columns:
            check = data[columns].dtypes
            check = [
                check
            ] if check.__class__.__name__ == 'dtype' else check.to_list()
            if 'object' in check:
                raise TypeError("column attribute must be numeric.")
        # Sanitize inputs
        index, columns, origin, development = self._str_to_list(
            index, columns, origin, development)
        key_gr = origin + self._flatten(development, index)
        # Aggregate data

        data_agg = data.groupby(key_gr).sum().reset_index().fillna(0)
        if not index:
            index = ['Total']
            data_agg[index[0]] = 'Total'
        for item in index:
            if pd.api.types.is_numeric_dtype(data_agg[item]):
                data_agg[item] = data_agg[item].astype(str)
        # Initialize origin and development dates and grains
        origin_date = TriangleBase._to_datetime(data_agg,
                                                origin,
                                                format=origin_format)
        self.origin_grain = TriangleBase._get_grain(origin_date)

        m_cnt = {'Y': 12, 'Q': 3, 'M': 1}
        if development:
            development_date = TriangleBase._to_datetime(
                data_agg,
                development,
                period_end=True,
                format=development_format)
            self.development_grain = TriangleBase._get_grain(development_date)
            col = 'development'
        else:
            development_date = origin_date + \
                pd.tseries.offsets.MonthEnd(m_cnt[self.origin_grain])
            self.development_grain = self.origin_grain
            col = None
        # Prep the data for 4D Triangle
        self.valuation_date = development_date.max()
        origin_date = pd.PeriodIndex(origin_date,
                                     freq=self.origin_grain).to_timestamp()
        # Assign object properties
        date_axes = self._get_date_axes(origin_date,
                                        development_date)  # cartesian product
        dev_lag_unique = TriangleBase._development_lag(
            date_axes['origin'], date_axes['development'])
        dev_lag = TriangleBase._development_lag(pd.Series(origin_date),
                                                pd.Series(development_date))
        dev = np.sort(dev_lag_unique.unique())
        orig = np.sort(date_axes['origin'].unique())
        key = data_agg[index].drop_duplicates().reset_index(drop=True)
        dev = dict(zip(dev, range(len(dev))))
        orig = dict(zip(orig, range(len(orig))))
        kdims = {v: k for k, v in key.sum(axis=1).to_dict().items()}
        orig_idx = origin_date.map(orig).values[None].T
        if development:
            dev_idx = dev_lag.map(dev).values[None].T
        else:
            dev_idx = (dev_lag * 0).values[None].T
        data_agg = data_agg[origin_date <= development_date]
        orig_idx = orig_idx[origin_date <= development_date]
        dev_idx = dev_idx[origin_date <= development_date]
        if sum(origin_date > development_date) > 0:
            warnings.warn(
                "Observations with development before origin start have been removed."
            )
        key_idx = data_agg[index].sum(axis=1).map(kdims).values[None].T
        val_idx = ((np.ones(len(data_agg))[None].T) *
                   range(len(columns))).reshape((1, -1), order='F').T
        coords = np.concatenate(tuple(
            [np.concatenate((orig_idx, dev_idx), axis=1)] * len(columns)),
                                axis=0)
        coords = np.concatenate((np.concatenate(
            tuple([key_idx] * len(columns)), axis=0), val_idx, coords),
                                axis=1)
        amts = data_agg[columns].unstack().values.astype('float64')
        values = sp(coords.T.astype('int32'),
                    amts,
                    shape=(len(key), len(columns), len(orig),
                           len(dev) if development else 1),
                    prune=True)
        self.kdims = np.array(key)
        self.key_labels = index
        for num, item in enumerate(index):
            if item in data.columns:
                if pd.api.types.is_numeric_dtype(data[item]):
                    self.kdims[:,
                               num] = self.kdims[:,
                                                 num].astype(data[item].dtype)
        self.odims = np.sort(date_axes['origin'].unique())
        if development:
            self.ddims = np.sort(dev_lag_unique.unique())
            self.ddims = self.ddims * (m_cnt[self.development_grain])
        else:
            self.ddims = np.array([None])
        self.vdims = np.array(columns)
        self._set_slicers()
        # Create 4D Triangle
        if self.array_backend == 'numpy':
            self.values = np.array(values.todense(),
                                   dtype=kwargs.get('dtype', None))
            self.values[self.values == 0.] = np.nan
        elif self.array_backend == 'sparse':
            self.values = values
        else:
            xp = cp
            if cp == np:
                warnings.warn('Unable to load CuPY.  Using numpy instead.')
                self.array_backend = 'numpy'
            self.values = xp.array(values, dtype=kwargs.get('dtype', None))
        self.is_cumulative = cumulative