def __init__(self, model, params, filter_results, cov_type='opg', cov_kwds=None, **kwargs): self.data = model.data tsbase.TimeSeriesModelResults.__init__(self, model, params, normalized_cov_params=None, scale=1.) # Save the state space representation output self.filter_results = filter_results # Dimensions self.nobs = model.nobs # Setup covariance matrix notes dictionary if not hasattr(self, 'cov_kwds'): self.cov_kwds = {} self.cov_type = cov_type # Setup the cache self._cache = resettable_cache() # Handle covariance matrix calculation if cov_kwds is None: cov_kwds = {} self._get_robustcov_results(cov_type=cov_type, use_self=True, **cov_kwds)
def __init__(self, tables, shift_zeros=False): if isinstance(tables, np.ndarray): sp = tables.shape if (len(sp) != 3) or (sp[0] != 2) or (sp[1] != 2): raise ValueError("If an ndarray, argument must be 2x2xn") table = tables else: # Create a data cube table = np.dstack(tables).astype(np.float64) if shift_zeros: zx = (table == 0).sum(0).sum(0) ix = np.flatnonzero(zx > 0) if len(ix) > 0: table = table.copy() table[:, :, ix] += 0.5 self.table = table self._cache = resettable_cache() # Quantities to precompute. Table entries are [[a, b], [c, # d]], 'ad' is 'a * d', 'apb' is 'a + b', 'dma' is 'd - a', # etc. self._apb = table[0, 0, :] + table[0, 1, :] self._apc = table[0, 0, :] + table[1, 0, :] self._bpd = table[0, 1, :] + table[1, 1, :] self._cpd = table[1, 0, :] + table[1, 1, :] self._ad = table[0, 0, :] * table[1, 1, :] self._bc = table[0, 1, :] * table[1, 0, :] self._apd = table[0, 0, :] + table[1, 1, :] self._dma = table[1, 1, :] - table[0, 0, :] self._n = table.sum(0).sum(0)
def __init__(self, model): self.data = model.data # Save the model output self._endog_names = model.endog_names self._exog_names = model.endog_names self._params = model.params self._param_names = model.data.param_names self._model_names = model.model_names self._model_latex_names = model.model_latex_names # Associate the names with the true parameters params = pd.Series(self._params, index=self._param_names) # Initialize the Statsmodels model base tsbase.TimeSeriesModelResults.__init__(self, model, params, normalized_cov_params=None, scale=1.) # Initialize the statespace representation super(MLEResults, self).__init__(model) # Setup the cache self._cache = resettable_cache()
def __init__(self, results, get_margeff, derivative, dist=None, margeff_args=()): self._cache = resettable_cache() self.results = results self.dist = dist self._get_margeff = get_margeff self.get_margeff(margeff_args)
def __init__(self, endog, exog=None, missing='none', hasconst=None, **kwargs): if 'design_info' in kwargs: self.design_info = kwargs.pop('design_info') if 'formula' in kwargs: self.formula = kwargs.pop('formula') if missing != 'none': arrays, nan_idx = self.handle_missing(endog, exog, missing, **kwargs) self.missing_row_idx = nan_idx self.__dict__.update(arrays) # attach all the data arrays self.orig_endog = self.endog self.orig_exog = self.exog self.endog, self.exog = self._convert_endog_exog( self.endog, self.exog) else: self.__dict__.update(kwargs) # attach the extra arrays anyway self.orig_endog = endog self.orig_exog = exog self.endog, self.exog = self._convert_endog_exog(endog, exog) # this has side-effects, attaches k_constant and const_idx self._handle_constant(hasconst) self._check_integrity() self._cache = resettable_cache()
def __init__(self, model, params, normalized_cov_params, scale): super(RLMResults, self).__init__(model, params, normalized_cov_params, scale) self.model = model self.df_model = model.df_model self.df_resid = model.df_resid self.nobs = model.nobs self._cache = resettable_cache()
def __init__(self, data, dist=stats.norm, fit=False, distargs=(), a=0, loc=0, scale=1): self.data = data self.a = a self.nobs = data.shape[0] self.distargs = distargs self.fit = fit if isinstance(dist, basestring): dist = getattr(stats, dist) self.fit_params = dist.fit(data) if fit: self.loc = self.fit_params[-2] self.scale = self.fit_params[-1] if len(self.fit_params) > 2: self.dist = dist(*self.fit_params[:-2], **dict(loc = 0, scale = 1)) else: self.dist = dist(loc=0, scale=1) elif distargs or loc == 0 or scale == 1: self.dist = dist(*distargs, **dict(loc=loc, scale=scale)) self.loc = loc self.scale = scale else: self.dist = dist self.loc = loc self.scale = scale # propertes self._cache = resettable_cache()
def __init__(self, data, dist=stats.norm, fit=False, distargs=(), a=0, loc=0, scale=1): self.data = data self.a = a self.nobs = data.shape[0] self.distargs = distargs self.fit = fit if isinstance(dist, string_types): dist = getattr(stats, dist) self.fit_params = dist.fit(data) if fit: self.loc = self.fit_params[-2] self.scale = self.fit_params[-1] if len(self.fit_params) > 2: self.dist = dist(*self.fit_params[:-2], **dict(loc = 0, scale = 1)) else: self.dist = dist(loc=0, scale=1) elif distargs or loc != 0 or scale != 1: self.dist = dist(*distargs, **dict(loc=loc, scale=scale)) self.loc = loc self.scale = scale else: self.dist = dist self.loc = loc self.scale = scale # propertes self._cache = resettable_cache()
def __init__(self, endog, exog=None, missing='none', hasconst=None, **kwargs): if 'design_info' in kwargs: self.design_info = kwargs.pop('design_info') if 'formula' in kwargs: self.formula = kwargs.pop('formula') if missing != 'none': arrays, nan_idx = self.handle_missing(endog, exog, missing, **kwargs) self.missing_row_idx = nan_idx self.__dict__.update(arrays) # attach all the data arrays self.orig_endog = self.endog self.orig_exog = self.exog self.endog, self.exog = self._convert_endog_exog(self.endog, self.exog) else: self.__dict__.update(kwargs) # attach the extra arrays anyway self.orig_endog = endog self.orig_exog = exog self.endog, self.exog = self._convert_endog_exog(endog, exog) # this has side-effects, attaches k_constant and const_idx self._handle_constant(hasconst) self._check_integrity() self._cache = resettable_cache()
def __init__(self, model, cov_type='opg', cov_kwds=None): self.data = model.data # Save the model output self._endog_names = model.endog_names self._exog_names = model.endog_names self._params = model.params.copy() self._param_names = model.data.param_names self._model_names = model.model_names self._model_latex_names = model.model_latex_names # Associate the names with the true parameters params = pd.Series(self._params, index=self._param_names) # Initialize the Statsmodels model base # TODO does not pass cov_type to parent right now, instead sets it # separately, see below. tsbase.TimeSeriesModelResults.__init__(self, model, params, normalized_cov_params=None, scale=1.) # Initialize the statespace representation super(MLEResults, self).__init__(model) # Setup the cache self._cache = resettable_cache() # Handle covariance matrix calculation if cov_kwds is None: cov_kwds = {} self._get_robustcov_results(cov_type=cov_type, use_self=True, **cov_kwds)
def __init__(self, results, get_margeff, derivative, dist=None, margeff_args=()): self._cache = resettable_cache() self.results = results self.dist = dist self.get_margeff(margeff_args)
def __init__(self, model, params, normalized_cov_params, scale): super(RLMResults, self).__init__(model, params, normalized_cov_params, scale) self.model = model self.df_model = model.df_model self.df_resid = model.df_resid self.nobs = model.nobs self._cache = resettable_cache() #for remove_data self.data_in_cache = ['sresid']
def __init__(self, model, mlefit, optimize_dict=None): self.model = model self.estimator = model.estimator self.optimize_dict = optimize_dict self.nobs = model.nobs self.df_model = model.df_model self.df_resid = model.df_resid self._cache = resettable_cache() self.__dict__.update(mlefit.__dict__) self.param_names = model.param_names(params_type='long') self.nperiods = self.model.nperiods
def __init__(self, model, params, normalized_cov_params, scale): super(RLMResults, self).__init__(model, params, normalized_cov_params, scale) self.model = model self.df_model = model.df_model self.df_resid = model.df_resid self.nobs = model.nobs self._cache = resettable_cache() # for remove_data self.data_in_cache = ["sresid"] self.cov_params_default = self.bcov_scaled
def __init__(self, model): self.model = model self.mlefit = model.fit() self.nobs_bychoice = model.nobs self.nobs = model.endog.shape[0] self.alt = model.V.keys() self.freq_alt = model.endog_bychoices[:, ].sum(0).tolist() self.perc_alt = (model.endog_bychoices[:, ].sum(0) / model.nobs)\ .tolist() self.__dict__.update(self.mlefit.__dict__) self._cache = resettable_cache()
def __init__(self, model, params, normalized_cov_params, scale): super(GLMResults, self).__init__(model, params, normalized_cov_params=normalized_cov_params, scale=scale) self.family = model.family self._endog = model.endog self.nobs = model.endog.shape[0] self.mu = model.mu self._data_weights = model.data_weights self.df_resid = model.df_resid self.df_model = model.df_model self.pinv_wexog = model.pinv_wexog self._cache = resettable_cache()
def __init__(self, model, params, normalized_cov_params, scale, cov_type='nonrobust', cov_kwds=None, use_t=None): super(GLMResults, self).__init__(model, params, normalized_cov_params=normalized_cov_params, scale=scale) self.family = model.family self._endog = model.endog self.nobs = model.endog.shape[0] self.mu = model.mu self._data_weights = model.data_weights self.df_resid = model.df_resid self.df_model = model.df_model self.pinv_wexog = model.pinv_wexog self._cache = resettable_cache() # are these intermediate results needed or can we just # call the model's attributes? # for remove data and pickle without large arrays self._data_attr.extend(['results_constrained']) self.data_in_cache = getattr(self, 'data_in_cache', []) self.data_in_cache.extend(['null']) # robust covariance from statsmodels.base.covtype import get_robustcov_results if use_t is None: self.use_t = False # TODO: class default else: self.use_t = use_t if cov_type == 'nonrobust': self.cov_type = 'nonrobust' self.cov_kwds = { 'description': 'Standard Errors assume that the ' + 'covariance matrix of the errors is correctly ' + 'specified.' } else: if cov_kwds is None: cov_kwds = {} get_robustcov_results(self, cov_type=cov_type, use_self=True, use_t=use_t, **cov_kwds)
def __init__(self, model, params, normalized_cov_params, scale): super(GLMResults, self).__init__(model, params, normalized_cov_params= normalized_cov_params, scale=scale) self.family = model.family self._endog = model.endog self.nobs = model.endog.shape[0] self.mu = model.mu self._data_weights = model.data_weights self.df_resid = model.df_resid self.df_model = model.df_model self.pinv_wexog = model.pinv_wexog self._cache = resettable_cache()
def __init__(self, params, resid, volatility, dep_var, names, loglikelihood, is_pandas, model): self._params = params self._resid = resid self._is_pandas = is_pandas self.model = model self._datetime = dt.datetime.now() self._cache = resettable_cache() self._dep_var = dep_var self._dep_name = dep_var.name self._names = names self._loglikelihood = loglikelihood self._nobs = model.nobs self._index = dep_var.index self._volatility = volatility
def __init__(self, datasets, paramgroup, basepath, figpath, showprogress=False, applyfilters=False, filtercount=5, filtercolumn='bmp'): self._cache = resettable_cache() self._applyfilters = applyfilters self.filtercount = filtercount self.filtercolumn = filtercolumn self._raw_datasets = [ds for ds in filter( lambda x: x.effluent.include, datasets )] self.basepath = basepath self.figpath = figpath self.showprogress = showprogress self.parameters = [ds.definition['parameter'] for ds in self.datasets] self.bmps = [ds.definition['category'] for ds in self.datasets] self.paramgroup = paramgroup
def test_resettable_cache(): # This test was taken from the old __main__ section of decorators.py reset = dict(a=('b', ), b=('c', )) cache = resettable_cache(a=0, b=1, c=2, reset=reset) assert_equal(cache, dict(a=0, b=1, c=2)) # Try resetting a cache['a'] = 1 assert_equal(cache, dict(a=1, b=None, c=None)) cache['c'] = 2 assert_equal(cache, dict(a=1, b=None, c=2)) cache['b'] = 0 assert_equal(cache, dict(a=1, b=0, c=None)) # Try deleting b del cache['a'] assert_equal(cache, {})
def __init__(self, model, params, normalized_cov_params=None, scale=1.0): super(ARResults, self).__init__(model, params, normalized_cov_params, scale) self._cache = resettable_cache() self.nobs = model.nobs n_totobs = len(model.endog) self.n_totobs = n_totobs self.X = model.X # copy? self.Y = model.Y k_ar = model.k_ar self.k_ar = k_ar k_trend = model.k_trend self.k_trend = k_trend trendorder = None if k_trend > 0: trendorder = k_trend - 1 self.trendorder = 1 # TODO: cmle vs mle? self.df_resid = self.model.df_resid = n_totobs - k_ar - k_trend
def test_resettable_cache(): # This test was taken from the old __main__ section of decorators.py reset = dict(a=('b',), b=('c',)) cache = resettable_cache(a=0, b=1, c=2, reset=reset) assert_equal(cache, dict(a=0, b=1, c=2)) # Try resetting a cache['a'] = 1 assert_equal(cache, dict(a=1, b=None, c=None)) cache['c'] = 2 assert_equal(cache, dict(a=1, b=None, c=2)) cache['b'] = 0 assert_equal(cache, dict(a=1, b=0, c=None)) # Try deleting b del cache['a'] assert_equal(cache, {})
def __init__(self, endog, exog=None, missing='none', **kwargs): if missing != 'none': arrays, nan_idx = self._handle_missing(endog, exog, missing, **kwargs) self.missing_row_idx = nan_idx self.__dict__.update(arrays) # attach all the data arrays self.orig_endog = self.endog self.orig_exog = self.exog self.endog, self.exog = self._convert_endog_exog(self.endog, self.exog) else: self.__dict__.update(kwargs) # attach the extra arrays anyway self.orig_endog = endog self.orig_exog = exog self.endog, self.exog = self._convert_endog_exog(endog, exog) self._check_integrity() self._cache = resettable_cache()
def __init__(self, model, params, normalized_cov_params=None, scale=1.0): super(ARMAResults, self).__init__(model, params, normalized_cov_params, scale) self.sigma2 = model.sigma2 nobs = model.nobs self.nobs = nobs k_exog = model.k_exog self.k_exog = k_exog k_trend = model.k_trend self.k_trend = k_trend k_ar = model.k_ar self.k_ar = k_ar self.n_totobs = len(model.endog) k_ma = model.k_ma self.k_ma = k_ma df_model = k_exog + k_trend + k_ar + k_ma self.df_model = df_model self.df_resid = self.nobs - df_model self._cache = resettable_cache()
def __init__(self, endog, exog=None, missing='none', **kwargs): if missing != 'none': arrays, nan_idx = self._handle_missing(endog, exog, missing, **kwargs) self.missing_row_idx = nan_idx self.__dict__.update(arrays) # attach all the data arrays self._orig_endog = self.endog self._orig_exog = self.exog self.endog, self.exog = self._convert_endog_exog( self.endog, self.exog) else: self.__dict__.update(kwargs) # attach the extra arrays anyway self._orig_endog = endog self._orig_exog = exog self.endog, self.exog = self._convert_endog_exog(endog, exog) self._check_integrity() self._cache = resettable_cache()
def __init__(self, model, params, normalized_cov_params=None, scale=1.): super(ARMAResults, self).__init__(model, params, normalized_cov_params, scale) self.sigma2 = model.sigma2 nobs = model.nobs self.nobs = nobs k_exog = model.k_exog self.k_exog = k_exog k_trend = model.k_trend self.k_trend = k_trend k_ar = model.k_ar self.k_ar = k_ar self.n_totobs = len(model.endog) k_ma = model.k_ma self.k_ma = k_ma df_model = k_exog + k_trend + k_ar + k_ma self.df_model = df_model self.df_resid = self.nobs - df_model self._cache = resettable_cache()
def __init__(self, model, params, normalized_cov_params=None, scale=1.): super(ARResults, self).__init__(model, params, normalized_cov_params, scale) self._cache = resettable_cache() self.nobs = model.nobs n_totobs = len(model.endog) self.n_totobs = n_totobs self.X = model.X # copy? self.Y = model.Y k_ar = model.k_ar self.k_ar = k_ar k_trend = model.k_trend self.k_trend = k_trend trendorder = None if k_trend > 0: trendorder = k_trend - 1 self.trendorder = 1 #TODO: cmle vs mle? self.df_resid = self.model.df_resid = n_totobs - k_ar - k_trend
def __init__(self, model, params, normalized_cov_params, scale): super(GLMResults, self).__init__(model, params, normalized_cov_params= normalized_cov_params, scale=scale) self.family = model.family self._endog = model.endog self.nobs = model.endog.shape[0] self.mu = model.mu self._data_weights = model.data_weights self.df_resid = model.df_resid self.df_model = model.df_model self.pinv_wexog = model.pinv_wexog self._cache = resettable_cache() # are these intermediate results needed or can we just # call the model's attributes? # for remove data and pickle without large arrays self._data_attr.extend(['results_constrained']) self.data_in_cache = getattr(self, 'data_in_cache', []) self.data_in_cache.extend(['null'])
def __init__(self, model, params, normalized_cov_params, scale): super(GLMResults, self).__init__(model, params, normalized_cov_params=normalized_cov_params, scale=scale) self.family = model.family self._endog = model.endog self.nobs = model.endog.shape[0] self.mu = model.mu self._data_weights = model.data_weights self.df_resid = model.df_resid self.df_model = model.df_model self.pinv_wexog = model.pinv_wexog self._cache = resettable_cache() # are these intermediate results needed or can we just # call the model's attributes? # for remove data and pickle without large arrays self._data_attr.extend(['results_constrained']) self.data_in_cache = getattr(self, 'data_in_cache', []) self.data_in_cache.extend(['null'])
def __init__(self, model, params, normalized_cov_params, scale, cov_type='nonrobust', cov_kwds=None, use_t=None): super(GLMResults, self).__init__(model, params, normalized_cov_params= normalized_cov_params, scale=scale) self.family = model.family self._endog = model.endog self.nobs = model.endog.shape[0] self.mu = model.mu self._data_weights = model.data_weights self.df_resid = model.df_resid self.df_model = model.df_model self.pinv_wexog = model.pinv_wexog self._cache = resettable_cache() # are these intermediate results needed or can we just # call the model's attributes? # for remove data and pickle without large arrays self._data_attr.extend(['results_constrained']) self.data_in_cache = getattr(self, 'data_in_cache', []) self.data_in_cache.extend(['null']) # robust covariance from statsmodels.base.covtype import get_robustcov_results if use_t is None: self.use_t = False # TODO: class default else: self.use_t = use_t if cov_type == 'nonrobust': self.cov_type = 'nonrobust' self.cov_kwds = {'description' : 'Standard Errors assume that the ' + 'covariance matrix of the errors is correctly ' + 'specified.'} else: if cov_kwds is None: cov_kwds = {} get_robustcov_results(self, cov_type=cov_type, use_self=True, use_t=use_t, **cov_kwds)
def __init__(self, endog, exog=None, **kwds): self._orig_endog = endog self._orig_exog = exog self.endog, self.exog = self._convert_endog_exog(endog, exog) self._check_integrity() self._cache = resettable_cache()
def _reset(self): self._cache = resettable_cache()
def __init__(self, results, args, kwargs={}): self._cache = resettable_cache() self.results = results self.get_margeff(*args, **kwargs)
class KDE(object): """ Kernel Density Estimator Parameters ---------- endog : array-like The variable for which the density estimate is desired. Notes ----- If cdf, sf, cumhazard, or entropy are computed, they are computed based on the definition of the kernel rather than the FFT approximation, even if the density is fit with FFT = True. """ _cache = resettable_cache() def __init__(self, endog): self.endog = np.asarray(endog) def fit(self, kernel="gau", bw="scott", fft=True, weights=None, gridsize=None, adjust=1, cut=3, clip=(-np.inf, np.inf)): """ Attach the density estimate to the KDE class. Parameters ---------- kernel : str The Kernel to be used. Choices are: - "biw" for biweight - "cos" for cosine - "epa" for Epanechnikov - "gau" for Gaussian. - "tri" for triangular - "triw" for triweight - "uni" for uniform bw : str, float The bandwidth to use. Choices are: - "scott" - 1.059 * A * nobs ** (-1/5.), where A is `min(std(X),IQR/1.34)` - "silverman" - .9 * A * nobs ** (-1/5.), where A is `min(std(X),IQR/1.34)` - If a float is given, it is the bandwidth. fft : bool Whether or not to use FFT. FFT implementation is more computationally efficient. However, only the Gaussian kernel is implemented. If FFT is False, then a 'nobs' x 'gridsize' intermediate array is created. gridsize : int If gridsize is None, max(len(X), 50) is used. cut : float Defines the length of the grid past the lowest and highest values of X so that the kernel goes to zero. The end points are -/+ cut*bw*{min(X) or max(X)} adjust : float An adjustment factor for the bw. Bandwidth becomes bw * adjust. """ try: bw = float(bw) self.bw_method = "user-given" except: self.bw_method = bw endog = self.endog if fft: if kernel != "gau": msg = "Only gaussian kernel is available for fft" raise NotImplementedError(msg) if weights is not None: msg = "Weights are not implemented for fft" raise NotImplementedError(msg) density, grid, bw = kdensityfft(endog, kernel=kernel, bw=bw, adjust=adjust, weights=weights, gridsize=gridsize, clip=clip, cut=cut) else: density, grid, bw = kdensity(endog, kernel=kernel, bw=bw, adjust=adjust, weights=weights, gridsize=gridsize, clip=clip, cut=cut) self.density = density self.support = grid self.bw = bw self.kernel = kernel_switch[kernel](h=bw) # we instantiate twice, # should this passed to funcs? @cache_readonly def cdf(self): """ Returns the cumulative distribution function evaluated at the support. Notes ----- Will not work if fit has not been called. """ _checkisfit(self) density = self.density kern = self.kernel if kern.domain is None: # TODO: test for grid point at domain bound a, b = -np.inf, np.inf else: a, b = kern.domain func = lambda x, s: kern.density(s, x) support = self.support support = np.r_[a, support] gridsize = len(support) endog = self.endog probs = [ integrate.quad(func, support[i - 1], support[i], args=endog)[0] for i in xrange(1, gridsize) ] return np.cumsum(probs) @cache_readonly def cumhazard(self): """ Returns the hazard function evaluated at the support. Notes ----- Will not work if fit has not been called. """ _checkisfit(self) return -np.log(self.sf) @cache_readonly def sf(self): """ Returns the survival function evaluated at the support. Notes ----- Will not work if fit has not been called. """ _checkisfit(self) return 1 - self.cdf @cache_readonly def entropy(self): """ Returns the differential entropy evaluated at the support Notes ----- Will not work if fit has not been called. 1e-12 is added to each probability to ensure that log(0) is not called. """ _checkisfit(self) def entr(x, s): pdf = kern.density(s, x) return pdf * np.log(pdf + 1e-12) pdf = self.density kern = self.kernel if kern.domain is not None: a, b = self.domain else: a, b = -np.inf, np.inf endog = self.endog #TODO: below could run into integr problems, cf. stats.dist._entropy return -integrate.quad(entr, a, b, args=(endog, ))[0] @cache_readonly def icdf(self): """ Inverse Cumulative Distribution (Quantile) Function Notes ----- Will not work if fit has not been called. Uses `scipy.stats.mstats.mquantiles`. """ _checkisfit(self) gridsize = len(self.density) return stats.mstats.mquantiles(self.endog, np.linspace(0, 1, gridsize)) def evaluate(self, point): """ Evaluate density at a single point. Parameters ---------- point : float Point at which to evaluate the density. """ _checkisfit(self) return self.kernel.density(self.endog, point)
def __init__(self): self._cache = resettable_cache() self.a = 0
def fit(self, kernel="gau", bw="normal_reference", fft=True, weights=None, gridsize=None, adjust=1, cut=3, clip=(-np.inf, np.inf)): """ Attach the density estimate to the KDEUnivariate class. Parameters ---------- kernel : str The Kernel to be used. Choices are: - "biw" for biweight - "cos" for cosine - "epa" for Epanechnikov - "gau" for Gaussian. - "tri" for triangular - "triw" for triweight - "uni" for uniform bw : str, float The bandwidth to use. Choices are: - "scott" - 1.059 * A * nobs ** (-1/5.), where A is `min(std(X),IQR/1.34)` - "silverman" - .9 * A * nobs ** (-1/5.), where A is `min(std(X),IQR/1.34)` - "normal_reference" - C * A * nobs ** (-1/5.), where C is calculated from the kernel. Equivalent (up to 2 dp) to the "scott" bandwidth for gaussian kernels. See bandwidths.py - If a float is given, it is the bandwidth. fft : bool Whether or not to use FFT. FFT implementation is more computationally efficient. However, only the Gaussian kernel is implemented. If FFT is False, then a 'nobs' x 'gridsize' intermediate array is created. gridsize : int If gridsize is None, max(len(X), 50) is used. cut : float Defines the length of the grid past the lowest and highest values of X so that the kernel goes to zero. The end points are -/+ cut*bw*{min(X) or max(X)} adjust : float An adjustment factor for the bw. Bandwidth becomes bw * adjust. """ try: bw = float(bw) self.bw_method = "user-given" except: self.bw_method = bw endog = self.endog if fft: if kernel != "gau": msg = "Only gaussian kernel is available for fft" raise NotImplementedError(msg) if weights is not None: msg = "Weights are not implemented for fft" raise NotImplementedError(msg) density, grid, bw = kdensityfft(endog, kernel=kernel, bw=bw, adjust=adjust, weights=weights, gridsize=gridsize, clip=clip, cut=cut) else: density, grid, bw = kdensity(endog, kernel=kernel, bw=bw, adjust=adjust, weights=weights, gridsize=gridsize, clip=clip, cut=cut) self.density = density self.support = grid self.bw = bw self.kernel = kernel_switch[kernel](h=bw) # we instantiate twice, # should this passed to funcs? # put here to ensure empty cache after re-fit with new options self.kernel.weights = weights if weights is not None: self.kernel.weights /= weights.sum() self._cache = resettable_cache()
def fit(self, kernel="gau", bw="scott", fft=True, weights=None, gridsize=None, adjust=1, cut=3, clip=(-np.inf, np.inf)): """ Attach the density estimate to the KDEUnivariate class. Parameters ---------- kernel : str The Kernel to be used. Choices are: - "biw" for biweight - "cos" for cosine - "epa" for Epanechnikov - "gau" for Gaussian. - "tri" for triangular - "triw" for triweight - "uni" for uniform bw : str, float The bandwidth to use. Choices are: - "scott" - 1.059 * A * nobs ** (-1/5.), where A is `min(std(X),IQR/1.34)` - "silverman" - .9 * A * nobs ** (-1/5.), where A is `min(std(X),IQR/1.34)` - If a float is given, it is the bandwidth. fft : bool Whether or not to use FFT. FFT implementation is more computationally efficient. However, only the Gaussian kernel is implemented. If FFT is False, then a 'nobs' x 'gridsize' intermediate array is created. gridsize : int If gridsize is None, max(len(X), 50) is used. cut : float Defines the length of the grid past the lowest and highest values of X so that the kernel goes to zero. The end points are -/+ cut*bw*{min(X) or max(X)} adjust : float An adjustment factor for the bw. Bandwidth becomes bw * adjust. """ try: bw = float(bw) self.bw_method = "user-given" except: self.bw_method = bw endog = self.endog if fft: if kernel != "gau": msg = "Only gaussian kernel is available for fft" raise NotImplementedError(msg) if weights is not None: msg = "Weights are not implemented for fft" raise NotImplementedError(msg) density, grid, bw = kdensityfft(endog, kernel=kernel, bw=bw, adjust=adjust, weights=weights, gridsize=gridsize, clip=clip, cut=cut) else: density, grid, bw = kdensity(endog, kernel=kernel, bw=bw, adjust=adjust, weights=weights, gridsize=gridsize, clip=clip, cut=cut) self.density = density self.support = grid self.bw = bw self.kernel = kernel_switch[kernel](h=bw) # we instantiate twice, # should this passed to funcs? # put here to ensure empty cache after re-fit with new options self._cache = resettable_cache()