def chi_norm(emg_data, params): emg_data = norm_emg(emg_data) emg_data = np.abs(emg_data) #emg_data = norm_emg(emg_data) for chnl in range(len(emg_data)): arg = params[chnl][:-2] loc = params[chnl][-2] scale = params[chnl][-1] a_max = chi.ppf(0.9999999999999999, loc=loc, scale=scale, *arg) a_min = chi.ppf(0.00000000001, loc=loc, scale=scale, *arg) transf = np.clip(emg_data[chnl, :], a_min=a_min, a_max=a_max) transf = chi.cdf(transf, loc=loc, scale=scale, *arg) emg_data[chnl] = norm.ppf(transf) return emg_data
def wald_test(tau, Sigma, alpha=0.05, max_condition=1e-6, pval=False): """ Test based on the chi_d distribution. :param tau: observed test statistics (scaled with sqrt(n) :param Sigma: observed covariance matrix :param alpha: level of the test :param max_condition: determines at which threshold eigenvalues are considered as 0 :param pval: if true, returns the conditional p value instead of the test result :return: level of the test """ # instead of regularizing we preprocess Sigma and tau to get rid of 0 eigenvalues tau, Sigma = preprocessing(tau, Sigma, max_condition=max_condition) d = len(tau) # compute matrix inverse Sigma_inv = np.linalg.inv(Sigma) # below quantity is asymptotically standard normal t_obs = np.sqrt(tau @ Sigma_inv @ tau) # compute the 1-alpha quantile of the chi distribution with d degrees of freedom threshold = chi.ppf(q=1-alpha, df=d) if not pval: if t_obs > threshold: return 1 else: return 0 else: # return p value return 1 - chi.cdf(x=t_obs, df=d)
def estimate_sigma(observed, df, upper_bound, factor=3, npts=50, nsample=2000): """ Produce an estimate of $\sigma$ from a constrained error sum of squares. The relevant distribution is a scaled $\chi^2$ restricted to $[0,U]$ where $U$ is `upper_bound`. Parameters ---------- observed : float The observed sum of squares. df : float Degrees of freedom of the sum of squares. upper_bound : float Upper limit of truncation interval. factor : float Range of candidate values is [observed/factor, observed*factor] npts : int How many candidate values for interpolator. nsample : int How many samples for each expected value of the truncated sum of squares. Returns ------- sigma_hat : np.float Estimate of $\sigma$. """ values = np.linspace(1. / factor, factor, npts) * observed expected = 0 * values for i, value in enumerate(values): P_upper = chidist.cdf(upper_bound * np.sqrt(df) / value, df) U = np.random.sample(nsample) sample = chidist.ppf(P_upper * U, df) * value expected[i] = np.mean(sample**2) if expected[i] >= 1.1 * (observed**2 * df + observed**2 * df**(0.5)): break interpolant = interp1d(values, expected + df**(0.5) * values**2) V = np.linspace(1. / factor, factor, 10 * npts) * observed # this solves for the solution to # expected(sigma) + sqrt(df) * sigma^2 = observed SS * (1 + sqrt(df)) # the usual "MAP" estimator would have RHS just observed SS # but this factor seems to ``correct it''. # it is such that if there were no selection it would be # the usual unbiased estimate sigma_hat = np.min( V[interpolant(V) >= observed**2 * df + observed**2 * df**(0.5)]) return sigma_hat
def _quantile_notTruncated(self, q, tol=1.e-6): """ Compute the quantile for the non truncated distribution Parameters ---------- q : float quantile you want to compute. Between 0 and 1 tol : float precision for the output Returns ------- x : float x such that P(X < x) = q """ scale = self._scale k = self._k dps = self._dps z_approx = scale * chi.ppf(q, k) epsilon = scale * 0.001 lb = z_approx - epsilon ub = z_approx + epsilon f = lambda z: self._cdf_notTruncated(-np.inf, z, dps) z = find_root(f, q, lb, ub, tol) return z
def estimate_sigma(observed, df, upper_bound, factor=3, npts=50, nsample=2000): """ Produce an estimate of $\sigma$ from a constrained error sum of squares. The relevant distribution is a scaled $\chi^2$ restricted to $[0,U]$ where $U$ is `upper_bound`. Parameters ---------- observed : float The observed sum of squares. df : float Degrees of freedom of the sum of squares. upper_bound : float Upper limit of truncation interval. factor : float Range of candidate values is [observed/factor, observed*factor] npts : int How many candidate values for interpolator. nsample : int How many samples for each expected value of the truncated sum of squares. Returns ------- sigma_hat : np.float Estimate of $\sigma$. """ values = np.linspace(1./factor, factor, npts) * observed expected = 0 * values for i, value in enumerate(values): P_upper = chidist.cdf(upper_bound * np.sqrt(df) / value, df) U = np.random.sample(nsample) sample = chidist.ppf(P_upper * U, df) * value expected[i] = np.mean(sample**2) if expected[i] >= 1.1 * (observed**2 * df + observed**2 * df**(0.5)): break interpolant = interp1d(values, expected + df**(0.5) * values**2) V = np.linspace(1./factor,factor,10*npts) * observed # this solves for the solution to # expected(sigma) + sqrt(df) * sigma^2 = observed SS * (1 + sqrt(df)) # the usual "MAP" estimator would have RHS just observed SS # but this factor seems to ``correct it''. # it is such that if there were no selection it would be # the usual unbiased estimate sigma_hat = np.min(V[interpolant(V) >= observed**2 * df + observed**2 * df**(0.5)]) return sigma_hat
def mean_min_dists(X, y): """ Calculate min distances used for original bolstering sig, 1-sigma. X -> nxD dataset y -> label output -> sig11(1xd), sig12(1x(D-d)), sig21(1xd), sig22(1x(D-d)), Four sigmas for two class, with in each class ONE sigma for first d features and the other for the rest of D-d features. """ from scipy.stats import chi ind1 = y == 0 ind2 = y == 1 X1 = X[ind1] X2 = X[ind2] n = X.shape[0] p = X.shape[1] n1 = X1.shape[0] n2 = X2.shape[0] tmp1 = np.zeros(n1) tmp2 = np.zeros(n2) for i in range(n1): dm = sys.float_info.max for j in range(i): e = X1[i] - X1[j] d = np.dot(e, e.T) if d < dm: dm = d for j in range(i + 1, n1): e = X1[i] - X1[j] d = np.dot(e, e.T) if d < dm: dm = d tmp1[i] = np.sqrt(dm) d1 = np.mean(tmp1) for i in range(n2): dm = sys.float_info.max for j in range(i): e = X2[i] - X2[j] d = np.dot(e, e.T) if d < dm: dm = d for j in range(i + 1, n2): e = X2[i] - X2[j] d = np.dot(e, e.T) if d < dm: dm = d tmp2[i] = np.sqrt(dm) d2 = np.mean(tmp2) cp = chi.ppf(PERCENTILE, p) sig1 = d1 * np.ones(p) / cp sig2 = d2 * np.ones(p) / cp return np.vstack((sig1, sig2))
def test_chi(self): from scipy.stats import chi import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) df = 78 mean, var, skew, kurt = chi.stats(df, moments='mvsk') x = np.linspace(chi.ppf(0.01, df), chi.ppf(0.99, df), 100) ax.plot(x, chi.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi pdf') rv = chi(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = chi.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], chi.cdf(vals, df)) r = chi.rvs(df, size=1000) ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
def mvstdtprob(a, b, R, df, ieps=1e-5, quadkwds=None, mvstkwds=None): '''probability of rectangular area of standard t distribution assumes mean is zero and R is correlation matrix Notes ----- This function does not calculate the estimate of the combined error between the underlying multivariate normal probability calculations and the integration. ''' kwds = dict(args=(a, b, R, df), epsabs=1e-4, epsrel=1e-2, limit=150) if not quadkwds is None: kwds.update(quadkwds) #print kwds res, err = integrate.quad(funbgh2, *chi.ppf([ieps, 1 - ieps], df), **kwds) prob = res * bghfactor(df) return prob
def mvstdtprob(a, b, R, df, ieps=1e-5, quadkwds=None, mvstkwds=None): """ Probability of rectangular area of standard t distribution assumes mean is zero and R is correlation matrix Notes ----- This function does not calculate the estimate of the combined error between the underlying multivariate normal probability calculations and the integration. """ kwds = dict(args=(a, b, R, df), epsabs=1e-4, epsrel=1e-2, limit=150) if quadkwds is not None: kwds.update(quadkwds) lower, upper = chi.ppf([ieps, 1 - ieps], df) res, err = integrate.quad(funbgh2, lower, upper, **kwds) prob = res * bghfactor(df) return prob
def min_dists_Dsigma(X, y): """ Calculate min distances used for new bolstering sig, D-sigma. X -> nxD dataset y -> label output -> sig1, sig2, for D directions of class 1 and 2. """ from scipy.stats import chi ind1 = y == 0 ind2 = y == 1 X1 = X[ind1] X2 = X[ind2] n = X.shape[0] D = X.shape[1] X1.sort(axis=0) X2.sort(axis=0) cp = chi.ppf(PERCENTILE, 1) # degree of one, for every dimention sig1 = np.mean(abs(X1[1:] - X1[:-1]), axis=0) / cp sig2 = np.mean(abs(X2[1:] - X2[:-1]), axis=0) / cp return np.vstack((sig1, sig2))
def estimate_g(radii, data_dimensionality, cdf_extension, cdf_precision): """ Estimates the function g() which is used in a rescaling of the vectors Parameters ---------- radii : ndarray A 1d array with n elements where n is the number of samples in the dataset and each entry is the l2 norm of the representation for that sample data_dimensionality : int The dimensionality of the datapoints that the radii pertains to. Used to set the degrees of freedom on the chi distribution cdf_extension : float The amount by which to extend the support of the CDF as a fraction of the range of radii present in the provided samples cdf_precision : int The number of samples to devote to evaluating the CDF in the range between the maximum and minimum radii Returns ------- g_support : ndarray A 1d array giving the values of r (radius) at which g has been estimated g : ndarray A 1d array giving the value of g for each value in the support """ g_support, cdf = estimate_radial_CDF(radii, cdf_extension, cdf_precision) # the l2 norm of a vector of n iid normal random variables is # a random variable distributed as a chi distribution with n degrees of freedom # we want it's inverse CDF, or percent point function # Because feeding the inverse CDF a value of 1.0 will produce inf, we'll just # hackily fix this, slightly lowering the top value of the cdf cdf[cdf == 1.0] = cdf[cdf == 1.0] - (1. / len(radii)) g = chi.ppf(cdf, data_dimensionality) return g_support, g
print(res.wald_test_terms()) # Chi2 это КСИ КВАДРАТ https://en.wikipedia.org/wiki/Chi-squared_distribution # chi2 P>chi2 df constraint # Intercept 1.022117 0.31201739467110934 1 # sex 54.473978 1.5751991092226142e-13 1 # class_1 46.947935 7.289775024150576e-12 1 # class_2 33.212807 8.260467771755613e-09 1 # sex:class_1 21.853960 2.9420881191443115e-06 1 # sex:class_2 33.535868 6.996184344418819e-09 1 # В столбце сhi2 значение функции кси-квадрат а в P>chi2 критическое значение # отношения правдоподобия LR = 2 * (loglikelihood_logit_2 - loglikelihood_logit_1) chi_crit = chi.ppf(0.95, 2) print( 'статистика отношения правдоподобия {}, критическое знание chi^2(0.95,2) {}' .format(LR, chi_crit)) if chi_crit < LR: print( 'Отвергаем гипотзу о H0:(b4=b5=0) на основание теста отношнения правдоподобия' ) else: print( 'Принимаем гипотзу о H0:(b4=b5=0) на основание теста отношнения правдоподобия' ) ################################################################################################### ############### Возможно, вклад пола в шансы выживания зависел от класса #####################
def estimate_sigma(observed, truncated_df, lower_bound, upper_bound, untruncated_df=0, factor=3, npts=50, nsample=2000): """ Produce an estimate of $\sigma$ from a constrained error sum of squares. The relevant distribution is a scaled $\chi^2$ restricted to $[0,U]$ where $U$ is `upper_bound`. Parameters ---------- observed : float The observed sum of squares. truncated_df : float Degrees of freedom of the truncated $\chi^2$ in the sum of squares. The observed sum is assumed to be the sum of an independent untruncated $\chi^2$ and the truncated one. lower_bound : float Lower limit of truncation interval. upper_bound : float Upper limit of truncation interval. untruncated_df : float Degrees of freedom of the untruncated $\chi^2$ in the sum of squares. factor : float Range of candidate values is [observed/factor, observed*factor] npts : int How many candidate values for interpolator. nsample : int How many samples for each expected value of the truncated sum of squares. Returns ------- sigma_hat : np.float Estimate of $\sigma$. """ if untruncated_df < 50: linear_term = truncated_df**(0.5) else: linear_term = 0 total_df = untruncated_df + truncated_df values = np.linspace(1./factor, factor, npts) * observed expected = 0 * values for i, value in enumerate(values): P_upper = chidist.cdf(upper_bound * np.sqrt(truncated_df) / value, truncated_df) P_lower = chidist.cdf(lower_bound * np.sqrt(truncated_df) / value, truncated_df) U = np.random.sample(nsample) if untruncated_df > 0: sample = (chidist.ppf((P_upper - P_lower) * U + P_lower, truncated_df)**2 + chidist.rvs(untruncated_df, size=nsample)**2) * value**2 else: sample = (chidist.ppf((P_upper - P_lower) * U + P_lower, truncated_df) * value)**2 expected[i] = np.mean(sample) if expected[i] >= 1.5 * (observed**2 * total_df + observed**2 * linear_term): break interpolant = interp1d(values, expected + values**2 * linear_term) V = np.linspace(1./factor,factor,10*npts) * observed # this solves for the solution to # expected(sigma) + sqrt(df) * sigma^2 = observed SS * (1 + sqrt(df)) # the usual "MAP" estimator would have RHS just observed SS # but this factor seems to correct it. # it is such that if there were no selection it would be # the usual unbiased estimate try: sigma_hat = np.min(V[interpolant(V) >= observed**2 * total_df + observed**2 * linear_term]) except ValueError: # no solution, just return observed sigma_hat = observed return sigma_hat
significancia = 0.05 confianca = 1 - significancia k = 2 # Número de eventos possíveis graus_de_liberdade = k - 1 # Passo 1 - formulação das hipóteses H_0 e H_1 # H_0: F_{CARA} = F_{COROA} # H_1: F_{CARA} \neq F_{COROA} # Passo 2 - fixação da significância do teste (\alpha) from scipy.stats import chi # chi_{\alpha}^2 chi_2_alpha = chi.ppf(confianca, graus_de_liberdade)**2 chi_2_alpha # Passo 3 - cálculo da estatística-teste e verificação desse valor com as áreas de aceitação e rejeição do teste # \chi^2 = \sum_{i=1}^{k}{\frac{(F_{i}^{Obs} - F_{i}^{Esp})^2}{F_{i}^{Esp}}} chi_2 = 0 for i in range(k): chi_2 += (F_Observada[i] - F_Esperada[i])**2 / F_Esperada[i] chi_2 # Passo 4 - Aceitação ou rejeição da hipótese nula # Critério do valor crítico # Rejeitar H_0 se \chi_{teste}^2 > \chi_{\alpha}^2
def ost_test(tau, Sigma, alpha=0.05, selection='discrete', max_condition=1e-6, accuracy=1e-6, constraints='Sigma', pval=False): """ Runs the full test suggested in our paper. :param tau: observed statistic :param Sigma: covariance matrix :param alpha: level of test :param selection: continuous/discrete (discrete is not extensively tested) :param max_condition: at which condition number the covariance matrix is truncated. :param accuracy: threshold to determine whether an entry is zero :param constraints: if 'Sigma' we work with the constraints (Sigma beta) >=0. If 'positive' we work with beta >= 0 :param pval: if true, returns the conditional p value instead of the test result :return: 1 (reject), 0 (no reject) """ assert constraints == 'Sigma' or constraints == 'positive', 'Constraints are not implemented' # if the selection is discrete we dont want any transformations if selection == 'discrete': constraints = 'positive' # check if there are entries with 0 variance zeros = [i for i in range(len(tau)) if Sigma[i][i] < 1e-10] tau = np.delete(tau, zeros) Sigma = np.delete(Sigma, zeros, 0) Sigma = np.delete(Sigma, zeros, 1) if constraints == 'Sigma': # compute pseudoinverse to also handle singular covariances (see Appendix) r_cond = max_condition # parameter which precision to use Sigma_inv = np.linalg.pinv(Sigma, rcond=r_cond, hermitian=True) # use Remark 1 to convert the problem tau = Sigma_inv @ tau Sigma = Sigma_inv # Apply Theorem 1 in the canonical form with beta>=0 constraints beta_star = optimization(tau=tau, Sigma=Sigma, selection=selection) # determine active set non_zero = [1 if beta_i > accuracy else 0 for beta_i in beta_star] projector = np.diag(non_zero) effective_sigma = projector @ Sigma @ projector # Use the rank of effective Sigma to determine how many degrees of freedom the covariance has after conditioning # for non-singular original covariance, this is the same number as the number of active dimensions |mathcal{U}|, # however, for singular cases using the rank is the right way to go. tol = max_condition * np.max(np.linalg.eigvalsh(Sigma)) r = np.linalg.matrix_rank(effective_sigma, tol=tol, hermitian=True) # go back to notation used in the paper l = r if l > 1: test_statistic = beta_star @ tau / np.sqrt( beta_star @ Sigma @ beta_star) threshold = chi_stats.ppf(q=1 - alpha, df=l) else: vminus = truncation(beta_star=beta_star, tau=tau, Sigma=Sigma, accuracy=accuracy) threshold = truncated_gaussian(var=beta_star @ Sigma @ beta_star, v_minus=vminus, level=alpha) test_statistic = beta_star @ tau if not pval: if test_statistic > threshold: # reject return 1 else: # cannot reject return 0 if pval: if l > 1: test_statistic = beta_star @ tau / np.sqrt( beta_star @ Sigma @ beta_star) pvalue = 1 - chi_stats.cdf(x=test_statistic, df=l) else: test_statistic = beta_star @ tau / np.sqrt( beta_star @ Sigma @ beta_star) vminus = truncation(beta_star=beta_star, tau=tau, Sigma=Sigma, accuracy=accuracy) / \ np.sqrt(beta_star @ Sigma @ beta_star) pvalue = 1 - (norm.cdf(x=test_statistic) - norm.cdf(x=vminus)) / (1 - norm.cdf(x=vminus)) return pvalue
def penconst_denoise(shape, scale, regularizer): """Computes a suggestion for the penalty constant of a denoising problem. Assume that we have noisy observations of an operator :math:`\mathcal{A}`: .. math:: \mathcal{Y} = \mathcal{A} + \mathcal{W}, where :math:`\mathcal{W}` is Gaussian noise with scale :math:`\sigma`. To denoise this operator with the nuclear norm :math:`N` as a regularizer, we solve .. math:: \operatorname{minimize}_\mathcal{X} \Vert \mathcal{X} - \mathcal{Y} \Vert^2_F + \lambda N(\mathcal{X}). The performance of the denoiser depends on the choice of :math:`\lambda >0`, and we choose .. math:: \lambda = \mathbb{E} N^*(\mathcal{W}), where :math:`N^*` is the dual of the nuclear norm :math:`N`. Note that this computation depends on the shape of :math:`\mathcal{A}`, the noise level :math:`\sigma`, and the choice of the nuclear norm :math:`N`. Note ---- Not all choices of the nuclear norm :math:`N` lead to efficient computations of the dual norm :math:`N^*`. In cases where this computation is not feasible, we return a heuristic guess. Parameters ---------- shape : tuple The shape of the operator to denoise. scale : float The standard deviation of the random Gaussian noise (:math:`\sigma`). regularizer : Regularizer The regularizer used in the denoising problem (:math:`N`) Returns ------- float The suggested penalty constant (:math:`\lambda`). """ from scipy.stats import chi, gumbel_r, norm def max_of_variates(n, ppf): return gumbel_r.mean(loc=ppf(1 - 1.0 / (n + 1)), scale=(ppf(1 - 1.0 / (np.e * (n + 1))) - ppf(1 - 1.0 / (n + 1)))) def max_of_gaussians(n): return max_of_variates(n, norm.ppf) # Warnings and errors def warn_guess(): print('warning: guess penconst for {0}'.format(regularizer)) def warn_default(): print('warning: default penconst for {0}'.format(regularizer)) return scale * (np.sqrt(lshape) + np.sqrt(rshape)) def error_value(): raise ValueError('Bad regularizer {0}'.format(regularizer)) lshape = shape[0] * shape[1] # total dimension of left factor rshape = shape[2] * shape[3] # total dimension of right factor lrank = min(shape[0:2]) rrank = min(shape[2:4]) if isinstance(regularizer, NucNorm): lnorm = regularizer.lnorm rnorm = regularizer.rnorm # Choose appropriate penalty constant if lnorm is norm_l1: if rnorm is norm_l2: out = scale * max_of_variates(lshape, ppf=lambda q: chi.ppf(q, rshape)) elif rnorm is norm_l1: # \ell_\infty norm of noise out = scale * max_of_gaussians( 2 * np.prod(shape) ) # doubling because we want max of folded Gaussians --- maybe use chi instead? slower and probably not much difference elif rnorm is norm_linf: # max of \ell_1 norms of rows of noise out = scale * np.mean([ np.max([ np.sum(np.abs(np.random.normal(size=(rshape)))) for i in range(lshape) ]) for j in range(1000) ]) elif rnorm is norm_s1: # max of S_\infty norms of right factors out = scale * np.mean([ np.max([ np.linalg.norm(np.random.normal(size=shape[2:4]), ord=2) for j in range(lshape) ]) for i in range(1000) ]) elif rnorm is norm_sinf: # max of S_1 norms of right factors out = scale * np.mean([ np.max([ np.linalg.norm(np.random.normal(size=shape[2:4]), ord='nuc') for j in range(lshape) ]) for i in range(1000) ]) else: out = warn_default() elif lnorm is norm_l2: if rnorm is norm_l2: # S_\infty norm of noise out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) elif rnorm is norm_l1: # max of \ell_2 norm of columns of noise out = scale * max_of_variates(rshape, ppf=lambda q: chi.ppf(q, lshape)) elif rnorm is norm_linf: # guess for \ell_2, \ell_infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt( rshape) # SHAPE[2] correct? elif rnorm is norm_sinf: # guess for \ell_2, S_\infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt(rrank) else: out = warn_default() elif lnorm is norm_linf: if rnorm is norm_linf: # guess for \ell_\infty, \ell_\infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt( lshape * rshape) elif rnorm is norm_l1: # max of \ell_1 norm of columns of noise out = scale * np.mean([ np.max([ np.sum(np.abs(np.random.normal(size=(lshape)))) for i in range(rshape) ]) for j in range(1000) ]) elif rnorm is norm_l2: # guess for \ell_\infty, \ell_2 warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt(lshape) elif rnorm is norm_s1: # guess for \ell_\infty, S_1 warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt(lshape) elif rnorm is norm_sinf: # guess for \ell_\infty, S_\infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt( lshape * rrank) else: out = warn_default() elif lnorm is norm_s1: if rnorm is norm_l1: # max of S_\infty norms of left factors out = scale * np.mean([ np.max([ np.linalg.norm(np.random.normal(size=shape[0:2]), ord=2) for j in range(rshape) ]) for i in range(1000) ]) elif rnorm is norm_linf: # guess for S_1, \ell_\infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt(rshape) elif rnorm is norm_sinf: # guess for S_1, S_\infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt(rrank) else: out = warn_default() elif lnorm is norm_sinf: if rnorm is norm_l1: # max of S_1 norms of left factors out = scale * np.mean([ np.max([ np.linalg.norm(np.random.normal(size=shape[0:2]), ord='nuc') for j in range(rshape) ]) for i in range(1000) ]) elif rnorm is norm_l2: # guess for S_\infty, \ell_2 warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt(lrank) elif rnorm is norm_linf: # guess for S_\infty, \ell_\infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt( lrank * rshape) elif rnorm is norm_s1: # guess for S_\infty, S_1 warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt(lrank) elif rnorm is norm_sinf: # guess for S_\infty, S_\infty warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt( lrank * rrank) else: out = warn_default() else: out = warn_default() elif isinstance(regularizer, VectorNorm) and regularizer.p == 'inf': out = scale * np.mean([ np.sum(np.abs(np.random.normal(size=(np.prod(shape))))) for i in range(1000) ]) elif isinstance(regularizer, MaxNorm): # guess for max-norm warn_guess() out = scale * (np.sqrt(lshape) + np.sqrt(rshape)) * np.sqrt( lshape * rshape) else: # otherwise, just use l2 \otimes l2 out = warn_default() return out
from scipy.stats import chi import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: df = 78 mean, var, skew, kurt = chi.stats(df, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(chi.ppf(0.01, df), chi.ppf(0.99, df), 100) ax.plot(x, chi.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = chi(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = chi.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], chi.cdf(vals, df)) # True # Generate random numbers: