def density(self, y, mu, sigma, logpdf=False): """ Density function of the logistic mixture model distribution Parameters ---------- y : :py:class:`numpy.ndarray` predictor values of shape(len(observations), 1) mu : float location of the distribution sigma : float scale of the distribution logpdf : bool If True, log of the probability density function will be returned. Returns ------- :py:class:`numpy.ndarray` Probability density function or log of it. """ if logpdf is True: dlogis = logistic(loc=mu, scale=sigma).logpdf(y) else: dlogis = logistic(loc=mu, scale=sigma).pdf(y) return dlogis
def test_point_density(scale, dist_source): scale_mid = scale.low + scale.width / 2 rv = logistic(loc=scale_mid, scale=scale.width / 30) xs = scale.denormalize_points(constants.target_xs) orig_densities = rv.pdf(xs) orig_cdfs = rv.cdf(xs) orig_pairs = [{ "x": x, "density": density } for (x, density) in zip(xs, orig_densities)] direct_dist = PointDensity.from_pairs(orig_pairs, scale) if dist_source == "direct": dist = direct_dist elif dist_source == "from_pairs": orig_pairs = [{ "x": x, "density": density } for (x, density) in zip(xs, orig_densities)] dist = PointDensity.from_pairs(orig_pairs, scale) elif dist_source == "to_arrays": _xs, _density = direct_dist.to_arrays() pairs = [{"x": x, "density": d} for x, d in zip(_xs, _density)] dist = PointDensity.from_pairs(pairs, scale) elif dist_source == "to_arrays/2": _xs, _density = direct_dist.to_arrays( num_xs=int(constants.point_density_default_num_points / 2), add_endpoints=True, ) pairs = [{"x": x, "density": d} for x, d in zip(_xs, _density)] dist = PointDensity.from_pairs(pairs, scale) elif dist_source == "structured": dist = PointDensity.structure(direct_dist.destructure()) elif dist_source == "denormalized": dist = direct_dist.normalize().denormalize(scale) elif dist_source == "from_conditions": cond = CrossEntropyCondition(p_dist=direct_dist) dist = PointDensity.from_conditions([cond], scale=scale) # PDF dist_densities = np.array([float(dist.pdf(x)) for x in xs]) if dist_source == "to_arrays/2": assert dist_densities == pytest.approx(orig_densities, abs=0.08) else: assert dist_densities == pytest.approx(orig_densities, abs=0.01) # CDF dist_cdfs = np.array([float(dist.cdf(x)) for x in xs]) assert dist_cdfs == pytest.approx(orig_cdfs, abs=0.06) # PPF MIN_CHECK_DENSITY = 1e-3 check_idxs = [ i for i in range(constants.point_density_default_num_points) if orig_densities[i] > MIN_CHECK_DENSITY ] dist_ppfs = np.array([float(dist.ppf(c)) for c in orig_cdfs[check_idxs]]) assert dist_ppfs == pytest.approx(xs[check_idxs], rel=0.25)
def _get_dist(latent, theta): if latent == 'normal': dist = st.norm(*theta) elif latent == 'logistic': dist = st.logistic(*theta) elif latent == 'log-logistic': dist = st.fisk(c=theta[1], scale=theta[0]) return dist
def test_logistic_distribution(predictor): y = deepcopy(predictor) # initialize Logistic Mixture Model distribution logi = families.initialize_family('logistic') # random mu, sigma and probability mu = 1 sigma = 2 prob = np.exp(y) / (1 + np.exp(y)) # calculate density dlogi = logi.density(y, mu, sigma) dlogi1 = logistic(loc=mu, scale=sigma).pdf(y) npt.assert_array_equal(dlogi, dlogi1) # also test log-density ldlogi = logi.density(y, mu, sigma, logpdf=True) npt.assert_array_equal(ldlogi, logistic(loc=mu, scale=sigma).logpdf(y)) # calculate posterior dlogi2 = logistic(loc=5, scale=4).pdf(y) post = logi.posterior(y, prob, { 'mu1': 1, 'logsd1': np.log(2), 'mu2': 5, 'logsd2': np.log(4) }) post1 = prob * dlogi2 / ((1 - prob) * dlogi1 + prob * dlogi2) npt.assert_array_equal(post, post1) # calculate theta theta = logi.theta(y, post) theta1 = logi.theta(y, post, init=True) npt.assert_equal(theta['mu1'], theta1['mu1']) npt.assert_equal(theta['mu2'], theta1['mu2']) npt.assert_equal(theta1['logsd1'], theta1['logsd2']) assert theta['logsd1'] != theta['logsd2'] # calculate log-liklihod loli = logi.loglik(y, post, prob, theta) npt.assert_equal(loli['component'] + loli['concomitant'], loli['full'])
def compactspace(scale, n): r""" Returns points :math:`x` spaced in the open interval :math:`(-\infty, \infty)` by linearly spacing in the compactified coordinate :math:`s(x) = e^{-\alpha x} / (1 + e^{-\alpha x})^2`, where :math:`\alpha` is a scale factor. """ logit = logistic(scale=scale).ppf compact_xs = np.linspace(0, 1, n + 2)[1:-1] return logit(compact_xs)
def test_survival(): """ Test log_survival. """ logistic_benchmark = stats.logistic(np.array([3.0]), np.array([[2.0], [4.0]])) expect_survival = logistic_benchmark.sf([1.0, 2.0]).astype(np.float32) survival_function = SF() output = survival_function(Tensor([1.0, 2.0], dtype=dtype.float32)) tol = 2e-5 assert (np.abs(output.asnumpy() - expect_survival) < tol).all()
def test_entropy(): """ Test entropy. """ logistic_benchmark = stats.logistic(np.array([3.0]), np.array([[2.0], [4.0]])) expect_entropy = logistic_benchmark.entropy().astype(np.float32) entropy = EntropyH() output = entropy() tol = 1e-6 assert (np.abs(output.asnumpy() - expect_entropy) < tol).all()
def test_pdf(): """ Test pdf. """ logistic_benchmark = stats.logistic(np.array([3.0]), np.array([[2.0], [4.0]])) expect_pdf = logistic_benchmark.pdf([1.0, 2.0]).astype(np.float32) pdf = Prob() output = pdf(Tensor([1.0, 2.0], dtype=dtype.float32)) tol = 1e-6 assert (np.abs(output.asnumpy() - expect_pdf) < tol).all()
def test_log_likelihood(): """ Test log_pdf. """ logistic_benchmark = stats.logistic(np.array([3.0]), np.array([[2.0], [4.0]])) expect_logpdf = logistic_benchmark.logpdf([1.0, 2.0]).astype(np.float32) logprob = LogProb() output = logprob(Tensor([1.0, 2.0], dtype=dtype.float32)) tol = 1e-6 assert (np.abs(output.asnumpy() - expect_logpdf) < tol).all()
def test_log_cdf(): """ Test log cdf. """ logistic_benchmark = stats.logistic(np.array([3.0]), np.array([[2.0], [4.0]])) expect_logcdf = logistic_benchmark.logcdf([1.0, 2.0]).astype(np.float32) logcdf = LogCDF() output = logcdf(Tensor([1.0, 2.0], dtype=dtype.float32)) tol = 5e-5 assert (np.abs(output.asnumpy() - expect_logcdf) < tol).all()
def Atualizar_Particulas(self): for i in self.particulas: for j in range(len(i.dimensao)): sigmoid = logistic(i.dimensao[j]) rand = np.random.uniform() if rand >= sigmoid: i.dimensao[j] = 1 else: i.dimensao[j] = 0
def point_density_from_scale(scale: Scale): scale_mid = scale.low + scale.width / 2 rv = logistic(loc=scale_mid, scale=scale.width / 30) xs = scale.denormalize_points(constants.target_xs) densities = rv.pdf(xs) pairs = [{ "x": x, "density": density } for (x, density) in zip(xs, densities)] return PointDensity.from_pairs(pairs, scale)
def simulate_new_Y_RNAseq(Xsim, t, p_dims, num_classes=10, noise_var=.2, dropout_dist=stats.logistic(0, 1), dropout_p=.1): """ Simulate an RNAseq experiment. We model dropouts in RNA count space (E=exp(Y), where Y is z-score standardized) by a logistic distribution. You can adjust the distribution by passing in a different dropout probability distribution dropout_dist. A dropout will be modelled by random draws of the dropout distribution being bigger then dropout_p. """ n_data = Xsim.shape[0] Y = np.empty((n_data, p_dims)) splits = np.random.choice(p_dims, replace=False, size=num_classes) splits.sort() #for sub in np.array_split(range(p_dims), splits): for sub in np.array_split(range(p_dims), splits): ky_sim = (GPy.kern.RBF(1, variance=1e-8, lengthscale=5, active_dims=[2]) # switch time info off (variance of 1e-8) + GPy.kern.Linear(2, variances=np.random.uniform(3, 5)) # Linear contribution + GPy.kern.Matern32(2, ARD=True, variance=np.random.uniform(4, 6), lengthscale=np.random.uniform(20,25, size=2)) # long-term + GPy.kern.Matern32(2, ARD=True, variance=np.random.uniform(40, 60), lengthscale=np.random.uniform(6,7, size=2)) # mid term + GPy.kern.Matern32(2, ARD=True, variance=np.random.uniform(1, 2), lengthscale=np.random.uniform(1,2, size=2)) # short term #+ GPy.kern.LogisticBasisFuncKernel(2, np.random.uniform(0,10), variance=1, slope=1, active_dims=[1,2]) + GPy.kern.White(3,variance=noise_var) ) Ky_sim = ky_sim.K(np.c_[Xsim, t]) Y[:, sub] = np.random.multivariate_normal(np.zeros(n_data), Ky_sim, 5).T.dot(np.random.normal(0,1,(5, sub.size))) #Ky_sim = ky_sim.K(np.c_[Xsim, t]) #Y = np.random.multivariate_normal(np.zeros(n_data), Ky_sim, p_dims).T Y -= Y.mean(0) Y /= Y.std(0) # put dropouts in # First exponentiate the data Y = np.exp(Y) Y -= Y.min() #we take it from an exponentially distributed stochastic variable: drop_fil = (dropout_dist.pdf(Y)*np.random.uniform(0,1,Y.shape)) > dropout_p Y[drop_fil] = 0 # go back to normal space and normalize again: Y = np.log1p(Y) #Y -= Y.mean(0) #Y /= Y.std(0) return Y, drop_fil
def testLogisticSample(self): loc_ = [3.0, 4.0, 2.0] scale_ = 1.0 dist = tfd.Logistic(loc_, scale_, validate_args=True) n = int(15e3) samples = dist.sample(n, seed=test_util.test_seed()) self.assertEqual(samples.shape, (n, 3)) samples_ = self.evaluate(samples) for i in range(3): self.assertLess( stats.kstest(samples_[:, i], stats.logistic(loc=loc_[i], scale=scale_).cdf)[0], 0.01)
def get_submission_params( self, logistic_params: logistic.LogisticParams ) -> SubmissionLogisticParams: """ Get the params needed to submit a logistic to Metaculus as part of a prediction. See comments for more explanation of how the params need to be transformed for Metaculus to accept them :param logistic_params: params for a logistic on the normalized scale :return: params to submit the logistic to Metaculus as part of a prediction """ distribution = stats.logistic(logistic_params.loc, logistic_params.scale) # The loc and scale have to be within a certain range for the Metaculus API to accept the prediction. # max loc of 3 set based on API response to prediction on # https://pandemic.metaculus.com/questions/3920/what-will-the-cbo-estimate-to-be-the-cost-of-the-emergency-telework-act-s3561/ max_loc = 3 clipped_loc = min(logistic_params.loc, max_loc) min_scale = 0.01 # max scale of 10 set based on API response to prediction on # https://pandemic.metaculus.com/questions/3920/what-will-the-cbo-estimate-to-be-the-cost-of-the-emergency-telework-act-s3561/ max_scale = 10 clipped_scale = min(max(logistic_params.scale, min_scale), max_scale) if self.low_open: # We're not really sure what the deal with the low and high is. # Presumably they're supposed to be the points at which Metaculus "cuts off" your distribution # and ignores porbability mass assigned below/above. # But we're not actually trying to use them to "cut off" our distribution in a smart way; # we're just trying to include as much of our distribution as we can without the API getting unhappy # (we belive that if you set the low higher than the value below [or if you set the high lower], # then the API will reject the prediction, though we haven't tested that extensively) min_open_low = 0.01 low = max(distribution.cdf(0), min_open_low) else: low = 0 if self.high_open: # min high of (low + 0.01) set based on API response for # https://www.metaculus.com/api2/questions/3961/predict/ -- # {'prediction': ['high minus low must be at least 0.01']}" min_open_high = low + 0.01 max_open_high = 0.99 high = max(min(distribution.cdf(1), max_open_high), min_open_high) else: high = 1 return SubmissionLogisticParams(clipped_loc, clipped_scale, low, high)
def sig(self, phi: OrderedDict[Subspace, Number]) -> Number: ''' Algorithm: 1. Fit numbers to a line by linear regression 2. Compute slope m and the goodness-of-fit r^2 3. Model the slopes by logistic distribution ''' y = np.array(list(phi.values())) x = np.arange(1, len(y) + 1).reshape(-1, 1) reg = self.lr.fit(x, y) r2 = reg.score(x, y) errors = y - reg.predict(x) mu, std = logistic.fit(errors[1:]) sig_score = r2 * logistic(mu, std).cdf(errors[0]) return sig_score
def __score_obs(self, X, y, beta, mu, sigma): X.reset_index(drop=True,inplace=True) y.reset_index(drop=True,inplace=True) X.drop(self.output, axis=1, inplace=True) if self.residual_dist == 'probit': result = np.array([spint.quad(lambda w : self.__grad_conditional_density_obs(X, w, y, beta, mu, sigma)[i] \ * st.norm(0,1).pdf(w), -3*sigma, 3*sigma)[0] for i in range(len(beta)+2)]) elif self.residual_dist == 'logit': result = np.array([spint.quad(lambda w : self.__grad_conditional_density_obs(X, w, y, beta, mu, sigma)[i] \ * st.logistic(0,1).pdf(w), -3*sigma, 3*sigma)[0] for i in range(len(beta)+2)]) else: raise ValueError('Unknown value for argument residual_dist') result = result / exp(self.__log_likelihood_obs(X, y, beta, mu, sigma)) return result
def __log_likelihood_obs(self, X, y, beta, mu, sigma): X.reset_index(drop=True,inplace=True) y.reset_index(drop=True,inplace=True) try: X.drop(self.output, axis=1, inplace=True) except: pass if self.residual_dist == 'probit': result = spint.quad(lambda w : self.__conditional_density_obs(X, w, y, beta, mu, sigma) \ * st.norm(0,1).pdf(w), -3*sigma, 3*sigma)[0] elif self.residual_dist == 'logit': result = spint.quad(lambda w : self.__conditional_density_obs(X, w, y, beta, mu, sigma) \ * st.logistic(0,1).pdf(w), -3*sigma, 3*sigma)[0] else: raise ValueError('Unknown value for argument residual_dist') return log(result)
def __init__(self, location, scale_parameter): self.scale_parameter = scale_parameter self.location = location if self.scale_parameter is not None: self.bounds = np.array([-np.inf, np.inf]) if self.scale_parameter > 0: mean, var, skew, kurt = logistic.stats( loc=self.location, scale=self.scale_parameter, moments='mvsk') self.parent = logistic(loc=self.location, scale=self.scale_parameter) self.mean = mean self.variance = var self.skewness = skew self.kurtosis = kurt self.x_range_for_pdf = np.linspace(self.location - 10.0, 20.0 + self.location, RECURRENCE_PDF_SAMPLES)
def __init__(self, loc=0, scale=1): """ Parameters ---------- loc : float, positive Location parameter scale : float, positive Scale parameter """ assert scale > 0, "scale parameter must be positive" # Parameters self.loc = loc self.scale = scale # Scipy backend self.sp = logistic(loc=loc, scale=scale) super().__init__()
def __init__(self, location, scale_parameter): if location is None: self.location = 0.0 else: self.location = location if scale_parameter is None: self.scale_parameter = 1.0 else: self.scale_parameter = scale_parameter self.bounds = np.array([-np.inf, np.inf]) if self.scale_parameter < 0: raise ValueError( 'Invalid parameter in Logistic distribution. Scale should be positive.' ) self.parent = logistic(loc=self.location, scale=self.scale_parameter) self.mean, self.variance, self.skewness, self.kurtosis = self.parent.stats( moments='mvsk') self.x_range_for_pdf = np.linspace(self.location - 10.0, 20.0 + self.location, RECURRENCE_PDF_SAMPLES)
def adaptive_integrate(f1, f2, key, value): '''inputs: f1: function 1 of x, function string f2: function 2 of x, function string key: distribution type of random variable, string value: parameters of random distribution, tuple outputs: y: integral value ''' if key.startswith('Uniform'): # stats.uniform defined in the range of [0, 1] # we have to convert it to [-1, 1] for the definition of Legendre basis # stats.uniform(location, scale) # or we can also do arbitrary type, will work on this later f_distr = stats.uniform(-1, 2) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -1, 1) elif key.startswith('Gaussian'): # this is for hermite polynomial basis # we can do arbitrary type by not using standard normal distribution # will work on this later f_distr = stats.norm(0, 1) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) elif key.startswith('Gamma'): # compare the stats.gamma with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # argument "1" is for the "standardized" format # or we can do arbitrary type later # value[0]: lambda, value[1]: k (a for stats.gamma) a = value[1] loc = 0 scale = 1./value[0] # stats.gamma uses "beta" instead of "lambda" f_distr = stats.gamma(a, loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Beta'): # compare the stats.beta with the one showed in UQLab tutorial (input) # stats.beta accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: alpha, value[1]: beta, no "loc" or "scale" needed # always in the range of [0, 1] alpha = value[0] beta = value[1] f_distr = stats.beta(alpha, beta) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, 1) elif key.startswith('Exponential'): # value: lambda loc = 0 scale = 1./value f_distr = stats.expon(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Lognormal'): # this part is very interesting # in UQLab they do Hermite for lognormal # and U the same as those from gaussian # then convert U to X using exp(U) # or they can specify arbitrary polynomial basis to be the same as here # we can do both, actually # value[0]: mu, value[1]:sigma s = value[1] loc = 0 scale = npy.exp(value[0]) f_distr = stats.lognorm(s, loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Gumbel'): # compare the stats.gumbel_r with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: mu, value[1]: beta loc = value[0] scale = value[1] f_distr = stats.gumbel_r(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) elif key.startswith('Weibull'): # compare the stats.weibull_min with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: lambda, value[1]: k k = value[1] loc = 0 scale = value[0] f_distr = stats.weibull_min(k, loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, npy.inf) elif key.startswith('Triangular'): # compare the stats.triang with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value: c, no "loc" and "scale" needed # always in the range of [0, 1] c = value f_distr = stats.triang(c) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, 0, 1) elif key.startswith('Logistic'): # compare the stats.logistic with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: location, value[1]: scale loc = value[0] scale = value[1] f_distr = stats.logistic(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) elif key.startswith('Laplace'): # compare the stats.laplace with the one showed in UQLab tutorial (input) # stats.gamma accepts only one value, but UQLab accepts two # we can do the location and scale to make them the same # value[0]: location, value[1]: scale loc = value[0] scale = value[1] f_distr = stats.laplace(loc, scale) f0 = lambda x: f_distr.pdf(x) f = lambda x: f1(x) * f2(x) * f0(x) y = integrate.quad(f, -npy.inf, npy.inf) else: print 'other types of statistical distributsions are coming soon ...' return y[0]
def logistic(x,N,mean,sigma): return N * stats.logistic(mean,sigma).pdf(x)
def all_dists(): # dists param were taken from scipy.stats official # documentaion examples # Total - 89 return { "alpha": stats.alpha(a=3.57, loc=0.0, scale=1.0), "anglit": stats.anglit(loc=0.0, scale=1.0), "arcsine": stats.arcsine(loc=0.0, scale=1.0), "beta": stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0), "betaprime": stats.betaprime(a=5, b=6, loc=0.0, scale=1.0), "bradford": stats.bradford(c=0.299, loc=0.0, scale=1.0), "burr": stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0), "cauchy": stats.cauchy(loc=0.0, scale=1.0), "chi": stats.chi(df=78, loc=0.0, scale=1.0), "chi2": stats.chi2(df=55, loc=0.0, scale=1.0), "cosine": stats.cosine(loc=0.0, scale=1.0), "dgamma": stats.dgamma(a=1.1, loc=0.0, scale=1.0), "dweibull": stats.dweibull(c=2.07, loc=0.0, scale=1.0), "erlang": stats.erlang(a=2, loc=0.0, scale=1.0), "expon": stats.expon(loc=0.0, scale=1.0), "exponnorm": stats.exponnorm(K=1.5, loc=0.0, scale=1.0), "exponweib": stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0), "exponpow": stats.exponpow(b=2.7, loc=0.0, scale=1.0), "f": stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0), "fatiguelife": stats.fatiguelife(c=29, loc=0.0, scale=1.0), "fisk": stats.fisk(c=3.09, loc=0.0, scale=1.0), "foldcauchy": stats.foldcauchy(c=4.72, loc=0.0, scale=1.0), "foldnorm": stats.foldnorm(c=1.95, loc=0.0, scale=1.0), # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0), # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0), "genlogistic": stats.genlogistic(c=0.412, loc=0.0, scale=1.0), "genpareto": stats.genpareto(c=0.1, loc=0.0, scale=1.0), "gennorm": stats.gennorm(beta=1.3, loc=0.0, scale=1.0), "genexpon": stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0), "genextreme": stats.genextreme(c=-0.1, loc=0.0, scale=1.0), "gausshyper": stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0), "gamma": stats.gamma(a=1.99, loc=0.0, scale=1.0), "gengamma": stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0), "genhalflogistic": stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0), "gilbrat": stats.gilbrat(loc=0.0, scale=1.0), "gompertz": stats.gompertz(c=0.947, loc=0.0, scale=1.0), "gumbel_r": stats.gumbel_r(loc=0.0, scale=1.0), "gumbel_l": stats.gumbel_l(loc=0.0, scale=1.0), "halfcauchy": stats.halfcauchy(loc=0.0, scale=1.0), "halflogistic": stats.halflogistic(loc=0.0, scale=1.0), "halfnorm": stats.halfnorm(loc=0.0, scale=1.0), "halfgennorm": stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0), "hypsecant": stats.hypsecant(loc=0.0, scale=1.0), "invgamma": stats.invgamma(a=4.07, loc=0.0, scale=1.0), "invgauss": stats.invgauss(mu=0.145, loc=0.0, scale=1.0), "invweibull": stats.invweibull(c=10.6, loc=0.0, scale=1.0), "johnsonsb": stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0), "johnsonsu": stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0), "ksone": stats.ksone(n=1e03, loc=0.0, scale=1.0), "kstwobign": stats.kstwobign(loc=0.0, scale=1.0), "laplace": stats.laplace(loc=0.0, scale=1.0), "levy": stats.levy(loc=0.0, scale=1.0), "levy_l": stats.levy_l(loc=0.0, scale=1.0), "levy_stable": stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0), "logistic": stats.logistic(loc=0.0, scale=1.0), "loggamma": stats.loggamma(c=0.414, loc=0.0, scale=1.0), "loglaplace": stats.loglaplace(c=3.25, loc=0.0, scale=1.0), "lognorm": stats.lognorm(s=0.954, loc=0.0, scale=1.0), "lomax": stats.lomax(c=1.88, loc=0.0, scale=1.0), "maxwell": stats.maxwell(loc=0.0, scale=1.0), "mielke": stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0), "nakagami": stats.nakagami(nu=4.97, loc=0.0, scale=1.0), "ncx2": stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0), "ncf": stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0), "nct": stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0), "norm": stats.norm(loc=0.0, scale=1.0), "pareto": stats.pareto(b=2.62, loc=0.0, scale=1.0), "pearson3": stats.pearson3(skew=0.1, loc=0.0, scale=1.0), "powerlaw": stats.powerlaw(a=1.66, loc=0.0, scale=1.0), "powerlognorm": stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0), "powernorm": stats.powernorm(c=4.45, loc=0.0, scale=1.0), "rdist": stats.rdist(c=0.9, loc=0.0, scale=1.0), "reciprocal": stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0), "rayleigh": stats.rayleigh(loc=0.0, scale=1.0), "rice": stats.rice(b=0.775, loc=0.0, scale=1.0), "recipinvgauss": stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0), "semicircular": stats.semicircular(loc=0.0, scale=1.0), "t": stats.t(df=2.74, loc=0.0, scale=1.0), "triang": stats.triang(c=0.158, loc=0.0, scale=1.0), "truncexpon": stats.truncexpon(b=4.69, loc=0.0, scale=1.0), "truncnorm": stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0), "tukeylambda": stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0), "uniform": stats.uniform(loc=0.0, scale=1.0), "vonmises": stats.vonmises(kappa=3.99, loc=0.0, scale=1.0), "vonmises_line": stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0), "wald": stats.wald(loc=0.0, scale=1.0), "weibull_min": stats.weibull_min(c=1.79, loc=0.0, scale=1.0), "weibull_max": stats.weibull_max(c=2.87, loc=0.0, scale=1.0), "wrapcauchy": stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0), }
def logistic(x, N, mean, sigma): return N * stats.logistic(mean, sigma).pdf(x)
def rnaseq_simulation(p_genes=220, n_divisions=6, num_classes=100, seed=None, split_prob=.01, dropout_dist=stats.logistic(0, 1), dropout_p=.1): t, labels, seed = make_cell_division_times(n_divisions, n_replicates=9, seed=seed, std=.05, drop_p=.6) c = np.log2(labels) / n_divisions #c = t xvar = 1. Xsim, seed, labels, t = simulate_latent_space(t, labels, var=xvar, seed=seed, split_prob=split_prob, gap=1.) def simulate_new(): return simulate_new_Y_RNAseq(Xsim, t, p_genes, num_classes=num_classes, noise_var=.2, dropout_dist=dropout_dist, dropout_p=dropout_p) return Xsim, simulate_new, t, c, labels, seed # def guo_simulation_old(p_dims=48, n_divisions=6, seed=None): # t, labels, seed = make_cell_division_times(n_divisions, n_replicates=9, seed=seed, std=.03, drop_p=.6) # c = np.log2(labels) / n_divisions # #c = t # xvar = .6 # Xsim, seed, labels = simulate_latent_space(t, labels, var=xvar, seed=seed, split_prob=.01) # def simulate_new(): # return simulate_new_Y(Xsim, t, p_dims, num_classes=48, noise_var=.7) # return Xsim, simulate_new, t, c, labels, seed
# Calculate a few first moments: mean, var, skew, kurt = logistic.stats(moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(logistic.ppf(0.01), logistic.ppf(0.99), 100) ax.plot(x, logistic.pdf(x), 'r-', lw=5, alpha=0.6, label='logistic pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = logistic() ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = logistic.ppf([0.001, 0.5, 0.999]) np.allclose([0.001, 0.5, 0.999], logistic.cdf(vals)) # True # Generate random numbers: r = logistic.rvs(size=1000) # And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
def generador_estadistica_2(base): dis = next(iter(base)) lista_parameters = np.asarray(base[dis]) if dis == "expon": seleccion = expon(lista_parameters[0], lista_parameters[1]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "exponnorm": seleccion = exponnorm(lista_parameters[0], lista_parameters[1], lista_parameters[2]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "gamma": seleccion = gamma(lista_parameters[0], lista_parameters[1], lista_parameters[2]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "logistic": seleccion = logistic(lista_parameters[0], lista_parameters[1]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "lognorm": seleccion = lognorm(lista_parameters[0], lista_parameters[1], lista_parameters[2]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "norm": seleccion = norm(lista_parameters[0], lista_parameters[1]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "uniform": seleccion = uniform(lista_parameters[0], lista_parameters[1]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "triang": seleccion = triang(lista_parameters[0], lista_parameters[1], lista_parameters[2]) lista_mean = seleccion.mean() lista_std = seleccion.std() lista_generador = seleccion.rvs(1000) elif dis == "Action can not be performed": lista_mean = "No data for mean" lista_std = "No data for std" lista_generador = seleccion.rvs(1000) base["mean"] = lista_mean base["std"] = lista_std base["generador"] = lista_generador return base
def plot_light_curves(self, model: str = 'de', figsize: tuple = (13, 8), fig=None, gridspec=None): if fig is None: fig = figure(figsize=figsize, constrained_layout=True) gs = dict(height_ratios=(0.5, 2, 2, 1)) if gridspec: gs.update(gridspec) axs = fig.subplots(4, self.nlc, sharey='row', gridspec_kw=gs) if model == 'de': pv = self.de.minimum_location err = 10**pv[self._sl_err] if not self.photometry_frozen: self.set_ofluxa(pv) elif model == 'mc': fc = array(self.posterior_samples(include_ldc=True)) pv = permutation(fc)[:300] err = 10**median(pv[:, self._sl_err], 0) if not self.photometry_frozen: self.set_ofluxa(median(pv, 0)) else: raise NotImplementedError( "Light curve plotting `model` needs to be either `de` or `mc`") ps = [50, 16, 84] if self.with_transit: tm = percentile(atleast_2d(self.transit_model(pv)), ps, 0) else: tm = percentile(atleast_2d(ones(self.timea.size)), ps, 0) fm = percentile(atleast_2d(self.flux_model(pv)), ps, 0) bl = percentile(atleast_2d(self.baseline(pv)), ps, 0) t0 = self.t0 for i, sl in enumerate(self.lcslices): t = self.timea[sl] axs[1, i].plot(t, self.ofluxa[sl], '.', alpha=0.5) axs[1, i].plot(t, fm[0][sl], 'k', lw=2) axs[2, i].plot(t, self.ofluxa[sl] / bl[0][sl], '.', alpha=0.5) if model == 'mc': axs[2, i].fill_between(t, tm[1][sl], tm[2][sl], facecolor='darkblue', alpha=0.25) axs[2, i].plot(t, tm[0][sl], 'k', lw=2) axs[3, i].plot(t, self.ofluxa[sl] - fm[0][sl], '.', alpha=0.5) res = self.ofluxa[sl] - fm[0][sl] x = linspace(-4 * err, 4 * err) axs[0, i].hist(1e3 * res, 'auto', density=True, alpha=0.5) axs[0, i].plot(1e3 * x, logistic(0, 1e3 * err[i]).pdf(1e3 * x), 'k') axs[0, i].text(0.05, 0.95, f"$\sigma$ = {(1e3 * err[i] * pi / sqrt(3)):5.2f} ppt", transform=axs[0, i].transAxes, va='top') [ ax.set_title(f"MuSCAT2 {t}", size='large') for ax, t in zip(axs[0], self.passbands) ] [setp(ax.get_xticklabels(), visible=False) for ax in axs[1:3].flat] setp(axs[1, 0], ylabel='Transit + Systematics') setp(axs[2, 0], ylabel='Transit - Systematics') setp(axs[3, 0], ylabel='Residuals') setp(axs[3, :], xlabel=f'Time - {self.t0:9.0f} [BJD]') setp(axs[0, :], xlabel='Residual [ppt]', yticks=[]) [sb.despine(ax=ax, offset=5, left=True) for ax in axs[0]] return fig, axs
def generador_estadistica(base): from scipy.stats import expon, exponnorm, gamma, logistic, lognorm, norm, uniform, triang lista_distributions = [] lista_parameters = [] for z in range(0, len(base)): if base["Parameters"][z] == "Action can not be performed": lista_distributions.append("Action can not be performed") lista_parameters.append("Action can not be performed") else: distri_prueba = [*base["Parameters"][z]] lista_distributions.append(distri_prueba) lista_parameters.append(base["Parameters"][z].get( distri_prueba[0])) lista_mean = [] lista_std = [] for xx in range(0, len(base)): if lista_distributions[xx][0] == "expon": seleccion = expon(lista_parameters[xx][0], lista_parameters[xx][1]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx][0] == "exponnorm": seleccion = exponnorm(lista_parameters[xx][0], lista_parameters[xx][1], lista_parameters[xx][2]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx][0] == "gamma": seleccion = gamma(lista_parameters[xx][0], lista_parameters[xx][1], lista_parameters[xx][2]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx][0] == "logistic": seleccion = logistic(lista_parameters[xx][0], lista_parameters[xx][1]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx][0] == "lognorm": seleccion = lognorm(lista_parameters[xx][0], lista_parameters[xx][1], lista_parameters[xx][2]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx][0] == "norm": seleccion = norm(lista_parameters[xx][0], lista_parameters[xx][1]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx][0] == "uniform": seleccion = uniform(lista_parameters[xx][0], lista_parameters[xx][1]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx][0] == "triang": seleccion = triang(lista_parameters[xx][0], lista_parameters[xx][1], lista_parameters[xx][2]) lista_mean.append(seleccion.mean()) lista_std.append(seleccion.std()) elif lista_distributions[xx] == "Action can not be performed": lista_mean.append("No data for mean") lista_std.append("No data for std") base["mean"] = lista_mean base["std"] = lista_std return base
def plot_distribution( *args, dist_type="logistic", comulative=False, ax=None, shaded=False, shade_alpha=0.5, line_alpha=1, vertical=False, flip_y=False, x_range=None, plot_kwargs={}, ax_kwargs={}, fill_offset=0, y_scale=1, **kwargs, ): # Get the distribution if dist_type == "logistic": dist = stats.logistic(*args, **kwargs) elif dist_type == "gamma": dist = stats.gamma(*args, **kwargs) elif dist_type == "beta": dist = stats.beta(*args, **kwargs) elif (dist_type == "normal" or dist_type == "gaussian" or dist_type == "norm"): dist = stats.norm(*args, **kwargs) elif dist_type == "exponential": dist = np.exp else: raise NotImplementedError # Get the probability density function or comulative density function if comulative: func = dist.cdf else: try: func = dist.pdf if not flip_y: x = np.linspace(dist.ppf(0.0001), dist.ppf(0.99999), 100) else: x = np.linspace(dist.ppf(0.0001), -dist.ppf(0.99999), 100) except: func = dist if not flip_y: x = np.linspace(x_range[0], x_range[1], 100) else: x = np.linspace(x_range[0], -x_range[1], 100) # Plot if ax is None: f, ax = plt.subplots() if not shaded: if not vertical: if not flip_y: ax.plot(x, func(x) * y_scale, **plot_kwargs) else: ax.plot(x, -func(x) * y_scale, **plot_kwargs) else: if not flip_y: ax.plot(func(x), x * y_scale, **plot_kwargs) else: ax.plot(func(x), -x * y_scale, **plot_kwargs) else: if not vertical: if not flip_y: ax.fill_between( x, fill_offset, (func(x) + fill_offset) * y_scale, alpha=shade_alpha, **plot_kwargs, ) ax.plot( x, (func(x) + fill_offset) * y_scale, alpha=line_alpha, **plot_kwargs, ) else: ax.fill_between( x, fill_offset, -func(x) + fill_offset * y_scale, alpha=shade_alpha, **plot_kwargs, ) ax.plot( x, -(func(x) + fill_offset) * y_scale, alpha=line_alpha, **plot_kwargs, ) else: if not flip_y: ax.fill_between( (func(x)) * y_scale + fill_offset, fill_offset, x, alpha=shade_alpha, **plot_kwargs, ) ax.plot( (func(x)) * y_scale + fill_offset, x, alpha=line_alpha, **plot_kwargs, ) else: ax.fill_between( (func(x)) * y_scale + fill_offset, fill_offset, -x, alpha=shade_alpha, **plot_kwargs, ) ax.plot( (func(x)) * y_scale + fill_offset, -x, alpha=line_alpha, **plot_kwargs, ) ax.set(**ax_kwargs) return ax
def rnaseq_simulation(p_genes=220, n_divisions=6, num_classes=100, seed=None, split_prob=.01, dropout_dist=stats.logistic(0, 1), dropout_p=.1): t, labels, seed = make_cell_division_times(n_divisions, n_replicates=9, seed=seed, std=.05, drop_p=.6) c = np.log2(labels) / n_divisions #c = t xvar = 1. Xsim, seed, labels, t = simulate_latent_space(t, labels, var=xvar, seed=seed, split_prob=split_prob, gap=1.) def simulate_new(): return simulate_new_Y_RNAseq(Xsim, t, p_genes, num_classes=num_classes, noise_var=.2, dropout_dist=dropout_dist, dropout_p=dropout_p) return Xsim, simulate_new, t, c, labels, seed