def makeFit_logratiosValues(df,numGenes,fractionImportant,noise): normNum=int(np.ceil(numGenes*(1-fractionImportant))) phenNum = numGenes-normNum c1Vals = skewnorm.rvs(-2, scale=0.99, size=normNum,random_state=random_state) c1PhenoVals = skewnorm.rvs(0, scale=noise, size=phenNum,random_state=random_state) c2Vals = list(map(add, c1Vals, skewnorm.rvs(0, scale=noise*2, size=normNum,random_state=random_state))) c2PhenoVals = skewnorm.rvs(-20, scale=4, size=phenNum,random_state=random_state) df['setAS2 c1']=np.append(c1Vals,c1PhenoVals) df['setAS3 c2']=np.append(c2Vals,c2PhenoVals) return(df)
def gen_remainder(self, height, width, time, y, x): """ Finds the values for the Dren and Vaelf Magics depending on the Magic values from the previous time step. :param height: The number of points in the y-dimension of the Map. :param width: The number of points in the x-dimension of the Map. :param time: The current time. :param y: The y-location of the given point. :param x: The x-location of the given point. """ if time == 0: # Want higher end to be at 3.3; exp(1.1939) = 3.3 # Want lowest end to be at 0.7; exp(-0.3567) = 0.7 # Therefore use (1.1939 + 0.3567) / width decay_loc = np.exp(1.1939 - ((1.5506 * x) / width)) growth_loc = np.exp(-0.3567 + ((1.5506 * x) / width)) # Want higher end to be at 0.5; exp(-0.6931) = 0.5 # Want lowest end to be at 0.25; exp(-1.3863) = 0.25 # Therefore use (-1.3863 + 0.6931) / height decay_scale = np.exp(-0.6931 - ((0.6932 * x) / width)) growth_scale = np.exp(-1.3863 + ((0.6932 * x) / width)) self.magics[time, 8, y, x] = np.round(skewnorm.rvs(0, loc=growth_loc, scale=growth_scale)) self.magics[time, 11, y, x] = np.round(skewnorm.rvs(0, loc=decay_loc, scale=decay_scale)) else: log_avg = self.find_LR_average(height, time-1, 8, y, x) exp_avg = self.find_LR_average(height, time-1, 11, y, x) decay_loc = np.exp(1.1939 - ((1.5506 * x) / width)) growth_loc = np.exp(-0.3567 + ((1.5506 * x) / width)) decay_scale = np.exp(-0.6931 - ((0.6932 * x) / width)) growth_scale = np.exp(-1.3863 + ((0.6932 * x) / width)) decay_skew = 3 * (log_avg - decay_loc) growth_skew = 3 * (exp_avg - growth_loc) self.magics[time, 8, y, x] = np.round(skewnorm.rvs(growth_skew, loc=growth_loc, scale=growth_scale)) self.magics[time, 11, y, x] = np.round(skewnorm.rvs(decay_skew, loc=decay_loc, scale=decay_scale))
def data_simulator4(ntrain, ntest, p, seeding, return_mean=False, corr=0): nobs = ntrain + ntest np.random.seed(seeding) if corr != 0: cov = gen_ar1_corr_matrix(corr, p) mvnorm_x = np.random.multivariate_normal([0]*p, cov, nobs) X = norm.cdf(mvnorm_x) else: X = np.random.uniform(size=(nobs, p)) if not return_mean: Y = (10*np.sin(2*np.pi*X[:,0]*X[:,1]) + 10*X[:,3] + 20*np.square((X[:,2] - 0.5)) + 5*X[:,4]) + skewnorm.rvs(-5, size=nobs) else: Y = (10*np.sin(2*np.pi*X[:,0]*X[:,1]) + 10*X[:,3] + 20*np.square((X[:,2] - 0.5)) + 5*X[:,4]) Y = Y.reshape(-1,1) TrainX = X[:ntrain,:] TrainY = Y[:ntrain,:] TestX = X[ntrain:,:] TestY = Y[ntrain:,:] return TrainX, TrainY, TestX, TestY
def rand_frag_size(dist_string): dist_list = dist_string.split(',') if dist_list[0] == 'skewed-normal': rand_size = abs( int( round( skewnormdist.rvs(float(dist_list[3]), loc=float(dist_list[1]), scale=float(dist_list[2]))))) elif dist_list[0] == 'normal': rand_size = abs( int( round( normdist.rvs(loc=float(dist_list[1]), scale=float(dist_list[2]))))) elif dist_list[0] == 'uniform': rand_size = abs( int( round( uniformdist.rvs(loc=float(dist_list[1]), scale=float(dist_list[2]))))) elif dist_list[0] == 'truncated-normal': rand_size = abs( int( round( truncnormdist.rvs(float(dist_list[3]), float(dist_list[4]), loc=float(dist_list[1]), scale=float(dist_list[2]))))) return (rand_size)
def get_data(n): data = np.concatenate( (expon.rvs(scale=1, size=n // 2), skewnorm.rvs(5, loc=3, size=n // 2))) #getting exp dist data #now shuffle the data np.random.shuffle(data) return data
def asymmetric_samples(mean, plus, minus, size=5000): if (plus == 0.0) and (minus == 0.0): samples = np.ones(size)*mean return samples x = np.array([mean-minus, mean, mean+plus]) data = np.array([0.16, 0.5, 0.84]) params = Parameters() params.add('mu', value=mean) params.add('sigma', value=np.mean([minus, plus])) params.add('alpha', value=0.0) try: minner = Minimizer(fnc2min, params, fcn_args=(x, data)) result = minner.minimize() mu = result.params['mu'].value sigma = result.params['sigma'].value alpha = result.params['alpha'].value samples = skewnorm.rvs(a=alpha, loc=mu, scale=sigma, size=size) except ValueError: print("Problem producing the distribution ot sampling from it.") print("\nReversing to a Normal Distribution") print("\nwith scale given by the average between the maximum and minimum errors.") samples = norm.rvs(loc=mean, scale=np.mean([minus, plus]), size=size) return samples
def old(): n, d = 10000, 100 random = check_random_state(0) bg_weight = random.beta(1, 10, size=d) bg_weight /= np.linalg.norm(bg_weight) fg_weight = random.beta(1, 5, size=d) fg_weight /= np.linalg.norm(fg_weight) alpha = .5 skew = 0 loc = n / 5 * 4 scale = n / 10 * 3 / 4 bg_ts = uniform.rvs(0, n, size=int(n * (1 - alpha)), random_state=random) fg_ts = skewnorm.rvs(skew, loc, scale, size=int(n * alpha), random_state=random) bg_X = random.normal(scale=bg_weight, size=(len(bg_ts), len(bg_weight))) fg_X = random.normal(scale=fg_weight, size=(len(fg_ts), len(fg_weight))) ts = np.concatenate((bg_ts, fg_ts)) X = np.concatenate((bg_X, fg_X), axis=0) order = np.argsort(ts) ts = ts[order] X = X[order] np.savetxt('ts.csv', ts, fmt='%.2f') np.savetxt('X.csv', X, fmt='%.5f')
def init_event_occuring(self,max_pos_skew = -5, max_neg_skew = 5): """ Initialise probabilities of event occurring. Generates a matrix of probability distributions. Each row represents number of days passed due date to visit location. Each row contains Gaussian distribution of probability values form 0-1 where as the number of days passed due date increases so the probability distribution becomes more negatively skewed to higher probability values indicating increasing urgency of visiting a location """ # Define list of distributions relating to the number of frames since last visited # so let's say we base it over 100 frames skewness_values = np.linspace(max_neg_skew, max_pos_skew, 100) # generate 1000 random values for each skewness setting dist_sample_size = 1000 # initialize the _sm_probabilities numpy array self._probabilities = np.zeros((dist_sample_size, len(skewness_values))) # Populate numpy array with probabilities changing the skewness of the prob distribution depending on how long it is since the agent last visited a specific destination for idx, skewness in enumerate(skewness_values): # generate _sm_probabilities distributions for each time frame self._probabilities[:,idx] = skewnorm.rvs(a = skewness,loc=100, size=dist_sample_size) # shift values so that lowest value is 0 self._probabilities[:,idx] = self._probabilities[:,idx] - np.min(self._probabilities[:,idx]) # standardise all values between 0 and 1 self._probabilities[:,idx] = self._probabilities[:,idx] / np.max(self._probabilities[:,idx]) return self._probabilities
def create_pdf(sd, mean, alfa): #invertire il segno di alfa x = skewnorm.rvs(alfa, size=1000000) def calc(k, sd, mean): return (k * sd) + mean x = calc(x, sd, mean) #standard distribution
def _get_age_dist_values(total_users, *, seed=None): """Return Numpy array of age values, given total_users""" if seed: np.random.seed(seed) # negative skewnorm dist - age range of 16 to 60 ish return ((27 + 10 * skewnorm.rvs(a=2, size=total_users)).astype(int).clip( min=16))
def gen_cold_and_ice(self, height, width, time, y, x): """ Determines the value of both Cold and Ice Magic at a point given the values of the points nearby in the previous time step. :param height: The number of points in the y-dimension of the Map. :param width: The number of points in the x-dimension of the Map. :param time: The value of time since the Map started its weather tracking. :param y: The y-location of the given point. :param x: The x-location of the given point. """ if time == 0: # Want higher end to be at 3.3; exp(1.1939) = 3.3 # Want lowest end to be at 0.7; exp(-0.3567) = 0.7 # Therefore use (1.1939 + 0.3567) / width growth_loc = np.exp(-0.3567 + ((1.5506 * y) / height)) # Want higher end to be at 0.5; exp(-0.6931) = 0.5 # Want lowest end to be at 0.25; exp(-1.3863) = 0.25 # Therefore use (-1.3863 + 0.6931) / height growth_scale = np.exp(-1.3863 + ((0.6932 * y) / height)) self.magics[time, 5, y, x] = np.round(skewnorm.rvs(0, loc=growth_loc, scale=growth_scale)) self.magics[time, 7, y, x] = np.round(skewnorm.rvs(0, loc=growth_loc, scale=growth_scale)) else: average1 = self.find_BT_average(width, time-1, 4, y, x) average2 = self.find_BT_average(width, time-1, 6, y, x) growth_loc = np.exp(-0.3567 + ((1.5506 * y) / height)) skew_value1 = 3 * (average1 - growth_loc) skew_value2 = 3 * (average2 - growth_loc) growth_scale = np.exp(-1.3863 + ((0.6932 * y) / height)) seasonal_shift = 0.7 + 0.3 * np.cos(np.pi + 8.7266 * (10**-4) * (time % 7200)) self.magics[time, 5, y, x] = np.round(skewnorm.rvs(skew_value1, loc=growth_loc * seasonal_shift, scale=growth_scale)) self.magics[time, 7, y, x] = np.round(skewnorm.rvs(skew_value2, loc=growth_loc, scale=growth_scale))
def get_skew_distribution(a, num_vertices): # A is the skew, and num_vertices tells how many samples to take r = skewnorm.rvs(a, size=num_vertices) lower, upper = skewnorm.interval(.999, a) # shifted = [x - lower for x in r] diff = upper - lower rescaled = [abs((x - lower) / diff) / 2 for x in r] return rescaled
def conditional_sample_func4(X, nobs, seeding=1234): loc1 = (10*np.sin(2*np.pi*X[0]*X[1]) + 10*X[3] + 20*np.square((X[2]-0.5)) + 5*X[4]) np.random.seed(seed=seeding) samples = loc1 + skewnorm.rvs(-5, size=nobs) return samples
def test_joint_entropyND(): """ Test that our implemented function to return the entropy corresponds to Scipy's entropy method in multiple dimensions. """ gridpoints = 10 # for KDE estimation ## Test 2D joint entropy: X = skewnorm.rvs(size=1000, a=-3, loc=0, scale=2) Y = skewnorm.rvs(size=1000, a=-3, loc=0, scale=2) data = pd.DataFrame({'X':X, 'Y':Y}) ## So this is valid if test_get_pdf passes pdf = get_pdf(data,gridpoints=gridpoints) ## The estimated entropy should correspond to scipy's value (to 5 d.p.) assert_almost_equal(get_entropy(data,gridpoints=gridpoints), entropy(pdf.flatten(),base=2), 5)
def ABCsimulation(param): #param = [om, w0] if param[0] < 0.0 or param[0] > 1.0: return [None]*len(zbins) else: model_1_class = DistanceCalc(param[0],0,1-param[0],0,[param[1],0],0.7) #om,ok,ol,wmodel,de_params,h0 data_abc = np.zeros(len(zbins)) for i in range(len(zbins)): data_abc[i] = model_1_class.mu(zbins[i]) + skewnorm.rvs(a, loc=e, scale=w, size=1) return data_abc
def skewnorm_distribution_maker(bins, skew, focus=1.0, size=10000): distro = skewnorm.rvs(skew, 1, 1, size=10000) distro_histro = np.histogram(distro, bins=bins, range=(0, 10), density=True)[0] distro_histro = [i**0.25 for i in distro_histro] distro_histro /= np.sum(distro_histro) return distro_histro
def scale_obj(obj, mean_size=60, max_size=103, size_real=0, distance=0): """ Inputs: obj - object to work with distance - distance in mm from camera, randomized if not set size_real - length in mm of object, if not set this is randomized based on species mean and maximum sizes mean_size - common length of species max_size - maximum length of species Assuming a horizontal object, we scale it to the correct pixel size based on distance from camera and known fields of view. """ if (distance == 0): # cross section is trapezoid, height increases towards back of image. # we want distribution to reflect this max_distance = 700 min_distance = 200 s = skewnorm.rvs(-5, size=1, loc=650, scale=120) s2 = (int)(np.clip(s, 200, 700)) distance = np.random.choice(s2) if (size_real == 0): # add some variance var_size = (max_size - mean_size) / 3 #print(max_size, mean_size, var_size) size_real = np.random.normal(mean_size, var_size) #print('mean size = ' + str(mean_size) + ', var size = ' + str(var_size)) # in front of image, size is 1392px/339mm # in back of image, size is 1392px/1190mm pix_per_mm = (1392 / (339 + (distance - 200) * ((1190 - 339) / (max_distance - 200)))) new_length = (int)(pix_per_mm * size_real) ratio = new_length / (int)(np.shape(obj)[1]) #print('distance ' +str(distance)) #print('pix per mm ', str(pix_per_mm)) #print('ratio ' + str(ratio)) #print('new length ' + str(new_length) + 'px, ' + str(size_real) + ' mm') new_height = (int)(np.shape(obj)[0] * ratio) #print('new height ' + str(new_height)) # ensure nothing is resized to 0 if (new_length < 1): new_length = 1 if (new_height < 1): new_height = 1 new_obj = obj.resize((new_length, new_height)) return new_obj, distance
def test_error_when_nan_introduced_during_transform(): # test error when NA are introduced during the discretisation. rng = default_rng() # create dataframe with 2 variables, 1 normal and 1 skewed random = skewnorm.rvs(a=-50, loc=4, size=100) random = random - min( random) # Shift so the minimum value is equal to zero. train = pd.concat( [ pd.Series(rng.standard_normal(100)), pd.Series(random), ], axis=1, ) train.columns = ["var_a", "var_b"] # create a dataframe with 2 variables normally distributed test = pd.concat( [ pd.Series(rng.standard_normal(100)), pd.Series(rng.standard_normal(100)), ], axis=1, ) test.columns = ["var_a", "var_b"] msg = ("During the discretisation, NaN values were introduced " "in the feature(s) var_b.") limits_dict = {"var_a": [-5, -2, 0, 2, 5], "var_b": [0, 2, 5]} # check for warning when errors equals 'ignore' with pytest.warns(UserWarning) as record: transformer = ArbitraryDiscretiser(binning_dict=limits_dict, errors="ignore") transformer.fit(train) transformer.transform(test) # check that only one warning was returned assert len(record) == 1 # check that message matches assert record[0].message.args[0] == msg # check for error when errors equals 'raise' with pytest.raises(ValueError) as record: transformer = ArbitraryDiscretiser(binning_dict=limits_dict, errors="raise") transformer.fit(train) transformer.transform(test) # check that error message matches assert str(record.value) == msg
def skewNormGen(s1, xi_popts, ome_popts, alp_popts): """ skewNormGen Generated S1 events using the effective marginalized pdf in S1. inputs: s1, xi optimums, omega optimums, alpha optimums """ xi = FUNC_EPS(s1, *xi_popts) omega = FUNC_OME(s1, *ome_popts) alpha = FUNC_ALP(s1, *alp_popts) return skewnorm.rvs(alpha, loc=xi, scale=omega)
def get_samples_from_percentiles(val, ehi, elo, Nsamp=1e3, add_p5_p95=True, pltt=False): '''Given the 16, 50, and 84 percentiles of a parameter's distribution, fit a Skew normal CDF and sample it.''' # don't do anything if NaNs are input if np.any(np.isnan([val, ehi, elo])): return np.nan, np.nan, np.repeat(np.nan, int(Nsamp)) # get percentiles p16, med, p84 = float(val - elo), float(val), float(val + ehi) assert p16 < med assert med < p84 # add approximate percentiles to help with fitting the wings # otherwise the resulting fitting distritubions tend to if add_p5_p95: p5_approx = med - 2 * (med - p16) p95_approx = med + 2 * (p84 - med) xin = [p5_approx, p16, med, p84, p95_approx] yin = [.05, .16, .5, .84, .95] else: xin, yin = [p16, med, p84], [.16, .5, .84] # make initial parameter guess mu, sig = med, np.mean([abs(p16), abs(p84)]) a = (abs(p16) - abs(p84)) / sig p0 = a, mu, sig popt, pcov = curve_fit(Skewnorm_CDF_func, xin, yin, p0=p0, sigma=np.repeat(.01, len(yin)), absolute_sigma=False) # sample the fitted pdf samples = skewnorm.rvs(*popt, size=int(Nsamp)) # plot distribution if desired if pltt: plt.hist(samples, bins=30, normed=True, label='Sampled parameter posterior') plt.plot(np.sort(samples), skewnorm.pdf(np.sort(samples), *popt), '-', label='Skew-normal fit: a=%.3f, m=%.3f, s=%.3f' % tuple(popt)) plt.xlabel('Parameter values'), plt.legend(loc='upper right') plt.show() return p0, popt, samples
def generate(self) -> datetime: """ Generate a random date taken from a Skewed Normal Distribution :return: datetime """ timestamp = round(self.center.timestamp()) generated = skewnorm.rvs(self.skew, loc=timestamp, scale=self.dispersion) return datetime.fromtimestamp(generated)
def next_transaction(self): # price rand_delta = skewnorm.rvs(self.skewness, size=1)[0] price = self.average_price * (rand_delta + 1) price = max(self.min_gas_price, price) if self.max_gas_price: price = min(self.max_gas_price, price) # gas consumed # 1 <= gas_consumed <= 2* block_size / txs_per_block gas = random.randint(1, 2 * int(self.block_gas_size / self.txs_per_block)) return Tx(gas, price)
def sim_data(chronological=True): a = random.choice([2., 3., 4.]) ys = skewnorm.rvs(a, size=1000) outliers = [ random.choice([-1., 1.]) * 10. if np.random.rand() < 0.05 else 0. for _ in ys ] xs = np.cumsum(0.1 * np.random.randn(1000)) zs = [ x + y * random.choice([-1., 1.]) + o for x, y, o in zip(xs, ys, outliers) ] return zs if chronological else list(reversed(zs))
def skewed_dist(max_value=10, min_value=0, num_values=10000, skewness=5, integers=False): """ generate skewed distribution """ # Negative skewness values are left skewed (long right tail), positive values are right skewed (left tail). random_list = skewnorm.rvs(a=skewness, loc=max_value, size=num_values) return scale_a_distribution(random_list, integers=integers, max_value=max_value, min_value=min_value)
def skew(): fig, ax = plt.subplots(1, 1) # This function creates a figure and a grid of subplots a = 5 # skewness parameter, when a = 0 the distribution is identical to a normal distribution # mean, variance, skew, kurt = skewnorm.stats ( a, moments='mvsk' ) d = skewnorm.rvs(a, size=350000) ax.hist(d, density=True, histtype='stepfilled', alpha=0.2) # histogram of numbers generated plt.show() return d # return all the numbers generated with skew distribution
def rand_normal(loc=0, sigma=1, size=1, skew=0, decimals='all'): # Normal distribution centered at loc data = skewnorm.rvs(a=skew, loc=loc, scale=sigma, size=size) if decimals != 'all': data = np.around(data, decimals) if decimals == 0: data = data.astype(int) if size == 1: return data[0] else: return data
def generate_random_c_x1(self): '''Function to generate random c and x1 from either Normal or skew normal distributions''' if bench: print 'Drawing color and stretch rvs...' if self.c_pdf == 'Normal': self.simc = np.random.normal(loc=self.c_params[0], scale=self.c_params[1], size=len(self.totz)) elif self.c_pdf == 'SkewNormal': self.simc = skewnorm.rvs(self.c_params[2], loc=self.c_params[0], scale=self.c_params[1], size=len(self.totz)) if self.x1_pdf == 'Normal': self.simx1 = np.random.normal(loc=self.x1_params[0], scale=self.x1_params[1], size=len(self.totz)) elif self.x1_pdf == 'SkewNormal': self.simx1 = skewnorm.rvs(self.x1_params[2], loc=self.x1_params[0], scale=self.x1_params[1], size=len(self.totz))
def gen_waxing_burst(self, time, y, x): """ Generates points where Waxing Magic is extremely strong. :param time: The value of time since the Map started its weather tracking. :param y: The y-location of the given point. :param x: The x-location of the given point. """ self.magics[time, 3, y, x] = np.round(skewnorm.rvs(4, loc=0, scale=1.4)) if self.magics[time, 3, y, x] < 4: self.magics[time, 3, y, x] = 0
def simulate_PDF(median, lower_err, upper_err, size=1, plot=True): ''' Simulates a draw of posterior samples from a value and asymmetric errorbars by assuming the underlying distribution is a skewed normal distribution. Developed to estimate PDFs from literature exoplanet parameters that did not report their MCMC chains. Inputs: ------- median : float the median value that was reported lower_err : float the lower errorbar that was reported upper_err : float the upper errorbar that was reported size : int the number of samples to be drawn Returns: -------- samples : array of float the samples drawn from the simulated skrewed normal distribution ''' sigma, omega, alpha = calculate_skewed_normal_params( median, lower_err, upper_err) samples = skewnorm.rvs(alpha, loc=sigma, scale=omega, size=size) if plot == False: return samples else: lower_err = np.abs(lower_err) upper_err = np.abs(upper_err) x = np.arange(median - 4 * lower_err, median + 4 * upper_err, 0.01) fig = plt.figure() for i in range(3): plt.axvline([median - lower_err, median, median + upper_err][i], color='k', lw=2) plt.plot(x, skewnorm.pdf(x, alpha, loc=sigma, scale=omega), 'r-', lw=2) fit_percentiles = skewnorm.ppf([0.16, 0.5, 0.84], alpha, loc=sigma, scale=omega) for i in range(3): plt.axvline(fit_percentiles[i], color='r', ls='--', lw=2) plt.hist(samples, density=True, color='red', alpha=0.5) return samples, fig
def draw_strain_skewstep(self, x, iter, beta): q = x.copy() lqxy = 0 a = 2 s = 1 diff = skewnorm.rvs(a, scale=s) q[self.pnames.index( 'log10_h')] = x[self.pnames.index('log10_h')] - diff lqxy = skewnorm.logpdf(-diff, a, scale=s) - skewnorm.logpdf( diff, a, scale=s) return q, float(lqxy)
""" ############################################################################### # By default, ChainConsumer uses maximum likelihood statistics. Thus you do not # need to explicitly enable maximum likelihood statistics. If you want to # anyway, the keyword is `"max"`. import numpy as np from scipy.stats import skewnorm from chainconsumer import ChainConsumer # Lets create some data here to set things up np.random.seed(0) data = skewnorm.rvs(5, size=(1000000, 2)) parameters = ["$x$", "$y$"] # Now the normal way of giving data is passing a numpy array and parameter separately c = ChainConsumer().add_chain(data, parameters=parameters).configure(statistics="max") fig = c.plotter.plot() fig.set_size_inches(4.5 + fig.get_size_inches()) # Resize fig for doco. You don't need this. ############################################################################### # Or we can enable cumulative statistics c = ChainConsumer().add_chain(data, parameters=parameters).configure(statistics="cumulative") fig = c.plotter.plot() fig.set_size_inches(4.5 + fig.get_size_inches()) # Resize fig for doco. You don't need this.