def q2qnbinom(counts, input_mean, output_mean, dispersion): """ Quantile to Quantile for a negative binomial """ zero = logical_or(input_mean < 1e-14, output_mean < 1e-14) input_mean[zero] = input_mean[zero] + 0.25 output_mean[zero] = output_mean[zero] + 0.25 ri = 1 + multiply(np.matrix(dispersion).T, input_mean) vi = multiply(input_mean, ri) rO = 1 + multiply(np.matrix(dispersion).T, output_mean) vO = multiply(output_mean, rO) i = counts >= input_mean low = logical_not(i) p1 = empty(counts.shape, dtype=np.float64) p2 = p1.copy() q1, q2 = p1.copy(), p1.copy() if i.any(): p1[i] = norm.logsf(counts[i], loc=input_mean[i], scale=np.sqrt(vi[i]))[0, :] p2[i] = gamma.logsf(counts[i], (input_mean / ri)[i], scale=ri[i])[0, :] q1[i] = norm.ppf(1 - np.exp(p1[i]), output_mean[i], np.sqrt(vO[i]))[0, :] q2[i] = gamma.ppf(1 - np.exp(p2[i]), np.divide(output_mean[i], rO[i]), scale=rO[i])[0, :] if low.any(): p1[low] = norm.logcdf(counts[low], loc=input_mean[low], scale=np.sqrt(vi[low]))[0, :] p2[low] = gamma.logcdf(counts[low], input_mean[low] / ri[low], scale=ri[low])[0, :] q1[low] = norm.ppf(np.exp(p1[low]), loc=output_mean[low], scale=np.sqrt(vO[low]))[0, :] q2[low] = gamma.ppf(np.exp(p2[low]), output_mean[low] / rO[low], scale=rO[low])[0, :] return (q1 + q2) / 2
def genGammaTable(self, randTable): """ This function will generate the gamma table, given that it knows about elicited parameters and the randomTable. The following line of code: myRows = gammaRows * trunc(R[0]) decides how many rows to allot to a given expert's opinion given the weight assigned to that expert's parametrization. """ gammaRows = len(randTable) numExperts = len(self.elicited) numParams = len(self.elicited[0]) randRow = 0 # First, normalize the weights. total_w = sum([R[0] for R in self.elicited]) for R in self.elicited: myRows = gammaRows * trunc(R[0]/total_w) for r in range(int(myRows)): l = [] for n in range(1, numParams): prob = randTable[randRow][n-1] alpha = R[n] l.append(gamma.ppf(prob, alpha)) l = norm_log(l) self.gammaTable.append(l) randRow += 1 self.gammaTable = array(self.gammaTable)
def get_max_firing_rate(self): """ Return the maximum firing rate of the neuron. Where the maximum firing rate is defined as the rate at which the CDF =0.99 :return: maximum firing rate of the neuron. """ return gamma.ppf(0.99, self.a, scale=self.b, loc=0)
def __get_object_preference(self, cdf_loc): """ Use the inverse cdf to get a random firing rate modifier, its normalized firing rate. :rtype : Firing rate modifier """ obj_pref = gamma.ppf(cdf_loc, self.a, scale=self.b, loc=0) return obj_pref / self.get_max_firing_rate()
def ppf(self,U): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. :param U: Percentiles for which the ppf will be computed. :type U: numpy.array :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' return Data(gamma.ppf(U,self.param['u'],scale=self.param['s'])**(1/self.param['p']))
def quantile_match(self): mask_indices = np.where(self.nc_patches["masks"] == 1) obj_values = self.nc_patches["obj_values"][mask_indices] obj_values = np.array(obj_values) percentiles = np.linspace(0.1, 99.9, 100) try: filename = self.size_distribution_training_path + '{0}_{1}_Size_Distribution.csv'.format(self.ensemble_name, self.watershed_obj) train_period_obj_per_vals = pd.read_csv(filename) train_period_obj_per_vals = train_period_obj_per_vals.loc[:,"Values"].values per_func = interp1d(train_period_obj_per_vals, percentiles / 100.0, bounds_error=False, fill_value=(0.1, 99.9)) except: obj_per_vals = np.percentile(obj_values, percentiles) per_func = interp1d(obj_per_vals, percentiles / 100.0, bounds_error=False, fill_value=(0.1, 99.9)) obj_percentiles = np.zeros(self.nc_patches["masks"].shape) obj_percentiles[mask_indices] = per_func(obj_values) obj_hail_sizes = np.zeros(obj_percentiles.shape) model_name = self.model_name.replace(" ", "-") self.units = "mm" self.data = np.zeros((self.forecast_hours.size, self.mapping_data["lon"].shape[0], self.mapping_data["lon"].shape[1]), dtype=np.float32) sh = self.forecast_hours.min() for p in range(obj_hail_sizes.shape[0]): if self.hail_forecast_table.loc[p, self.condition_model_name.replace(" ", "-") + "_conditionthresh"] > 0.5: patch_mask = np.where(self.nc_patches["masks"][p] == 1) obj_hail_sizes[p, patch_mask[0], patch_mask[1]] = gamma.ppf(obj_percentiles[p, patch_mask[0], patch_mask[1]], self.hail_forecast_table.loc[p, model_name + "_shape"], self.hail_forecast_table.loc[p, model_name + "_location"], self.hail_forecast_table.loc[p, model_name + "_scale"]) self.data[self.nc_patches["forecast_hour"][p] - sh, self.nc_patches["i"][p, patch_mask[0], patch_mask[1]], self.nc_patches["j"][p, patch_mask[0], patch_mask[1]]] = obj_hail_sizes[p, patch_mask[0], patch_mask[1]] return
def hsic_gam(X, Y, alph = 0.5): """ X, Y are numpy vectors with row - sample, col - dim alph is the significance level auto choose median to be the kernel width """ n = X.shape[0] # ----- width of X ----- Xmed = X G = np.sum(Xmed*Xmed, 1).reshape(n,1) Q = np.tile(G, (1, n) ) R = np.tile(G.T, (n, 1) ) dists = Q + R - 2* np.dot(Xmed, Xmed.T) dists = dists - np.tril(dists) dists = dists.reshape(n**2, 1) width_x = np.sqrt( 0.5 * np.median(dists[dists>0]) ) # ----- ----- # ----- width of X ----- Ymed = Y G = np.sum(Ymed*Ymed, 1).reshape(n,1) Q = np.tile(G, (1, n) ) R = np.tile(G.T, (n, 1) ) dists = Q + R - 2* np.dot(Ymed, Ymed.T) dists = dists - np.tril(dists) dists = dists.reshape(n**2, 1) width_y = np.sqrt( 0.5 * np.median(dists[dists>0]) ) # ----- ----- bone = np.ones((n, 1), dtype = float) H = np.identity(n) - np.ones((n,n), dtype = float) / n K = rbf_dot(X, X, width_x) L = rbf_dot(Y, Y, width_y) Kc = np.dot(np.dot(H, K), H) Lc = np.dot(np.dot(H, L), H) testStat = np.sum(Kc.T * Lc) / n varHSIC = (Kc * Lc / 6)**2 varHSIC = ( np.sum(varHSIC) - np.trace(varHSIC) ) / n / (n-1) varHSIC = varHSIC * 72 * (n-4) * (n-5) / n / (n-1) / (n-2) / (n-3) K = K - np.diag(np.diag(K)) L = L - np.diag(np.diag(L)) muX = np.dot(np.dot(bone.T, K), bone) / n / (n-1) muY = np.dot(np.dot(bone.T, L), bone) / n / (n-1) mHSIC = (1 + muX * muY - muX - muY) / n al = mHSIC**2 / varHSIC bet = varHSIC*n / mHSIC thresh = gamma.ppf(1-alph, al, scale=bet)[0][0] return (testStat, thresh)
def generate_data(Nsims, plot=True): # Get z1, ... , z10 where zi = (zi(1), zi(2)) and zi(1), zi(2) ~ U(0,1) z1 = np.random.uniform(size=10) z2 = np.random.uniform(size=10) z = list(zip(z1, z2)) # Get theta in (0, 0.4, 5) with probabilities (0.05, 0.6, 0.35) x = np.random.uniform(0, 1, size=Nsims) thetas = np.zeros(Nsims) thetas[np.where(x <= 0.05)] = 0 thetas[np.where((0.05 < x) & (x <= 0.65))] = 0.4 thetas[np.where(x > 0.65)] = 5 # Generate realizations of (L1, ..., L10) using Gaussian copulas x = np.zeros((Nsims, 10)) theta_labels = {5: [], 0.4: [], 0: []} for i in range(Nsims): theta = thetas[i] # Get the correlation # rho_ij = exp{-theta_i * ||zi - zj||} where ||.|| denotes the Euclidean distance Omega = np.zeros((10, 10)) for j in range(10): for k in range(10): Omega[j, k] = np.exp( -theta * np.linalg.norm(np.array(z[j]) - np.array(z[k]))) # Create samples from a correlated multivariate normal x0 = np.random.multivariate_normal(mean=np.zeros(10), cov=Omega) x[i, :] = x0 theta_labels[theta].append(i) # Get uniform marginals u = norm.cdf(x) # Marginal distributions Li ~ Gamma(5, 0.2i) with mean=25 L = np.zeros((Nsims, 10)) x_axis = np.linspace(25, 50, 200) means = np.zeros(10) for i in range(10): L_i = gamma.ppf(u[:, i], a=5, loc=25, scale=0.2 * (i + 1)) L[:, i] = L_i means[i] = np.mean(L_i) # Gamma distribution plot if plot: y_i = gamma.pdf(x_axis, a=5, loc=25, scale=0.2 * (i + 1)) plt.plot(x_axis, y_i, label=f"scale={0.2*(i+1)}") if plot: plt.legend() plt.savefig('Plots/ex/data_marginal_dist.pdf', format='pdf') plt.show() max_mean = np.max(means) min_mean = np.min(means) marker_sizes = (means - min_mean) / (max_mean - min_mean) * 150 # Location plot if plot: plt.scatter(z1, z2, marker='o', color='black', s=marker_sizes) plt.savefig('Plots/ex/data_location_by_mean.pdf', format='pdf') plt.show() # Define the data and get the bandwidths, density and CDF data = {"y": np.sum(L, axis=1), "x": L} return data, theta_labels
def relSDM(obs, mod, sce, cdf_threshold=0.9999999, lower_limit=0.1): '''relative scaled distribution mapping assuming a gamma distributed parameter (with lower limit zero) rewritten from pyCAT for 1D data obs :: observed variable time series mod :: modelled variable for same time series as obs sce :: to unbias modelled time series cdf_threshold :: upper and lower threshold of CDF lower_limit :: lower limit of data signal (values below will be masked!) returns corrected timeseries tested with pandas series. ''' obs_r = obs[obs >= lower_limit] mod_r = mod[mod >= lower_limit] sce_r = sce[sce >= lower_limit] obs_fr = 1. * len(obs_r) / len(obs) mod_fr = 1. * len(mod_r) / len(mod) sce_fr = 1. * len(sce_r) / len(sce) sce_argsort = np.argsort(sce) obs_gamma = gamma.fit(obs_r, floc=0) mod_gamma = gamma.fit(mod_r, floc=0) sce_gamma = gamma.fit(sce_r, floc=0) obs_cdf = gamma.cdf(np.sort(obs_r), *obs_gamma) mod_cdf = gamma.cdf(np.sort(mod_r), *mod_gamma) obs_cdf[obs_cdf > cdf_threshold] = cdf_threshold mod_cdf[mod_cdf > cdf_threshold] = cdf_threshold expected_sce_raindays = min( int(np.round(len(sce) * obs_fr * sce_fr / mod_fr)), len(sce)) sce_cdf = gamma.cdf(np.sort(sce_r), *sce_gamma) sce_cdf[sce_cdf > cdf_threshold] = cdf_threshold # interpolate cdf-values for obs and mod to the length of the scenario obs_cdf_intpol = np.interp(np.linspace(1, len(obs_r), len(sce_r)), np.linspace(1, len(obs_r), len(obs_r)), obs_cdf) mod_cdf_intpol = np.interp(np.linspace(1, len(mod_r), len(sce_r)), np.linspace(1, len(mod_r), len(mod_r)), mod_cdf) # adapt the observation cdfs obs_inverse = 1. / (1 - obs_cdf_intpol) mod_inverse = 1. / (1 - mod_cdf_intpol) sce_inverse = 1. / (1 - sce_cdf) adapted_cdf = 1 - 1. / (obs_inverse * sce_inverse / mod_inverse) adapted_cdf[adapted_cdf < 0.] = 0. # correct by adapted observation cdf-values xvals = gamma.ppf(np.sort(adapted_cdf), *obs_gamma) * gamma.ppf( sce_cdf, *sce_gamma) / gamma.ppf(sce_cdf, *mod_gamma) # interpolate to the expected length of future raindays correction = np.zeros(len(sce)) if len(sce_r) > expected_sce_raindays: xvals = np.interp(np.linspace(1, len(sce_r), expected_sce_raindays), np.linspace(1, len(sce_r), len(sce_r)), xvals) else: xvals = np.hstack( (np.zeros(expected_sce_raindays - len(sce_r)), xvals)) correction[sce_argsort[-expected_sce_raindays:]] = xvals return pd.Series(correction, index=sce.index)
% (np.float(afb), np.float(bfb))) # Plot Max firing rates based on Lehky (non-optimal stimuli set) & the full (optimal stimuli set) n = 1000 shape_param_dist_f = gamma.rvs(afa, scale=bfa, loc=0, size=n) scale_param_dist_f = gamma.rvs(afb, scale=bfb, loc=0, size=n) shape_param_dist_l = gamma.rvs(ala, scale=bla, loc=0, size=n) scale_param_dist_l = gamma.rvs(alb, scale=blb, loc=0, size=n) max_rates_f = [] max_rates_l = [] for index in np.arange(n): max_rates_f.append( gamma.ppf(0.99, shape_param_dist_f[index], loc=0, scale=scale_param_dist_f[index])) max_rates_l.append( gamma.ppf(0.99, shape_param_dist_l[index], loc=0, scale=scale_param_dist_l[index])) plt.figure("Max Fire Rate Distributions") plt.subplot(211) plt.hist(max_rates_f) plt.title('Histogram of full (unscaled) max spike rates') plt.subplot(212) plt.hist(max_rates_l, label='method1') plt.title('Histogram of scaled (Lehky) max spike rates') # Method 2 of getting Lehky distribution from full spike rates # noinspection PyArgumentList scale_factors = np.random.rand(n)
# print(np.mean(deaths_5), np.mean(deaths_6), np.mean(deaths_7), np.mean(deaths_8)) deaths = [deaths_5, deaths_6, deaths_7, deaths_8] deaths # deaths_alt = deaths_5+deaths_6+deaths_7+deaths_8 # print(deaths_alt) plt.figure(figsize=(16, 8)) death_sum = 0 i = 0 for d_i in deaths: death_sum += sum(d_i) alpha = death_sum + 1 b = (i + 1) * 7 + (1 / beta) x = np.linspace(gamma.ppf(0.01, alpha, scale=1 / b), gamma.ppf(0.99, alpha, scale=1 / b), 100) plt.title("Posterior Gamma distributions") label = "Week-" + str(i + 5) + " MAP(mean): " + str(alpha / b) plt.plot(x, gamma.pdf(x, alpha, scale=1 / b), label=label) plt.xlabel("Deaths") plt.ylabel("PDF of Gamma distribution") plt.legend() i += 1 plt.show() # ### Observations: # # - From the above graphs, we can say that as the weeks progress, MAP is reducing indicating a decrease in number of deaths # - We can also infer that as the time progresses, the number of deaths might saturate if the trend follows a similar pattern (rate)
def relative_sdm(obs_cube, mod_cube, sce_cubes, *args, **kwargs): """ apply relative scaled distribution mapping to all scenario cubes assuming a gamma distributed parameter (with lower limit zero) if one of obs, mod or sce data has less than min_samplesize valid values, the correction will NOT be performed but the original data is output Args: * obs_cube (:class:`iris.cube.Cube`): the observational data * mod_cube (:class:`iris.cube.Cube`): the model data at the reference period * sce_cubes (:class:`iris.cube.CubeList`): the scenario data that shall be corrected Kwargs: * lower_limit (float): assume values below lower_limit to be zero (default: 0.1) * cdf_threshold (float): limit of the cdf-values (default: .99999999) * min_samplesize (int): minimal number of samples (e.g. wet days) for the gamma fit (default: 10) """ from scipy.stats import gamma lower_limit = kwargs.get('lower_limit', 0.1) cdf_threshold = kwargs.get('cdf_threshold', .99999999) min_samplesize = kwargs.get('min_samplesize', 10) obs_cube_mask = np.ma.getmask(obs_cube.data) cell_iterator = np.nditer(obs_cube.data[0], flags=['multi_index']) while not cell_iterator.finished: index_list = list(cell_iterator.multi_index) cell_iterator.iternext() index_list.insert(0, 0) index = tuple(index_list) # consider only cells with valid observational data if obs_cube_mask and obs_cube_mask[index]: continue index_list[0] = slice(0, None, 1) index = tuple(index_list) obs_data = obs_cube.data[index] mod_data = mod_cube.data[index] obs_raindays = obs_data[obs_data >= lower_limit] mod_raindays = mod_data[mod_data >= lower_limit] if obs_raindays.size < min_samplesize \ or mod_raindays.size < min_samplesize: continue obs_frequency = 1. * obs_raindays.shape[0] / obs_data.shape[0] mod_frequency = 1. * mod_raindays.shape[0] / mod_data.shape[0] obs_gamma = gamma.fit(obs_raindays, floc=0) mod_gamma = gamma.fit(mod_raindays, floc=0) obs_cdf = gamma.cdf(np.sort(obs_raindays), *obs_gamma) mod_cdf = gamma.cdf(np.sort(mod_raindays), *mod_gamma) obs_cdf[obs_cdf > cdf_threshold] = cdf_threshold mod_cdf[mod_cdf > cdf_threshold] = cdf_threshold for sce_cube in sce_cubes: sce_data = sce_cube[index].data sce_raindays = sce_data[sce_data >= lower_limit] if sce_raindays.size < min_samplesize: continue sce_frequency = 1. * sce_raindays.shape[0] / sce_data.shape[0] sce_argsort = np.argsort(sce_data) sce_gamma = gamma.fit(sce_raindays, floc=0) expected_sce_raindays = min( np.round( len(sce_data) * obs_frequency * sce_frequency / mod_frequency), len(sce_data)) sce_cdf = gamma.cdf(np.sort(sce_raindays), *sce_gamma) sce_cdf[sce_cdf > cdf_threshold] = cdf_threshold # interpolate cdf-values for obs and mod to the length of the # scenario obs_cdf_intpol = np.interp( np.linspace(1, len(obs_raindays), len(sce_raindays)), np.linspace(1, len(obs_raindays), len(obs_raindays)), obs_cdf) mod_cdf_intpol = np.interp( np.linspace(1, len(mod_raindays), len(sce_raindays)), np.linspace(1, len(mod_raindays), len(mod_raindays)), mod_cdf) # adapt the observation cdfs obs_inverse = 1. / (1 - obs_cdf_intpol) mod_inverse = 1. / (1 - mod_cdf_intpol) sce_inverse = 1. / (1 - sce_cdf) adapted_cdf = 1 - 1. / (obs_inverse * sce_inverse / mod_inverse) adapted_cdf[adapted_cdf < 0.] = 0. # correct by adapted observation cdf-values xvals = gamma.ppf(np.sort(adapted_cdf), *obs_gamma) *\ gamma.ppf(sce_cdf, *sce_gamma) /\ gamma.ppf(sce_cdf, *mod_gamma) # interpolate to the expected length of future raindays correction = np.zeros(len(sce_data)) if len(sce_raindays) > expected_sce_raindays: xvals = np.interp( np.linspace(1, len(sce_raindays), expected_sce_raindays), np.linspace(1, len(sce_raindays), len(sce_raindays)), xvals) else: xvals = np.hstack( (np.zeros(expected_sce_raindays - len(sce_raindays)), xvals)) correction[sce_argsort[-expected_sce_raindays:]] = xvals sce_cube.data[index] = correction
def getDelta(myA, myAlpha): q = gamma.ppf(myAlpha, myA, 0, 1 / myA) return (q - 1) * (myA + np.divide(1 - myA, q))
def posterior_ess(Y, M, Sigma, A, B, C, Beta=None, lam_gridsize=100, nburn=1000, nsamples=1000, nthin=1, nthreads=1, print_freq=100): # Filter out the unknown Y values Present = Y >= 0 if Beta is None: # Initialize beta to the approximate MLE where data is not missing # and the prior where data is missing Beta = M * (1 - Present) + Present * ( (Y - C[:, None]) / A[:, None] * B[:, None]).clip(1e-6, 1 - 1e-6) # Use a grid approximation for lambda integral Lam_grid, Lam_weights = [], [] for a, b, c in zip(A, B, C): grid = np.linspace(gamma.ppf(1e-3, a, scale=b), gamma.ppf(1 - 1e-3, a, scale=b), lam_gridsize)[np.newaxis, :] weights = gamma.pdf(grid, a, scale=b) weights /= weights.sum() Lam_grid.append(grid) Lam_weights.append(weights) Lam_grid = np.array(Lam_grid) Lam_weights = np.array(Lam_weights) # Create the results arrays Cur_log_likelihood = np.zeros(M.shape[0]) chol = np.linalg.cholesky(Sigma) Beta_samples = np.zeros((nsamples, Beta.shape[0], Beta.shape[1])) Loglikelihood_samples = np.zeros(nsamples) if nthreads == 1: ### Create a log-likelihood function for the ES sampler ### def log_likelihood_fn(proposal_beta, idx): if np.any(proposal_beta[:-1] > proposal_beta[1:]): return -np.inf present = Present[idx] y = Y[idx][present][:, np.newaxis] tau = ilogit(proposal_beta)[present][:, np.newaxis] grid = Lam_grid[idx] weights = Lam_weights[idx] c = C[idx] return np.log((poisson.pmf(y, grid * tau + c) * weights).clip( 1e-10, np.inf).sum(axis=1)).sum() # Run the MCMC sampler on a single thread for step in range(nburn + nsamples * nthin): if print_freq and step % print_freq == 0: if step > 0: sys.stdout.write("\033[F") # Cursor up one line print('MCMC step {}'.format(step)) # Ellipitical slice sample for each beta for idx, beta in enumerate(Beta): cur_ll = None if step == 0 else Cur_log_likelihood[idx] Beta[idx], Cur_log_likelihood[idx] = elliptical_slice( beta, chol, log_likelihood_fn, cur_log_like=cur_ll, ll_args=idx, mu=M[idx]) # Save this sample after burn-in and markov chain thinning if step < nburn or ((step - nburn) % nthin) != 0: continue # Save the samples sample_idx = (step - nburn) // nthin Beta_samples[sample_idx] = Beta Loglikelihood_samples[sample_idx] = Cur_log_likelihood.sum() else: from multiprocessing import Pool jobs = [(Y[idx][Present[idx]][:, np.newaxis], Present[idx], Lam_grid[idx], Lam_weights[idx], C[idx], M[idx], Beta[idx], chol, nburn, nsamples, nthin) for idx in range(Beta.shape[0])] # Calculate the posteriors in parallel with Pool(nthreads) as pool: results = pool.map(posterior_ess_helper, jobs) # Aggregate the results for idx in range(Beta.shape[0]): Beta_samples[:, idx] = results[idx][0] Loglikelihood_samples += results[idx][1] return Beta_samples, Loglikelihood_samples
def getRK(gBar, myA, myW, myP, myAlpha): q = gamma.ppf(myAlpha, myA, 0, 1 / myA) return gBar * myP * (1 - myW + myW * q)
def getK(gBar, myA, myW, myP, myAlpha): q = gamma.ppf(myAlpha, myA, 0, 1 / myA) return gBar * myP * myW * (q - 1)
def getW(myP, myA, myRho, myAlpha): num = th.computeP(myP, myRho, norm.ppf(1 - myAlpha)) - myP den = myP * (gamma.ppf(myAlpha, myA, 0, 1 / myA) - 1) return np.divide(num, den)
import numpy as np from scipy.stats import gamma import nibabel as nib #probably redundant and have to find a better way to do this img = nib.load('/Users/nanditharajamani/Desktop/IIT_delhi_stuff/Assignment_IIT_delhi/fsl_preprocessed/smoothed_img/sub-MSC01_ses-func01_task-motor_run-01_bold_mcf_filt_st_smooth.nii.gz') img_data = img.get_data() num_vols = img_data.shape[3] header = img.header find_pix = header['pixdim'] TR = find_pix[4] TR=2.2 t_list = np.arange(1,img_data.shape[3],TR) #define the hrf model h = gamma.ppf(t_list,6) + -0.5*gamma.ppf(t_list,10) h = h/max(h) #read from the covariates file and determine the duration of each task dur_of_each_task = 15.4 TRperStim = TR*dur_of_each_task nREPS = 2 #number of times each stimulus is repeated. This can also be obtained from the #number of rows in the covariates file, for each stimulus nTRs = TRperStim*nREPS + len(h) design_matrix = np.zeros(1,nTRs) #now, let's make each entry at the time point the stimulus was on as 1. the rest will be zero #left hand stimulus left_hand_stim = design_matrix left_hand_stim(1:TRperStim:)
elif (EARTH_QUESTION): print('EARTH_QUESTION') Lambda = np.arange(0.01,1,0.01) prior_alpha = 1 prior_beta = 30 prior = Gamma(Lambda, prior_alpha, prior_beta) data = [16,8,114,60,4,23,30,105] likelihood = Exponential(Lambda, data) posterior_alpha = prior_alpha + len(data) posterior_beta = prior_beta + sum(data) PPTLT = GammaDist.ppf(0.95,posterior_alpha,scale=1.0/posterior_beta) print(PPTLT) posterior = Gamma(Lambda, posterior_alpha, posterior_beta) # posterior predictive density given the data f(newData | oldData) # range of new data Y = np.arange(0,121,1) #notice that now we are considering the above posterior to be our #new prior #and we are calculating the new posterior for each possible new data predictive_posterior = [] for y in Y: predictive_posterior.append(max(Gamma(Lambda, posterior_alpha+1, posterior_beta+y))) plt.plot(Y,predictive_posterior)
function_of_gamma=f_gamma, range_gamma=range_gamma, k_opt_fcn=k_opt, range_of_k=range_of_k, beta=beta, tol=tol) R[rep, i] = dist c = limiting_dist_EQOPP(X=data, a=data_sensitive, y=data_label, beta=beta, marginals=marginals_rand) k = 1 / 2 theta = 2 * c if N * dist > gamma.ppf(.95, a=k, scale=theta): print('Reject') test_res[rep, i] = 1 else: print('Fail to reject') test_res[rep, i] = 0 end = time.time() time_elapsed = (end - start) * (replications - rep - 1) conversion = datetime.timedelta(seconds=time_elapsed) print('Replication====>' + str(rep) + '/' + str(replications) + ', Time remaining : ' + str(conversion)) np.savetxt( 'results_' + str(N_range) + '_iterations_' + str(replications) + '.out', R) np.savetxt(
def SMC2(td, beta_softmax=1., numberOfStateSamples=200, numberOfThetaSamples=200, numberOfBetaSamples=20, coefficient=.5, latin_hyp_sampling=True): print('\n') print('Forward Constant Volatility Model') print('number of theta samples ' + str(numberOfThetaSamples)) print('\n') #Start timer start_time_multi = time.time() # uniform distribution if latin_hyp_sampling: d0 = uniform() print('latin hypercube sampling') else: print('sobolev sampling') # Extract parameters from task description stimuli = td['S'] # Sequence of Stimuli numberOfActions = td['action_num'] # Number of Actions possible numberOfStimuli = td['state_num'] # Number of states or stimuli rewards = td['reward'] actions = td['A_chosen'] K = np.prod( np.arange(numberOfActions + 1)[-numberOfStimuli:]) # Number of possible Task Sets numberOfTrials = len(stimuli) # Number of Trials # verification if K == 2: if latin_hyp_sampling == False: raise ValueError( 'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.' ) # Sampling and prior settings betaPrior = np.array([1, 1]) # Prior on Beta, the feedback noise parameter tauPrior = np.array([1, 1]) gammaPrior = np.ones(K) # Prior on Gamma, the Dirichlet parameter log_proba = 0. log_proba_ = 0. # Mapping from task set to correct action per stimulus mapping = get_mapping.Get_TaskSet_Stimulus_Mapping( state_num=numberOfStimuli, action_num=numberOfActions).T betaWeights = np.zeros(numberOfBetaSamples) betaLog = np.zeros(numberOfBetaSamples) logbetaWeights = np.zeros(numberOfBetaSamples) betaAncestors = np.arange(numberOfBetaSamples) # Probabilities of every actions updated at every time step -> Used to take the decision actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions]) sum_actionLik = np.zeros(numberOfBetaSamples) filt_actionLkd = np.zeros( [numberOfTrials, numberOfBetaSamples, numberOfActions]) # Keep track of probability correct/exploration after switches tsProbability = np.zeros([numberOfBetaSamples, K]) sum_tsProbability = np.zeros(numberOfBetaSamples) dirichletParamCandidates = np.zeros(K) # SMC particles initialisation muSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples] ) #np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) tauSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) if K == 24: try: latin_hyp_samples = pickle.load( open('../../utils/sobol_200_26.pkl', 'rb')) except: latin_hyp_samples = pickle.load( open('../../models/utils/sobol_200_26.pkl', 'rb')) for beta_idx in range(numberOfBetaSamples): if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd(dist=d0, size=numberOfThetaSamples, dims=K + 2) muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0], betaPrior[0], betaPrior[1]) tauSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 1], tauPrior[0], tauPrior[1]) gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 2:], gammaPrior) gammaSamples[beta_idx] = np.transpose( gammaSamples[beta_idx].T / np.sum(gammaSamples[beta_idx], axis=1)) elif K == 2: muSamples = np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) tauSamples = np.random.beta( tauPrior[0], tauPrior[1], [numberOfBetaSamples, numberOfThetaSamples]) gammaSamples = np.random.dirichlet( gammaPrior, [numberOfBetaSamples, numberOfThetaSamples]) else: raise IndexError('Wrong number of task sets') logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) currentSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) ancestorSamples = np.zeros( [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples], dtype=np.intc) weightsList = np.ones([numberOfThetaSamples, numberOfStateSamples ]) / numberOfStateSamples log_proba_corr = 0. ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc) gammaAdaptedProba = np.zeros(K) likelihoods = np.zeros(K) positiveStates = np.zeros(K, dtype=np.intc) # Guided SMC variables muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) tauSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K]) logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples]) normalisedThetaWeights = np.zeros( [numberOfBetaSamples, numberOfThetaSamples]) # Loop over trials for T in range(numberOfTrials): # Print progress if (T + 1) % 10 == 0: sys.stdout.write(' ' + str(T + 1)) sys.stdout.flush() if (T + 1) % 100 == 0: print('\n') for beta_idx in range(numberOfBetaSamples): ances = betaAncestors[beta_idx] smc_c.bootstrapUpdateStep_c(currentSamples[beta_idx], logThetaWeights[beta_idx], gammaSamples[ances], muSamples[ances]/2. + 1./2, tauSamples[ances]/2., T, ancestorSamples[ances], weightsList, \ np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1], ancestorsIndexes, gammaAdaptedProba, likelihoods, positiveStates, 0) # Move step normalisedThetaWeights[ beta_idx] = useful_functions.to_normalized_weights( logThetaWeights[beta_idx]) ess = 1. / np.sum(normalisedThetaWeights[beta_idx]**2) if (ess < coefficient * numberOfThetaSamples): acceptanceProba = 0. tauMu = np.sum(normalisedThetaWeights[beta_idx] * tauSamples[ances]) tauVar = np.sum(normalisedThetaWeights[beta_idx] * (tauSamples[ances] - tauMu)**2) tauAlpha = ((1 - tauMu) / tauVar - 1 / tauMu) * tauMu**2 tauBeta = tauAlpha * (1 / tauMu - 1) assert (tauAlpha > 0) assert (tauBeta > 0) betaMu = np.sum(normalisedThetaWeights[beta_idx] * muSamples[ances]) betaVar = np.sum(normalisedThetaWeights[beta_idx] * (muSamples[ances] - betaMu)**2) betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2 betaBeta = betaAlpha * (1 / betaMu - 1) assert (betaAlpha > 0) assert (betaBeta > 0) dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] * gammaSamples[ances].T, axis=1) dirichletVar = np.sum(normalisedThetaWeights[beta_idx] * (gammaSamples[ances]**2).T, axis=1) - dirichletMeans**2 dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2 ) / (np.sum(dirichletVar)) - 1 dirichletParamCandidates[:] = np.maximum( dirichletMeans * dirichletPrecision, 1.) assert ((dirichletParamCandidates > 0).all()) if K == 2: tauSamplesNew[beta_idx] = np.random.beta( tauAlpha, tauBeta, numberOfThetaSamples) muSamplesNew[beta_idx] = np.random.beta( betaAlpha, betaBeta, numberOfThetaSamples) gammaSamplesNew[beta_idx] = np.random.dirichlet( dirichletParamCandidates, numberOfThetaSamples) elif K == 24: if latin_hyp_sampling: latin_hyp_samples = mcerp.lhd( dist=d0, size=numberOfThetaSamples, dims=K + 2) muSamplesNew[beta_idx] = betalib.ppf( latin_hyp_samples[:, 0], betaAlpha, betaBeta) tauSamplesNew[beta_idx] = betalib.ppf( latin_hyp_samples[:, 1], tauAlpha, tauBeta) gammaSamplesNew[beta_idx] = gammalib.ppf( latin_hyp_samples[:, 2:], dirichletParamCandidates) gammaSamplesNew[beta_idx] = np.transpose( gammaSamplesNew[beta_idx].T / np.sum(gammaSamplesNew[beta_idx], axis=1)) logThetaWeightsNew[beta_idx] = 0. normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples else: tauSamplesNew[beta_idx] = tauSamples[ances] muSamplesNew[beta_idx] = muSamples[ances] gammaSamplesNew[beta_idx] = gammaSamples[ances] logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx] # task set probability sum_tsProbability[:] = 0. for ts_idx in range(K): tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum( (currentSamples == ts_idx), axis=2), axis=1) sum_tsProbability += tsProbability[:, ts_idx] tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability) # Compute action likelihood sum_actionLik[:] = 0. for action_idx in range(numberOfActions): actionLikelihood[:, action_idx] = np.exp( np.log( np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] == action_idx], axis=1)) * beta_softmax) sum_actionLik += actionLikelihood[:, action_idx] rewards[T] = td['reward'][T] actions[T] = td['A_chosen'][T] actionLikelihood[:] = np.transpose(actionLikelihood.T / sum_actionLik) betaWeights[:] = actionLikelihood[:, actions[T].astype(int)] filt_actionLkd[T] = actionLikelihood log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples) betaWeights = betaWeights / sum(betaWeights) betaAncestors[:] = useful_functions.stratified_resampling(betaWeights) # update particles muSamples[:] = muSamplesNew gammaSamples[:] = gammaSamplesNew tauSamples[:] = tauSamplesNew logThetaWeights[:] = logThetaWeightsNew[betaAncestors] ancestorSamples[:] = currentSamples elapsed_time = time.time() - start_time_multi return log_proba_, filt_actionLkd
def posterior_ess_Sigma(Y, M, A, B, C, Sigma=None, nu=None, Psi=None, Beta=None, lam_gridsize=100, nburn=500, nsamples=1000, nthin=1, print_freq=100): if nu is None: # Default degrees of freedom nu = M.shape[1] + 1 if Psi is None: # # Default squared exponential kernel prior # bandwidth, kernel_scale, noise_var = 2., 1., 0.5 # Psi = np.array([kernel_scale*(np.exp(-0.5*(i - np.arange(M.shape[1]))**2 / bandwidth**2)) for i in np.arange(M.shape[1])]) + noise_var*np.eye(M.shape[1]) Psi = np.eye(M.shape[1]) Psi *= nu - M.shape[1] + 1 if Sigma is None: # Sample from the prior to initialize Sigma Sigma = invwishart.rvs(nu, Psi) if Beta is None: Beta = np.copy(M) # Filter out the unknown Y values Present = Y >= 0 # Use a grid approximation for lambda integral Lam_grid, Lam_weights = [], [] for a, b, c in zip(A, B, C): grid = np.linspace(gamma.ppf(1e-3, a, scale=b), gamma.ppf(1 - 1e-3, a, scale=b), lam_gridsize)[np.newaxis, :] weights = gamma.pdf(grid, a, scale=b) weights /= weights.sum() Lam_grid.append(grid) Lam_weights.append(weights) Lam_grid = np.array(Lam_grid) Lam_weights = np.array(Lam_weights) ### Create a log-likelihood function for the ES sampler ### def log_likelihood_fn(proposal_beta, idx): if np.any(proposal_beta[:-1] > proposal_beta[1:] + 1e-6): return -np.inf present = Present[idx] y = Y[idx][present][:, np.newaxis] tau = ilogit(proposal_beta)[present][:, np.newaxis] grid = Lam_grid[idx] weights = Lam_weights[idx] c = C[idx] return np.log((poisson.pmf(y, grid * tau + c) * weights).clip( 1e-10, np.inf).sum(axis=1)).sum() # Initialize betas with draws from the prior Cur_log_likelihood = np.zeros(M.shape[0]) chol = np.linalg.cholesky(Sigma) # Create the results arrays Beta_samples = np.zeros((nsamples, Beta.shape[0], Beta.shape[1])) Sigma_samples = np.zeros((nsamples, Sigma.shape[0], Sigma.shape[1])) Loglikelihood_samples = np.zeros(nsamples) # Run the MCMC sampler for step in range(nburn + nsamples * nthin): if print_freq and step % print_freq == 0: if step > 0: sys.stdout.write("\033[F") # Cursor up one line print('MCMC step {}'.format(step)) # Ellipitical slice sample for each beta for idx, beta in enumerate(Beta): Beta[idx], Cur_log_likelihood[idx] = elliptical_slice( beta, chol, log_likelihood_fn, ll_args=idx, mu=M[idx]) # Cur_log_likelihood[idx] += mvn.logpdf(Beta[idx], M[idx], Sigma) # Conjugate prior update for Sigma Sigma = invwishart.rvs(nu + M.shape[0], Psi + (Beta - M).T.dot(Beta - M)) # Cholesky representation chol = np.linalg.cholesky(Sigma) # Save this sample after burn-in and markov chain thinning if step < nburn or ((step - nburn) % nthin) != 0: continue # Save the samples sample_idx = (step - nburn) // nthin Beta_samples[sample_idx] = Beta Sigma_samples[sample_idx] = Sigma Loglikelihood_samples[sample_idx] = Cur_log_likelihood.sum() return Beta_samples, Sigma_samples, Loglikelihood_samples
def noiselevel(self): if len(self.img.shape) < 3: self.img = np.expand_dims(self.img, 2) nlevel = np.ndarray(self.img.shape[2]) th = np.ndarray(self.img.shape[2]) num = np.ndarray(self.img.shape[2]) kh = np.expand_dims(np.expand_dims(np.array([-0.5, 0, 0.5]), 0),2) imgh = correlate(self.img, kh, mode='nearest') imgh = imgh[:, 1: imgh.shape[1] - 1, :] imgh = imgh * imgh kv = np.expand_dims(np.vstack(np.array([-0.5, 0, 0.5])), 2) imgv = correlate(self.img, kv, mode='nearest') imgv = imgv[1: imgv.shape[0] - 1, :, :] imgv = imgv * imgv Dh = np.matrix(self.convmtx2(np.squeeze(kh,2), self.patchsize, self.patchsize)) Dv = np.matrix(self.convmtx2(np.squeeze(kv,2), self.patchsize, self.patchsize)) DD = Dh.getH() * Dh + Dv.getH() * Dv r = np.double(np.linalg.matrix_rank(DD)) Dtr = np.trace(DD) tau0 = gamma.ppf(self.conf, r / 2, scale=(2 * Dtr / r)) for cha in range(self.img.shape[2]): X = view_as_windows(self.img[:, :, cha], (self.patchsize, self.patchsize)) X = X.reshape(np.int(X.size / self.patchsize ** 2), self.patchsize ** 2, order='F').transpose() Xh = view_as_windows(imgh[:, :, cha], (self.patchsize, self.patchsize - 2)) Xh = Xh.reshape(np.int(Xh.size / ((self.patchsize - 2) * self.patchsize)), ((self.patchsize - 2) * self.patchsize), order='F').transpose() Xv = view_as_windows(imgv[:, :, cha], (self.patchsize - 2, self.patchsize)) Xv = Xv.reshape(np.int(Xv.size / ((self.patchsize - 2) * self.patchsize)), ((self.patchsize - 2) * self.patchsize), order='F').transpose() Xtr = np.expand_dims(np.sum(np.concatenate((Xh, Xv), axis=0), axis=0), 0) if self.decim > 0: XtrX = np.transpose(np.concatenate((Xtr, X), axis=0)) XtrX = np.transpose(XtrX[XtrX[:, 0].argsort(),]) p = np.floor(XtrX.shape[1] / (self.decim + 1)) p = np.expand_dims(np.arange(0, p) * (self.decim + 1), 0) Xtr = XtrX[0, p.astype('int')] X = np.squeeze(XtrX[1:XtrX.shape[1], p.astype('int')]) # noise level estimation tau = np.inf if X.shape[1] < X.shape[0]: sig2 = 0 else: cov = (np.asmatrix(X) @ np.asmatrix(X).getH()) / (X.shape[1] - 1) d = np.flip(np.linalg.eig(cov)[0], axis=0) sig2 = d[0] for i in range(1, self.itr): # weak texture selection tau = sig2 * tau0 p = Xtr < tau Xtr = Xtr[p] X = X[:, np.squeeze(p)] # noise level estimation if X.shape[1] < X.shape[0]: break cov = (np.asmatrix(X) @ np.asmatrix(X).getH()) / (X.shape[1] - 1) d = np.flip(np.linalg.eig(cov)[0], axis=0) sig2 = d[0] nlevel[cha] = np.sqrt(sig2) th[cha] = tau num[cha] = X.shape[1] # clean up self.img = np.squeeze(self.img) return nlevel, th, num
def hsic_gam(X, Y, alph=0.5): """ X, Y are numpy vectors with row - sample, col - dim alph is the significance level auto choose median to be the kernel width """ n = X.shape[0] # ----- width of X ----- Xmed = X G = np.sum(Xmed * Xmed, 1).reshape(n, 1) Q = np.tile(G, (1, n)) R = np.tile(G.T, (n, 1)) dists = Q + R - 2 * np.dot(Xmed, Xmed.T) dists = dists - np.tril(dists) dists = dists.reshape(n**2, 1) width_x = np.sqrt(0.5 * np.median(dists[dists > 0])) # ----- ----- # ----- width of X ----- Ymed = Y G = np.sum(Ymed * Ymed, 1).reshape(n, 1) Q = np.tile(G, (1, n)) R = np.tile(G.T, (n, 1)) dists = Q + R - 2 * np.dot(Ymed, Ymed.T) dists = dists - np.tril(dists) dists = dists.reshape(n**2, 1) width_y = np.sqrt(0.5 * np.median(dists[dists > 0])) # ----- ----- bone = np.ones((n, 1), dtype=float) H = np.identity(n) - np.ones((n, n), dtype=float) / n K = rbf_dot(X, X, width_x) L = rbf_dot(Y, Y, width_y) Kc = np.dot(np.dot(H, K), H) Lc = np.dot(np.dot(H, L), H) testStat = np.sum(Kc.T * Lc) / n varHSIC = (Kc * Lc / 6)**2 varHSIC = (np.sum(varHSIC) - np.trace(varHSIC)) / n / (n - 1) varHSIC = varHSIC * 72 * (n - 4) * (n - 5) / n / (n - 1) / (n - 2) / (n - 3) K = K - np.diag(np.diag(K)) L = L - np.diag(np.diag(L)) muX = np.dot(np.dot(bone.T, K), bone) / n / (n - 1) muY = np.dot(np.dot(bone.T, L), bone) / n / (n - 1) mHSIC = (1 + muX * muY - muX - muY) / n al = mHSIC**2 / varHSIC bet = varHSIC * n / mHSIC thresh = gamma.ppf(1 - alph, al, scale=bet)[0][0] return (testStat, thresh)
def qGamma(p: float, location: np.ndarray, scale: np.ndarray): """Quantile function. """ q = gamma.ppf(p, a=1 / scale**2, scale=location * scale**2) return q
#Create frequency distribution with numpy freq, counts = FreqDist(testdata, x) ax.plot(x, freq, 'r.') ax.legend(loc='best', frameon=False) plt.xscale('log') plt.yscale('log') plt.ylim(0.001, 1) plt.show() #%% fig, ax = plt.subplots(1, 1) a = 1.99 mean, var, skew, kurt = gamma.stats(a, moments='mvsk') x = np.linspace(gamma.ppf(0.01, a), gamma.ppf(0.99, a), 100) ax.plot(x, gamma.pdf(x, a), 'r-', lw=5, alpha=0.6, label='gamma pdf') rv = gamma(a) ax.plot(x, rv.pdf(x), 'k-', lw=3, label='best-fit distribution') #%% testdata = WDists["15_set_10"] x = np.logspace(np.log10(0.01), np.log10(10)) def FreqDist(data, bins): counts = np.zeros(len(bins)) for i in range(len(bins)): if i == 0: lower = 0
# X ~ Gamma(k, theta) but in scipy, theta = 1 # f(x;k, theta) = x**(k-1) * exp(-x / theta) / theta ** k / gamma_function(k) ############################# from scipy.stats import gamma k = 1 x = 1 pdf_value = gamma.pdf(x, k) print(f"When X ~ Gamma({k}, 1),\t pdf(X = {x}) = {pdf_value}") cdf_value = gamma.cdf(x, k) print(f"When X ~ Gamma({k}, 1),\t cdf(X <= {x}) = {cdf_value}") # ppf: percentage point function (inverse function of cdf) p = 0.25 ppf_value = gamma.ppf(p, k) print(f"When X ~ Gamma({k}, 1),\t ppf(p = {p}) = {ppf_value}") print(f"When X ~ Gamma({k}, 1),\t IQR = [{gamma.ppf(0.25, k)}, {gamma.ppf(0.75, k)}]") # rvs : random variates sample_size = 10 print(f"Random Variates (size :{sample_size}) from X ~ Gamma({k}, 1)\n", gamma.rvs(k, size=sample_size)) print() #%% ############################# # Expotential Distribution # X ~ Exp(lambd) ... (X ~ Gamma(1, 1 / lambda)) # f(x;lambda) = lambda * exp(-x * lambda) # in scipy, scale = 1 / lambda ############################# from scipy.stats import expon
def dip_threshold(n, p_value): k = 21.642 theta = 1.84157e-2 / numpy.sqrt(n) return gamma.ppf(1. - p_value, a=k, scale=theta)
for i in range(2,len(TotalCases)): new_cases=float(TotalCases[i]-TotalCases[i-1]) old_new_cases=float(TotalCases[i-1]-TotalCases[i-2]) # This uses a conjugate prior as a Gamma distribution for b_t, with parameters alpha and beta alpha =alpha+new_cases beta=beta +old_new_cases valpha.append(alpha) vbeta.append(beta) mean = gamma.stats(a=alpha, scale=1/beta, moments='m') RRest=1.+infperiod*ln(mean) if (RRest<0.): RRest=0. predR.append(RRest) testRRM=1.+infperiod*ln( gamma.ppf(0.99, a=alpha, scale=1./beta) )# these are the boundaries of the 99% confidence interval for new cases if (testRRM <0.): testRRM=0. pstRRM.append(testRRM) testRRm=1.+infperiod*ln( gamma.ppf(0.01, a=alpha, scale=1./beta) ) if (testRRm <0.): testRRm=0. pstRRm.append(testRRm) #print('estimated RR=',RRest,testRRm,testRRM) # to see the numbers for the evolution of Rt if (new_cases==0. or old_new_cases==0.): pred.append(0.) pstdM.append(10.) pstdm.append(0.) NewCases.append(0.) if (new_cases>0. and old_new_cases>0.):
def ppf(p, a, b): q = gamma.ppf(p, a, loc=0, scale=b) return q
def u_to_x(self, u): return gamma.ppf(norm.cdf(u, 0, 1), a=self.k, scale=self.th)
def zradius(ndim, siglevel=6): q = 1 - 2.0 * norm.cdf(-siglevel) xx = gamma.ppf(q, ndim * 0.5) zz = np.sqrt(2 * xx) return zz
def Draw_samples_from_dist(num_samples, dist, **params): uniform_samps = Draw_samples_from_Uni(0, 1, num_samples) ## Using inverse transform sampling and the Box-Muller transform if dist == "Normal": if "mean" in params: mean = params["mean"] else: print("Please specify 'mean' parameter.") return if "std" in params: std = params["std"] else: print("Please specify 'std' parameter.") return uniform_samps_pairs = Draw_samples_from_Uni(0, 1, num_samples) z = np.zeros(num_samples) for i in range(num_samples): z0 = np.sqrt(-2 * np.log(uniform_samps[i])) * np.cos( 2 * np.pi * uniform_samps_pairs[i]) # z1 = np.sqrt(-2*np.log(uniform_samps[i]))*np.sin(2*np.pi*uniform_samps_pairs[i]) ## however we can just use z0, we don't need pairs z[i] = z0 * std + mean return z elif dist == "Exponential": if "lamb" in params: lamb = params["lamb"] else: print("Please specify 'lamb' parameter.") return x = np.zeros(num_samples) for i in range(num_samples): x[i] = -(1 / lamb) * ( np.log(1 - uniform_samps[i]) ) ## take the inverse Exponential CDF and apply inverse transform sampling return x elif dist == "Gamma": if "shape" in params: shape = params["shape"] else: print("Please specify 'shape' parameter.") return if "loc" in params: loc = params["loc"] else: print("Please specify 'loc' parameter.") return if "scale" in params: scale = params["scale"] else: print("Please specify 'scale' parameter.") return g = np.zeros(num_samples) for i in range(num_samples): g[i] = gamma.ppf(uniform_samps[i], shape, loc, scale) return g else: print( "Please input a correct distribution. Type either 'Normal', 'Exponential' or 'Gamma'" )
import matplotlib.pyplot as plt from scipy.stats import gamma import numpy as np plt.style.use('seaborn-paper') fig, ax = plt.subplots(1, 1) # Param. for Gamma distribution alpha, beta = 6, 6 # Separate x-axis to conver cum. prob.1% ~ 99% x = np.linspace(gamma.ppf(.01, alpha, beta), gamma.ppf(.99, alpha, beta), 100) # Plot ax.plot(x, gamma.pdf(x, alpha, beta), label='Gam({0}, {1})'.format(alpha, beta)) plt.title('Pdf of Gamma Distrubtion') plt.legend(loc='best') plt.tight_layout() plt.show()
def run_platformqc(data_path, output_path, *, suffix=None, b_width=1000): if not suffix: suffix = "" else: suffix = "_" + suffix log_path = os.path.join(output_path, "log", "log_sequel_platformqc" + suffix + ".txt") fig_path = os.path.join(output_path, "fig", "fig_sequel_platformqc_length" + suffix + ".png") fig_path_bar = os.path.join( output_path, "fig", "fig_sequel_platformqc_adapter" + suffix + ".png") json_path = os.path.join(output_path, "QC_vals_sequel" + suffix + ".json") # json tobe_json = {} # output_path will be made too. if not os.path.isdir(os.path.join(output_path, "log")): os.makedirs(os.path.join(output_path, "log"), exist_ok=True) if not os.path.isdir(os.path.join(output_path, "fig")): os.makedirs(os.path.join(output_path, "fig"), exist_ok=True) ### logging conf ### logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) fh = logging.FileHandler(log_path, 'w') sh = logging.StreamHandler() formatter = logging.Formatter( '%(module)s:%(asctime)s:%(lineno)d:%(levelname)s:%(message)s') fh.setFormatter(formatter) sh.setFormatter(formatter) logger.addHandler(sh) logger.addHandler(fh) ##################### logger.info("Started sequel platform QC for %s" % data_path) # sequel xml_file = get_sts_xml_path(data_path, logger) if not xml_file: logger.warning("sts.xml is missing. Productivity won't be shown") [p0, p1, p2] = [None] * 3 else: [p0, p1, p2] = parse_sts_xml( xml_file, ns="http://pacificbiosciences.com/PacBioBaseDataModel.xsd") logger.info("Parsed sts.xml") [subr_bam_p, scrap_bam_p] = get_bam_path(data_path, logger) if subr_bam_p and scrap_bam_p: scrap_bam = pysam.AlignmentFile(scrap_bam_p, 'rb', check_sq=False) subr_bam = pysam.AlignmentFile(subr_bam_p, 'rb', check_sq=False) else: logger.ERROR("Platform QC failed due to missing bam files") return 1 bam_reads = {} snr = [[], [], [], []] hr_fraction = [] tot_lengths = [] hr_lengths = [] ad_num_stat = {} control_throughput = 0 if get_readtype(scrap_bam.header) == 'SCRAP': logger.info("Started to load scraps.bam...") control_throughput = set_scrap(bam_reads, scrap_bam, snr) else: logger.ERROR("the given scrap file has incorrect header.") logger.info("Scrap reads were loaded.") if get_readtype(subr_bam.header) == 'SUBREAD': logger.info("Started to load subreads.bam...") set_subreads(bam_reads, subr_bam, snr) else: logger.ERROR("the given subread file has incorrect header.") logger.info("Subreads were loaded.") for k, v in bam_reads.items(): #print(k) l = construct_polread(v) #print(l) if l[4]: hr_fraction.append(l[2] / l[3]) tot_lengths.append(l[3]) hr_lengths.append(l[2]) if l[5] in ad_num_stat: ad_num_stat[l[5]] += 1 else: ad_num_stat[l[5]] = 1 max_adnum = max(ad_num_stat.keys()) min_adnum = min(ad_num_stat.keys()) left = [] height = [] for i in range(min_adnum, max_adnum + 1): left.append(i) if i in ad_num_stat: height.append(ad_num_stat[i]) else: height.append(0) plt.bar(left, height) plt.savefig(fig_path_bar, bbox_inches="tight") plt.close() logger.info("Plotted bar plot for adpter occurence") (a, b) = lq_gamma.estimate_gamma_dist_scipy(hr_lengths) logger.info("Fitting by Gamma dist finished.") _max = np.array(hr_lengths).max() _mean = np.array(hr_lengths).mean() _n50 = get_N50(hr_lengths) _n90 = get_NXX(hr_lengths, 90) throughput = np.sum(hr_lengths) longest = np.max(hr_lengths) fracs = np.mean(hr_fraction) tobe_json["Productivity"] = {"P0": p0, "P1": p1, "P2": p2} tobe_json["Throughput"] = int(throughput) tobe_json["Throughput(Control)"] = int(control_throughput) tobe_json["Longest_read"] = int(_max) tobe_json["Num_of_reads"] = len(hr_lengths) tobe_json["polread_gamma_params"] = [float(a), float(b)] tobe_json["Mean_polread_length"] = float(_mean) tobe_json["N50_polread_length"] = float(_n50) tobe_json["Mean_HQ_fraction"] = float(np.mean(fracs)) tobe_json["Adapter_observation"] = ad_num_stat with open(json_path, "w") as f: logger.info("Quality measurements were written into a JSON file: %s" % json_path) json.dump(tobe_json, f, indent=4) x = np.linspace(0, gamma.ppf(0.99, a, 0, b)) est_dist = gamma(a, 0, b) plt.plot(x, est_dist.pdf(x), c=rgb(214, 39, 40)) plt.grid(True) plt.hist(hr_lengths, histtype='step', bins=np.arange(min(hr_lengths), _max + b_width, b_width), color=rgb(214, 39, 40), alpha=0.7, density=True) plt.xlabel('Read length') plt.ylabel('Probability density') if _mean >= 10000: # pol read mean is expected >= 10k and <= 15k, but omit the <= 15k condition. plt.axvline(x=_mean, linestyle='dashed', linewidth=2, color=rgb(44, 160, 44), alpha=0.8) else: plt.axvline(x=_mean, linestyle='dashed', linewidth=2, color=rgb(188, 189, 34), alpha=0.8) if _n50 >= 20000: plt.axvline(x=_n50, linewidth=2, color=rgb(44, 160, 44), alpha=0.8) else: plt.axvline(x=_n50, linewidth=2, color=rgb(188, 189, 34), alpha=0.8) plt.hist(tot_lengths, histtype='step', bins=np.arange(min(tot_lengths), max(tot_lengths) + b_width, b_width), color=rgb(31, 119, 180), alpha=0.7, density=True) ymin, ymax = plt.gca().get_ylim() xmin, xmax = plt.gca().get_xlim() plt.text(xmax * 0.6, ymax * 0.72, r'$\alpha=%.3f,\ \beta=%.3f$' % (a, b)) plt.text(xmax * 0.6, ymax * 0.77, r'Gamma dist params:') plt.text(xmax * 0.6, ymax * 0.85, r'sample mean: %.3f' % (_mean, )) plt.text(xmax * 0.6, ymax * 0.9, r'N50: %.3f' % (_n50, )) plt.text(xmax * 0.6, ymax * 0.95, r'N90: %.3f' % (_n90, )) plt.text(_mean, ymax * 0.85, r'Mean') plt.text(_n50, ymax * 0.9, r'N50') plt.savefig(fig_path, bbox_inches="tight") plt.close() #plt.show() logger.info("Figs were generated.") logger.info("Finished all processes.")
def get_rate_percentile(self, percentile): return gamma.ppf(percentile, self.alpha, scale=1/float(self.beta))
# plt.hist(abc.x,bins=20,label="$\epsilon$="+str(abc.epsilon),density=True,alpha=0.5) #pmc sequence for eps in abc.epsilon_list[1:]: abc.run() abc.check() tend = time.time() print(tend-tstart,"sec") #plotting... fig=plt.figure(figsize=(10,5)) ax=fig.add_subplot(211) ax.hist(abc.x,bins=30,label="$\epsilon$="+str(abc.epsilon),density=True,alpha=0.5) ax.hist(abc.xres(),bins=30,label="resampled",density=True,alpha=0.2) alpha=alpha0+abc.nsample beta=beta0+Ysum xl = np.linspace(gammafunc.ppf(0.0001, alpha,scale=1.0/beta),gammafunc.ppf(0.9999, alpha,scale=1.0/beta), 100) ax.plot(xl, gammafunc.pdf(xl, alpha, scale=1.0/beta),label="analytic") plt.xlabel("$\lambda$") plt.ylabel("$\pi_\mathrm{ABC}$") plt.legend() ax=fig.add_subplot(212) ax.plot(abc.x,abc.w,".") plt.xlabel("$\lambda$") plt.ylabel("$weight$") plt.savefig("abcpmc.png") plt.show()
def compile(alphabet, words, nonwords): print(' Generating all possible transitions...') from itertools import product all = [] for state_size in range(args.max_state_size + 1): all += product(product(alphabet, repeat = state_size), [*alphabet, None]) def of(string): for i in range(len(string)): yield string[max(0, i - args.max_state_size):i], string[i] yield string[max(0, len(string) - args.max_state_size):], None from collections import Counter counts = Counter() for word in tqdm(words, ' Counting transitions', leave = True): for state, symbol in of(word): counts[state, symbol] += 1 state_counts = Counter() for state, symbol in tqdm(counts, ' Counting states', leave = True): state_counts[state] += counts[state, symbol] import numpy as np logprobs = np.empty(len(all)) for i, (state, symbol) in enumerate(tqdm(all, ' Computing conditional transition probabilities', leave = True)): try: logprobs[i] = np.log(state_counts[state] / counts[state, symbol]) except ZeroDivisionError: logprobs[i] = np.inf print(' Fitting flattening distribution...') from scipy.stats import gamma params = gamma.fit(logprobs[logprobs != np.inf]) print(' Flattening...') logprobs = gamma.cdf(logprobs, *params) lower_bound = np.min(logprobs) upper_bound = np.max(logprobs[logprobs != 1]) new_logprobs = np.empty(len(logprobs), int) for i, logprob in enumerate(tqdm(logprobs, ' Discretizing', leave = True)): if logprob == 1: new_logprobs[i] = 2 ** args.transition_bits - 1 else: new_logprobs[i] = round((logprob - lower_bound) * ((2 ** args.transition_bits - 2) / (upper_bound - lower_bound))) logprobs = new_logprobs data = bytearray() bit_buffer = 0 bit_buffer_size = 0 for logprob in tqdm(logprobs, ' Packing', leave = True): bit_buffer = bit_buffer << args.transition_bits | int(logprob) bit_buffer_size += args.transition_bits if bit_buffer_size % 8 == 0: data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big') bit_buffer = 0 bit_buffer_size = 0 while bit_buffer_size % 8 != 0: bit_buffer = bit_buffer << args.transition_bits bit_buffer_size += args.transition_bits data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big') old_logprobs = np.empty(len(logprobs)) for i, logprob in enumerate(tqdm(logprobs, ' Undiscretizing...', leave = True)): if logprob == 2 ** args.transition_bits - 1: old_logprobs[i] = 1 else: old_logprobs[i] = lower_bound + logprob * ((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) print(' Unflattening...') old_logprobs = gamma.ppf(old_logprobs, *params) old_logprobs = dict(zip(all, old_logprobs)) def params_of(strings): strings_logprobs = np.empty(len(strings)) for i, string in enumerate(strings): strings_logprobs[i] = sum(old_logprobs[state, symbol] for state, symbol in of(string)) strings_params = gamma.fit(strings_logprobs[strings_logprobs != np.inf]) _, bins, _ = plt.hist(strings_logprobs[strings_logprobs != np.inf], 500, histtype = 'step', normed = True) plt.plot(bins, gamma.pdf(bins, *strings_params)) return strings_params print(' Fitting words distribution...') words_params = params_of(words) print(' Fitting nonwords distribution...') nonwords_params = params_of(nonwords) def minify(code): if args.minify: import subprocess p = subprocess.run([str(Path(__file__).parent / 'node_modules/uglify-js/bin/uglifyjs'), '--screw-ie8', '--mangle', 'sort,toplevel', '--compress', '--bare-returns', ], input = code.encode(), stdout = subprocess.PIPE, stderr = subprocess.PIPE) if p.returncode != 0: import sys sys.stderr.buffer.write(p.stderr) p.check_returncode() code = p.stdout.decode() return code print(' Generating JS code...') code = minify(r''' exports.init = function(buffer) { exports.test = (new Function('buffer', buffer.utf8Slice(''' + str(len(data)) + r''')))(buffer); }; ''').encode() data += minify(r''' var abs = Math.abs; var min = Math.min; var max = Math.max; var alphabet = [ ''' + r''' '''.join('"' + symbol + '",' for symbol in alphabet) + r''' ]; var of; (function() { function fold(string) { string = Array.from(string); for (var i = alphabet.length - 1; alphabet[i].length > 1; --i) { for (var j = 0; j <= string.length - alphabet[i].length; ++j) { if (string.slice(j, j + alphabet[i].length).join('') == alphabet[i]) { string.splice(j, alphabet[i].length, alphabet[i]); } } } return string; } of = function(string) { string = fold(string); var ofString = []; for (var i = 0; i < string.length; ++i) { ofString.push([string.slice(max(0, i - ''' + str(args.max_state_size) + r'''), i), string[i]]); } ofString.push([string.slice(max(0, string.length - ''' + str(args.max_state_size) + r''')), null]); return ofString; }; })(); var all; (function() { function product(xs, ys) { var result = []; for (var i = 0; i < xs.length; ++i) { for (var j = 0; j < ys.length; ++j) { result.push([xs[i], ys[j]]); } } return result; } function power(a, k) { if (k == 0) { return [[]]; } var result = []; for (var i = 0; i < a.length; ++i) { var b = power(a, k - 1); for (var j = 0; j < b.length; ++j) { result.push([a[i]].concat(b[j])); } } return result; } all = []; for (var stateSize = 0; stateSize <= ''' + str(args.max_state_size) + r'''; ++stateSize) { all = all.concat(product(power(alphabet, stateSize), alphabet.concat([null]))); } })(); var gammaPdf, gammaPpf; (function() { var pow = Math.pow; var exp = Math.exp; var log = Math.log; var sqrt = Math.sqrt; var cof = [ 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5, ]; function ln(x) { var j = 0; var ser = 1.000000000190015; var xx, y, tmp; tmp = (y = xx = x) + 5.5; tmp -= (xx + 0.5) * log(tmp); for (; j < 6; j++) ser += cof[j] / ++y; return log(2.5066282746310005 * ser / xx) - tmp; } gammaPdf = function(x, a) { if (x < 0) return 0; if (x === 0 && a === 1) return 1; return exp((a - 1) * log(x) - x - ln(a)); }; function lowReg(a, x) { var aln = ln(a); var ap = a; var sum = 1 / a; var del = sum; var b = x + 1 - a; var c = 1 / 1.0e-30; var d = 1 / b; var h = d; var i = 1; var ITMAX = -~(log((a >= 1) ? a : 1 / a) * 8.5 + a * 0.4 + 17); var an, endval; if (x < 0 || a <= 0) { return NaN; } else if (x < a + 1) { for (; i <= ITMAX; i++) { sum += del *= x / ++ap; } return sum * exp(-x + a * log(x) - aln); } for (; i <= ITMAX; i++) { an = -i * (i - a); b += 2; d = an * d + b; c = b + an / c; d = 1 / d; h *= d * c; } return 1 - h * exp(-x + a * log(x) - aln); } gammaPpf = function(p, a) { var j = 0; var a1 = a - 1; var EPS = 1e-8; var gln = ln(a); var x, err, t, u, pp, lna1, afac; if (p > 1) return NaN; if (p == 1) return Infinity; if (p < 0) return NaN; if (p == 0) return 0; if (a > 1) { lna1 = log(a1); afac = exp(a1 * (lna1 - 1) - gln); pp = (p < 0.5) ? p : 1 - p; t = sqrt(-2 * log(pp)); x = (2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t; if (p < 0.5) x = -x; x = max(1e-3, a * pow(1 - 1 / (9 * a) - x / (3 * sqrt(a)), 3)); } else { t = 1 - a * (0.253 + a * 0.12); if (p < t) x = pow(p / t, 1 / a); else x = 1 - log(1 - (p - t) / (1 - t)); } for(; j < 12; j++) { if (x <= 0) return 0; err = lowReg(a, x) - p; if (a > 1) t = afac * exp(-(x - a1) + a1 * (log(x) - lna1)); else t = exp(-x + a1 * log(x) - gln); u = err / t; x -= (t = u / (1 - 0.5 * min(1, u * ((a - 1) / x - 1)))); if (x <= 0) x = 0.5 * (x + t); if (abs(t) < EPS * x) break; } return x; }; })(); var logprobs = {}; var bitBuffer = 0, bitBufferSize = 0; var bufferOffset = 0; for (var i = 0; i < all.length; ++i) { while (bitBufferSize < ''' + str(args.transition_bits) + r''') { bitBuffer = bitBuffer << 8 | buffer.readUInt8(bufferOffset++); bitBufferSize += 8; } var logprob = bitBuffer >> (bitBufferSize - ''' + str(args.transition_bits) + r''') & ''' + hex(2 ** args.transition_bits - 1) + r'''; bitBufferSize -= ''' + str(args.transition_bits) + r'''; if (logprob == ''' + str(2 ** args.transition_bits - 1) + r''') { logprob = 1; } else { logprob = ''' + str(lower_bound) + r''' + logprob * ''' + str((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) + r'''; } logprob = ''' + str(params[1]) + r''' + gammaPpf(logprob, ''' + str(params[0]) + r''') * ''' + str(params[2]) + r'''; logprobs[all[i]] = logprob; } return function(string) { var stringLogprob = 0; var ofString = of(string); for (var i = 0; i < ofString.length; ++i) { stringLogprob += logprobs[ofString[i]]; } if (stringLogprob == Infinity) { return false; } var wordsDensity = gammaPdf((stringLogprob - ''' + str(words_params[1]) + r''') / ''' + str(words_params[2]) + r''', ''' + str(words_params[0]) + r''') / ''' + str(words_params[2]) + r'''; var nonwordsDensity = gammaPdf((stringLogprob - ''' + str(nonwords_params[1]) + r''') / ''' + str(nonwords_params[2]) + r''', ''' + str(nonwords_params[0]) + r''') / ''' + str(nonwords_params[2]) + r'''; if (wordsDensity > nonwordsDensity) { return true; } if (wordsDensity < nonwordsDensity) { return false; } return Math.random() >= 0.5; }; ''').encode() data, is_gzipped = bytes(data), False if args.gzip: import gzip print(' Gzipping...') gzipped_data = gzip.compress(data) if len(gzipped_data) < len(data): data, is_gzipped = gzipped_data, True return code, data, is_gzipped
import matplotlib.pyplot as plt #from statsmodels.distributions.empirical_distribution import ECDF from scipy.integrate import quad from scipy.optimize import fsolve a = 10 # shape b = 2 # scale n = 1000 # size # task 1 samples_numpy = np.random.gamma(a, b, n) samples_scipy = gamma.rvs(a=a, scale=b, size=n) # task 2 print(gamma.ppf(0.01, a)) print(gamma.ppf(0.99, a)) x = np.linspace(gamma.ppf(0.000001, a), gamma.ppf(0.99999999999, a), n) plt.plot(x, gamma.pdf(x, a, loc=0, scale=b)) plt.hist(samples_scipy, normed=True) plt.show() loc = 30 scale = 3 samples_numpy = np.random.normal(loc, scale, n) samples_scipy = norm.rvs(loc=loc, scale=scale, size=n) print(norm.ppf(0.01, loc=loc, scale=scale)) print(norm.ppf(0.99, loc=loc, scale=scale)) x = np.linspace(norm.ppf(0.00000001, loc=loc, scale=scale),
def pearscdf(X, mu, sigma, skew, kurt, method, k, output): # pearspdf # [p,type,coefs] = pearspdf(X,mu,sigma,skew,kurt) # # Returns the probability distribution denisty of the pearsons distribution # with mean `mu`, standard deviation `sigma`, skewness `skew` and # kurtosis `kurt`, evaluated at the values in X. # # Some combinations of moments are not valid for any random variable, and in # particular, the kurtosis must be greater than the square of the skewness # plus 1. The kurtosis of the normal distribution is defined to be 3. # # The seven distribution types in the Pearson system correspond to the # following distributions: # # Type 0: Normal distribution # Type 1: Four-parameter beta # Type 2: Symmetric four-parameter beta # Type 3: Three-parameter gamma # Type 4: Not related to any standard distribution. Density proportional # to (1+((x-a)/b)^2)^(-c) * exp(-d*arctan((x-a)/b)). # Type 5: Inverse gamma location-scale # Type 6: F location-scale # Type 7: Student's t location-scale # # Examples # # See also # pearspdf pearsrnd mean std skewness kurtosis # # References: # [1] Johnson, N.L., S. Kotz, and N. Balakrishnan (1994) Continuous # Univariate Distributions, Volume 1, Wiley-Interscience. # [2] Devroye, L. (1986) Non-Uniform Random Variate Generation, # Springer-Verlag. otpt = len(output) # outClass = superiorfloat(mu, sigma, skew, kurt) if X[1] == inf: cdist = 1 limstate = X[0] elif X[0] == -inf: cdist = 2 limstate = X[1] else: cdist = 3 limstate = X if sigma == 0: print "Warning: The standard deviation of output distribution",k,"is zero. No distribution or correlation can be calculated for it." if mu>=X[0] and mu<=X[1]: #mean is in the limits return 1, None, inf, None, None, None, None, None, None, None, None else: #mean is outside the limits return 0, None, inf, None, None, None, None, None, None, None, None X = (X - mu) / sigma # Z-score if method == 'MCS': beta1 = 0 beta2 = 3 beta3 = sigma ** 2 else: beta1 = skew ** 2 beta2 = kurt beta3 = sigma ** 2 # Return NaN for illegal parameter values. if (sigma < 0) or (beta2 <= beta1 + 1): p = zeros(otpt)+nan #p = zeros(sizeout)+nan dtype = NaN coefs = zeros((1,3))+nan print 'Illegal parameter values passed to pearscdf! (sigma:',sigma,' beta1:',beta1,' beta2:', beta2,')' return #% Classify the distribution and find the roots of c0 + c1*x + c2*x^2 c0 = (4 * beta2 - 3 * beta1)# ./ (10*beta2 - 12*beta1 - 18); c1 = skew * (beta2 + 3)# ./ (10*beta2 - 12*beta1 - 18); c2 = (2 * beta2 - 3 * beta1 - 6)# ./ (10*beta2 - 12*beta1 - 18); if c1 == 0: # symmetric dist'ns if beta2 == 3: dtype = 0 a1 = 0 a2 = 0 else: if beta2 < 3: dtype = 2 elif beta2 > 3: dtype = 7 a1 = -sqrt(abs(c0 / c2)) a2 = -a1 # symmetric roots elif c2 == 0: # kurt = 3 + 1.5*skew^2 dtype = 3 a1 = -c0 / c1 # single root a2 = a1 else: kappa = c1 ** 2 / (4 * c0 * c2) if kappa < 0: dtype = 1 elif kappa < 1 - finfo(float64).eps: dtype = 4 elif kappa <= 1 + finfo(float64).eps: dtype = 5 else: dtype = 6 # Solve the quadratic for general roots a1 and a2 and sort by their real parts csq=c1 ** 2 - 4 * c0 * c2 if c1 ** 2 - 4 * c0 * c2 < 0: tmp = -(c1 + sign(c1) * cmath.sqrt(c1 ** 2 - 4 * c0 * c2)) / 2 else: tmp = -(c1 + sign(c1) * sqrt(c1 ** 2 - 4 * c0 * c2)) / 2 a1 = tmp / c2 a2 = c0 / tmp if (real(a1) > real(a2)): tmp = a1; a1 = a2; a2 = tmp; denom = (10 * beta2 - 12 * beta1 - 18) if abs(denom) > sqrt(finfo(double).tiny): c0 = c0 / denom c1 = c1 / denom c2 = c2 / denom coefs = [c0, c1, c2] else: dtype = 1 # this should have happened already anyway # beta2 = 1.8 + 1.2*beta1, and c0, c1, and c2 -> Inf. But a1 and a2 are # still finite. coefs = zeroes((1,3))+inf if method == 'MCS': dtype = 8 #% Generate standard (zero mean, unit variance) values if dtype == 0: # normal: standard support (-Inf,Inf) # m1 = zeros(outClass); # m2 = ones(outClass); m1 = 0 m2 = 1 p = norm.cdf(X[1], m1, m2) - norm.cdf(X[0], m1, m2) lo= norm.ppf( 3.39767E-06, mu,sigma ); hi= norm.ppf( 0.999996602, mu,sigma ); Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( normcdf(X[0],m1,m2), 0,1 ); #Inv2 = norm.ppf(normcdf(X[1], m1, m2), 0, 1) elif dtype == 1: # four-parameter beta: standard support (a1,a2) if abs(denom) > sqrt(finfo(double).tiny): m1 = (c1 + a1) / (c2 * (a2 - a1)) m2 = -(c1 + a2) / (c2 * (a2 - a1)) else: # c1 and c2 -> Inf, but c1/c2 has finite limit m1 = c1 / (c2 * (a2 - a1)) m2 = -c1 / (c2 * (a2 - a1)) # r = a1 + (a2 - a1) .* betarnd(m1+1,m2+1,sizeOut); X = (X - a1) / (a2 - a1) # Transform to 0-1 interval # lambda = -(a2-a1)*(m1+1)./(m1+m1+2)-a1; # X = (X - lambda - a1)./(a2-a1); alph=m1+1 beta=m2+1 if alph < 1.001 and beta < 1.001: alph=1.001 beta=1.001 mode=(alph-1)/(alph+beta-2) if mode < 0.1: if alph > beta: alph = max(2.0,alph) beta = (alph-1)/0.9 - alph + 2 elif beta > alph: beta = max(2.0,beta) alph = (0.1*(beta -2) +1)/(1 - 0.1) elif mode > 0.9: if alph > beta: alph = max(2.0,alph) beta =(alph-1)/0.9 - alph + 2 elif beta > alph: beta = max(2.0,beta); alph = (0.1*(beta -2) +1)/(1 - 0.1) p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta) lo=a1*sigma+mu; hi=a2*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( beta.cdf(X[0],m1+1,m2+1), 0,1 ); #Inv2 = norm.ppf(beta.cdf(X[1], m1 + 1, m2 + 1), 0, 1) # X = X*(a2-a1) + a1; % Undo interval tranformation # r = r + (0 - a1 - (a2-a1).*(m1+1)./(m1+m2+2)); elif dtype == 2: # symmetric four-parameter beta: standard support (-a1,a1) m = (c1 + a1) / (c2 * 2 * abs(a1)) m1 = m m2 = m X = (X - a1) / (2 * abs(a1)) # r = a1 + 2*abs(a1) .* betapdf(X,m+1,m+1); alph=m+1; beta=m+1; if alph < 1.01: alph=1.01 beta=1.01 p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta) lo=a1*sigma+mu; hi=a2*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( beta.cdf(X[0],m+1,m+1), 0,1 ); #Inv2 = norm.ppf(beta.cdf(X[1], m + 1, m + 1), 0, 1) # X = a1 + 2*abs(a1).*X; elif dtype == 3: # three-parameter gamma: standard support (a1,Inf) or (-Inf,a1) m = (c0 / c1 - c1) / c1 m1 = m m2 = m X = (X - a1) / c1 # r = c1 .* gampdf(X,m+1,1,sizeOut) + a1; p = gamma.cdf(X[1], m + 1, 1) - gamma.cdf(X[0], m + 1, 1) lo=(gamma.ppf( 3.39767E-06, m+1, scale=1 )*c1+a1)*sigma+mu; hi=(gamma.ppf( 0.999996602, m+1, scale=1 )*c1+a1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( gamcdf(X[0],m+1,1), 0,1 ); #Inv2 = norm.ppf(gamcdf(X[1], m + 1, 1), 0, 1) # X = c1 .* X + a1; elif dtype == 4: # Pearson IV is not a transformation of a standard distribution: density # proportional to (1+((x-lambda)/a)^2)^(-m) * exp(-nu*arctan((x-lambda)/a)), # standard support (-Inf,Inf) X = X * sigma + mu r = 6 * (beta2 - beta1 - 1) / (2 * beta2 - 3 * beta1 - 6) m = 1 + r / 2 nu = -r * (r - 2) * skew / sqrt(16 * (r - 1) - beta1 * (r - 2) ** 2) a = sqrt(beta3 * (16 * (r - 1) - beta1 * (r - 2) ** 2)) / 4 _lambda = mu - ((r - 2) * skew * sigma) / 4 # gives zero mean m1 = m m2 = nu # X = (X - lambda)./a; if cdist == 1: p = 1 - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma) elif cdist == 2: p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) elif cdist == 3: p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma) lo=norm.ppf( 3.39767E-06, mu,sigma ); hi=norm.ppf( 0.999996602, mu,sigma ); Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( pearson4cdf(X[0],m,nu,a,lambda,mu,sigma), 0,1 ); #Inv2 = norm.ppf(pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma), 0, 1) # C = X.*a + lambda; # C = diff(C); # C= C(1); # p = p./(sum(p)*C); elif dtype == 5: # inverse gamma location-scale: standard support (-C1,Inf) or # (-Inf,-C1) C1 = c1 / (2 * c2) # r = -((c1 - C1) ./ c2) ./ gampdf(X,1./c2 - 1,1) - C1; X = -((c1 - C1) / c2) / (X + C1) m1 = c2 m2 = 0 p = gamma.cdf(X[1], 1. / c2 - 1, scale=1) - gamma.cdf(X[0], 1. / c2 - 1, scale=1) lo=(-((c1-C1)/c2)/gamma.ppf( 3.39767E-06, 1/c2 - 1, scale=1 )-C1)*sigma+mu; hi=(-((c1-C1)/c2)/gamma.ppf( 0.999996602, 1/c2 - 1, scale=1 )-C1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( gamcdf(X[0],1./c2 - 1,1), 0,1 ); #Inv2 = norm.ppf(gamcdf(X[1], 1. / c2 - 1, 1), 0, 1) # X = -((c1-C1)./c2)./X-C1; elif dtype == 6: # F location-scale: standard support (a2,Inf) or (-Inf,a1) m1 = (a1 + c1) / (c2 * (a2 - a1)) m2 = -(a2 + c1) / (c2 * (a2 - a1)) # a1 and a2 have the same sign, and they've been sorted so a1 < a2 if a2 < 0: nu1 = 2 * (m2 + 1) nu2 = -2 * (m1 + m2 + 1) X = (X - a2) / (a2 - a1) * (nu2 / nu1) # r = a2 + (a2 - a1) .* (nu1./nu2) .* fpdf(X,nu1,nu2); p = fcdf(X[1], nu1, nu2) - fcdf(X[0], nu1, nu2) lo=(f.ppf( 3.39767E-06, nu1,nu2)+a2)*sigma+mu hi=(f.ppf( 0.999996602, nu1,nu2)+a2)*sigma+mu Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 ); #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1) # X = a2 + (a2-a1).*(nu1./nu2).*X else: # 0 < a1 nu1 = 2 * (m1 + 1) nu2 = -2 * (m1 + m2 + 1) X = (X - a1) / (a1 - a2) * (nu2 / nu1) # r = a1 + (a1 - a2) .* (nu1./nu2) .* fpdf(X,nu1,nu2); p = -fcdf(X[1], nu1, nu2) + fcdf(X[0], nu1, nu2) hi=(-f.ppf( 3.39767E-06, nu1,nu2)+a1)*sigma+mu; lo=(-f.ppf( 0.999996602, nu1,nu2)+a1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 ); #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1) # X = a1 + (a1-a2).*(nu1./nu2).*X; elif dtype == 7: # t location-scale: standard support (-Inf,Inf) nu = 1. / c2 - 1 X = X / sqrt(c0 / (1 - c2)) m1 = nu m2 = 0 p = t.cdf(X[1], nu) - t.cdf(X[0], nu) lo=t.ppf( 3.39767E-06, nu )*sqrt(c0/(1-c2))*sigma+mu hi=t.ppf( 0.999996602, nu )*sqrt(c0/(1-c2))*sigma+mu Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( tcdf(X[0],nu), 0,1 ); #Inv2 = norm.ppf(tcdf(X[1], nu), 0, 1) # p = sqrt(c0./(1-c2)).*tpdf(X,nu); # X = sqrt(c0./(1-c2)).*X; else: print "ERROR: Unknown data type!" # elif dtype == 8: #Monte Carlo Simulation Histogram # out = kurt # p = skew # m1 = 0 # m2 = 0 # scale and shift # X = X.*sigma + mu; % Undo z-score if dtype != 1 and dtype != 2: mu_s=(mu-lo)/(hi-lo); sigma_s=sigma ** 2/(hi-lo) ** 2; alph = ((1-mu_s)/sigma_s -1/mu_s)*mu_s ** 2; beta = alph*(1/mu_s - 1); if alph >70 or beta>70: alph=70; beta=70; lo=mu-11.87434*sigma hi=2*mu-lo return p, dtype, Inv1, m1, m2, a1, a2, alph, beta, lo, hi
line = file.readline() for _ in range(2): line = file.readline() tree2 = skbio.read(StringIO(line), 'newick', skbio.TreeNode) tree = get_tree(tree1, tree2) # Load rate categories with open(f'../asr_indel/out/{OGid}.iqtree') as file: line = file.readline() while not line.startswith('Model of rate heterogeneity:'): line = file.readline() num_categories = int(line.rstrip().split(' Gamma with ')[1][0]) alpha = float(file.readline().rstrip().split(': ')[1]) igfs = [] # Incomplete gamma function evaluations for i in range(num_categories + 1): x = gamma.ppf(i / num_categories, a=alpha, scale=1 / alpha) igfs.append(gammainc(alpha + 1, alpha * x)) rates = [] # Normalized rates for i in range(num_categories): rate = num_categories * (igfs[i + 1] - igfs[i]) rates.append((rate, 1 / num_categories)) # Load sequence and convert to vectors at tips of tree mca = read_fasta(f'../asr_indel/out/{OGid}.mfa') tips = {tip.name: tip for tip in tree.tips()} for header, seq in mca: tip = tips[header[1:5]] conditional = np.zeros((2, len(seq))) for j, sym in enumerate(seq): conditional[int(sym), j] = 1 tip.conditional = conditional