def do_mh(display=False): f1_pdf = norm(1, sqrt(25)).pdf f2_pdf = norm(-2, 1).pdf f3_pdf = norm(3, 2).pdf def f_pdf(x): return 0.2 * f1_pdf(x) + 0.3 * f2_pdf(x) + 0.5 * f3_pdf(x) mew, sigma = 0, 100 samples = [] num_accepts = 0 num_samples = 500 x = rand.normal(mew, sigma, 1)[0] # initialize with a direct sample for i in range(num_samples): x_prime = x + rand.normal(mew, sigma, 1)[0] # p(x' -> x) / p(x -> x') = 1 b/c normal is symmetric alpha = (f_pdf(x_prime)/f_pdf(x)) u = rand.uniform(0, 1, 1)[0] if u < alpha: # accept! samples.append(x_prime) x = x_prime num_accepts += 1 else: # reject :( samples.append(x) print "Metropolis Hastings" print "\taccept_rate = {0}".format(float(num_accepts)/num_samples) plot_histogram(samples, "Histogram of 500 Metropolis Hastings Samples with SD={0}".format(sigma), "mh_sd-{0}.jpg".format(sigma), display=display, bins=50, width=0.2)
def white_gaussian_polar(self,phase_dev, mag_dev,n_ports=1,**kwargs): ''' Complex zero-mean gaussian white-noise network. Creates a network whose s-matrix is complex zero-mean gaussian white-noise, of given standard deviations for phase and magnitude components. This 'noise' network can be added to networks to simulate additive noise. Parameters ---------- phase_mag : number standard deviation of magnitude phase_dev : number standard deviation of phase n_ports : int number of ports. \*\*kwargs : passed to :class:`~skrf.network.Network` initializer Returns -------- result : :class:`~skrf.network.Network` object a noise network ''' shape = (self.frequency.npoints, n_ports,n_ports) phase_rv= stats.norm(loc=0, scale=phase_dev).rvs(size = shape) mag_rv = stats.norm(loc=0, scale=mag_dev).rvs(size = shape) result = Network(**kwargs) result.frequency = self.frequency result.s = mag_rv*npy.exp(1j*phase_rv) return result
def sense_door(x, m): """ A door sensor returns either True (=door) or False (=no door) with some perception errors """ # Set the (true) perception model: a gaussian mixture model with three modals (=peaks=models) multimodal = [(m["left-door"], 0.25), (m["middle-door"], 0.25), (m["right-door"], 0.25)] door_width = m["door-width"] # lamdas contains the weight to each element in probs # lamdas is normalized to sum up to one dists = [abs(modal[0] - x) for modal in multimodal] lamdas = [ 1.0 if d == min(dists) else 0.0 for d in dists ] # for now, lamda is one for the nearest modal, otherwise zero p_total = 0 for i, modal in enumerate(multimodal): cdf_lo = stat.norm(loc=modal[0], scale=modal[1]).cdf(x - door_width / 2) cdf_hi = stat.norm(loc=modal[0], scale=modal[1]).cdf(x + door_width / 2) p = cdf_hi - cdf_lo p_total = p_total + (lamdas[i] * p) # This is a bernoulli dist. door = np.random.binomial(n=1, p=p_total) return door
def mcmc_sym_dist(alignment, num_imp, dem_ratios, directory, length, burnin): acceptances = 0 d = transprobs(TRANSITIONS, MARGINAL) pd = pdn(alignment) mins = np.array([sorted(i) for i in pd]) nloc, nscale = norm.fit(mins) dist = norm(nloc, nscale) # Build first state of Markov chain print 'Imputing first alignment...' current = impute.imp_align(num_imp, alignment, dem_ratios) current.loglik = loglik(current)+math.log(distlik(current, num_imp, nloc, 1000)) print '\t Log likelihood %2f' % current.loglik if not burnin: AlignIO.write(current, '%s/%d.fasta' % (directory,0), 'fasta') # Run chain for i in xrange(1,length+1): proposal = propose(current,num_imp,max(norm(loc=2,scale=1).rvs(),1), d) l1 = loglik(proposal) l2 = math.log(distlik(proposal, num_imp, nloc, 1000)) proposal.loglik = l1+l2 p = proposal.loglik-current.loglik print 'Current LLH: %2f; Proposed LLH: %2f' % (current.loglik, proposal.loglik) print '\tPhylogeny component: %2f; Distance component: %2f' % (l1, l2) print '\tAcceptance probability %e' % math.exp(p) if random.random()<math.exp(p): current = proposal acceptances += 1 print '\tAccepted' else: print '\tNot accepted' if i > burnin: AlignIO.write(current, '%s/%d.fasta' % (directory,i-burnin), 'fasta') return float(acceptances)/length
def main(): fig = plt.figure() # the number of sample for c, datapoints in enumerate([2, 4, 10, 100]): ds = normal(loc=0, scale=1, size=datapoints) # estimate arithematic mean mu = np.mean(ds) # estimate standard deviation sigma = np.sqrt(np.var(ds)) subplot = fig.add_subplot(2, 2, c + 1) subplot.set_title("N=%d" % datapoints) linex = np.arange(-10, 10.1, 0.1) orig = norm(loc=0, scale=1) # norm.pdf # Probability dencity function subplot.plot(linex, orig.pdf(linex), color='green', linestyle='--') est = norm(loc=mu, scale=np.sqrt(sigma)) label = "Sigma=%.2f" % sigma subplot.plot(linex, est.pdf(linex), color="red", label=label) subplot.legend(loc=1) subplot.scatter(ds, orig.pdf(ds), marker='o', color='blue') subplot.set_xlim(-4, 4) subplot.set_ylim(0) fig.show()
def power_dist(self, speed, direction, sstd=None, dstd=None, n=None, normalise=False): """Returns an array of power interpolated from the speed and direction pairs, with independent normal distribution applied to the speed and direction. Due to the implemtation of the RectBivariateSpline, which requires that the data is monotonic increasing, this must loop through values and do them pairwise Arguments: speed: 1d array of wind speed values sstd: 1d array of wind direction values dstd: standard deviation to apply to wind speed values when sampling from Normal distribution dddev: standard deviation to wind direction values when sampling from Normal distribution n: number of sample points to use when drawing from distributions Returns: ndarray with first dimension the same as speed and direction, and second dimension of size n if specified""" from scipy.stats import norm assert speed.shape==direction.shape # generate a randomly sampled speed distribution sdist = np.array([norm(loc=s, scale=sstd).rvs(n) for s in speed]) ddist = np.array([norm(loc=d, scale=dstd).rvs(n) for d in direction]) pdist = np.array([self._spline(s,d) for (s,d) in zip(sdist.flatten(), ddist.flatten())]) if normalise: print '\n\n\n*********NORMALISING****************' pdist = pdist/float(np.max(self._pcentres)) return pdist.reshape(speed.shape[0], n)
def pdf(self, sample): """ get the probability of a specific sample """ v = sample[0] pv = ss.norm(0, 3).pdf(v) xs = sample[1:] pxs = [ss.norm(0, np.sqrt(np.e**v)).pdf(x_k) for x_k in xs] return np.array([pv] + pxs)
def main(): # p_1 is the normal distribution for mu=4, sigma=1 and a P(C=1)=0.3. # p_2 is the normal distribution for mu=7, sigma=2 and a P(C=2)=0.7. x = sp.linspace(-4, 15, 100) p_1 = norm(loc=4, scale=1).pdf(x) * 0.3 p_2 = norm(loc=7, scale=1.5).pdf(x) * 0.7 # p_1 and p_2 are plotted. plt.plot(x, p_1, 'b-', label='$P_{XC}(x, C=1)$') plt.plot(x, p_2, 'r-', label='$P_{XC}(x, C=2)$') # P(C=1|x) and P(C=2|x) plotted. # P(C=1|x) = p_1 / (p_1 + p_2) # P(C=2|x) = p_2 / (p_1 + p_2) plt.plot(x, p_1 / (p_1 + p_2), 'b--', label='$P(C=1|X)$') plt.plot(x, p_2 / (p_1 + p_2), 'r--', label='$P(C=2|X)$') plt.title("Normal distributions of the example and their class boundaries" "\nFor P(C=1)=0.3, $\mu_1=4$ and $\sigma_1=1$" " and for P(C=2)=0.7, $\mu_2=7$ and $\sigma_2=2$") plt.legend() plt.xlabel("x") plt.xlim(-4, 15) plt.ylim(0, 1) plt.savefig("min_err_class.png")
def logllk(theta): #Estimands (declare and transfer to constrained parameters) betak = theta[0] sigmaeps = np.exp(theta[1]) pibetan = theta[2] gamma = theta[3] sigmaeta = np.exp(theta[4]) covepseta = (1/(1+np.exp(-theta[5])) - .5)*2*np.exp(theta[1])*np.exp(theta[4]) #Locals covxieta = sigmaeta**2 -covepseta sigmaxi = np.sqrt(sigmaeta**2 +sigmaeps**2 -2*covepseta) xi_star = z*gamma -n*(pibetan) -kappa*betak #l_0 l_0 = norm(0,1).cdf(-xi_star/sigmaxi) #l_1 pdf_l1 = norm(0, 1).pdf((w -z*gamma)/sigmaeta) arg1_cdf_l1 = xi_star + (covxieta/(sigmaeta**2))*(w -z*gamma) arg2_cdf_l1 = np.sqrt(sigmaxi**2 -(covxieta**2)/(sigmaeta**2)) cdf_l1 = norm(0,1).cdf(arg1_cdf_l1/arg2_cdf_l1) l_1 = (1/sigmaeta)*pdf_l1*cdf_l1 return -np.sum(d*np.log(l_1) +(1-d)*np.log(l_0))
def _get_random_field(self): if self.seed == True: np.random.seed(101) '''simulates the Gaussian random field''' # evaluate the eigenvalues and eigenvectors of the autocorrelation # matrix _lambda, phi = self.eigenvalues # simulation points from 0 to 1 with an equidistant step for the LHS randsim = linspace(0, 1, len(self.xgrid) + 1) - 0.5 / (len(self.xgrid)) randsim = randsim[1:] # shuffling points for the simulation shuffle(randsim) # matrix containing standard Gauss distributed random numbers xi = transpose( ones((self.nsim, len(self.xgrid))) * array([norm().ppf(randsim)])) # eigenvalue matrix LAMBDA = eye(len(self.xgrid)) * _lambda # cutting out the real part ydata = dot(dot(phi, (LAMBDA) ** 0.5), xi).real if self.distr_type == 'Gauss': # scaling the std. distribution scaled_ydata = ydata * self.stdev + self.mean elif self.distr_type == 'Weibull': # setting Weibull params Pf = norm().cdf(ydata) scaled_ydata = weibull_min( self.shape, scale=self.scale, loc=self.loc).ppf(Pf) self.reevaluate = False rf = reshape(scaled_ydata, len(self.xgrid)) if self.non_negative_check == True: if (rf < 0).any(): raise ValueError, 'negative value(s) in random field' return rf
def sample(self, model, evidence): g = evidence['g'] h = evidence['h'] C = evidence['C'] z = evidence['z'] shot_id = evidence['shot_id'] noise_proportion = evidence['noise_proportion'] observation_var_g = evidence['observation_var_g'] observation_var_h = evidence['observation_var_h'] canopy_cover = model.known_params['canopy_cover'] z_min = model.known_params['z_min'] z_max = model.known_params['z_max'] prior_p = model.hyper_params['T']['p'] N = len(z) T = zeros(N) noise_rv = stats.uniform(z_min, z_max - z_min) min_index = min(z.index) for i in shot_id.index: l = zeros(3) index = i-min_index shot_index = shot_id[i]-min(shot_id) l[0] = noise_proportion*noise_rv.pdf(z[i]) g_norm = stats.norm(g[shot_index], sqrt(observation_var_g)) C_i = canopy_cover[C[shot_index]] l[1] = (1-noise_proportion)*(1-C_i)*g_norm.pdf(z[i]) h_norm = stats.norm(h[shot_index] + g[shot_index], sqrt(observation_var_h)) if z[i] > g[shot_index]+3: l[2] = (1-noise_proportion)*(C_i)*h_norm.pdf(z[i]) p = l/sum(l) T[index] = Categorical(p).rvs() return T
def __test(): L = 17000000 X_p = sst.norm.rvs(0., 1., size=L) X_q = sst.norm.rvs(5., 1., size=L) p = sst.norm(loc=0, scale=1).pdf q = sst.norm(loc=5, scale=1).pdf f = lambda x: x >= 5 print "S_p =", np.mean(f(X_p)) print "var_p =", np.var(f(X_p)) print "S_q =", np.mean(p(X_q) / q(X_q) * f(X_q)) print "var_q =", np.var(p(X_q) / q(X_q) * f(X_q)) print "S =", scipy.integrate.quad(sst.norm.pdf, 5.0, np.inf)[0] #plt.figure(figsize=(8, 6)) #plt.tick_params(labelbottom='off') #plt.tick_params(labelleft='off') #plt.gca().get_xaxis().set_ticks_position('none') #plt.gca().get_yaxis().set_ticks_position('none') #plt.xlim([0,22]) #plt.ylim([0,13]) x = np.arange(0.0, 35.0, 0.05)
def ergodic_chain(args): (burnin, run_length, alpha_count_slow, alpha_count_fast, alpha_mu_slow, alpha_mu_fast, buckets_slow, buckets_fast) = args np.random.seed((os.getpid() << 16) | (int(time.time()) & 0xFFFF)) rh_slow = create_rolling_histogram_class( Bucket=create_bucket_class( alpha_mu=alpha_mu_slow, alpha_count=alpha_count_slow), target_buckets=buckets_slow)() rh_fast = create_rolling_histogram_class( Bucket=create_bucket_class( alpha_mu=alpha_mu_fast, alpha_count=alpha_count_fast), target_buckets=buckets_fast)() jagged = [stats.uniform(x, x + 1) for x in range(200)] #for val in gen_value(jagged, burnin): #for val in gen_value([stats.uniform(0, 1)], burnin): for val in gen_value([stats.norm(0, 1)], burnin): #for val in gen_value([stats.cauchy(0)], burnin): rh_slow.update(val) rh_fast.update(val) data = [[] for _ in range(len(FUNC_LIST))] #for val in gen_value(jagged, run_length): #for val in gen_value([stats.uniform(0, 1)], run_length): for val in gen_value([stats.norm(0, 1)], run_length): #for val in gen_value([stats.cauchy(0)], run_length): rh_slow.update(val) rh_fast.update(val) cdf_long = rh_slow.get_CDF() cdf_short = rh_fast.get_CDF() for i, func in enumerate(FUNC_LIST): data[i].append(func(cdf_short, cdf_long)) return data
def v_posts_from_dataframe(df,N=1e4,alpha=0.23,l0=20,sigl=20): """ names: Prot, e_Prot, R, e_R, vsini, e_vsini """ vsini_posts = [] veq_posts = [] if 'ep_R' in df: for R,dR_p,dR_m,P,dP,v,dv in zip(df['R'],df['ep_R'],df['em_R'], df['Prot'],df['e_Prot'], df['vsini'],df['e_vsini']): vsini_posts.append(stats.norm(v,dv)) if dR_p==dR_m: veq_posts.append(Veq_Posterior(R,dR_p,P,dP)) else: R_dist = dists.fit_doublegauss(R,dR_m,dR_p) Prot_dist = stats.norm(P,dP) veq_posts.append(Veq_Posterior_General(R_dist,Prot_dist,N=N, l0=l0,sigl=sigl)) else: for R,dR,P,dP,v,dv in zip(df['R'],df['e_R'], df['Prot'],df['e_Prot'], df['vsini'],df['e_vsini']): vsini_posts.append(stats.norm(v,dv)) veq_posts.append(Veq_Posterior(R,dR,P,dP)) return vsini_posts,veq_posts
def draw_gauss(f, xset, yset, w, SD, beta, filename=None): def cal_std(x): psi = array([x**i for i in range(len(w))]) std = (1/beta + mdotl(psi, SD, psi.T))**0.5 return std step = linspace(0, 1, 100) # this change is made to match this certain polymonial circumstance xset = xset[:, 0] regression = [ x for x in (sum([w[i]*x**i for i in range(len(w))]) for x in step) ] std = list(map(cal_std, step)) basis = list(map(f, step)) Y1 = [norm(regression[i], std[i]).interval(0.95)[0] for i in range(len(step))] Y2 = [norm(regression[i], std[i]).interval(0.95)[1] for i in range(len(step))] plt.figure() plt.subplot(111) plt.xlim(0, 1) plt.ylim(-1.5, 1.5) plt.fill_between(step, Y1, Y2, color='pink') plt.scatter(xset, yset, color='b') plt.plot(step, basis, color='g') plt.plot(step, regression, color='r') if filename: plt.savefig(filename) plt.show()
def accuracy_vs_kth(self, n=100, trials=100): """Evalute the accuracy of the algorithm as a function of k. Parameters ---------- N : int Number of random samples. trials : int Number of independent drawing experiments. Returns ------- (err, stddev) The mean error and standard deviation around the analytical value for different values of k from 1 to 15. """ p = stats.norm(0, 1) q = stats.norm(0.2, 0.9) k = np.arange(1, 16) out = [] for n in range(trials): out.append(dd.kldiv(p.rvs(n), q.rvs(n), k)) out = np.array(out) # Compare with analytical value err = out - analytical_KLDiv(p, q) # Return mean and standard deviation return err.mean(0), err.std(0)
def _random_different_mean(n1, n2, ncases, mean, std): """Return random samples from two populations with different standard deviation and different mean Generate a number of populations from Normal(mean, std^2) and Normal(0, 1). Parameters ---------- n1 : number of samples in population 1 n2 : number of samples in population 2 ncases : number of populations to generate mean -- mean of population 1 std -- standard deviation of population 1 Returns ------- pop1 : 2D array pop1[i, :] contains the `n1` samples from population number i pop2 : 2D array pop2[i, :] contains the `n2` samples from population number i """ mean = mean*_DISTR_STD pop1_distr = stats.norm(loc=0., scale=_DISTR_STD*std) pop2_distr = stats.norm(loc=mean, scale=_DISTR_STD) pop1 = pop1_distr.rvs(size=(ncases, n1)) pop2 = pop2_distr.rvs(size=(ncases, n2)) return pop1, pop2
def EmaxTm1(EyTm1,z,h,n,kappa,EyT): part1 = 1.0 - norm(0,1).cdf(-xi_starTm1(z,h,n,kappa)/sigma_xi) pdf_ = norm(0,1).pdf(-xi_starTm1(z,h,n,kappa)/sigma_xi) cdf_ = norm(0,1).cdf(-xi_starTm1(z,h,n,kappa)/sigma_xi) part2 = W1(EyTm1,z,h,n) + delta*EmaxT(EyT,z,h + 1.0,n,kappa) + (sigma_eta_xi/sigma_xi)*(pdf_/(1.0 - cdf_)) part3 = W0(EyTm1,kappa,n) + delta*EmaxT(EyT,z,h,n,kappa) - (sigma_eps_xi/sigma_xi)*(pdf_/cdf_) return part1*part2 + (1.0 - part1)*part3
def pdfPsi2(x,y): xDist1 = norm(-0.5,1) yDist1 = norm(-1,1) xDist2 = norm(1,1) yDist2 = norm(0.5,1) rVal = 0.5*(xDist1.pdf(x)*yDist1.pdf(y)) + 0.5*(xDist2.pdf(x)*yDist2.pdf(y)) return rVal
def smear_with_gaussian_convolution(x,y,mean,sigma): npts = len(x) convolving_term = stats.norm(mean,sigma) convolving_pts = convolving_term.pdf(x) # Try adding another Gaussian to the. convolving_term_2 = stats.norm(2.0,5.0) for i,pt in enumerate(convolving_pts): convolving_pts[i] += convolving_term_2.pdf(x[i]) convolving_pts[i] /= 2.0 convolved_function = signal.convolve(y/y.sum(),convolving_pts) # Have to carve out the middle of the curve, because # the returned array has too many points in it. znpts = len(convolved_function) begin = znpts/2 - npts/2 end = znpts/2 + npts/2 print "%d %d %d %d" % (npts,znpts,begin,end) return convolved_function[begin:end],convolving_pts
def EmaxT(EyT,z,h,n,kappa): part1 = 1.0 - norm(0,1).cdf(-xi_starT(z,h,n,kappa)/sigma_xi) pdf_ = norm(0,1).pdf(-xi_starT(z,h,n,kappa)/sigma_xi) cdf_ = norm(0,1).cdf(-xi_starT(z,h,n,kappa)/sigma_xi) part2 = W1(EyT,z,h,n) + (sigma_eta_xi/sigma_xi)*(pdf_/(1-cdf_)) part3 = W0(EyT,kappa,n) - (sigma_eps_xi/sigma_xi)*(pdf_/cdf_) return part1*part2 + (1.0 - part1)*part3
def frequency_response(self, N_points, freq_range=(0,200), mirror=False): """ Frequency response curve of the sensor Args: freq_range (tuple): min and max frequency, defining the frequency range of the response N_points (int): number of points generated in the curve (lenght of the response arrays) mirror (bool): if true generates a mirror of the response for negative frequencies. The effective freq_range would be from -1*freq_range[1] to freq_range[1] Returns: list: with two arrays, one of the frequency range array and the other with the corresponding intensities, normalized from 0 to 1, 1 being the response in the resonant frequency. """ if not mirror: f_array = np.linspace(*freq_range, N_points) freq_response = norm(scale=self.bandwidth/2, loc=self.resonant_freq).pdf(f_array) freq_response /= max(freq_response) else: f_array = np.linspace(*freq_range, N_points//2) freq_response = norm(scale=self.bandwidth/2, loc=self.resonant_freq).pdf(f_array) freq_response /= max(freq_response) mirrored = (np.flip(f_array*-1, 0), np.flip(freq_response, 0)) f_array = np.hstack((mirrored[0], f_array)) freq_response = np.hstack((freq_response, mirrored[1])) return [f_array, freq_response]
def plotAlignments(outDir,alignments,scoreOptimal,label): fontSize = 25 scores = [ getAlignScore(a) for a in alignments ] fig = pPlotUtil.figure(xSize=24,ySize=12) plt.subplot(1,2,1) meanScore,stdevScore,bins = plotScoreHistograms(scores,fontSize,'k') plt.title("Shuffled DNA alignment Histogram", fontsize=fontSize) pdfFunc = norm(loc=meanScore,scale=stdevScore).pdf(bins) plotPDF = lambda : plt.plot(bins,pdfFunc,'g--',linewidth=3.0, label="Normal(mean,var)") plotPDF() plt.legend(fontsize=fontSize) ax = plt.subplot(1,2,2) plotScoreHistograms(scores,fontSize) plotPDF() zScore = (scoreOptimal-meanScore)/stdevScore print("Z Score for {:s} is {:.2f}".format(label,zScore)) # ??? is this the real p Value? Dont think so extrProb = 1-norm().cdf(zScore) plt.title(("Histogram of optimal alignment score for {:d} trials\n" + "Optimal score: {:d}*sigma from shuffled mean.\n" "P(shuffled score>=optimal) ~ {:.5g}").\ format(len(scores),int(zScore),extrProb),fontsize=fontSize) plt.axvline(scoreOptimal,color='r',linestyle='--', label="Optimal global alignment score using {:s}: {:d}".\ format(label,int(scoreOptimal))) plt.legend(loc='best',fontsize=fontSize) pPlotUtil.savefig(fig,outDir+ "q2Histograms" + label)
def logllk(thetae): #Estimands theta = {} theta['betak'] = thetae[0] theta['sigmaeps'] = np.exp(thetae[1]) theta['betan'] = thetae[2] theta['gamma1'] = thetae[3] theta['gamma2'] = thetae[4] theta['delta'] = thetae[5] theta['sigmaeta'] = np.exp(thetae[6]) theta['covepseta'] = thetae[7] theta['pi'] = thetae[8] #Locals theta['sigmaxi'] = np.sqrt(theta['sigmaeta']**2 + theta['sigmaeps']**2 - 2.0*theta['covepseta']) theta['covxieta'] = theta['sigmaeta']**2 - theta['covepseta'] xi_star = np.zeros((N,T)) for t in range(0,T): exec("xi_star[:," + str(t) + "] = xi_starTm" + str(T-1-t) + "(z[:," + str(t) + "], h[:," + str(t) + "]\ , n[:," + str(t) + "], kappa[:," + str(t) + "],theta)") #l_0 l_0 = norm(0,1).cdf(-xi_star/theta['sigmaxi']) #l_1 pdf_l1 = norm(0, 1).pdf((w -z*theta['gamma1'] -h*theta['gamma2'])/theta['sigmaeta']) arg1_cdf_l1 = xi_star + (theta['covxieta']/(theta['sigmaeta']**2))*(w -z*theta['gamma1'] -h*theta['gamma2']) arg2_cdf_l1 = np.sqrt(theta['sigmaxi']**2 -(theta['covxieta']**2)/(theta['sigmaeta']**2)) cdf_l1 = norm(0,1).cdf(arg1_cdf_l1/arg2_cdf_l1) l_1 = (1/theta['sigmaeta'])*pdf_l1*cdf_l1 return -np.sum(d*np.log(l_1) +(1.0-d)*np.log(l_0))
def expand_sf_data(inpat): dates = pd.date_range('1/1/2013', periods = 30, freq = 'M') if len(inpat) > 3: mu = inpat['DiffDate'].mean() st = inpat['DiffDate'].std() obj = norm(loc=mu, scale=st) else: obj = norm(loc=pat_mu, scale=pat_std) outdata = pd.DataFrame(columns = ['CohortSF', 'PatSF', 'LastVisit', 'DiffDays'], index = pd.Index(dates, name = 'Date')) try: ldate = inpat.iloc[-2]['Date'] lvisit = inpat.index[-2][1] except IndexError: lvisit = 'R01' ldate = inpat.iloc[0]['Date'] outdata['LastVisit'] = lvisit for date in dates: diff_date = (date - ldate).days outdata.ix[date]['CohortSF'] = cohort_norm[lvisit].sf(diff_date) outdata.ix[date]['PatSF'] = obj.sf(diff_date) outdata.ix[date]['DiffDays'] = diff_date return outdata
def calcH2Continuous_twotails(XXT, phe, keepArr, prev, h2coeff): print 'computing h2 for a two-tails ascertained study...' XXT = XXT[np.ix_(keepArr, keepArr)] phe = phe[keepArr] t1 = stats.norm(0,1).ppf(prev) t2 = stats.norm(0,1).isf(prev) phit1 = stats.norm(0,1).pdf(t1) phit2 = stats.norm(0,1).pdf(t2) K1 = prev K2 = prev xCoeff = ((phit2*t2 - phit1*t1 + K1 + K2)**2 * (K1+K2)**2 - (phit2-phit1)**4) / (K1 + K2)**4 intersect = ((phit2-phit1) / (K1+K2))**2 pheMean = 0 pheVar = 1 x = (xCoeff * h2coeff) * XXT y = np.outer((phe-pheMean)/np.sqrt(pheVar), (phe-pheMean)/np.sqrt(pheVar)) y -= intersect y = y[np.triu_indices(y.shape[0], 1)] x = x[np.triu_indices(x.shape[0], 1)] slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y) return slope
def calcH2Continuous(XXT, phe, keepArr, prev, h2coeff): t = stats.norm(0,1).isf(prev) phit = stats.norm(0,1).pdf(t) K1 = 1 - prev K2 = 1 - K1 P = np.sum(phe<t) / float(phe.shape[0]) P2 = 1.0 P1 = K2*P2*P / (K1*(1-P)) R = P2 / P1 XXT = XXT[np.ix_(keepArr, keepArr)] phe = phe[keepArr] xCoeff = (((R-1)*phit*t + K1 + R*K2)**2 * (K1+R*K2)**2 - ((R-1)*phit)**4) / (K1 + R*K2)**4 x = (xCoeff * h2coeff) * XXT pheMean = 0 pheVar = 1 y = np.outer((phe-pheMean) / np.sqrt(pheVar), (phe-pheMean)/np.sqrt(pheVar)) y -= ((R-1)*phit / (K1+R*K2))**2 y = y[np.triu_indices(y.shape[0], 1)] x = x[np.triu_indices(x.shape[0], 1)] slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y) return slope
def stress_vector(self, N_points, time_range = (0, 1), peek_value_relative_time = 0.5, multiple_axis=True): """ Generates a temporal stress normal curve, simulating a vehicle passing at a point. Args: N_points (int): number of samples in the arrays. time_range (tuple): the start and stop time for the samples. peek_value_relative_time (float): the point where the max_tire_pressure occurrs, relative to the time_range. Expected to be a value between 0 and 1. multiple_axis (bool): considers the effect of multiple tires passing through. Generates multiple stress pulses delayed by the tire_space and speed. Returns: list: list with two arrays, one corresponding to the time stample, and the other to the stress in one point. """ t_array = np.linspace(*time_range, N_points) std, m = self.tire_contact_time/6, (time_range[1]-time_range[0])*peek_value_relative_time p_array = norm(scale=std, loc=m).pdf(t_array) if multiple_axis: delay = 0 for _ in np.arange(self.N_axis-1): delay += self.tire_space/self.speed p_array += norm(scale=std, loc=m+delay).pdf(t_array) # normalizing and applying the max_stress to the peak p_array /= max(p_array) p_array *= self.max_tire_pressure return [t_array, p_array]
def calcH2Binary(XXT, phe, probs, thresholds, keepArr, prev, h2coeff): K = prev P = np.sum(phe>0) / float(phe.shape[0]) XXT = XXT[np.ix_(keepArr, keepArr)] phe = phe[keepArr] if (thresholds is None): t = stats.norm(0,1).isf(K) phit = stats.norm(0,1).pdf(t) xCoeff = P*(1-P) / (K**2 * (1-K)**2) * phit**2 * h2coeff y = np.outer((phe-P) / np.sqrt(P*(1-P)), (phe-P) / np.sqrt(P*(1-P))) x = xCoeff * XXT else: probs = probs[keepArr] thresholds = thresholds[keepArr] Ki = K*(1-P) / (P*(1-K)) * probs / (1 + K*(1-P) / (P*(1-K))*probs - probs) phit = stats.norm(0,1).pdf(thresholds) probsInvOuter = np.outer(probs*(1-probs), probs*(1-probs)) y = np.outer(phe-probs, phe-probs) / np.sqrt(probsInvOuter) sumProbs = np.tile(np.column_stack(probs).T, (1,probs.shape[0])) + np.tile(probs, (probs.shape[0], 1)) Atag0 = np.outer(phit, phit) * (1 - (sumProbs)*(P-K)/(P*(1-K)) + np.outer(probs, probs)*(((P-K)/(P*(1-K)))**2)) / np.sqrt(probsInvOuter) B0 = np.outer(Ki + (1-Ki)*(K*(1-P))/(P*(1-K)), Ki + (1-Ki)*(K*(1-P))/(P*(1-K))) x = (Atag0 / B0 * h2coeff) * XXT y = y[np.triu_indices(y.shape[0], 1)] x = x[np.triu_indices(x.shape[0], 1)] slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y) return slope
def learn(self, idx=None): self.normals = [] # all indices used in the learning process if idx == None: idx = range(0, len(self.arr)) items = np.array([self.arr[i] for i in idx]) # mask for all males mask = np.ma.array(items[:,0] < 1) subs = getRows(items, mask) # calculate one patch of normals (for males) self.means.append(np.mean(subs[:,1:], axis=0)) self.stds.append(np.std(subs[:,1:], axis=0)) self.normals.append([stats.norm(loc=self.means[0][i], \ scale=self.stds[0][i]) for i in range(0, len(self.means[0]))]) # inverse mask. Mark for the ladies mask = [not i for i in mask] subs = getRows(items, mask) # Calc mean and stdev for females self.means.append(np.mean(subs[:,1:], axis=0)) self.stds.append(np.std(subs[:,1:], axis=0)) self.normals.append([stats.norm(loc=self.means[1][i], \ scale=self.stds[1][i]) for i in range(0, len(self.means[1]))])
def process_chromosome_values(chromosome, chromosome_values, mu, sigma, OUT, p_threshold=0.0001, merge_window=1000, window=51): ''' Go over depth values for a given chromosome in an input list and write to a list of filtered positions if a position is less or greater than threshhold values derived the cummulative distribution function of a normal distribution. chromosome -- The name of the chromosome. Used only in printing. chromosome_values -- list of chromosome depths at every position. mu -- Describes normal distribution, used to filter abnormal depths. sigma -- Describes normal distribution, used to filter abnormal depths. OUT -- File handle to write filtered positions with abnormal depths. window -- Window to smooth depth values. p_threshold -- Probability applied to the CDF of the normal distribution to generate depth thresholds for filtering. ''' def write_interval_to_filter(chromosome, start, end): OUT.write('{}\t{}\t{}\n'.format( chromosome, str(start), str(end + 1), )) d = int((window - 1) / 2) norm_dist = norm(mu, sigma) keep_threshold = [mu, mu] filter_threshold = [float('-inf'), float('inf')] first = float('inf') last = float('-inf') side = 0 last_side = 0 max = len(chromosome_values) for i in range(0, max): if i < d: window_start = 0 window_end = i + d + 1 elif i >= (max - d): window_start = i - d window_end = max else: window_start = i - d window_end = i + d + 1 window_depth = numpy.mean(chromosome_values[window_start:window_end]) if not (window_depth >= keep_threshold[0] and window_depth <= keep_threshold[1]): if (window_depth <= filter_threshold[0] or window_depth >= filter_threshold[1]): if window_depth < mu: side = -1 else: side = 1 if i - last > merge_window or last_side * side == -1: if last - first > 0: write_interval_to_filter( chromosome, first, last, ) first = i last = i last_side = side else: if window_depth < mu: side = -1 p = norm_dist.cdf(window_depth) if p >= p_threshold: keep_threshold[0] = window_depth else: filter_threshold[0] = window_depth if i - last > merge_window or last_side * side == -1: if last - first > 0: write_interval_to_filter( chromosome, first, last, ) first = i last = i last_side = side elif window_depth > mu: side = 1 p = 1. - norm_dist.cdf(window_depth) if p >= p_threshold: keep_threshold[1] = window_depth else: filter_threshold[1] = window_depth if i - last > merge_window or last_side * side == -1: if last - first > 0: write_interval_to_filter( chromosome, first, last, ) first = i last = i last_side = side if last - first > 0: write_interval_to_filter( chromosome, first, last, )
incident rate change based on their reproductive behaviors' change""" """First part: Reproductive behavior implementation in the past 15 years """ # ***************************************************************************** # Breastfeeding probability vector in 2000s # Use the data from "The changes in female physical and childbearing # characteristics in china and potential association with risk of breast cancer" # Based on the research in 2012, for the cohort of women aged 25-34, the average # number of parity they have is 1.01, and their accumulative breastfeeding period # has a mean of 13.62 and SD 9.80. We can draw a normal distribution from the data # to estimate the breastfeeding duration time per child for women in 2000s. #fig = plt.figure() mu1, sigma1 = 13.62, 9.80 pb01 = norm(loc=mu1, scale=sigma1) """ For every child a woman gives birth to, the probability of her devoting no breastfeeding is pb01_0, probability of her devoting 0~12 months breastfeeding is pb01_12, and 12~24, more than 24 months breastfeeding are pb01_24, pb01_36. """ pb01_0 = pb01.cdf(0) pb01_12 = pb01.cdf(12) - pb01.cdf(0) pb01_24 = pb01.cdf(24) - pb01.cdf(12) pb01_36 = 1 - pb01.cdf(24) pb00s_1 = [ pb01_12 / (1 - pb01_0), pb01_24 / (1 - pb01_0), pb01_36 / (1 - pb01_0 - pb01_12), pb01_0 ] """ If a woman bears two children, the probability of her devoting no breastfeeding is pb02_0, probability of her devoting 0~12 months breastfeeding is pb02_12, and 12~24, more than 24 months breastfeeding are pb02_24, pb02_36.
lnsig_air = 1e-8 x0, z0, r0 = -6., -4., 3. x1, z1, r1 = 6., -4., 3. ln_sigback = -5. ln_sigc = -3. ln_sigr = -7. noisemean = 0. noisevar = 0.0 overburden_extent = 0. ln_over = -4. #m = (lnsig_background)*np.ones(mesh.nC); #mu =np.ones(mesh.nC); mtrue = ln_sigback * np.ones(mesh.nC) + norm(noisemean, noisevar).rvs(mesh.nC) overb = (mesh.gridCC[:, 1] > -overburden_extent) & (mesh.gridCC[:, 1] <= 0) mtrue[overb] = ln_over * np.ones_like(mtrue[overb]) + norm( noisemean, noisevar).rvs(np.prod((mtrue[overb]).shape)) csph = (np.sqrt((mesh.gridCC[:, 1] - z0)**2. + (mesh.gridCC[:, 0] - x0)**2.)) < r0 mtrue[csph] = ln_sigc * np.ones_like(mtrue[csph]) + norm( noisemean, noisevar).rvs(np.prod((mtrue[csph]).shape)) #Define the sphere limit rsph = (np.sqrt((mesh.gridCC[:, 1] - z1)**2. + (mesh.gridCC[:, 0] - x1)**2.)) < r1 mtrue[rsph] = ln_sigr * np.ones_like(mtrue[rsph]) + norm( noisemean, noisevar).rvs(np.prod((mtrue[rsph]).shape))
# D0 = 0.66e-9 # most sensitive Connectom-like scheme # grad (T/m), DELTA (s), delta (s) scheme_connectom = np.array([[0.3, 40e-3, 40e-3]]) # data SNR at B0 SNRs = [30.0, 300.0] for isnr in range(len(SNRs)): SNR = SNRs[isnr] # significance level alphas = np.array([0.05]) # compute sigma_bar for the diameter limit formula sigmabs = norm().ppf(1 - alphas) / SNR d_mins = nilsson_diameter(sigmabs, D0, scheme_connectom[0, 2], scheme_connectom[0, 0]) # diameters in m diams = np.arange(0.1, 5.1, 0.05) * 1e-6 # number of noise trial Ntrial = 10000 np.random.seed(0) fit_data = np.zeros( (len(diams), Ntrial), dtype=np.complex ) # when gaussian noise bring signal > 1 (i.e. b0), you get imaginary diameter (to be disarded)
from numpy import mean from numpy import std from matplotlib import pyplot as plt from scipy.stats import norm from preprocessing.oasis_subject import deserialize_bulk_from_csv_file INDEX_SOURCE = '../../data/external_raw/csv/oasis_cross-sectional.csv' subjects = deserialize_bulk_from_csv_file(INDEX_SOURCE) rv = norm() def draw_plot_for_freq_data(name, data, fig, subplot, _min, _max, overscan=5, step=1, show_normal=True): fit = norm.pdf(data, mean(data), std(data)) xvals = [x * step for x in range(_min, int(_max/step)+1)] freq = {} for i in xvals: freq[i] = 0 for i in xvals: for datum in data: if datum == i: freq[i] += 1 for i in xvals: freq[i] /= float(len(ages)) freq_arr = freq.values() plt.figure(fig)
import scipy.stats as ss x = 1123.00 #number of responses n = 1783.00 #sample size alpha = .10 #confidence interval 1-alpha #point estimate for a parameter of a normal distribution #determine p_hat as the sample mean p_hat = x/n #compute the sample standard deviation std = math.sqrt((p_hat*(1-p_hat))/n) #standard error E E = std / math.sqrt(n) #create a normal distribution and get the z score of z sub alpha /2 pd = ss.norm(loc = 0, scale = 1) z = abs(pd.ppf((alpha/2))) #get the cofidence intervals about the mean lbound = p_hat - (z * std) ubound = p_hat + (z * std) print "p hat: " + "{0:.3f}".format(p_hat) print "std: " + "{0:.3f}".format(std) print("E: " + "{0:.3f}".format(E)) print("z score: " + "{0:.3f}".format(z)) print("lbound: " + "{0:3f}".format(lbound)) print("ubound: " + "{0:3f}".format(ubound))
def quadratic_grad(params,x = rec.Fneu[4], y = rec.F[4],k=4,v=False): guess = params[0] + params[1] * x res = y - guess #data points above line are positive outer_derivative = (res>0)*(2*res)+(res<0)*(k*res**(2)) theta_0 = np.sum(outer_derivative)*(-1) theta_1 = np.sum(outer_derivative*(-x)) grad = (theta_0, theta_1) return grad def normalize(x,y): vector = np.array((x,y)) norms = np.linalg.norm(vector,axis=0) return vector / norms dirac = norm(0,1e-2).pdf def guess(x, y): slope, intercept, r_value, p_value, std_err = linregress(x,y) return (intercept, slope) class ParabolicRegressor: default_k = 70 @classmethod def loss(cls,params,x, y, k=None, v=False): if k is None: k = cls.default_k guess = params[0] + params[1] * x residuals = y - guess #data points above line are positive upper_cost = np.sum(ramp(residuals)**2) lower_cost = np.sum(2*ramp(-residuals)**(2))
setup_text_plots(fontsize=8, usetex=True) #------------------------------------------------------------ # Define the distributions to be plotted sigma_values = [0.5, 1.0, 2.0] linestyles = ['-', '--', ':'] mu = 0 x = np.linspace(-10, 10, 1000) #------------------------------------------------------------ # plot the distributions fig, ax = plt.subplots(figsize=(5, 3.75)) for sigma, ls in zip(sigma_values, linestyles): # create a gaussian / normal distribution dist = norm(mu, sigma) plt.plot(x, dist.pdf(x), ls=ls, c='black', label=r'$\mu=%i,\ \sigma=%.1f$' % (mu, sigma)) plt.xlim(-5, 5) plt.ylim(0, 0.85) plt.xlabel('$x$') plt.ylabel(r'$p(x|\mu,\sigma)$') plt.title('Gaussian Distribution') plt.legend()
def model(args): return {"y": st.norm(args['x'], sigma).rvs()}
y2, Sigy2 = testModel(mod2, np.array([optimizerX[i, :]])) temp_x_pareto = np.vstack( (copyxPareto, np.array([optimizerX[i, :]]))) New_Weights_, New_Paretos_ = WeightPoints(temp_x_pareto, dataset, Kernels, Kinv_0, Kinv_1, points_) slide_size = len(SlidingY) for j in (range(slide_size)): temp_pareto = np.vstack( (New_Paretos_[:-1], np.array([SlidingY[j, 0], SlidingY[j, 1]]))) found_temp_pareto = mPareto(temp_pareto) usef_weights = New_Weights_[parY_X(temp_pareto, found_temp_pareto)] Probs_dim1 = norm(y1, Sigy1).pdf(SlidingY[j, 0]) Probs_dim2 = norm(y2, Sigy2).pdf(SlidingY[j, 1]) EHVI_New = Expected_HVI(found_temp_pareto, usef_weights, Faster[j]) * Probs_dim1 * Probs_dim2 Total_HVI_diff += EHVI_New imp_log.append(Total_HVI_diff) indx = imp_log.index(max(imp_log)) Best_x = x_log[indx][0] Best_y = function(np.array([Best_x ]))[0, 0], function(np.array([Best_x]))[0, 1] if Best_x not in dataset.data: dataset.newData(Best_x) dataset.newOut(Best_y) else: sys.exit("Couldn't go on, almost the same point! Why?!")
# Calculate their estimated Sharpe ratio (SR^). *It is "estimated" because it use the historical returns as a prediction of the future returns.* sr_st1 = estimated_sharpe_ratio(returns_st1) print('SR st1 dist.:', sr_st1) sr_ann_st1 = ann_estimated_sharpe_ratio(returns_st1, periods=52) print('SR Annual. st1 dist.:', round(sr_ann_st1, 2)) # ## Strategy 2 - *Simulate a normal distribution of returns with better `mean` and same `std` as the strategy 1* # By definition the SR, in a big data sample, of this strategy (this returns distribution) must be greater than the SR of Strategy 1...but with a few data? # + EXTRA_EDGE = 0.0012 dist_st2 = norm(loc=true_mean + EXTRA_EDGE, scale=true_std) # - true_mean_st2 = dist_st2.stats('m').item() true_std_st2 = np.sqrt(dist_st2.stats('v').item()) true_skew_st2 = dist_st2.stats('s').item() true_kurt_st2 = dist_st2.stats('k').item() + 3 print('Long term true weekly mean returns st2: {:.2%}'.format(true_mean_st2)) print('Long term true std returns st2: {:.2%}'.format(true_std_st2)) print('Long term true skew returns st2: {:.2f}'.format(true_skew_st2)) print('Long term true kurt returns st2: {:.2f}'.format(true_kurt_st2)) # Generates random **weekly returns** and check their moments statistic (in a large sample the *mean* and *std* should be equal to the `moments_st1`, and the *skew* should be 0 and *kurtosis* 3) # + SEED_ST2 = 9563
def p_y_given_model(mu_x_model): res = st.norm(mu_x_model, sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed) return res
a = 0.0 b = 3.0 theta=1.0 sigma=sqrt(theta/(2*(a+b+2))) tscale = 0.05 invariant_distribution = poly1d( [-1 for x in range(int(a))], True)*poly1d( [1 for x in range(int(b))], True) def eigenvalue(n): return theta*n*(n+a+b+1)/(a+b+2) gaussian_var = norm() def dW(dt): return norm.rvs() / sqrt(dt) def random_walk(y0, tmax, dt, times = None): dt = dt * tscale def rhs(y,t): return -theta*(y-(a-b)/(a+b+2)) + sqrt(2*theta*(1-y*y)/(a+b+2))*dW(dt/tscale) if (times is None): times = arange(0,tmax,dt) y = zeros(shape=times.shape, dtype=float) y[0] = y0 for i in range(1,y.shape[0]): y[i] = y[i-1] + rhs(y[i-1], times[i])*dt if abs(y[i]) > 1: y[i] = y[i] / abs(y[i])
def main(argv=None): parser = argparse.ArgumentParser( description="PINT tool for MCMC optimization of timing models using event data." ) parser.add_argument("eventfile", help="event file to use") parser.add_argument("parfile", help="par file to read model from") parser.add_argument("gaussianfile", help="gaussian file that defines template") parser.add_argument("--ft2", help="Path to FT2 file.", default=None) parser.add_argument( "--weightcol", help="name of weight column (or 'CALC' to have them computed", default=None, ) parser.add_argument( "--nwalkers", help="Number of MCMC walkers (def 200)", type=int, default=200 ) parser.add_argument( "--burnin", help="Number of MCMC steps for burn in (def 100)", type=int, default=100, ) parser.add_argument( "--nsteps", help="Number of MCMC steps to compute (def 1000)", type=int, default=1000, ) parser.add_argument( "--minMJD", help="Earliest MJD to use (def 54680)", type=float, default=54680.0 ) parser.add_argument( "--maxMJD", help="Latest MJD to use (def 57250)", type=float, default=57250.0 ) parser.add_argument( "--phs", help="Starting phase offset [0-1] (def is to measure)", type=float ) parser.add_argument( "--phserr", help="Error on starting phase", type=float, default=0.03 ) parser.add_argument( "--minWeight", help="Minimum weight to include (def 0.05)", type=float, default=0.05, ) parser.add_argument( "--wgtexp", help="Raise computed weights to this power (or 0.0 to disable any rescaling of weights)", type=float, default=0.0, ) parser.add_argument( "--testWeights", help="Make plots to evalute weight cuts?", default=False, action="store_true", ) parser.add_argument( "--doOpt", help="Run initial scipy opt before MCMC?", default=False, action="store_true", ) parser.add_argument( "--initerrfact", help="Multiply par file errors by this factor when initializing walker starting values", type=float, default=0.1, ) parser.add_argument( "--priorerrfact", help="Multiple par file errors by this factor when setting gaussian prior widths", type=float, default=10.0, ) parser.add_argument( "--usepickle", help="Read events from pickle file, if available?", default=False, action="store_true", ) global nwalkers, nsteps, ftr args = parser.parse_args(argv) eventfile = args.eventfile parfile = args.parfile gaussianfile = args.gaussianfile weightcol = args.weightcol if args.ft2 is not None: # Instantiate Fermi observatory once so it gets added to the observatory registry get_satellite_observatory("Fermi", args.ft2) nwalkers = args.nwalkers burnin = args.burnin nsteps = args.nsteps if burnin >= nsteps: log.error("burnin must be < nsteps") sys.exit(1) nbins = 256 # For likelihood calculation based on gaussians file outprof_nbins = 256 # in the text file, for pygaussfit.py, for instance minMJD = args.minMJD maxMJD = args.maxMJD # Usually set by coverage of IERS file minWeight = args.minWeight do_opt_first = args.doOpt wgtexp = args.wgtexp # Read in initial model modelin = pint.models.get_model(parfile) # The custom_timing version below is to manually construct the TimingModel # class, which allows it to be pickled. This is needed for parallelizing # the emcee call over a number of threads. So far, it isn't quite working # so it is disabled. The code above constructs the TimingModel class # dynamically, as usual. # modelin = custom_timing(parfile) # Remove the dispersion delay as it is unnecessary # modelin.delay_funcs['L1'].remove(modelin.dispersion_delay) # Set the target coords for automatic weighting if necessary if "ELONG" in modelin.params: tc = SkyCoord( modelin.ELONG.quantity, modelin.ELAT.quantity, frame="barycentrictrueecliptic", ) else: tc = SkyCoord(modelin.RAJ.quantity, modelin.DECJ.quantity, frame="icrs") target = tc if weightcol == "CALC" else None # TODO: make this properly handle long double ts = None if args.usepickle: try: ts = toa.load_pickle(eventfile) except IOError: pass if ts is None: # Read event file and return list of TOA objects tl = fermi.load_Fermi_TOAs( eventfile, weightcolumn=weightcol, targetcoord=target, minweight=minWeight ) # Limit the TOAs to ones in selected MJD range and above minWeight tl = [ tl[ii] for ii in range(len(tl)) if ( tl[ii].mjd.value > minMJD and tl[ii].mjd.value < maxMJD and (weightcol is None or float(tl[ii].flags["weight"]) > minWeight) ) ] log.info("There are %d events we will use" % len(tl)) # Now convert to TOAs object and compute TDBs and posvels ts = toa.TOAs(toalist=tl) ts.filename = eventfile ts.compute_TDBs() ts.compute_posvels(ephem="DE421", planets=False) toa.save_pickle(ts) if weightcol is not None: if weightcol == "CALC": weights = np.asarray([float(x["weight"]) for x in ts.table["flags"]]) log.info( "Original weights have min / max weights %.3f / %.3f" % (weights.min(), weights.max()) ) # Rescale the weights, if requested (by having wgtexp != 0.0) if wgtexp != 0.0: weights **= wgtexp wmx, wmn = weights.max(), weights.min() # make the highest weight = 1, but keep min weight the same weights = wmn + ((weights - wmn) * (1.0 - wmn) / (wmx - wmn)) for ii, x in enumerate(ts.table["flags"]): x["weight"] = str(weights[ii]) weights = np.asarray([float(x["weight"]) for x in ts.table["flags"]]) log.info( "There are %d events, with min / max weights %.3f / %.3f" % (len(weights), weights.min(), weights.max()) ) else: weights = None log.info("There are %d events, no weights are being used." % ts.ntoas) # Now load in the gaussian template and normalize it gtemplate = read_gaussfitfile(gaussianfile, nbins) gtemplate /= gtemplate.mean() # Set the priors on the parameters in the model, before # instantiating the emcee_fitter # Currently, this adds a gaussian prior on each parameter # with width equal to the par file uncertainty * priorerrfact, # and then puts in some special cases. # *** This should be replaced/supplemented with a way to specify # more general priors on parameters that need certain bounds phs = 0.0 if args.phs is None else args.phs fitkeys, fitvals, fiterrs = get_fit_keyvals(modelin, phs=phs, phserr=args.phserr) for key, v, e in zip(fitkeys[:-1], fitvals[:-1], fiterrs[:-1]): if key == "SINI" or key == "E" or key == "ECC": getattr(modelin, key).prior = Prior(uniform(0.0, 1.0)) elif key == "PX": getattr(modelin, key).prior = Prior(uniform(0.0, 10.0)) elif key.startswith("GLPH"): getattr(modelin, key).prior = Prior(uniform(-0.5, 1.0)) else: getattr(modelin, key).prior = Prior( norm(loc=float(v), scale=float(e * args.priorerrfact)) ) # Now define the requirements for emcee ftr = emcee_fitter(ts, modelin, gtemplate, weights, phs, args.phserr) # Use this if you want to see the effect of setting minWeight if args.testWeights: log.info("Checking H-test vs weights") ftr.prof_vs_weights(use_weights=True) ftr.prof_vs_weights(use_weights=False) sys.exit() # Now compute the photon phases and see if we see a pulse phss = ftr.get_event_phases() maxbin, like_start = marginalize_over_phase( phss, gtemplate, weights=ftr.weights, minimize=True, showplot=False ) log.info("Starting pulse likelihood: %f" % like_start) if args.phs is None: fitvals[-1] = 1.0 - maxbin[0] / float(len(gtemplate)) if fitvals[-1] > 1.0: fitvals[-1] -= 1.0 if fitvals[-1] < 0.0: fitvals[-1] += 1.0 log.info("Starting pulse phase: %f" % fitvals[-1]) else: log.warning( "Measured starting pulse phase is %f, but using %f" % (1.0 - maxbin / float(len(gtemplate)), args.phs) ) fitvals[-1] = args.phs ftr.fitvals[-1] = fitvals[-1] ftr.phaseogram(plotfile=ftr.model.PSR.value + "_pre.png") plt.close() # ftr.phaseogram() # Write out the starting pulse profile vs, xs = np.histogram( ftr.get_event_phases(), outprof_nbins, range=[0, 1], weights=ftr.weights ) f = open(ftr.model.PSR.value + "_prof_pre.txt", "w") for x, v in zip(xs, vs): f.write("%.5f %12.5f\n" % (x, v)) f.close() # Try normal optimization first to see how it goes if do_opt_first: result = op.minimize(ftr.minimize_func, np.zeros_like(ftr.fitvals)) newfitvals = np.asarray(result["x"]) * ftr.fiterrs + ftr.fitvals like_optmin = -result["fun"] log.info("Optimization likelihood: %f" % like_optmin) ftr.set_params(dict(zip(ftr.fitkeys, newfitvals))) ftr.phaseogram() else: like_optmin = -np.inf # Set up the initial conditions for the emcee walkers. Use the # scipy.optimize newfitvals instead if they are better ndim = ftr.n_fit_params if like_start > like_optmin: # Keep the starting deviations small... pos = [ ftr.fitvals + ftr.fiterrs * args.initerrfact * np.random.randn(ndim) for ii in range(nwalkers) ] # Set starting params for param in ["GLPH_1", "GLEP_1", "SINI", "M2", "E", "ECC", "PX", "A1"]: if param in ftr.fitkeys: idx = ftr.fitkeys.index(param) if param == "GLPH_1": svals = np.random.uniform(-0.5, 0.5, nwalkers) elif param == "GLEP_1": svals = np.random.uniform(minMJD + 100, maxMJD - 100, nwalkers) # svals = 55422.0 + np.random.randn(nwalkers) elif param == "SINI": svals = np.random.uniform(0.0, 1.0, nwalkers) elif param == "M2": svals = np.random.uniform(0.1, 0.6, nwalkers) elif param in ["E", "ECC", "PX", "A1"]: # Ensure all positive svals = np.fabs( ftr.fitvals[idx] + ftr.fiterrs[idx] * np.random.randn(nwalkers) ) if param in ["E", "ECC"]: svals[svals > 1.0] = 1.0 - (svals[svals > 1.0] - 1.0) for ii in range(nwalkers): pos[ii][idx] = svals[ii] else: pos = [ newfitvals + ftr.fiterrs * args.initerrfact * np.random.randn(ndim) for i in range(nwalkers) ] # Set the 0th walker to have the initial pre-fit solution # This way, one walker should always be in a good position pos[0] = ftr.fitvals import emcee # Following are for parallel processing tests... if 0: def unwrapped_lnpost(theta, ftr=ftr): return ftr.lnposterior(theta) import pathos.multiprocessing as mp pool = mp.ProcessPool(nodes=8) sampler = emcee.EnsembleSampler( nwalkers, ndim, unwrapped_lnpost, pool=pool, args=[ftr] ) else: sampler = emcee.EnsembleSampler(nwalkers, ndim, ftr.lnposterior) # The number is the number of points in the chain sampler.run_mcmc(pos, nsteps) def chains_to_dict(names, sampler): chains = [sampler.chain[:, :, ii].T for ii in range(len(names))] return dict(zip(names, chains)) def plot_chains(chain_dict, file=False): npts = len(chain_dict) fig, axes = plt.subplots(npts, 1, sharex=True, figsize=(8, 9)) for ii, name in enumerate(chain_dict.keys()): axes[ii].plot(chain_dict[name], color="k", alpha=0.3) axes[ii].set_ylabel(name) axes[npts - 1].set_xlabel("Step Number") fig.tight_layout() if file: fig.savefig(file) plt.close() else: plt.show() plt.close() chains = chains_to_dict(ftr.fitkeys, sampler) plot_chains(chains, file=ftr.model.PSR.value + "_chains.png") # Make the triangle plot. samples = sampler.chain[:, burnin:, :].reshape((-1, ndim)) try: import corner fig = corner.corner( samples, labels=ftr.fitkeys, bins=50, truths=ftr.maxpost_fitvals, plot_contours=True, ) fig.savefig(ftr.model.PSR.value + "_triangle.png") plt.close() except ImportError: pass # Plot the scaled prior probability alongside the initial gaussian probability distribution and the histogrammed samples ftr.plot_priors(chains, burnin, scale=True) plt.savefig(ftr.model.PSR.value + "_priors.png") plt.close() # Make a phaseogram with the 50th percentile values # ftr.set_params(dict(zip(ftr.fitkeys, np.percentile(samples, 50, axis=0)))) # Make a phaseogram with the best MCMC result ftr.set_params(dict(zip(ftr.fitkeys[:-1], ftr.maxpost_fitvals[:-1]))) ftr.phaseogram(plotfile=ftr.model.PSR.value + "_post.png") plt.close() # Write out the output pulse profile vs, xs = np.histogram( ftr.get_event_phases(), outprof_nbins, range=[0, 1], weights=ftr.weights ) f = open(ftr.model.PSR.value + "_prof_post.txt", "w") for x, v in zip(xs, vs): f.write("%.5f %12.5f\n" % (x, v)) f.close() # Write out the par file for the best MCMC parameter est f = open(ftr.model.PSR.value + "_post.par", "w") f.write(ftr.model.as_parfile()) f.close() # Print the best MCMC values and ranges ranges = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0)), ) log.info("Post-MCMC values (50th percentile +/- (16th/84th percentile):") for name, vals in zip(ftr.fitkeys, ranges): log.info("%8s:" % name + "%25.15g (+ %12.5g / - %12.5g)" % vals) # Put the same stuff in a file f = open(ftr.model.PSR.value + "_results.txt", "w") f.write("Post-MCMC values (50th percentile +/- (16th/84th percentile):\n") for name, vals in zip(ftr.fitkeys, ranges): f.write("%8s:" % name + " %25.15g (+ %12.5g / - %12.5g)\n" % vals) f.write("\nMaximum likelihood par file:\n") f.write(ftr.model.as_parfile()) f.close() import pickle pickle.dump(samples, open(ftr.model.PSR.value + "_samples.pickle", "wb"))
def tmu_pvalue(tmu): z0 = np.sqrt(tmu) p0 = 2. * (1. - stats.norm(0., 1.).cdf(z0)) return p0
#!/usr/bin/python3 import numpy as np import matplotlib import matplotlib.pyplot as plt import matplotlib from scipy import stats as stats matplotlib.rcParams["axes.facecolor"] = "#23272E" matplotlib.rcParams["figure.facecolor"] = "#23272E" matplotlib.rcParams["axes.grid"] = False gaussian = lambda x, mu=0, sigma=1: stats.norm(mu, sigma).pdf(x) x = np.linspace(-10, 10, 1000) g0 = gaussian(x, 0, 1.3) fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot() ax.set_xlim(-4, 4) ax.set_ylim(-4, 4) ax.axis("off") yshift = -3.0 yscale = 20 plt.plot(x, yscale * g0 + yshift, c="#5E81AC", lw=50) plt.savefig("../public/logo.svg", dpi=400, bbox_inches="tight", transparent=True)
Pz = Wc[k] * np.outer(c, c) covv_z = np.cov(c, c) + R Pz += R # sebagai S inv_covv_z = pinv(covv_z) # Cross covariance Weight sebelum dan weights sesudah unscented transform Pxz = np.zeros(sigmas.shape) # Tut for k in range(kmax): cc = np.subtract(X_, Mz) # sebagai T Pxz = Wc[k] * np.outer(cc, c) covv_xz = np.cov(cc, c) # Kalman gain Kk = np.dot(covv_xz, inv_covv_z) K1 = np.dot(Pxz, inv(P)) Knorm = np.reshape(norm(K1[:, 0], (-1, 1)), (-1, 1)) # tambahan doang K = np.reshape(K1[:, 0], (-1, 1)) # innovasi dari dash 2014 dash = np.dot(gamma**-2, np.identity(n)) # ============================================================================= # P2 = np.subtract(inv(P),P1) # inv berdaasarkan paper dash 2014 # P3 = np.dot(Pz,K.T) # P = P2 - np.dot(K,P3) # besarnya error kovarian matriks # ============================================================================= # P = P - np.dot(Pz,K) # np.dot(Pz,K.T)) P = P - np.dot(K, np.dot(Pz.sum(), K.T)) P += Q #%%
def qmu_pvalue(qmu): z0 = np.sqrt(qmu) p0 = 1. - stats.norm(0., 1.).cdf(z0) return p0
def erro_verdadeiro(self): erro_min = self.error_avg - (st.norm().ppf(self.confianca) * self.std_error) erro_max = self.error_avg + (st.norm().ppf(self.confianca) * self.std_error) return erro_min, erro_max
def main(_): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.85) with tf.device('/gpu:' + str(FLAGS.gpu)): with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess: # setting seed for reproducibility if FLAGS.seed is not -1: tf.compat.v1.random.set_random_seed(FLAGS.seed) np.random.seed(FLAGS.seed) print("setting seed for reproducibility to " + str(FLAGS.seed)) # inputs gen_input = tf.placeholder(tf.float32, shape=[FLAGS.batch_size, 32]) # model g = models_cleaned.generator_32 dde = models_cleaned.dde_32 # generator avg_styles = None if FLAGS.avg_styles is not "": # load avg styles import pickle with open(FLAGS.avg_styles, 'rb') as f: avg_styles = pickle.load(f) avg_styles = avg_styles.astype(np.float32) noise_ph = get_noise_placeholders(FLAGS.batch_size) if FLAGS.g_act == 'lrelu': act = partial(leaky_relu, leak=0.2) elif FLAGS.g_act == 'relu': act = tf.nn.relu elif FLAGS.g_act == 'tanh': act = tf.nn.tanh elif FLAGS.g_act == 'swish': act = swish else: act = tf.nn.softplus fake_img = g(gen_input, FLAGS.batch_size, noises=noise_ph, avg_styles=avg_styles, psi=FLAGS.trun_factor, scope='generator', act=act, linear=FLAGS.linear, clip=FLAGS.clip, norm=FLAGS.normalize, reuse=False) if FLAGS.dde_act == 'lrelu': dde_act = partial(leaky_relu, leak=0.2) elif FLAGS.dde_act == 'relu': dde_act = tf.nn.relu elif FLAGS.dde_act == 'tanh': dde_act = tf.nn.tanh elif FLAGS.dde_act == 'swish': dde_act = swish else: dde_act = tf.nn.softplus # fake dde noisy_fake_img = fake_img + tf.random.normal(shape=tf.shape(fake_img), mean=0., stddev=FLAGS.sigma) fake_dde = dde(noisy_fake_img, act=dde_act, scope='fake_dde', reuse=False) grad_fake_dde = tf.gradients(fake_dde, noisy_fake_img)[0] denoised_fake_img = noisy_fake_img + grad_fake_dde * FLAGS.sigma ** 2 # load model sess.run(tf.initialize_all_variables()) saver = tf.train.Saver(max_to_keep=None) saver.restore(sess, FLAGS.model_name) print('loaded model from :' + FLAGS.model_name) mu = 0. sigma = 1. import scipy.stats as stats n = stats.norm(loc=mu, scale=sigma) # get some samples noise_batch = n.rvs(FLAGS.batch_size*32) layer_noise = get_layer_noise(FLAGS.batch_size, n) noise_batch = np.reshape(noise_batch, (FLAGS.batch_size, 32)) fake_imgs, fake_imgs_den = sess.run( [fake_img, denoised_fake_img], feed_dict={gen_input: noise_batch, noise_ph[0]: layer_noise[0], noise_ph[1]: layer_noise[1], noise_ph[2]: layer_noise[2], noise_ph[3]: layer_noise[3], noise_ph[4]: layer_noise[4], noise_ph[5]: layer_noise[5], noise_ph[6]: layer_noise[6], noise_ph[7]: layer_noise[7], noise_ph[8]: layer_noise[8]}) if FLAGS.store_single: print() for i in range(FLAGS.batch_size): img = fake_imgs[i] img = cv2.cvtColor(np.clip(img + 0.5, 0, 1), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.out_single_file % (i+1+FLAGS.id_offset), img * 255, [cv2.IMWRITE_PNG_COMPRESSION, 9]) print('\rwrote %d / %d images' % (i+1, FLAGS.batch_size), end='') print() else: out_img = imgrid(fake_imgs) # out_img_den = imgrid(fake_imgs_den) # data is in [-0.5, 0.5] out_img += 0.5 # out_img_den += 0.5 # we store clipped and normalized version out_img_norm = out_img - np.min(out_img) out_img_norm = out_img_norm / np.max(out_img_norm) out_img_norm = cv2.cvtColor(out_img_norm, cv2.COLOR_BGR2RGB) out_img_clip = cv2.cvtColor(np.clip(out_img, 0., 1.), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.out_file, out_img_clip*255, [cv2.IMWRITE_PNG_COMPRESSION, 9]) out_file_norm = FLAGS.out_file.replace('.png', '_norm.png') cv2.imwrite(out_file_norm, out_img_norm * 255, [cv2.IMWRITE_PNG_COMPRESSION, 9])
plt.hlines(pi_bar[1], 0, T, 'r', '--') plt.xlabel('Time') plt.title('Percent of Time Unemployed') plt.tight_layout() plt.savefig('example_averages.png') #============================================================================== # Now add McCall Search Model #============================================================================== from scipy.stats import norm #using quaterly data alpha_q = (1 - (1 - alpha)**3) # alpha is monthly and alpha_q is quarterly gamma = 1. logw_dist = norm(np.log(20.), 1) w = np.linspace(0., 175, 201) # wage grid #compute probability of each wage level cdf = logw_dist.cdf(np.log(w)) pdf = cdf[1:] - cdf[:-1] pdf /= pdf.sum() w = (w[1:] + w[:-1]) / 2 #Find the quilibirum LME = LakeModel_Equilibrium(alpha_q, gamma, 0.99, 2.00, pdf, w) #possible levels of unemployment insurance cvec = np.linspace(1., 75, 25) T, W, U, EV, pi = map(np.vstack, zip(*[LME.find_steady_state_tax(c) for c in cvec]))
from scipy.stats import norm from scipy.integrate import quad fi=norm() value,error=quad(fi.pdf,-2,2) print(value)
def f1_prob_mass_Gauss(y, delta): return norm(1, 1).cdf(y) - norm(1, 1).cdf(y - delta)
def get_p_value(ydata, binvals, mask=[], verbose=0, plotfile=None, yerr=None, return_teststat=False, plotsys=True, myax=None): ydata = np.array(ydata) #Assume poisson is gaussian with N+1 variance if not yerr: yerr = np.sqrt(ydata + 1) else: yerr = np.array(yerr) def fit_func(x, p1, p2, p3): #see the ATLAS diboson resonance search: https://arxiv.org/pdf/1708.04445.pdf. xi = 0. y = x / 13000. return p1 * (1. - y)**(p2 - xi * p3) * y**-p3 xdata = np.array([ 0.5 * (binvals[i] + binvals[i + 1]) for i in range(0, len(binvals) - 1) ]) xwidths = np.array( [-binvals[i] + binvals[i + 1] for i in range(0, len(binvals) - 1)]) #Assuming inputs are bin counts, this is needed to get densities. Important for variable-width bins ydata = np.array(ydata) * 100 / xwidths yerr = np.array(yerr) * 100 / np.array(xwidths) #Least square fit, masking out the signal region popt, pcov = curve_fit(fit_func, np.delete(xdata, mask), np.delete(ydata, mask), sigma=np.delete(yerr, mask), maxfev=10000) if verbose: print('fit params: ', popt) ydata_fit = np.array( [fit_func(x, popt[0], popt[1], popt[2]) for x in xdata]) #Check that the function is a good fit to the sideband residuals = np.delete((ydata - ydata_fit) / yerr, mask) if verbose > 0: print("Goodness: ", kstest(residuals, norm(loc=0, scale=1).cdf)) print(residuals) print(((ydata - ydata_fit) / yerr)[mask]) print('\n') #The following code is used to get the bin errors by propagating the errors on the fit params def fit_func_array(parr): #see the ATLAS diboson resonance search: https://arxiv.org/pdf/1708.04445.pdf. p1, p2, p3 = parr xi = 0. return np.array([ p1 * (1. - (x / 13000.))**(p2 - xi * p3) * (x / 13000.)**-p3 for x in xdata ]) jac = numdifftools.core.Jacobian(fit_func_array) x_cov = np.dot(np.dot(jac(popt), pcov), jac(popt).T) #For plot, take systematic error band as the diagonal of the covariance matrix y_unc = np.sqrt([row[i] for i, row in enumerate(x_cov)]) if (plotfile != None) & (plotfile != 'ax'): if plotsys: plt.fill_between(xdata, ydata_fit + y_unc, ydata_fit - y_unc, facecolor='gray', edgecolor=None, alpha=0.4) yerr2 = np.array(yerr) yerr2[yerr >= ydata] = yerr2[yerr >= ydata] * 0.8 plt.errorbar(xdata, ydata, [yerr2, yerr], None, 'bo', label='data', markersize=4) plt.plot(xdata, ydata_fit, 'r--', label='data') plt.yscale('log', nonposy='clip') if plotfile == 'ax': if plotsys: myax.fill_between(xdata, ydata_fit + y_unc, ydata_fit - y_unc, facecolor='gray', edgecolor=None, alpha=0.4) yerr2 = np.array(yerr) yerr2[yerr >= ydata] = yerr2[yerr >= ydata] * 0.8 myax.errorbar(xdata, ydata, [yerr2, yerr], None, 'bo', label='data', markersize=4) myax.plot(xdata, ydata_fit, 'r--', label='data') myax.set_yscale('log', nonposy='clip') if plotfile == 'show': plt.show() elif plotfile: plt.savefig(plotfile) #Now, let's compute some statistics. # Will use asymptotic formulae for p0 from Cowan et al arXiv:1007.1727 # and systematics procedure from https://cds.cern.ch/record/2242860/files/NOTE2017_001.pdf #First get systematics in the signal region #This function returns array of signal predictions in the signal region def signal_fit_func_array(parr): #see the ATLAS diboson resonance search: https://arxiv.org/pdf/1708.04445.pdf. p1, p2, p3 = parr xi = 0. return np.array([ np.sum([ p1 * (1. - (x / 13000.))**(p2 - xi * p3) * (x / 13000.)**-p3 * xwidths[mask[i]] / 100 for i, x in enumerate(xdata[mask]) ]) ]) #Get covariance matrix of prediction uncertainties in the signal region jac = numdifftools.core.Jacobian(signal_fit_func_array) x_signal_cov = np.dot(np.dot(jac(popt), pcov), jac(popt).T) #Inverse signal region covariance matrix: inv_x_signal_cov = inv(x_signal_cov) #Get observed and predicted event counts in the signal region obs = np.array( [np.sum(np.array(ydata)[mask] * np.array(xwidths)[mask] / 100)]) expected = np.array([ np.sum([ fit_func(xdata[targetbin], popt[0], popt[1], popt[2]) * xwidths[targetbin] / 100 for targetbin in mask ]) ]) #Negative numerator of log likelihood ratio, for signal rate mu = 0 def min_log_numerator(expected_nuis_arr): #expected_nuis_arr is the array of systematic background uncertainty nuisance parameters #These are event rate densities expected_nuis_arr = np.array(expected_nuis_arr) to_return = 0 #Poisson terms for i, expected_nuis in enumerate(expected_nuis_arr): #Poisson lambda. Have to rescale nuisance constribution by bin width my_lambda = expected[i] + expected_nuis_arr[i] #Prevent negative predicted rates if my_lambda < 10**-10: my_lambda = 10**-10 #Poisson term. Ignore the factorial piece which will cancel in likelihood ratio to_return = to_return + (obs[i] * np.log(my_lambda) - my_lambda) #Gaussian nuisance term nuisance_term = -0.5 * np.dot( np.dot(expected_nuis_arr, inv_x_signal_cov), expected_nuis_arr) to_return = to_return + nuisance_term return -to_return def jac_min_log_numerator(expected_nuis_arr): #expected_nuis_arr is the array of systematic background uncertainty nuisance parameters #These are event rate densities expected_nuis_arr = np.array(expected_nuis_arr) to_return = np.array([0.]) #Poisson terms #Poisson lambda. Have to rescale nuisance constribution by bin width my_lambda = expected + expected_nuis_arr dmy_lambda = np.array([1.]) #Prevent negative predicted rates my_lambda[my_lambda < 10**-10] = np.ones( len(my_lambda[my_lambda < 10**-10])) * 10**-10 dmy_lambda[my_lambda < 10**-10] = 0 #Poisson term. Ignore the factorial piece which will cancel in likelihood ratio to_return = to_return + (obs * dmy_lambda / my_lambda - dmy_lambda) #Gaussian nuisance term nuisance_term = -np.dot(inv_x_signal_cov, expected_nuis_arr) to_return = to_return + nuisance_term return -to_return #Initialization of nuisance params expected_nuis_array_init = [0.02] #shift log likelihood to heklp minimization algo def rescaled_min_log_numerator(expected_nuis_arr): return min_log_numerator(expected_nuis_arr) - min_log_numerator( expected_nuis_array_init) #Perform minimization over nuisance parameters. Set bounds for bg nuisance at around 8 sigma. bnds = [[-8 * y_unc[mask[0]], 8 * y_unc[mask[0]]]] minimize_log_numerator = minimize(rescaled_min_log_numerator, expected_nuis_array_init, jac=jac_min_log_numerator, bounds=bnds) if verbose: print("numerator: ", minimize_log_numerator.items(), '\n') #Now get likelihood ratio denominator def min_log_denom(nuis_arr): #nuis_arr contains the bg systematics and also the signal rate expected_nuis_arr = np.array(nuis_arr)[:1] #print(expected_nuis_arr) mu = nuis_arr[1] #Signal prediction pred = [mu] to_return = 0 #Poisson terms for i, expected_nuis in enumerate(expected_nuis_arr): #Poisson lambda my_lambda = expected[i] + expected_nuis_arr[i] + pred[i] #Prevent prediction from going negative if my_lambda < 10**-10: my_lambda = 10**-10 #Poisson term. Ignore the factorial piece which will cancel in likelihood ratio to_return = to_return + (obs[i] * np.log(my_lambda) - my_lambda) #Gaussian nuisance term nuisance_term = -0.5 * np.dot( np.dot(expected_nuis_arr, inv_x_signal_cov), expected_nuis_arr) to_return = to_return + nuisance_term return -to_return def jac_min_log_denom(nuis_arr): #expected_nuis_arr is the array of systematic background uncertainty nuisance parameters #These are event rate densities expected_nuis_arr = np.array(nuis_arr)[:1] mu = nuis_arr[1] pred = [mu] to_return_first = np.array([0.]) #Poisson terms #Poisson lambda. Have to rescale nuisance constribution by bin width my_lambda = expected + expected_nuis_arr + pred dmy_lambda = np.array([1.]) #Prevent prediction from going negative my_lambda[my_lambda < 10**-10] = np.ones( len(my_lambda[my_lambda < 10**-10])) * 10**-10 dmy_lambda[my_lambda < 10**-10] = 0 #Poisson term. Ignore the factorial piece which will cancel in likelihood ratio to_return_first = to_return_first + (obs * dmy_lambda / my_lambda - dmy_lambda) #Gaussian nuisance term nuisance_term = -np.dot(inv_x_signal_cov, expected_nuis_arr) to_return_first = to_return_first + nuisance_term to_return_last = np.array([0.]) dpred = np.array([[1.]]) my_lambda = expected + expected_nuis_arr + pred dmy_lambda = dpred to_return_last = np.dot( (obs / my_lambda), dmy_lambda.T) - np.sum(dmy_lambda, axis=1) return -np.append(to_return_first, to_return_last) #initizalization for minimization nuis_array_init = [0.01, 1.] #Shift log likelihood for helping minimization algo. def rescaled_min_log_denom(nuis_arr): return min_log_denom(nuis_arr) - min_log_denom(nuis_array_init) bnds = ((None, None), (None, None)) minimize_log_denominator = minimize(rescaled_min_log_denom, nuis_array_init, jac=jac_min_log_denom, bounds=bnds) if verbose: print("Denominator: ", minimize_log_denominator.items(), '\n') if minimize_log_denominator.x[-1] < 0: Zval = 0 neglognum = 0 neglogden = 0 else: neglognum = min_log_numerator(minimize_log_numerator.x) neglogden = min_log_denom(minimize_log_denominator.x) Zval = np.sqrt(2 * (neglognum - neglogden)) p0 = 1 - norm.cdf(Zval) if verbose: print("z = ", Zval) print("p0 = ", p0) #plt.title(str(p0)) # if plotfile == 'show': # plt.show() # elif plotfile: # plt.savefig(plotfile) if return_teststat: return p0, 2 * (neglognum - neglogden) else: return p0
_26 = Object() _26.data = np.repeat((0, 1), (3, 6)) with pm.Model() as _26.na: _26.p = pm.Uniform('p', 0, 1) _26.w = pm.Binomial('w', n=len(_26.data), p=_26.p, observed=_26.data.sum()) _26.mean_p = pm.find_MAP() _26.std_q = ((1 / pm.find_hessian(_26.mean_p, vars=[_26.p]))**0.5)[0] _26.mean_p['p'], _26.std_q # - # Assuming the posterior is Gaussian, it's maximized at $0.67$ and its standard deviation is $0.16$. # 89% confidence interval: _26.norm_dist = stats.norm(_26.mean_p['p'], _26.std_q) _26.z = stats.norm.ppf([(1 - .89) / 2, 1 - (1 - 0.89) / 2]) print("89% confidence interval:", _26.mean_p['p'] + _26.std_q * _26.z) # # Medium # ## 2M1 _2m1 = Object() _2m1.NUM = 100 _2m1.p_grid = np.linspace(0, 1, _2m1.NUM) _2m1.prior = np.repeat(1, _2m1.NUM) # ### Item 1 | W, W, W _2m1.item_1 = Object() _2m1.item_1.likelihood = binom.pmf(k=3, n=3, p=_2m1.p_grid)
import numpy as np from scipy import stats import matplotlib.pyplot as plt plt.ion() data = np.loadtxt('data.txt') density = np.asarray(data[:, 0]) w, p = stats.shapiro(density) wL, pL = stats.shapiro(np.log(density)) plt.subplot(2, 1, 1) x = np.linspace(min(density) - 2, max(density) + 1, 100) mu, sigma = stats.norm.fit(density) new_norm = stats.norm(mu, sigma) plt.hist(density, normed=1, label='Density Hist') plt.plot(x, new_norm.pdf(x), label='Best Normal Fit') plt.title('Shapiro-Wilk for Gaussianity') plt.text(-1, 0.6, 'p value=%f' % (p)) plt.text(-1, 0.5, 'w value=%f' % (w)) plt.ylabel('Asteroid Density') plt.legend() plt.subplot(2, 1, 2) muL, sigmaL = stats.norm.fit(np.log(density)) new_normL = stats.norm(muL, sigmaL) y = np.linspace(min(np.log(density) - 1), max(np.log(density) + 1), 100) plt.hist(np.log(density), normed=1, label='Log(Density) Hist') plt.plot(y, new_normL.pdf(y), label='Best Normal fit') plt.text(-1, 1.5, 'p value=%f' % (pL)) plt.text(-1, 1.3, 'w value=%f' % (wL)) plt.ylabel('Log(Density)')
from nose.tools import * from os.path import abspath, dirname, join import numpy as np import pandas as pd from scipy.stats import norm, lognorm import wntr testdir = dirname(abspath(str(__file__))) datadir = join(testdir, '..', '..', 'tests', 'networks_for_testing') FC1 = wntr.scenario.FragilityCurve() FC1.add_state('Major', 2, {'Default': norm(loc=1, scale=2)}) FC1.add_state('Minor', 1, {'Default': norm(loc=0, scale=1)}) FC2 = wntr.scenario.FragilityCurve() FC2.add_state('Minor', 1, { 'Default': lognorm(0.25, loc=0, scale=1), '3': lognorm(0.2, loc=0, scale=1) }) FC2.add_state('Major', 2, {'Default': lognorm(0.25, loc=1, scale=2)}) #x = np.linspace(-5,5,100) #for name, state in FC2.states(): # dist=state.distribution['Default'] # plt.plot(x,dist.cdf(x), label=name) #plt.ylim((0,1)) #plt.legend() def test_get_priority_map(): priority_map = FC1.get_priority_map()
def kernel_smooth(spike_vector, sigma, edges, bin_size=None, padding='symmetric', border_correction = False): """ Receives an array of spike times (point-process like), and smoothes it by convolving with a _gaussian_ kernel, of width *sigma*. The time position will be alocated with a time precision that is a ratio of sigma, given by tp = sigma/precision_factor. Parameters ---------- spike_vector : array Point process like spike times, *in milisseconds* sigma : int Width of the window, in ms edges : tuple Starting and ending time of the window of interest, in ms. precision_factor : int, default 10 Factor of the precision ratio sigma/temporal_precision bin_size : int, default None The size (in ms) of each step in the returning smoothed data. By default is the minimum, equal to 1ms. padding : str, default None The kind of padding on array edges. Possible values are 'constant', 'edge', 'maximum', 'mean', 'median', 'minimum', 'reflect', 'symmetric', 'wrap', or a <function>. border_correction : bool, default False whether to divide borders by spikevector true contribution Raises a ValueError if used adjoined with padding Returns ------- smoothed_data : array The estimated firing rate as each interval of bin_size in *spikes per second* times : array The time at the left edge of each interval Notes ----- Total kernel size is 6*sigma, 3 sigma for each size. See also -------- numpy.pad for padding options and information. """ tp = 1# int(sigma/precision_factor) if bin_size is None: bin_size = tp try: assert float(bin_size) == bin_size # Is multiple except AssertionError: raise ValueError("Bin size must be a multiple of temporal precision.") n_bins = int(bin_size*int((edges[1]-edges[0])/bin_size)) edges= (edges[0], bin_size*int(n_bins/bin_size)+edges[0]) if edges[1] <= edges[0]: return ([],[]) if sigma is None: return np.histogram(spike_vector, bins=int((edges[1]-edges[0])/bin_size), range=edges) spike_count, times = np.histogram(spike_vector, bins=n_bins, range=edges) each_size_len = int(3*sigma + 1) if padding is not None: if border_correction: raise ValueError('Padding and correction cannot be used together') spike_count = np.pad(spike_count, each_size_len, padding) s=sigma # Just for one-lining below kernel = st.norm(0,s).pdf( np.linspace(-3*s, 3*s, 2*each_size_len + 1) ) smoothed = np.convolve(spike_count, kernel, 'valid' if padding is not None else 'same') if border_correction: contrib = st.norm(0,s).cdf(np.linspace(0, 3*s, each_size_len)) smoothed[:each_size_len] /= contrib smoothed[-each_size_len:]/= contrib[::-1] cs = np.hstack((0, smoothed.cumsum()))*1000/bin_size return np.diff(cs[::bin_size]), times[:-bin_size:bin_size]
def run(self): """ Instantiate the random mu, pi and var """ self.mu = [-8, 8, 5] self.pi = [1 / 3, 1 / 3, 1 / 3] self.var = [5, 3, 1] """ E-Step """ for iter in range(self.iterations): """Create the array r with dimensionality nxK""" r = np.zeros((len(X_tot), 3)) """ Probability for each datapoint x_i to belong to gaussian g """ for c, g, p in zip(range(3), [ norm(loc=self.mu[0], scale=self.var[0]), norm(loc=self.mu[1], scale=self.var[1]), norm(loc=self.mu[2], scale=self.var[2]) ], self.pi): r[:, c] = p * g.pdf( X_tot ) # Write the probability that x belongs to gaussian c in column c. # Therewith we get a 60x3 array filled with the probability that each x_i belongs to one of the gaussians """ Normalize the probabilities such that each row of r sums to 1 and weight it by mu_c == the fraction of points belonging to cluster c """ for i in range(len(r)): r[i] = r[i] / (np.sum(self.pi) * np.sum(r, axis=1)[i]) """Plot the data""" fig = plt.figure(figsize=(10, 10)) ax0 = fig.add_subplot(111) for i in range(len(r)): ax0.scatter(self.X[i], 0, c=np.array([r[i][0], r[i][1], r[i][2]]), s=100) """Plot the gaussians""" for g, c in zip([ norm(loc=self.mu[0], scale=self.var[0]).pdf( np.linspace(-20, 20, num=60)), norm(loc=self.mu[1], scale=self.var[1]).pdf( np.linspace(-20, 20, num=60)), norm(loc=self.mu[2], scale=self.var[2]).pdf( np.linspace(-20, 20, num=60)) ], ['r', 'g', 'b']): ax0.plot(np.linspace(-20, 20, num=60), g, c=c) """M-Step""" """calculate m_c""" m_c = [] for c in range(len(r[0])): m = np.sum(r[:, c]) m_c.append( m ) # For each cluster c, calculate the m_c and add it to the list m_c """calculate pi_c""" for k in range(len(m_c)): self.pi[k] = ( m_c[k] / np.sum(m_c) ) # For each cluster c, calculate the fraction of points pi_c which belongs to cluster c """calculate mu_c""" self.mu = np.sum(self.X.reshape(len(self.X), 1) * r, axis=0) / m_c """calculate var_c""" var_c = [] for c in range(len(r[0])): var_c.append((1 / m_c[c]) * np.dot( ((np.array(r[:, c]).reshape(60, 1)) * (self.X.reshape(len(self.X), 1) - self.mu[c])).T, (self.X.reshape(len(self.X), 1) - self.mu[c]))) plt.draw() plt.show(block=False)
def smoothing_matrix(measure, vertids, fwhm, exclude=None, minpool=6): """Define a matrix to smooth voxels using surface geometry. If T is an n_voxel x n_tp timeseries matrix, the resulting object S can be used to smooth the timeseries with the matrix operation S * T. Parameters ---------- measure : surface.SurfaceMeasure object Object for measuring distance along a cortical mesh. vertids : 1d numpy array Array of vertex IDs corresponding to each cortical voxel. fwhm : float Size of the smoothing kernel, in mm. exclude : 1d numpy array Binary array defining voxels that should be excluded and interpolated during smoothing. minpool : int Minimum number of neighborhood vertices to include in smoothing pool. Returns ------- S : csr sparse matrix Matrix with smoothing weights. """ # Define the weighting function if fwhm <= 0: raise ValueError("Smoothing kernel fwhm must be positive") sigma = fwhm / (2 * np.sqrt(2 * np.log(2))) norm = stats.norm(0, sigma) # Define the vertex ids that will be included in the smoothing if exclude is None: exclude = np.zeros_like(vertids) clean = ~(exclude.astype(bool)) clean_verts = set(vertids[clean]) # Define a mapping from vertex index to voxel index voxids = np.full(measure.n_v, -1, np.int) for i, v in enumerate(vertids): voxids[v] = i # Initialize the sparse smoothing matrix n_voxels = len(vertids) mat_size = n_voxels, n_voxels S = sparse.lil_matrix(mat_size) # Ensure that the minpool isn't larger than the surface minpool = min(minpool, clean.sum()) # Build the matrix by rows for voxid, vertid in enumerate(vertids): # Find the distance to a minmimum number of neighboring voxels factor = 4 pool = 0 while pool < minpool: all_dist = measure(vertid, sigma * factor) distmap = {v: d for v, d in all_dist.items() if v in clean_verts} pool = len(distmap) factor += 1 if factor > 10: # TODO probably better not to fail but to return data with nans # (or at least make that an option) and handle downstream raise RuntimeError("Could not find enough neighbors in mesh") # Find weights for nearby voxels verts, distances = zip(*distmap.items()) voxels = voxids[list(verts)] w = norm.pdf(distances) w /= w.sum() # Update the matrix S[voxid, voxels] = w return S.tocsr()