def get_mu_from_cdf_probit_ln(self,b0,b1): #The standard mean of a CDF is were probability = 0.5 mu = (sqrt(2)*erfinv(0) - b0)/b1 x = 5 sig = (log(x) - mu) / ( sqrt(2) * erfinv(erf((b1*log(x) + b0)/sqrt(2))) ) return sig,mu
def histogramGauss2d(d1,d2,bins=256,sigmagauss = 1.,plotty=False,uniform=False): o1 = N.argsort(d1) o2 = N.argsort(d2) d1g = 0.*d1 d2g = 0.*d2 lenny = len(d1) step = 1./lenny if (uniform): d1g[o1] = N.arange(0.5*step,1,step).astype(N.float32) d2g[o2] = N.arange(0.5*step,1,step).astype(N.float32) else: d1g[o1] = (N.sqrt(2.)*sigmagauss*SSp.erfinv(2.*N.arange(0.5*step,1,step)-1.)).astype(N.float32) d2g[o2] = N.sqrt(2.)*sigmagauss*SSp.erfinv(2.*N.arange(0.5*step,1,step)-1.).astype(N.float32) print d1g[o1[-5:]],d2g[o2[-5:]] print d1[o1[-5:]],d2[o2[-5:]] hist2d, xedgesg, yedgesg = N.histogram2d(d1g,d2g,bins=bins,normed=True) where_xedgesg = d1g[o1].searchsorted(xedgesg) where_yedgesg = d2g[o2].searchsorted(yedgesg) xcenters = 0.*xedgesg[:-1] ycenters = 0.*yedgesg[:-1] xbinsize = 0.*xedgesg[:-1] ybinsize = 0.*yedgesg[:-1] xedges = d1[o1[where_xedgesg]] yedges = d2[o2[where_yedgesg]] print xedgesg[-5:] print where_xedgesg[-5:] print xedges[-5:] print hist2d[-5:,-5:] for i in range(len(xcenters)): xcenters[i] = N.mean(d1[o1[where_xedgesg[i]:where_xedgesg[i+1]]]) ycenters[i] = N.mean(d2[o2[where_yedgesg[i]:where_yedgesg[i+1]]]) xbinsize[i] = xedges[i+1]-xedges[i] ybinsize[i] = yedges[i+1]-yedges[i] if (plotty): M.subplot(221) M.pcolor(xedgesg,yedgesg,hist2d) M.colorbar() M.subplot(222) M.pcolor(xedgesg,yedgesg,hist2d*N.outer(xbinsize,ybinsize)) M.colorbar() M.subplot(223) M.pcolor(xedges,yedges,hist2d) return hist2d,xedgesg,yedgesg,xedges,yedges,xcenters,ycenters
def getY(length): y = [] yTemp = NP.array(range(length)) yTemp = (yTemp+1.)/float(len(yTemp)) for cp in yTemp: if cp < 0.5: map = -sq*SCI.erfinv(1. - 2.*cp) elif cp > 0.5: map = sq*SCI.erfinv(2.*cp - 1.) else: map = 0. y.append(map) return y
def extension(M1_amplitude, M1_slope, M1_offset, M2_amplitude, M2_slope, M2_offset, M1_days, M1_data_extension, M2_days, M2_data_extension, m2_m1_conversion): M1_height_max = 35. # in millimeters M1_model_extension = (M1_amplitude * spec.erf(M1_slope * (M1_days - M1_offset))) + (M1_height_max - M1_amplitude) M1_initiation = (spec.erfinv((M1_amplitude - M1_height_max) / M1_amplitude) + M1_offset * M1_slope) / M1_slope M2_height_max = 41. # in millimeters M2_model_extension = (M2_amplitude * spec.erf(M2_slope * (M2_days - M2_offset))) + (M2_height_max - M2_amplitude) M2_initiation = (spec.erfinv((M2_amplitude - M2_height_max) / M2_amplitude) + M2_offset * M2_slope) / M2_slope m2_m1_converted = convert(m2_m1_conversion, M1_amplitude, M1_slope, M1_offset, M2_amplitude, M2_slope, M2_offset) return M1_model_extension, M1_data_extension, M2_model_extension, M2_data_extension, m2_m1_converted, M1_initiation, M2_initiation
def convert(m2days): ''' ''' #m2height = 75.123*spec.erf(.0028302*(m2days+70.17))+(42-75.12266) # max at 42 m2height = 44.182*spec.erf(.003736412*(m2days+53.0767))+(40.5-44.182) # max at 40.5 optimized with full data set on nlopt m2height = 46.625*spec.erf(.0032506*(m2days+53.0767))+(42.46-46.625) # max at 40.5 optimized with synchrotron data set on nlopt m2percent = m2height / 42 m1height = m2percent * 36 m1days = (25000000*spec.erfinv((50*m1height-283)/1517)-1577367)/152550 m1days_a = 163.873*spec.erfinv(0.0292948*(75.123*spec.erf(0.0028302*(m2days+70.17))-33.123)-0.0186437)-10.5459 m1days_b = 163.873*spec.erf(0.0282485*(75.123*spec.erf(0.0028302*(m2days+70.17))-33.123)-0.0186437)-10.5459 return m1days
def capak_error(mag, mag_limit, zero_pt): flux_lim = astro.mag2flux(mag_limit, zero_pt) hold = 2.0 * np.random.random() - 1.0 if hold > 0: hold = ss.erfinv(hold) * flux_lim else: hold = -1.0 * ss.erfinv(-1.0 * hold) * flux_lim flux = astro.mag2flux(mag, zero_pt) + hold fluxerror = (flux + flux_lim ** 2.0) ** 0.5 / 10.0 obs_mag = astro.flux2mag(flux, zero_pt) mag_err = (2.5 / np.log(10.0)) * (fluxerror / flux) if flux < 0 or mag_err > 2.0: mag_err = mag_limit return obs_mag, mag_err
def II(x, a, t, method=5): if method == 0: sys = integrate.gsl_function(cdf, (a, t)) integral = integrate.qagp(sys, [0., x], 1e-8, 1e-7, 100, w2) elif method == 2: integral = quad(cdf_eta, 1e-8, x, args=(a, t)) elif method == 3: integral = x * cdf_eta(x, a, t) - quad(xpdf, 1e-10, x, args=(a, t))[0] elif method == 4: phi = a + 2 * sqrt(t) * erfinv(2. * x - 1.) integral = x * cdf_phi(phi, a, t) - quad(xr, -12., phi, args=(a, t))[0] elif method == 5: phi = a + 2 * sqrt(t) * erfinv(2. * x - 1.) integral = x / 2. * (1. + erf(phi / sqrt(2. * (1. - 2. * t)))) - quad(xr, -Inf, phi, args=(a, t))[0] return integral
def rankStandardizeNormal(X): """ Gaussianize X: [samples x phenotypes] - each phentoype is converted to ranks and transformed back to normal using the inverse CDF """ Is = X.argsort(axis=0) RV = SP.zeros_like(X) rank = SP.zeros_like(X) for i in xrange(X.shape[1]): x = X[:,i] i_nan = SP.isnan(x) if 0: Is = x.argsort() rank = SP.zeros_like(x) rank[Is] = SP.arange(X.shape[0]) #add one to ensure nothing = 0 rank +=1 else: rank = st.rankdata(x[~i_nan]) #devide by (N+1) which yields uniform [0,1] rank /= ((~i_nan).sum()+1) #apply inverse gaussian cdf RV[~i_nan,i] = SP.sqrt(2) * special.erfinv(2*rank-1) RV[i_nan,i] = x[i_nan] return RV
def inverse_deriv_numpy(self, y): abs_y = np.abs(y) dx = np.empty_like(y) dx[abs_y < self.b] = 0.5 / norm_pdf(erfinv(y[abs_y < self.b]) * 2**0.5) dx[abs_y >= self.b] = (2 * self.alpha * abs_y[abs_y >= self.b] + self.beta) return dx
def rsig(ndof_eff, alpha=0.95): """ USAGE ----- Rsig = rsig(ndof_eff, alpha=0.95) Computes the minimum (absolute) threshold value 'rsig' that the Pearson correlation coefficient r between two normally-distributed data sequences with 'ndof_eff' effective degrees of freedom has to have to be statistically significant at the 'alpha' (defaults to 0.95) confidence level. For example, if rsig(ndof_eff, alpha=0.95) = 0.2 for a given pair of NORMALLY-DISTRIBUTED samples with a correlation coefficient r>0.7, there is a 95 % chance that the r estimated from the samples is significantly different from zero. In other words, there is a 5 % chance that two random sequences would have a correlation coefficient higher than 0.7. OBS: This assumes that the two data series have a normal distribution. Translated to Python from the original matlab code by Prof. Sarah Gille (significance.m), available at http://www-pord.ucsd.edu/~sgille/sio221c/ References ---------- Gille lecture notes on data analysis, available at http://www-pord.ucsd.edu/~sgille/mae127/lecture10.pdf Example ------- TODO """ rcrit_z = erfinv(alpha)*np.sqrt(2./ndof_eff) return rcrit_z
def tooth_timing_convert(conversion_times, a1, s1, o1, max1, a2, s2, o2, max2): ''' Takes an array of events in days occurring in one tooth, calculates where these will appear spatially during tooth extension, then maps these events onto the spatial dimensions of a second tooth, and calculates when similar events would have occurred in days to produce this mapping in the second tooth. Inputs: conversion_times: a 1-dimensional numpy array with days to be converted. a1, s1, o1, max1: the amplitude, slope, offset and max height of the error function describing the first tooth's extension, in mm, over time in days. a2, s2, o2, max2: the amplitude, slope, offset and max height of the error function describing the second tooth's extension, in mm, over time in days. Returns: converted 1-dimensional numpy array of converted days. ''' t1_ext = a1*spec.erf(s1*(conversion_times-o1))+(max1-a1) t1_pct = t1_ext / max1 t2_ext = t1_pct * max2 converted_times = (spec.erfinv((a2+t2_ext-max2)/a2) + (o2*s2)) / s2 return converted_times
def forward_cpu(self, x): if not available_cpu: raise ImportError('SciPy is not available. Forward computation' ' of erfinv in CPU can not be done.' + str(_import_error)) self.retain_outputs((0,)) return utils.force_array(special.erfinv(x[0]), dtype=x[0].dtype),
def normal(self,media,desviacion): p= ga.GeneradoresAleatorios().generar_wichmannHill(1,77) x= 2*p-1 erfinv=sp.erfinv(x) normInv= media+ desviacion*np.sqrt(2)*(erfinv) print normInv return normInv
def age_hs(T0,T1,T,d,kappa): """ Gives the age of the plate when the temperature is T at depth d for a half-space cooling model with a top and bottom boundary conditions T0 and T1 and thermal diffusivity kappa. """ return (1/kappa)*(d/(2*erfinv( (T-T0)/(T1-T0) )))**2
def median_absolute_deviation(values,scaletonormal=False,cenestimator=np.median): """ Computes the median_absolute_deviation for the provided sequence of values, a more robust estimator than the variance. :param values: the values for which to compute the MAD :type values: array-like, will be treated as 1D :param scaletonormal: Rescale the MAD so that a normal distribution is 1 :type scaletonormal: bool :param cenestimator: A function to estimate the center of the values from a 1D array of values. To actually be the "median" absolute deviation, this must be left as the default (median). :type cenestimator: callable :returns: the MAD as a float """ from scipy.special import erfinv x = np.array(values,copy=False).ravel() res = np.median(np.abs(x-np.median(x))) if scaletonormal: nrm = (2**0.5*erfinv(.5)) return res/nrm else: return res
def plot_normprob(d, snrs, outroot): """ Normal quantile plot compares observed SNR to expectation given frequency of occurrence. Includes negative SNRs, too. """ outname = os.path.join(d["workdir"], "plot_" + outroot + "_normprob.png") # define norm quantile functions Z = lambda quan: n.sqrt(2) * erfinv(2 * quan - 1) quan = lambda ntrials, i: (ntrials + 1 / 2.0 - i) / ntrials # calc number of trials npix = d["npixx"] * d["npixy"] if d.has_key("goodintcount"): nints = d["goodintcount"] else: nints = d["nints"] ndms = len(d["dmarr"]) dtfactor = n.sum([1.0 / i for i in d["dtarr"]]) # assumes dedisperse-all algorithm ntrials = npix * nints * ndms * dtfactor logger.info("Calculating normal probability distribution for npix*nints*ndms*dtfactor = %d" % (ntrials)) # calc normal quantile if len(n.where(snrs > 0)[0]): snrsortpos = n.array(sorted(snrs[n.where(snrs > 0)], reverse=True)) # high-res snr Zsortpos = n.array([Z(quan(ntrials, j + 1)) for j in range(len(snrsortpos))]) logger.info("SNR positive range = (%.1f, %.1f)" % (snrsortpos[-1], snrsortpos[0])) logger.info("Norm quantile positive range = (%.1f, %.1f)" % (Zsortpos[-1], Zsortpos[0])) if len(n.where(snrs < 0)[0]): snrsortneg = n.array(sorted(n.abs(snrs[n.where(snrs < 0)]), reverse=True)) # high-res snr Zsortneg = n.array([Z(quan(ntrials, j + 1)) for j in range(len(snrsortneg))]) logger.info("SNR negative range = (%.1f, %.1f)" % (snrsortneg[-1], snrsortneg[0])) logger.info("Norm quantile negative range = (%.1f, %.1f)" % (Zsortneg[-1], Zsortneg[0])) # plot fig3 = plt.Figure(figsize=(10, 10)) ax3 = fig3.add_subplot(111) if len(n.where(snrs < 0)[0]) and len(n.where(snrs > 0)[0]): logger.info("Plotting positive and negative cands") ax3.plot(snrsortpos, Zsortpos, "k.") ax3.plot(snrsortneg, Zsortneg, "kx") refl = n.linspace( min(snrsortpos.min(), Zsortpos.min(), snrsortneg.min(), Zsortneg.min()), max(snrsortpos.max(), Zsortpos.max(), snrsortneg.max(), Zsortneg.max()), 2, ) elif len(n.where(snrs > 0)[0]): logger.info("Plotting positive cands") refl = n.linspace(min(snrsortpos.min(), Zsortpos.min()), max(snrsortpos.max(), Zsortpos.max()), 2) ax3.plot(snrsortpos, Zsortpos, "k.") elif len(n.where(snrs < 0)[0]): logger.info("Plotting negative cands") refl = n.linspace(min(snrsortneg.min(), Zsortneg.min()), max(snrsortneg.max(), Zsortneg.max()), 2) ax3.plot(snrsortneg, Zsortneg, "kx") ax3.plot(refl, refl, "k--") ax3.set_xlabel("SNR") ax3.set_ylabel("Normal quantile SNR") canvas = FigureCanvasAgg(fig3) canvas.print_figure(outname)
def invGaussianPsy(p, alphax, betax, gammax, lambdax): """ Compute the inverse gaussian psychometric function. Parameters ---------- p : Proportion correct on the psychometric function. alphax: Mid-point(s) of the psychometric function. betax: The slope of the psychometric function. gammax: Lower limit of the psychometric function. lambdax: The lapse rate. Returns ------- x : Stimulus level at which proportion correct equals `p` for the listener specified by the function. References ----------- .. [1] Kingdom, F. A. A., & Prins, N. (2010). *Psychophysics: A Practical Introduction*. Academic Press. """ out = alphax + sqrt(2*betax**2)*erfinv(2*(p-gammax)/(1-gammax-lambdax)-1) return out
def histClim( imData, cutoff = 0.01, bins_ = 512 ): '''Compute display range based on a confidence interval-style, from a histogram (i.e. ignore the 'cutoff' proportion lowest/highest value pixels)''' if( cutoff <= 0.0 ): return imData.min(), imData.max() # compute image histogram hh, bins_ = imHist(imData, bins_) hh = hh.astype( 'float' ) # number of pixels Npx = np.sum(hh) hh_csum = np.cumsum( hh ) # Find indices where hh_csum is < and > Npx*cutoff try: i_forward = np.argwhere( hh_csum < Npx*(1.0 - cutoff) )[-1][0] i_backward = np.argwhere( hh_csum > Npx*cutoff )[0][0] except IndexError: print( "histClim failed, returning confidence interval instead" ) from scipy.special import erfinv sigma = np.sqrt(2) * erfinv( 1.0 - cutoff ) return ciClim( imData, sigma ) clim = np.array( [bins_[i_backward], bins_[i_forward]] ) if clim[0] > clim[1]: clim = np.array( [clim[1], clim[0]] ) return clim
def predict_percentile(self, X, ancillary_X=None, p=0.5): """ Returns the median lifetimes for the individuals, by default. If the survival curve of an individual does not cross ``p``, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. p: float, optional (default=0.5) the percentile, must be between 0 and 1. Returns ------- percentiles: DataFrame See Also -------- predict_median """ exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) return pd.DataFrame(exp_mu_ * np.exp(np.sqrt(2) * sigma_ * erfinv(2 * p - 1)), index=_get_index(X))
def plot_fill(llr_cur, tkey, asimov_llr, hist_vals, bincen, fit_gauss, **kwargs): """ Plots fill between the asimov llr value and the histogram values which represent an LLR distribution. """ validate_key(tkey) expr = 'bincen < asimov_llr' if 'true_N' in tkey else 'bincen > asimov_llr' plt.fill_betweenx( hist_vals, bincen, x2=asimov_llr, where=eval(expr), **kwargs) pvalue = (1.0 - float(np.sum(llr_cur > asimov_llr))/len(llr_cur) if 'true_N' in tkey else (1.0 - float(np.sum(llr_cur < asimov_llr))/len(llr_cur))) sigma_fit = np.fabs(asimov_llr - fit_gauss[1])/fit_gauss[2] #logging.info( # " For tkey: %s, gaussian computed mean (of alt MH): %.3f and sigma: %.3f" # %(tkey,fit_gauss[1],fit_gauss[2])) pval_gauss = 1.0 - norm.cdf(sigma_fit) sigma_1side = np.sqrt(2.0)*erfinv(1.0 - pval_gauss) mctrue_row = [tkey,asimov_llr,llr_cur.mean(),pvalue,pval_gauss,sigma_fit, sigma_1side] return mctrue_row
def normplot(x,*arg,**kwarg): y=erfinv(np.linspace(-1,1,len(x)+2)[1:-1:]) sx=np.sort(x,axis=None) mpl.plot(sx,y,*arg,**kwarg) p = np.array([0.001, 0.003, 0.01, 0.02, 0.05, 0.10, 0.25, 0.5, 0.75, 0.90, 0.95, 0.98, 0.99, 0.997, 0.999]) label = np.array(['0.001', '0.003', '0.01','0.02','0.05','0.10','0.25', '0.50', '0.75','0.90','0.95','0.98','0.99','0.997', '0.999']) position=erfinv(p*2-1) mpl.yticks(position,label) mpl.grid()
def s_bird(X, scales, n_runs, p_above, p_active=1, max_iter=100, random_state=None, n_jobs=1, memory=Memory(None), verbose=False): """ Multichannel version of BIRD (S-BIRD) seeking Structured Sparsity Parameters ---------- X : array, shape (n_channels, n_times) The numpy n_channels-vy-n_samples array to be denoised where n_channels is the number of sensors and n_samples the dimension scales : list of int The list of MDCT scales that will be used to built the dictionary Phi n_runs : int the number of runs (n_runs in the paper) p_above : float probability of appearance of the max above which the noise hypothesis is considered false p_active : float proportion of active channels (l in the paper) max_iter : int The maximum number of iterations in one pursuit. random_state : None | int | np.random.RandomState To specify the random generator state (seed). n_jobs : int The number of jobs to run in parallel. memory : instance of Memory The object to use to cache some computations. If cachedir is None, no caching is performed. verbose : bool verbose mode Returns ------- X_denoise : array, shape (n_channels, n_times) The denoised data. """ X, prepad = _pad(X) # Computing Lambda_W(Phi, p_above) n_channels = X.shape[0] n_samples = float(X.shape[1]) # size of the full shift-invariant dictionary M = np.sum(np.array(scales) / 2) * n_samples sigma = sqrt((1.0 - (2.0 / np.pi)) / float(n_samples)) Lambda_W = sigma * sqrt(2.0) * erfinv((1.0 - p_above) ** (1.0 / float(M))) lint = int(n_channels * p_active) this_stop_crit = partial(stop_crit, lint=lint) # XXX : check lint here this_selection_rule = partial(selection_rule, lint=lint) print("Starting S-BIRD with MDCT dictionary of %d Atoms." " Lambda_W=%1.3f, n_runs=%d, p_active=%1.1f" % (M, Lambda_W, n_runs, p_active)) denoised = _bird_core(X, scales, n_runs, Lambda_W, verbose=verbose, stop_crit=this_stop_crit, n_jobs=n_jobs, selection_rule=this_selection_rule, max_iter=max_iter, indep=False, memory=memory) return denoised[:, prepad:]
def extension(M1_amplitude, M1_slope, M1_offset, M1_days, M1_data_extension): M1_height_max = 41. # in millimeters M1_model_extension = (M1_amplitude * spec.erf(M1_slope * (M1_days - M1_offset))) + (M1_height_max - M1_amplitude) M1_initiation = (spec.erfinv((M1_amplitude - M1_height_max) / M1_amplitude) + M1_offset * M1_slope) / M1_slope return M1_model_extension, M1_data_extension, M1_initiation
def __init__(self, p0): Statistic._Distribution.__init__(self, p0) from scipy.special import erf, erfinv # popt: p[0] is s, p[1] is mu self.pdf = lambda x, *argv: 1. / (x * argv[0] * np.sqrt(2 * np.pi)) * np.exp( -(np.log(x) - argv[1]) ** 2 / (2 * argv[0] ** 2)) self.cdf = lambda x, *argv: 0.5 * (1 + erf((np.log(x) - argv[1]) / (argv[0] * np.sqrt(2)))) self.dcdf = lambda x, *argv: np.exp(np.sqrt(2) * argv[0] * erfinv(2 * x - 1) + argv[1])
def nSamples(y, deltaY, confidence=0.99): """ Computes the number of Monte Carlo samples needed for obtaining a yield estimate that is within +-*deltaY* of *y* with confidence level given by *confidence*. """ k_gamma=erfinv(confidence)*2**0.5 return ceil(y*(1-y)*k_gamma**2/deltaY**2)
def tooth_timing_convert_lin2curv(conversion_times, s1, o1, max1, a2, s2, o2, max2): t1_ext = (s1*conversion_times)+o1 t1_pct = t1_ext / max1 t2_ext = t1_pct * max2 converted_times = (spec.erfinv((a2+t2_ext-max2)/a2) + (o2*s2)) / s2 return converted_times
def maskUniform(x,sigx=0,sigy=0,h=100): nu = -np.sqrt(2)*sigy*special.erfinv(-sigx*(h)*np.exp(x**2 / (2.0*sigx**2))) if abs(nu) == np.inf: return 0.0 elif nu < 0.0: return 0.0 else: return nu
def TS2sigma(TS,dof, quiet=False): """ one-sided Chi^2 test """ pval_1 = chi2.cdf(TS, dof) sigma=math.sqrt(2)*sp.erfinv(pval_1) if not quiet: print "TS=%.2f\t->\t%.2f sigma"%(TS,sigma) return sigma
def qnorm(probability): """ A reimplementation of R's qnorm() function. This function calculates the quantile function of the normal distributition. (http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function) Required is the erfinv() function, the inverse error function. (http://en.wikipedia.org/wiki/Error_function#Inverse_function) """ if probability > 1 or probability <= 0: raise BaseException # TODO: raise a standard/helpful error else: print("..?? : " +str(2*probability - 1)) print("...?? 2 : " +str(erfinv(2*probability - 1))) return sqrt(2) * erfinv(2*probability - 1)
def _inverse(yy, xShift, sd): global _chance yy = np.asarray(yy) # xx = (special.erfinv((yy-chance)/(1-chance)*2.0-1)+xShift)/xScale # NB: np.special.erfinv() goes from -1:1 xx = (xShift + np.sqrt(2) * sd * special.erfinv(((yy - _chance) / (1 - _chance) - 0.5) * 2)) return xx
def DependentSample(self, count=1): global PRIME_NUMBERS assert count >= 1, "Error. count must be at least 1." interval_length = 1. / count offset = interval_length / 2. samples = np.array([ [ offset + x * interval_length for x in range(count) ] ] * self.dimension).T #uniform = random.uniform(low=-offset, high=offset, # size=count * self.dimension) #uniform = uniform.reshape(count, self.dimension) #samples = samples + uniform samples = np.sqrt(2) * special.erfinv(2. * samples - 1.) * np.exp(self.LogVariances() / 2.) + self.Means() for i in range(self.dimension): random.shuffle(samples[:, i]) return samples
def fit_transform(self, X): from scipy.special import erfinv i = np.argsort(X, axis=0) j = np.argsort(i, axis=0) assert (j.min() == 0).all() assert (j.max() == len(j) - 1).all() j_range = len(j) - 1 self.divider = j_range / self.range transformed = j / self.divider transformed = transformed - self.upper transformed = erfinv(transformed) return transformed
def residual_measures(res): """ Compute quantities needed to evaluate the quality of the estimation, based solely on the residuals. :rtype: :py:class:`ResidualMeasures` :returns: the scaled residuals, their ordering, the theoretical quantile for each residuals, and the expected value for each quantile. """ IX = argsort(res) scaled_res = res[IX] / std(res) prob = (arange(len(scaled_res)) + 0.5) / len(scaled_res) normq = sqrt(2) * erfinv(2 * prob - 1) return ResidualMeasures(scaled_res, IX, prob, normq)
def rank_gauss_normalization(x): """ Learned from the 1st place solution of Porto competition. https://www.kaggle.com/c/porto-seguro-safe-driver-prediction/discussion/44629 input: x, a numpy array. """ N = x.shape[0] temp = x.argsort() rank_x = temp.argsort() / N rank_x -= rank_x.mean() rank_x *= 2 # rank_x.max(), rank_x.min() should be in (-1, 1) efi_x = erfinv(rank_x) # np.sqrt(2)*erfinv(rank_x) efi_x -= efi_x.mean() ans = efi_x.astype(np.float32) return ans
def _qnorm(self, probability): """ A reimplementation of R's qnorm() function. This function calculates the quantile function of the normal distributition. (http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function) Required is the erfinv() function, the inverse error function. (http://en.wikipedia.org/wiki/Error_function#Inverse_function) """ if probability > 1 or probability <= 0: raise LSDProbabilityOutOfRange("Alpha-value out of range: '%s'" % (P)) else: return sqrt(2) * erfinv(2 * probability - 1)
def normal_from_ci(p1, p2, f=None): β, α = [x for x in zip(p1, p2)] if f is not None: try: T = getattr(np, f) except AttributeError: T = getattr(special, f) β = [T(x) for x in β] ζ = [special.erfinv(2 * x - 1) for x in α] den = ζ[1] - ζ[0] σ = np.sqrt(.5) * (β[1] - β[0]) / den # μ = (β[1] * ζ[1] - β[0] * ζ[0]) / den μ = 0.5 * (β[1] + β[0]) + np.sqrt(0.5) * σ * (ζ[0] + ζ[1]) return μ, σ
def pd_gaussian(signal_to_noise, probability_false_alarm): """ Calculate the probability of detection for a given signal to noise ratio and probability of false alarm. when the noise is Gaussian (non-coherent detection). :param signal_to_noise: The signal to noise ratio. :param probability_false_alarm: The probability of false alarm. :return: The probability of detection. """ # Calculate the voltage threshold voltage_threshold = erfinv(1.0 - 2.0 * probability_false_alarm) * sqrt(2.0) # Calculate the signal amplitude based on signal to noise ratio amplitude = sqrt(2.0 * signal_to_noise) # Calculate the probability of detection return 0.5 * (1.0 - erf((voltage_threshold - amplitude) / sqrt(2.0)))
def xi_hat(self, xi_bar_hat): """ A priori SNR estimate. Argument/s: xi_bar_hat - mapped a priori SNR estimate. Returns: A priori SNR estimate. """ xi_db_hat = np.add( np.multiply( np.multiply(self.sigma, np.sqrt(2.0)), spsp.erfinv(np.subtract(np.multiply(2.0, xi_bar_hat), 1))), self.mu) return np.power(10.0, np.divide(xi_db_hat, 10.0))
def gaussian(n, seed): """ Gaussian random number generator. mean = 0, variance = 1 Input: n: size of random vector to be returned seed: seed for the generator Returns: y : vector of random gaussian numbers """ m = 2**31 - 1 a = 48271 c = 0 u = congruential(n, seed, m, a, c) y = np.sqrt(2) * erfinv(2 * u - 1) return y
def rank_gauss(df): df = df.rank() print('calc min ...') m = df.min() print('calc max ...') M = df.max() df = (df - m) / ((M - m)) assert all(df.max()) == 1 assert all(df.min()) == 0 df = (df - 0.5) * (2 - 1e-9) df = erfinv(df) print('calc mean ...') # df = df - df.mean() return df
def twosided_cl_to_dlnl(cl): """Compute the delta-loglikehood value that corresponds to a two-sided interval of the given confidence level. Parameters ---------- cl : float Confidence level. Returns ------- dlnl : float Delta-loglikelihood value with respect to the maximum of the likelihood function. """ return 0.5 * np.power(np.sqrt(2.) * special.erfinv(cl), 2)
async def random_album_popularity(): """Returns a truncated normal random popularity between 0 and 1 that follows the PDF f(y) = exp( -a * (y - c)^2 ) where a is the curvature and c is the bell center. This is a truncated normal distribution. The random variable transformation g(x) : x -> y needs to be used where x is a uniform distribution and y is the f(x) distribution. g(x) = Fy^-1( F(x) ) where Fx = x and Fy = erf( sqrt(a)*(y - c) ) which are the corresponding CDFs of x and y. Solving we find that g(x) = erfinv(x) / sqrt(a) + c.""" center_popularity = 0.8 curvature = 40 lower_bound = -1 upper_bound = erf(math.sqrt(curvature) * (1 - center_popularity)) x = random.uniform(lower_bound, upper_bound) y = erfinv(x) / math.sqrt(curvature) + center_popularity return y
def uni_to_norm(x, a, b): """ Transform a uniform random variable to a standard (normal) random variable. Parameters ---------- x : float coordinate in uniform variable space a, b : float lower and upper bounds of the uniform distribution Returns ------- float, random variable in SRV space """ return np.sqrt(2) * erfinv(2. * (x - a) / (b - a) - 1.0)
def CI(coverage, T_ProbA, T_ProbB): x = np.arange(0.01, 0.99, 0.01) y = np.arange(100, 101) T_ProbA = T_ProbA #np.random.choice(x,1)[0] #true prob of heads for coin a T_ProbB = T_ProbB #np.random.choice(x,1)[0] #true prob of heads for coin b T_Theta = T_ProbB / T_ProbA #true value of theta Num_FlipsA = np.random.choice( y, 100 ) #array of 100 random numbers of flips in a given experiment (of flipping coin a) Num_FlipsB = np.random.choice( y, 100 ) #array of 100 random numbers of flips in a given experiment (of flipping coin b) for i in range(100): E_Num_HeadsA = np.random.binomial( Num_FlipsA[i], T_ProbA, 1) #flip coin A 25 times, 100 times over. E_Num_HeadsB = np.random.binomial( Num_FlipsB[i], T_ProbB, 1) #flip coin B 25 times, 100 times over. p[i] = np.float(E_Num_HeadsA[0]) / Num_FlipsA[i] u[i] = np.float(E_Num_HeadsB[0]) / Num_FlipsB[i] q[i] = THETA(np.float(Num_FlipsA[i]), np.float(Num_FlipsB[i]), np.float(E_Num_HeadsA[0]), np.float(E_Num_HeadsB[0])) plt.hist(q, 16, normed=1) plt.show() Nsigma = np.sqrt(2) * erfinv(coverage) mu = q.mean() sigma_mu = (1.0) * q.size**(-0.5) print "True Theta is:", T_Theta print "True Prob Coin A is Heads", T_ProbA print "True Prob Coin B is Heads", T_ProbB print "Exp Theta is:", mu print "Prob Coin A is Heads", np.mean(p) print "Prob Coin B is Heads", np.mean(u) print "Lower Bound CI", mu - Nsigma * sigma_mu print "Upper Bound CI", mu + Nsigma * sigma_mu if T_Theta > mu - Nsigma * sigma_mu and T_Theta < mu + Nsigma * sigma_mu: print "CI Covers" else: print "CI Doesn't Cover" return
def normal_from_uniform(su, means=None, sigmas=None): # su is sample from a uniform distribution [0,1] # output is normally distributed oneD = False if len(np.shape(su)) == 1: oneD = True su = np.array(su, ndmin=2).T npts, dim = np.shape(su) if means is None: means = np.zeros(dim) if sigmas is None: sigmas = np.ones(dim) xs = np.sqrt(2) * erfinv(-1 + 2 * np.array(su, ndmin=2)) xs = np.transpose(np.array(sigmas, ndmin=2).T * xs.T) + np.array(means) if oneD: xs = xs[:, 0] return xs
def __init__(self, params): super(GaussianRandomField, self).__init__(params) # don't forget this line in our classes! # value of the correlation function at the origin self.corr_func_at_origin = self.frac_volume * (1.0 - self.frac_volume) # inverse slope of the normalized correlation function at the origin beta = np.sqrt(2) * erfinv(2 * (1 - self.frac_volume) - 1) # second derivative of the field acf at the origin acf_psi_doubleprime = -1.0 / 2 * ( (1.0 / self.corr_length)**2 + 1.0 / 3 * (2 * np.pi / self.repeat_distance)**2) SSA_tilde = 2.0 / np.pi * np.exp( -beta**2 / 2) * np.sqrt(-acf_psi_doubleprime) / self.frac_volume self.inv_slope_at_origin = 4.0 * (1 - self.frac_volume) / SSA_tilde
def igrcdf(norm, dim): """ Inverse Gaussian radial CDF. @type norm: array_like @param norm: norms of the data points @type dim: integer @param dim: dimensionality of the Gaussian """ if dim < 2: result = erfinv(norm) result[result > 6.] = 6. return sqrt(2.) * result else: return sqrt(2.) * sqrt(gammaincinv(dim / 2., norm))
def event(self, event): opts = ('', 0.) if len(self.variations) == 0: for df in self.chunk_events(event, opts=opts, chunksize=int(1e7)): df.to_hdf( self.path, self.name, format='table', append=True, complevel=9, complib='zlib', ) for source, vlabel, vval in self.variations: if vlabel == "percentile": nsig = np.sqrt(2) * erfinv(2 * vval / 100. - 1) table_name = ("_".join([ self.name, "{}{}".format(source, vval) ]) if source != "" else self.name) elif vlabel == "sigmaval": nsig = vval updown = "Up" if nsig >= 0. else "Down" updown = "{:.2f}".format(np.abs(nsig)).replace(".", "p") + updown table_name = ("_".join([ self.name, "{}{}".format(source, updown) ]) if source != "" else self.name) elif vlabel.lower() in ["up", "down"]: nsig = 1. if vlabel.lower() == "up" else -1. table_name = ("_".join([ self.name, "{}{}".format(source, vlabel) ]) if source != "" else self.name) else: nsig = 0. table_name = self.name opts = (source, nsig) for df in self.chunk_events(event, opts=opts, chunksize=int(1e7)): df.to_hdf( self.path, table_name, format='table', append=True, complevel=9, complib='zlib', )
def rank_gauss(x, title=None): # Trying to implement rankGauss in python, here are my steps # 1) Get the index of the series # 2) sort the series # 3) standardize the series between -1 and 1 # 4) apply erfinv to the standardized series # 5) create a new series using the index # Am i missing something ?? # I subtract mean afterwards. And do not touch 1/0 (binary columns). # The basic idea of this "RankGauss" was to apply rank trafo and them shape them like gaussians. # Thats the basic idea. You can try your own variation of this. if (title != None): fig, axs = plt.subplots(3, 3, figsize=(8, 8)) fig.suptitle(title) axs[0][0].hist(x) from scipy.special import erfinv N = x.shape[0] temp = x.argsort() if (title != None): print('1)', max(temp), min(temp)) axs[0][1].hist(temp) rank_x = temp.argsort() / N if (title != None): print('2)', max(rank_x), min(rank_x)) axs[0][2].hist(rank_x) rank_x -= rank_x.mean() if (title != None): print('3)', max(rank_x), min(rank_x)) axs[1][0].hist(rank_x) rank_x *= 2 if (title != None): print('4)', max(rank_x), min(rank_x)) axs[1][1].hist(rank_x) efi_x = erfinv(rank_x) if (title != None): print('5)', max(efi_x), min(efi_x)) axs[1][2].hist(efi_x) efi_x -= efi_x.mean() if (title != None): print('6)', max(efi_x), min(efi_x)) axs[2][0].hist(efi_x) plt.show() return efi_x
def onesided_cl_to_dlnl(cl): """Compute the delta-loglikehood values that corresponds to an upper limit of the given confidence level. Parameters ---------- cl : float Confidence level. Returns ------- dlnl : float Delta-loglikelihood value with respect to the maximum of the likelihood function. """ alpha = 1.0 - cl return 0.5 * np.power(np.sqrt(2.) * special.erfinv(1 - 2 * alpha), 2.)
def __call__(self, M, S, step=None, grad=False): C = np.zeros(2 * self.num_states) r = math.pow(2, 0.5) * sp.erfinv(self.conf) if grad: dCdM = np.zeros(C.shape + M.shape) dCdS = np.zeros(C.shape + S.shape) for i in range(self.num_states): C[2 * i] = M[i] - r * np.sqrt(S[i, i]) - self.bounds[i, 0] C[2 * i + 1] = self.bounds[i, 1] - r * np.sqrt(S[i, i]) - M[i] if grad: dCdM[2 * i, i] = 1. dCdM[2 * i + 1, i] = -1. dCdS[2 * i, i] = -r / (2 * np.sqrt(S[i, i])) dCdS[2 * i + 1, i] = -r / (2 * np.sqrt(S[i, i])) return C if not grad else (C, dCdM, dCdS)
def uniform_to_normal(self, x): """converts a uniform random variable to a normal distribution """ import copy uni_prior_up = self.uni_prior_range["up"] uni_prior_down = self.uni_prior_range["down"] length = len(x) output = copy.deepcopy(x) for i in range(length): limit = (uni_prior_up[i] - uni_prior_down[i]) tmp = (output[i] - uni_prior_down[i]) / limit output[i] = sqrt(2.0) * erfinv(2.0 * tmp - 1.0) return output
def ccdf_inv(self, points): """ Return the inverse ccdf of the points usage: Instance.ccdf_inv(points) Input ----- points: 0 <= points[i] <= 1 Output Return value: Numpy array which is the inverse of the ccdf """ pts = np.array(points) accdfinv = np.exp(self.__mu + self.__sigma * math.sqrt(2.0) * spec.erfinv(1 - 2 * pts)) return accdfinv
def test_literal_values(self): # calculated via https://keisan.casio.com/exec/system/1180573448 # for y = 0, 0.1, ... , 0.9 actual = sc.erfinv(np.linspace(0, 0.9, 10)) expected = [ 0, 0.08885599049425768701574, 0.1791434546212916764928, 0.27246271472675435562, 0.3708071585935579290583, 0.4769362762044698733814, 0.5951160814499948500193, 0.7328690779592168522188, 0.9061938024368232200712, 1.163087153676674086726, ] assert_allclose(actual, expected, rtol=0, atol=1e-15)
def _ppf(self, q, mu, sigma): r""" percent point function Parameters ---------- mu : array_like mean of the logit of `x` sigma : array_like standard deviation of the logit of `x` Notes ----- """ return expit(np.sqrt(2.0) * sigma**2.0 * erfinv(2.0 * q - 1.0) + mu)
def GaussianCdfInverse(p, mu=0, sigma=1): """Evaluates the inverse CDF of the gaussian distribution. See http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function Args: p: float mu: mean parameter sigma: standard deviation parameter Returns: float """ x = ROOT2 * erfinv(2 * p - 1) return mu + x * sigma
def solve_lambda(alpha, x, beta_hat, eps, tolerance=1e-8, verbose=False): if verbose: print('-----------') d = math.sqrt(2) * special.erfinv(2 * alpha - 1) A = np.outer(beta_hat, beta_hat) - d**2 * var_covar_matrix a = eps.dot(A).dot(eps) b = x.dot(A).dot(eps) + eps.dot(A).dot(x) c = x.dot(A).dot(x) #import pdb; pdb.set_trace() if verbose: print('value a: {0}'.format(a)) delta = b**2 - 4 * a * c if delta < 0: if verbose: print('No real solution. Delta: {0}'.format(delta)) return None elif delta == 0: if verbose: print('One solution') return -b / (2 * a) elif delta > 0: lambda1 = (-b - delta**0.5) / (2 * a) lambda2 = (-b + delta**0.5) / (2 * a) if verbose: print('Two solutions: {0}, {1}'.format(lambda2, lambda1)) for lambda_star in [ lambda1, lambda2 ]: #[lambda1, lambda2]: # TODO: verifier que resiste a l'ordre x_adv = x + lambda_star * eps eq = abs( x_adv.dot(beta_hat) + d * math.sqrt(x_adv.dot(var_covar_matrix).dot(x_adv))) # TODO: on est loin if verbose: print('Value eq: {0}'.format(eq)) eq2 = abs(x_adv.dot(A).dot(x_adv)) #print('Value eq2: {0}'.format(eq2)) #print('--') if eq < tolerance: if verbose: print('----') return lambda_star import pdb pdb.set_trace() raise ValueError('Error when solving the 2nd degres eq')
def _compute(rot_ax, ang_idx, eta_eff, Cvec, derivs=None, second_derivs=None): """ :param rot_ax: scitbx.matrix.col rotation axis :param ang_idx: list of indices corresponding to uniform samplings of the cummulative distribution function :param eta_eff: float, effective mosaicity in degrees :param Cvec: scitbx.matrix.col the vector of rotation axis along the projects :param derivs: list of d_etaEffective_d_eta derivative tensors , should be len 1 for isotropic models, else len 3 :param second_derivs: same as derivs, yet dsquared_detaEffecitve_d_eta_squared :return: Umatrices, and there first and second derivatives w.r.t. eta """ # store positive and negative rotation matrices in a list Us, Uprimes, Udblprimes = [],[],[] # rotation amount factor = np.sqrt(2) * special.erfinv(ang_idx) * np.pi / 180. rot_ang = eta_eff * factor # first derivatives if derivs is not None: d_theta_d_eta = [] dsquared_theta_d_eta_squared = [] common_term = -(0.5 *eta_eff**3) * factor for d, d2 in zip(derivs, second_derivs): G = np.dot(Cvec, np.dot(d, Cvec)) d_theta_d_eta.append(common_term*G) G2 = np.dot(Cvec, np.dot(d2, Cvec)) dsquared_theta_d_eta_squared.append(common_term*(-1.5* eta_eff**2 * G**2 + G2)) # do for both postivie and negative rotations for even distribution of Umats for rot_sign in [1, -1]: U = rot_ax.axis_and_angle_as_r3_rotation_matrix(rot_sign*rot_ang, deg=False) Us.append(U) if derivs is not None: dU_d_theta = rot_ax.axis_and_angle_as_r3_derivative_wrt_angle(rot_sign*rot_ang, deg=False) # 1st deriv d2U_d_theta2 = rot_ax.axis_and_angle_as_r3_derivative_wrt_angle(rot_sign*rot_ang, deg=False, second_order=True) # second deriv for d, d2 in zip(d_theta_d_eta, dsquared_theta_d_eta_squared): dU_d_eta = rot_sign*dU_d_theta*d d2U_d_eta2 = d2U_d_theta2*(d**2) + dU_d_theta*d2 Uprimes.append(dU_d_eta) Udblprimes.append(d2U_d_eta2) return Us, Uprimes, Udblprimes
def custom_ndpac(pha, amp, p=.05): npts = amp.shape[-1] # Normalize amplitude : # Use the sample standard deviation, as in original Matlab code from author amp = np.subtract(amp, np.mean(amp, axis=-1, keepdims=True)) amp = np.divide(amp, np.std(amp, ddof=1, axis=-1, keepdims=True)) # Compute pac : pac = np.abs(np.einsum('i...j, k...j->ik...', amp, np.exp(1j * pha))) s = pac**2 pac /= npts # Set to zero non-significant values: xlim = npts * erfinv(1 - p)**2 pac_nt = pac.copy() pac[s <= 2 * xlim] = np.nan return pac_nt.squeeze(), pac.squeeze(), s.squeeze()
def wilson(dfr, nb_test, coverage): z = sqrt(2) * erfinv(coverage) if nb_test <= 0: return 0, 1 if z * z - 1 / nb_test + 4 * nb_test * dfr * (1 - dfr) + (4 * dfr - 2) < 0: return 0, 1 w_minus = max(0, (2 * nb_test * dfr + z * z - z * sqrt(z * z - 1 / nb_test + 4 * nb_test * dfr * (1 - dfr) + (4 * dfr - 2)) + 1) / (2 * (nb_test + z * z))) w_plus = min(1, (2 * nb_test * dfr + z * z + z * sqrt(z * z - 1 / nb_test + 4 * nb_test * dfr * (1 - dfr) - (4 * dfr - 2)) + 1) / (2 * (nb_test + z * z))) if w_minus == 0: return w_minus, w_plus return w_minus, w_plus