def broadgauss(x, y, sigma): '''Gaussian function for broadening ''' bla = True plot = False c = 299792458. if bla: print " sigma = ", round(sigma, 4), " km/s" sigma = sigma * 1.0e3/c * pl.mean(x) # sigma in Å if bla: print " sigma = ", round(sigma, 3), " Å " xk = x - pl.mean(x) g = make_gauss(1, 0, sigma) yk = [g(i) for i in xk] if bla: print " Integral of the gaussian function: ", pl.trapz(yk, xk).__format__('5.3') if plot: pl.figure(2) pl.plot(xk, yk, '+-') pl.show() #if bla: print" size y:", y.size y = pl.convolve(y, yk, mode='same') #if bla: print" size y:", y.size return y/max(y)
def latent_simplex(X): """ TODO: describe this function""" N, T, J = X.shape alpha = [] for t in range(T): alpha_t = [] for j in range(J): mu_alpha_tj = pl.mean(X[:,t,j]) / pl.mean(X[:,t,:], 0).sum() alpha_t.append(mc.Normal('alpha_%d_%d'%(t,j), mu=0., tau=1., value=pl.log(mu_alpha_tj))) alpha.append(alpha_t) @mc.deterministic def pi(alpha=alpha): pi = pl.zeros((T, J)) for t in range(T): pi[t] = pl.reshape(pl.exp(alpha[t]), J) / pl.sum(pl.exp(alpha[t])) return pi @mc.observed def X_obs(pi=pi, value=X.mean(0), sigma=X.std(0), pow=2): """ TODO: experiment with different values of pow, although pow=2 seems like a fine choice based on our limited experience.""" return -((pl.absolute(pi - value) / sigma)**pow).sum() return vars()
def data_to_ch(data): ch = {} for ch_ind in range(1, 97): ch[ch_ind] = {} ch[ch_ind]["bl"] = data[ch_ind]["blanks"] ch[ch_ind]["bl_mu"] = pl.mean(ch[ch_ind]["bl"]) ch[ch_ind]["bl_sem"] = pl.std(ch[ch_ind]["bl"]) / pl.sqrt(len(ch[ch_ind]["bl"])) for ind in sorted(data[ch_ind].keys()): if ind != "blanks": k = ind[0] if k not in ch[ch_ind]: ch[ch_ind][k] = {} ch[ch_ind][k]["fr"] = [] ch[ch_ind][k]["fr_mu"] = [] ch[ch_ind][k]["fr_sem"] = [] ch[ch_ind][k]["pos_y"] = [] ch[ch_ind][k]["dprime"] = [] ch[ch_ind][k]["fr"].append(data[ch_ind][ind]["on"]) ch[ch_ind][k]["fr_mu"].append(pl.mean(data[ch_ind][ind]["on"])) ch[ch_ind][k]["fr_sem"].append(pl.std(data[ch_ind][ind]["on"]) / pl.sqrt(len(data[1][ind]["on"]))) ch[ch_ind][k]["pos_y"].append(ind[2]) # print ch[ch_ind][k]['pos_y'] # print pl.std(data[ch_ind][ind]['on']) ch[ch_ind][k]["dprime"].append( (pl.mean(data[ch_ind][ind]["on"]) - ch[ch_ind]["bl_mu"]) / ((pl.std(ch[ch_ind]["bl"]) + pl.std(data[ch_ind][ind]["on"])) / 2) ) # print ch[ch_ind]['OSImage_5']['pos_y'] return ch
def scatter_stats(db, s1, s2, f1=None, f2=None, **kwargs): if f1 == None: f1 = lambda x: x # constant function if f2 == None: f2 = f1 x = [] xerr = [] y = [] yerr = [] for k in db: x_k = [f1(x_ki) for x_ki in db[k].__getattribute__(s1).gettrace()] y_k = [f2(y_ki) for y_ki in db[k].__getattribute__(s2).gettrace()] x.append(pl.mean(x_k)) xerr.append(pl.std(x_k)) y.append(pl.mean(y_k)) yerr.append(pl.std(y_k)) pl.text(x[-1], y[-1], " %s" % k, fontsize=8, alpha=0.4, zorder=-1) default_args = {"fmt": "o", "ms": 10} default_args.update(kwargs) pl.errorbar(x, y, xerr=xerr, yerr=yerr, **default_args) pl.xlabel(s1) pl.ylabel(s2)
def flow_rate_hist(sheets): ant_rates = [] weights = [] for sheet in sheets: ants, seconds, weight = flow_rate(sheet) ant_rate = seconds / ants #ant_rate = ants / seconds ant_rates.append(ant_rate) weights.append(float(weight)) #weights.append(seconds) weights = pylab.array(weights) weights /= sum(weights) #print "ants per second" print "seconds per ant" mu = pylab.mean(ant_rates) print "mean", pylab.mean(ant_rates) wmean = pylab.average(ant_rates, weights=weights) print "weighted mean", wmean print "median", pylab.median(ant_rates) print "std", pylab.std(ant_rates, ddof=1) ant_rates = pylab.array(ant_rates) werror = (ant_rates - mu) * weights print "weighted std", ((sum(werror ** 2))) ** 0.5 print "weighted std 2", (pylab.average((ant_rates - mu)**2, weights=weights)) ** 0.5 pylab.figure() pylab.hist(ant_rates) pylab.savefig('ant_flow_rates.pdf', format='pdf') pylab.close()
def compare_models(db, stoch="itn coverage", stat_func=None, plot_type="", **kwargs): if stat_func == None: stat_func = lambda x: x X = {} for k in sorted(db.keys()): c = k.split("_")[2] X[c] = [] for k in sorted(db.keys()): c = k.split("_")[2] X[c].append([stat_func(x_ki) for x_ki in db[k].__getattribute__(stoch).gettrace()]) x = pl.array([pl.mean(xc[0]) for xc in X.values()]) xerr = pl.array([pl.std(xc[0]) for xc in X.values()]) y = pl.array([pl.mean(xc[1]) for xc in X.values()]) yerr = pl.array([pl.std(xc[1]) for xc in X.values()]) if plot_type == "scatter": default_args = {"fmt": "o", "ms": 10} default_args.update(kwargs) for c in X.keys(): pl.text(pl.mean(X[c][0]), pl.mean(X[c][1]), " %s" % c, fontsize=8, alpha=0.4, zorder=-1) pl.errorbar(x, y, xerr=xerr, yerr=yerr, **default_args) pl.xlabel("First Model") pl.ylabel("Second Model") pl.plot([0, 1], [0, 1], alpha=0.5, linestyle="--", color="k", linewidth=2) elif plot_type == "rel_diff": d1 = sorted(100 * (x - y) / x) d2 = sorted(100 * (xerr - yerr) / xerr) pl.subplot(2, 1, 1) pl.title("Percent Model 2 deviates from Model 1") pl.plot(d1, "o") pl.xlabel("Countries sorted by deviation in mean") pl.ylabel("deviation in mean (%)") pl.subplot(2, 1, 2) pl.plot(d2, "o") pl.xlabel("Countries sorted by deviation in std err") pl.ylabel("deviation in std err (%)") elif plot_type == "abs_diff": d1 = sorted(x - y) d2 = sorted(xerr - yerr) pl.subplot(2, 1, 1) pl.title("Percent Model 2 deviates from Model 1") pl.plot(d1, "o") pl.xlabel("Countries sorted by deviation in mean") pl.ylabel("deviation in mean") pl.subplot(2, 1, 2) pl.plot(d2, "o") pl.xlabel("Countries sorted by deviation in std err") pl.ylabel("deviation in std err") else: assert 0, "plot_type must be abs_diff, rel_diff, or scatter" return pl.array([x, y, xerr, yerr])
def calZsocre(self,core,surface,sampleSize): coreMean=mean(core) s=[] for i in range(sampleSize): s.append(mean(sample(surface,len(core)))) sig= sqrt(var(s)) return (coreMean-mean(s))/sig
def plot2(): import pylab as pl hs, ds = [], [] for event, time in load(): if event == main_start: start_time = time elif event == main_end: d0, h0 = days_hours(start_time) d1, h1 = days_hours(time) hs.append((h0, h1)) ds.append((d0, d1)) pl.plot([d0, d1], [h0, h1], 'b') ihs, fhs = zip(*hs) ids, fds = zip(*ds) pl.plot(ids, ihs, 'g') pl.plot([ids[0], ids[-1]], [pl.mean(ihs)] * 2, 'g--') pl.plot(fds, fhs, 'r') pl.plot([fds[0], fds[-1]], [pl.mean(fhs)] * 2, 'r--') f, i = pl.mean(fhs), pl.mean(ihs) pl.plot([fds[0], fds[-1]], [(f + i) / 2] * 2, 'b--') print i, f, f - i, (f + i) / 2 std_i, std_f = pl.std(ihs), pl.std(fhs) print std_i, std_f pl.xlim(ids[0], fds[-1]) pl.ylim(4, 28) pl.grid(True) pl.xlabel('Time [day]') pl.ylabel('Day interval [hours]') pl.show()
def build_moving5(days, avg): moving5 = array(zeros(len(days)-4), dtype = float) cday = 1 moving5[0] = pylab.mean(avg[0:4]) for a in avg[5:]: moving5[cday] = pylab.mean(avg[cday:cday+4]) cday += 1 return moving5
def perlin_covariance_corr(delta,N=1000000,bound=1): ts = bound*pl.rand(N) tds = ts+delta ps = [p(t) for t in ts] pds = [p(td) for td in tds] #cov = pl.mean([pp*pd for pp,pd in zip(ps,pds)]) cov = pl.mean([(pp-pd)**2 for pp,pd in zip(ps,pds)]) corr = pl.mean([pp*pd for pp,pd in zip(ps,pds)]) return cov, corr
def int_f(a, fs=1.): """ A fourier-based integrator. =========== Parameters: =========== a : *array* (1D) The array which should be integrated fs : *float* sampling time of the data ======== Returns: ======== y : *array* (1D) The integrated array """ if False: # version with "mirrored" code xp = hstack([a, a[::-1]]) int_fluc = int_f0(xp, float(fs))[:len(a)] baseline = mean(a) * arange(len(a)) / float(fs) return int_fluc + baseline - int_fluc[0] # old version baseline = mean(a) * arange(len(a)) / float(fs) int_fluc = int_f0(a, float(fs)) return int_fluc + baseline - int_fluc[0] # old code - remove eventually (comment on 02/2014) # periodify if False: baseline = linspace(a[0], a[-1], len(a)) a0 = a - baseline m = a0[-1] - a0[-2] b2 = linspace(0, -.5 * m, len(a)) baseline -= b2 a0 += b2 a2 = hstack([a0, -1. * a0[1:][::-1]]) # "smooth" periodic signal dbase = baseline[1] - baseline[0] t_vec = arange(len(a)) / float(fs) baseint = baseline[0] * t_vec + .5 * dbase * t_vec ** 2 # define frequencies T = len(a2) / float(fs) freqs = 1. / T * arange(len(a2)) freqs[len(freqs) // 2 + 1 :] -= float(fs) spec = fft.fft(a2) spec_i = zeros_like(spec, dtype=complex) spec_i[1:] = spec[1:] / (2j * pi* freqs[1:]) res_int = fft.ifft(spec_i).real[:len(a0)] + baseint return res_int - res_int[0]
def correctBias(AllData): # correct for difficulty and plot each subject %correct vs confidence corrmatrix, confmatrix = returnConfMatrix(AllData) Qs, subjects = py.shape(corrmatrix) copts = [1,2,3,4,5] datamat = np.array(py.zeros([len(copts), subjects])) print(datamat) fig = py.figure() ax15 = fig.add_subplot(111) i = 0 while i < subjects: c1, c2, c3, c4, c5 = [],[],[],[],[] # get confidences for each subject j = 0 while j < Qs: # get confidences and correct for each question if confmatrix[j][i] == 1: c1.append(corrmatrix[j][i]) elif confmatrix[j][i] == 2: c2.append(corrmatrix[j][i]) elif confmatrix[j][i] == 3: c3.append(corrmatrix[j][i]) elif confmatrix[j][i] == 4: c4.append(corrmatrix[j][i]) elif confmatrix[j][i] == 5: c5.append(corrmatrix[j][i]) else: print('bad num encountered') j += 1 print('i is %d' %i) minconf = ([py.mean(c1), py.mean(c2), py.mean(c3), py.mean(c4), py.mean(c5)]) pmin = 10 for p in minconf: if p < pmin and p != 0 and math.isnan(p) is not True: pmin = p print(pmin) datamat[0][i] = py.mean(c1)/pmin datamat[1][i] = py.mean(c2)/pmin datamat[2][i] = py.mean(c3)/pmin datamat[3][i] = py.mean(c4)/pmin datamat[4][i] = py.mean(c5)/pmin # print(datamat) print( py.shape(datamat)) print(len(datamat[:,i])) ax15.plot(range(1,6), datamat[:,i], alpha=0.4, linewidth=4) i += 1 ax15.set_ylabel('Modified Correct') ax15.set_xlabel('Confidence') ax15.set_title('All responses') ax15.set_xticks(np.arange(1,6)) ax15.set_xticklabels( [1, 2, 3, 4, 5] ) ax15.set_xlim(0,6)
def xyamb(xytab,qu,xyout=''): mytb=taskinit.tbtool() if not isinstance(qu,tuple): raise Exception,'qu must be a tuple: (Q,U)' if xyout=='': xyout=xytab if xyout!=xytab: os.system('cp -r '+xytab+' '+xyout) QUexp=complex(qu[0],qu[1]) print 'Expected QU = ',qu # , ' (',pl.angle(QUexp)*180/pi,')' mytb.open(xyout,nomodify=False) QU=mytb.getkeyword('QU')['QU'] P=pl.sqrt(QU[0,:]**2+QU[1,:]**2) nspw=P.shape[0] for ispw in range(nspw): st=mytb.query('SPECTRAL_WINDOW_ID=='+str(ispw)) if (st.nrows()>0): q=QU[0,ispw] u=QU[1,ispw] qufound=complex(q,u) c=st.getcol('CPARAM') fl=st.getcol('FLAG') xyph0=pl.angle(pl.mean(c[0,:,:][pl.logical_not(fl[0,:,:])]),True) print 'Spw = '+str(ispw)+': Found QU = '+str(QU[:,ispw]) # +' ('+str(pl.angle(qufound)*180/pi)+')' #if ( (abs(q)>0.0 and abs(qu[0])>0.0 and (q/qu[0])<0.0) or # (abs(u)>0.0 and abs(qu[1])>0.0 and (u/qu[1])<0.0) ): if ( pl.absolute(pl.angle(qufound/QUexp)*180/pi)>90.0 ): c[0,:,:]*=-1.0 xyph1=pl.angle(pl.mean(c[0,:,:][pl.logical_not(fl[0,:,:])]),True) st.putcol('CPARAM',c) QU[:,ispw]*=-1 print ' ...CONVERTING X-Y phase from '+str(xyph0)+' to '+str(xyph1)+' deg' else: print ' ...KEEPING X-Y phase '+str(xyph0)+' deg' st.close() QUr={} QUr['QU']=QU mytb.putkeyword('QU',QUr) mytb.close() QUm=pl.mean(QU[:,P>0],1) QUe=pl.std(QU[:,P>0],1) Pm=pl.sqrt(QUm[0]**2+QUm[1]**2) Xm=0.5*atan2(QUm[1],QUm[0])*180/pi print 'Ambiguity resolved (spw mean): Q=',QUm[0],'U=',QUm[1],'(rms=',QUe[0],QUe[1],')','P=',Pm,'X=',Xm stokes=[1.0,QUm[0],QUm[1],0.0] print 'Returning the following Stokes vector: '+str(stokes) return stokes
def nrms(data_fit, data_true): """ Normalized root mean square error. """ # root mean square error rms = pl.mean(pl.norm(data_fit - data_true, axis=0)) # normalization factor is the max - min magnitude, or 2 times max dist from mean norm_factor = 2*pl.norm(data_true - pl.mean(data_true, axis=1), axis=0).max() return (norm_factor - rms)/norm_factor
def ttest(X,Y): """ Takes two lists of values, returns t value >>> ttest([2, 3, 7, 6, 10], [11,2,3,1,2]) 0.77459666924148329 """ if len(X) <= 1 or len(Y) <= 1: return 0.0 return ((pylab.mean(X) - pylab.mean(Y)) / stderr(X,Y))
def DFA(data, npoints=None, degree=1, use_median=False): """ computes the detrended fluctuation analysis returns the fluctuation F and the corresponding window length L :args: data (n-by-1 array): the data from which to compute the DFA npoints (int): the number of points to evaluate; if omitted the log(n) will be used degree (int): degree of the polynomial to use for detrending use_median (bool): use median instead of mean fluctuation :returns: F, L: the fluctuation F as function of the window length L """ # max window length: n/4 #0th: compute integral integral = cumsum(data - mean(data)) #1st: compute different window lengths n_samples = npoints if npoints is not None else int(log(len(data))) lengths = sort(array(list(set( logspace(2,log(len(data)/4.),n_samples,base=exp(1)).astype(int) )))) #print lengths all_flucs = [] used_lengths = [] for wlen in lengths: # compute the fluctuation of residuals from a linear fit # according to Kantz&Schreiber, ddof must be the degree of polynomial, # i.e. 1 (or 2, if mean also counts? -> see in book) curr_fluc = [] # rrt = 0 for startIdx in arange(0,len(integral),wlen): pt = integral[startIdx:startIdx+wlen] if len(pt) > 3*(degree+1): resids = pt - polyval(polyfit(arange(len(pt)),pt,degree), arange(len(pt))) # if abs(wlen - lengths[0]) < -1: # print resids[:20] # elif rrt == 0: # print "wlen", wlen, "l0", lengths[0] # rrt += 1 curr_fluc.append(std(resids, ddof=degree+1)) if len(curr_fluc) > 0: if use_median: all_flucs.append(median(curr_fluc)) else: all_flucs.append(mean(curr_fluc)) used_lengths.append(wlen) return array(all_flucs), array(used_lengths)
def zoom(beg, end, x1_plot, y1_plot, z_plot, x2_plot, y2_plot, t_plot, KOP_plot, radical): #resize sample according zoom interval x1_plot = x1_plot[:,beg/0.05:end/0.05] x2_plot = x2_plot[:,beg/0.05:end/0.05] y1_plot = y1_plot[:,beg/0.05:end/0.05] y2_plot = y2_plot[:,beg/0.05:end/0.05] z_plot = z_plot[:,beg/0.05:end/0.05] t_plot = t_plot[beg/0.05:end/0.05] KOP_plot = KOP_plot[0, beg/0.05:end/0.05] #0 because k only needed, no psi nbn1=x1_plot.shape[0] nbn2=x2_plot.shape[0] x1bar_plot = pb.zeros(x1_plot.shape[1]) x2bar_plot = pb.zeros(x2_plot.shape[1]) zbar_plot = pb.zeros(z_plot.shape[1]) for i in range(x1bar_plot.size): x1bar_plot[i]=pb.mean(x1_plot[:,i]) x2bar_plot[i]=pb.mean(x2_plot[:,i]) zbar_plot[i]=pb.mean(z_plot[:,i]) #plotting fig = pb.figure(figsize=(20,10)) pb.hold(True) ax1=pb.subplot(5,1,1); ax1.hold(True); ax1.set_title("x1 (thick black=x1bar)") ax2=pb.subplot(5,1,2); ax2.hold(True); ax2.set_title("x2 (thick black=x2bar)") ax3=pb.subplot(5,1,3); ax3.hold(True); ax3.set_title("x1bar - x2bar") ax4=pb.subplot(5,1,4); ax4.hold(True); ax4.set_title("Z (thick black=zbar)") ax5=pb.subplot(5,1,5); ax5.hold(True); ax5.set_title("Amplitude of the Kuramoto Order parameter") for i in range(nbn1): #time series pop1 ax1.plot(t_plot, x1_plot[i,:]) #i -> all neurons, 0 -> only neuron 0 ... #time series z ax4.plot(t_plot, z_plot[i,:], label=None) for j in range(nbn2): #time series pop2 ax2.plot(t_plot, x2_plot[j,:]) #draw time series ax1.plot(t_plot, x1bar_plot, 'black', linewidth=1.5) ax2.plot(t_plot, x2bar_plot, 'black', linewidth=1.5) ax3.plot(t_plot, x2bar_plot - x1bar_plot, label='x2bar - x1bar') ax3.legend(prop={'size':10}) ax4.plot(t_plot, zbar_plot, 'black', linewidth=2., label="zbar") ax4.legend(prop={'size':10}) ax5.plot(t_plot, KOP_plot[:]) #ax5.legend(prop={'size':10}) fig.savefig("epilepton"+radical+"_zoom.png", dpi=200)
def lsqReg(X,Y): """ Returns the least square fit of Y = a*X + b. """ m_x = pylab.mean(X) m_y = pylab.mean(Y) m_x2 = pylab.mean(X*X) m_xy = pylab.mean(X*Y) a = (m_xy - m_x*m_y)/(m_x2 - m_x*m_x) b = m_y - a*m_x return a,b
def sample(self, model, evidence): z = evidence['z'] T = evidence['T'] g = evidence['g'] h = evidence['h'] transition_var_g = evidence['transition_var_g'] shot_id = evidence['shot_id'] observation_var_g = model.known_params['observation_var_g'] observation_var_h = model.known_params['observation_var_h'] prior_mu_g = model.hyper_params['g']['mu'] prior_cov_g = model.hyper_params['g']['cov'] N = len(z) n = len(g) # Make g, h, and z vector valued to avoid ambiguity g = g.copy().reshape((n, 1)) h = h.copy().reshape((n, 1)) z_g = ma.asarray(nan + zeros((n, 1))) obs_cov = ma.asarray(inf + zeros((n, 1, 1))) for i in xrange(n): z_i = z[shot_id == i] T_i = T[shot_id == i] if 1 in T_i and 2 in T_i: # Sample mean and variance for multiple observations n_obs_g, n_obs_h = sum(T_i == 1), sum(T_i == 2) obs_cov_g, obs_cov_h = observation_var_g/n_obs_g, observation_var_h/n_obs_h z_g[i] = (mean(z_i[T_i == 1])/obs_cov_g + mean(z_i[T_i == 2] - h[i])/obs_cov_h)/(1/obs_cov_g + 1/obs_cov_h) obs_cov[i] = 1/(1/obs_cov_g + 1/obs_cov_h) elif 1 in T_i: n_obs_g = sum(T_i == 1) z_g[i] = mean(z_i[T_i == 1]) obs_cov[i] = observation_var_g/n_obs_g elif 2 in T_i: n_obs_h = sum(T_i == 2) z_g[i] = mean(z_i[T_i == 2] - h[i]) obs_cov[i] = observation_var_h/n_obs_h z_g[isnan(z_g)] = ma.masked obs_cov[isinf(obs_cov)] = ma.masked kalman = self._kalman kalman.initial_state_mean = array([prior_mu_g[0],]) kalman.initial_state_covariance = array([prior_cov_g[0],]) kalman.transition_matrices = eye(1) kalman.transition_covariance = array([transition_var_g,]) kalman.observation_matrices = eye(1) kalman.observation_covariance = obs_cov sampled_g = forward_filter_backward_sample(kalman, z_g, prior_mu_g, prior_cov_g) return sampled_g.reshape((n,))
def est_dtlnorm(x, thres, opt_method): def cond_dtlnorm(par): return m_nl_dtlnorm(x=x, mu=par[0], sigma=par[1], thres=thres) if opt_method in ['L-BFGS-B', 'SLSQP', 'TNC']: est_par = minimize(x0=[mean(log(x)), std(log(x))], fun=cond_dtlnorm, method=opt_method, bounds=[(log(thres[0]), log(thres[1])), (1e-16, Inf)]).x else: est_par = minimize(x0=[mean(log(x)), std(log(x))], fun=cond_dtlnorm, method=opt_method).x return est_par
def data_info(sheets): times = [] lengths = [] for sheet in sheets: df = get_df(sheet) time = dataset_time(df) times.append(time) length = len(df.index) lengths.append(length) print "times" print "min", min(times), "max", max(times), "mean", mean(times) print "lengths" print "min", min(lengths), "max", max(lengths), "mean", mean(times), "total", sum(lengths)
def anneal_bdst(n=11, depth=10, phases=10, iters=1000): """ MCMC/simulated annealing to generate a random bounded-depth spanning tree Parameters ---------- n : int, size of grid depth : int, optional, target bound on depth Returns ------- T : nx.Graph, spanning tree with T.base_graph, possibly with degree bound satisfied """ beta = mc.Uninformative('beta', value=1.) G = nx.grid_graph([n, n]) root = ((n-1)/2, (n-1)/2) bdst = BDST(G, root, depth, beta) @mc.deterministic def max_depth(T=bdst, root=root): shortest_path_length = nx.shortest_path_length(T, root) T.max_depth = max(shortest_path_length.values()) return T.max_depth mod_mc = mc.MCMC([beta, bdst, max_depth]) mod_mc.use_step_method(STMetropolis, bdst) mod_mc.use_step_method(mc.NoStepper, beta) for i in range(phases): beta.value = i*5 mod_mc.sample(iters, thin=max(1, iters/100)) print 'cur depth', max_depth.value print 'pct of trace with max_depth <= depth', pl.mean(mod_mc.trace(max_depth) <= depth) return bdst.value
def anneal_ldst(n=11, phases=10, iters=1000): """ MCMC/simulated annealing to generate a random low-degree spanning tree on a grid graph Parameters ---------- n : int, size of grid phases : int, optional, number of cooling phases iters : int, optional, number of MCMC steps per phase Returns ------- T : nx.Graph, spanning tree with T.base_graph, with few degree 3 vertices """ beta = mc.Uninformative('beta', value=1.) ldst = LDST(my_grid_graph([n,n]), beta=beta) mod_mc = mc.MCMC([beta, ldst]) mod_mc.use_step_method(STMetropolis, ldst) mod_mc.use_step_method(mc.NoStepper, beta) for i in range(phases): print 'phase %d' % (i+1), beta.value = i*5 mod_mc.sample(iters, burn=iters-1) print 'frac of deg 2 vtx = %.2f' % pl.mean(pl.array(ldst.value.degree().values()) == 2) return ldst.value
def getMFigure(self,nodeBc,measureName,measureShorthand): #sort bc by mean names=nodeBc.keys() meanNodeBc={} for name in names: meanNodeBc[name]=pylab.mean(nodeBc[name]) names.sort(key=lambda x:meanNodeBc[x],reverse=True) data=[] for name in names: data.append(nodeBc[name]) #top 5 distributions nTop=5 fig=pylab.Figure(figsize=(5,8), dpi=80) fig.subplots_adjust(bottom=0.08,right=0.95,top=0.95) for nodeIndex in range(nTop): axes=fig.add_subplot(nTop,1,nodeIndex+1) axes.hist(data[nodeIndex],100) axes.set_ylabel(names[nodeIndex],fontsize=8) for tick in axes.get_yticklabels(): tick.set_fontsize(10) tick.set_fontname("Times") if nodeIndex==0: axes.set_title("Distribution of "+measureShorthand+"s for top "+str(nTop)+" locations") axes.set_xlabel(measureName) return fig
def plot(self,p_A=0.3,N_As=None): if N_As is None: pl.linspace(0,self.N_u,self.N_u+1) posteriors = sample_posteriors_noloop(N_As,p_A,1000) pl.figure() x = N_As/N_u pl.plot(x,pl.mean(posteriors,0),label='A') pl.plot(x,1-pl.mean(posteriors,0),label='B') #plot([0,1],[1-p_A,1-p_A],'k--',label='1 - p(A) and p(A)') #plot([0,1],[p_A,p_A],'k--') pl.plot([0.5,0.5],[0,1],'k-.',label='full ambiguity') pl.legend(loc='best') pl.title('p(on|correct)=%.1f p(on|incorrect)=%.1f' %(self.p_uA_given_A,self.p_uA_given_B)) pl.xlabel('Fraction of A in ambiguous stimulus') pl.ylabel('Posterior probability')
def scatter_times(name, sheets): means = [] medians = [] delays = [] mean_points = [] med_points = [] for sheet, delay in sheets: delays.append(delay) times = get_times(sheet) mean = pylab.mean(times) median = pylab.median(times) means.append(mean) medians.append(median) mean_points.append((mean, sheet)) med_points.append((median, sheet)) print "----------mean points-----------" for mean, sheet in sorted(mean_points): print mean, sheet print "----------median points-----------" for median, sheet in sorted(med_points): print median, sheet pylab.scatter(delays, means, color='r') pylab.scatter(delays, medians, color='b') print "show" pylab.show()
def errEval( self, lpamat, Smat, Tmat): ''' This function ... Aguments -------- Keyword arguments ----------------- ''' lpamat_est = pl.dot(Smat, Tmat) lpamat_diff = lpamat - lpamat_est err = pl.mean(lpamat_diff**2)/pl.mean(lpamat**2) return err
def plotUpdateLines(n_samples_per_update,ax,y_limits=[]): """ Plot vertical lines when an parameter update occured during the optimization. \param[in] n_samples_per_update Vector specifying how many samples were used between updates. \param[in] ax Axis object to plot the lines in. \param[in] y_limits Limits of the y-axis. Default is [], in which case ax.get_ylim() is used. """ if (len(y_limits)==0): y_limits = ax.get_ylim() # Find good number of horizontal update lines to plot updates = np.arange(0, len(n_samples_per_update)) while len(n_samples_per_update)>20: n_samples_per_update = n_samples_per_update[0:-1:5] updates = updates[0:-1:5] ax.plot([n_samples_per_update, n_samples_per_update],y_limits,'-',color='#bbbbbb',linewidth=0.5,zorder=0) for ii in range(len(n_samples_per_update)-1): y = y_limits[0] + 0.9*(y_limits[1]-y_limits[0]) ax.text(n_samples_per_update[ii+1], y,str(updates[ii+1]), horizontalalignment='center',verticalalignment='top',rotation='vertical') y = y_limits[0] + 0.95*(y_limits[1]-y_limits[0]) ax.text(mean(ax.get_xlim()), y,'number of updates', horizontalalignment='center', verticalalignment='top') ax.set_ylim(y_limits)
def store_mcmc_fit(dm, key, model_vars=None, rate_trace=None): """ Store the parameter estimates generated by an MCMC fit of the negative-binomial model in the disease_model object, keyed by key Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str model_vars : dict of PyMC stochastic or deterministic variable Results ------- Save a regional estimate of the model prediction, with uncertainty """ if rate_trace == None: rate_trace = calc_rate_trace(dm, key, model_vars) rate_trace = pl.sort(rate_trace, axis=0) rate = {} for x in [2.5, 50, 97.5]: rate[x] = rate_trace[x/100.*len(rate_trace), :] param_mesh = dm.get_param_age_mesh() age_mesh = dm.get_estimate_age_mesh() dm.set_mcmc('lower_ui', key, rate[2.5]) dm.set_mcmc('median', key, rate[50]) dm.set_mcmc('upper_ui', key, rate[97.5]) dm.set_mcmc('mean', key, pl.mean(rate_trace,axis=0)) if dm.vars[key].has_key('dispersion'): dm.set_mcmc('dispersion', key, dm.vars[key]['dispersion'].stats()['quantiles'].values())
def testPlot1(trials=20): f = FunctionWrapper(trials, StochQuad(noiseLevel=0.2)) ls = lossTraces(fwrap=f, aclass=SGD, dim=trials, maxsteps=100, algoparams={'learning_rate':0.2}) pylab.plot(ls, 'b:') pylab.plot(pylab.mean(ls, axis=1), 'r-') pylab.semilogy() pylab.show()
def sensitivityLarge(runtype, input_list, reps): taxMeans = [] taxSEs = [] p['verboseDebugging'] = False p['singleRunGraphs'] = False p['interactiveGraphics'] = False outFile = open(runtype + ' GEMSA outputs large.txt', 'a') for run in xrange(len(input_list[0])): print("Running simulation number {}...".format(run)) print("Number of reps: {}".format(reps)) sim_list = np.array(input_list) print(sim_list) p['agingParentsMoveInWithKids'] = sim_list[0, run] print(p['agingParentsMoveInWithKids']) p['personCareProb'] = sim_list[1, run] p['retiredHours'] = sim_list[2, run] p['ageOfRetirement'] = sim_list[3, run] p['baseDieProb'] = sim_list[4, run] p['babyDieProb'] = sim_list[5, run] p['personCareProb'] = sim_list[6, run] p['maleAgeCareScaling'] = sim_list[7, run] p['femaleAgeCareScaling'] = sim_list[8, run] p['childHours'] = sim_list[9, run] p['homeAdultHours'] = sim_list[10, run] p['workingAdultHours'] = sim_list[11, run] p['lowCareHandicap'] = sim_list[12, run] p['growingPopBirthProb'] = sim_list[13, run] p['basicDivorceRate'] = sim_list[14, run] p['variableDivorce'] = sim_list[15, run] p['basicMaleMarriageProb'] = sim_list[16, run] p['basicFemaleMarriageProb'] = sim_list[17, run] p['probApartWillMoveTogether'] = sim_list[18, run] p['coupleMovesToExistingHousehold'] = sim_list[19, run] p['basicProbAdultMoveOut'] = sim_list[20, run] p['variableMoveBack'] = sim_list[21, run] taxList = [] taxSum = 0.0 for i in range(0, reps): print i, s = Sim(p) tax, seed = s.run() taxList.append(tax) taxSum += tax print tax taxMeans.append(pylab.mean(taxList)) outFile.write(str(taxSum / reps) + "\n" + str(seed) + "\n") taxSEs.append(pylab.std(taxList) / math.sqrt(reps)) outFile.close()
def analyzeRoutes(placeStats, RatingBounds, *RouteGroups): print '\tGroup\t\t', ('\t' * 4).join( [RouteGroup[0] for RouteGroup in RouteGroups]) print 'Fun Name\t', 'Entropy\t Mean\t StdDev| ' * len(RouteGroups) RouteGroupList = [RouteGroup[1].values() for RouteGroup in RouteGroups] for fn in (AtTarget, AtStart, Oth_Plc, AtModePl, AtOtMdPl, NavConf, DirRtng, NumPlace, NumPose, ManDist, Effic, EffAcc): print fn.__name__, '\t', for Routes in RouteGroupList: if RatingBounds: Routes = [ route for route in Routes if route.mean_rating > RatingBounds[0] and route.mean_rating <= RatingBounds[1] ] seq = fn(Routes) if len(seq) > 1: avg = pylab.mean(seq) stddev = pylab.std(seq) else: if len(seq): avg = seq[0] else: avg = 0.0 stddev = 0.0 print '%6.2f\t%6.2f\t%6.2f | ' % (frequency_entropy(seq), avg, stddev), print if not placeStats: return print 'Common Places:\t', for Routes in RouteGroupList: max_places = max([len(route.places) for route in Routes] + [1]) if not Routes: continue commonPlaces = Routes[0].places for route in Routes[1:]: commonPlaces = commonPlaces.intersection(route.places) print '%d of %d\t%6.2f | ' % (len(commonPlaces), max_places, len(commonPlaces) / float(max_places)), print print 'Termination Place Entropy:\t', for Routes in RouteGroupList: print ' %6.2f |\t' % (frequency_entropy( [route.term_place for route in Routes])), print if RouteGroupList and RouteGroupList[0] and RouteGroupList[0][0]: print 'Most common termination place (Target %s):\t' % str( RouteGroupList[0].target_place), else: print 'No routes found', for Routes in RouteGroupList: print ' %s |\t' % (str( mode([route.mode_term_place for route in Routes], 3))), print
def regional_average(derived_covariate, key, region, year, sex): """ handle region = iso3 code or region = clean(gbd_region)""" # TODO: make regional average weighted by population if key not in derived_covariate: debug('WARNING: derived covariate %s not found' % key) return 0. if region == 'world': return 0. cov_vals = [derived_covariate[key]['%s+%s+%s'%(iso3,year,sex)] for iso3 in countries_for[region] if derived_covariate[key].has_key('%s+%s+%s'%(iso3,year,sex))] return pl.mean(cov_vals)
def moving_average(y, window_length): """ Compute the moving average of y with specified window length. Args: y: an 1-d pylab array with length N, representing the y-coordinates of the N sample points window_length: an integer indicating the window length for computing moving average Returns: an 1-d pylab array with the same length as y storing moving average of y-coordinates of the N sample points """ res = [] for i in range(len(y)): if i < window_length: mean = pylab.mean(y[0:i + 1]) else: mean = pylab.mean(y[i - window_length + 1:i + 1]) res.append(mean) return pylab.array(res)
def gain_hist(self): mybins = pl.arange(-.4, 1.3, .15) pl.hist(self.gain, bins=mybins, color='0.5') pl.xlim(-.4, 1.1) pl.ylim(0, 8) ax = pl.gca() pl.text(0.9, 0.8, self.classid, horizontalalignment='center', transform=ax.transAxes, fontsize=14) pl.axvline(x=pl.mean(self.gain), color='k', ls='--')
def get_rate(session, ch, cl, condition, rwin=(0.05, 0.15)): spikes = session.get_spike_times(ch, cl) trials, _, _, _ = session.get_trials(condition) if len(trials) == 0: raise ValueError("no trials for condition: %s" % str(condition)) s = [ len(ts) for ts in physio.spikes.stats.event_lock( trials, spikes, rwin[0], rwin[1]) ] ms = pl.mean(s) / (float(rwin[1] - rwin[0])) ss = pl.std(s) / (float(rwin[1] - rwin[0]) * pl.sqrt(len(s))) nt = len(s) return ms, ss, nt
def post_hist(self): mybins = pl.arange(0, 34, 4) pl.hist(self.posttest, bins=mybins, color='0.5') pl.xlim(0, 32) pl.ylim(0, 7) ax = pl.gca() pl.text(0.9, 0.8, self.classid, horizontalalignment='center', transform=ax.transAxes, fontsize=14) pl.axvline(x=pl.mean(self.posttest), color='k', ls='--')
def fit_string(fit): popt, pcov = fit if type(pcov) is float and pcov == pylab.inf: return "no fit" center = popt[0:2] scale, fwhm = popt[2:4] center_error = pylab.mean([pylab.sqrt(pcov[0][0]), pylab.sqrt(pcov[1][1])]) fwhm_error = pylab.sqrt(pcov[3][3]) s = "center: %.2f %.2f degrees" % tuple(map(angles.to_degrees, center)) s += ", fwhm: %.2f arcmin" % angles.to_arcmin(fwhm) s += ", center error: %.2f degrees" % angles.to_degrees(center_error) s += ", fwhm error: %.2f arcmin" % angles.to_arcmin(fwhm_error) return s
def boxdotplot ( ax, data, color, ecolor, jitter=0.01, yshift=0 ): yc = jitter*pl.randn ( len(data) ) + yshift ax.scatter ( yc, data, s=5, c=color, edgecolor=ecolor ) prc = pl.prctile ( data ) ax.plot ( [yshift+1]*2, [prc[0],prc[-1]], 'k-', zorder=0 ) jitter *= 5 ax.fill ( yshift+pl.array([1-jitter,1+jitter,1+jitter,1-jitter]), [prc[1],prc[1],prc[3],prc[3]], facecolor='w', edgecolor='k', zorder=1 ) ax.plot ( yshift+pl.array([1-jitter,1+jitter]), [prc[2]]*2, 'k-', zorder=2 ) ax.plot ( [yshift+1], [pl.mean(data)], 'o', color='w', markeredgecolor='k' ) ax = graphics.prepare_axes ( ax, haveon=('left',) )
def estimate_skew_angle(self, image, angles): param = self.param estimates = [] for a in angles: v = mean(interpolation.rotate(image, a, order=0, mode='constant'), axis=1) v = var(v) estimates.append((v, a)) if param['debug'] > 0: plot([y for x, y in estimates], [x for x, y in estimates]) ginput(1, param['debug']) _, a = max(estimates) return a
def mode(self): mo = mean(array(getRanges(self.Vars)), axis=0) fo = self(*[mo[i] for i in range(len(mo))]) r = getRanges(self.Vars) for i in range(100): mi = zeros_like(mo) for j in range(len(r)): mi[j] = float(UniformDistr(r[0][j], r[1][j]).rand(1)) fi = self(*mi) if fo < fi: mo =mi fo = fi return maxprob(self, mo*1.01, array(getRanges(self.Vars)).T)
def uncertainty_comparison(): """ Plots the uncertainty in the dG of each reaction in KEGG, against the RMSE of the predictions (compared to the NIST measurements). The x-value is a function of the number of substrates and number of products of each reaction (excluding H2O and H+). The y-value of each dot is calculated by the RMSE of the observation vs. estimation across all measurements of the same reaction. """ limits = [(-5, -3), (-5, -2), (-6, -2)] # in log10 scale pylab.rcParams['text.usetex'] = True pylab.rcParams['legend.fontsize'] = 8 pylab.rcParams['font.family'] = 'sans-serif' pylab.rcParams['font.size'] = 8 pylab.rcParams['lines.linewidth'] = 0.4 pylab.rcParams['lines.markersize'] = 3 pylab.figure(figsize=(12,3.5)) for i in range(len(limits)): (min_C, max_C) = limits[i] rid_to_nist_rowids = map_rid_to_nist_rowids() data_mat = [] for (rid, rowids) in rid_to_nist_rowids.iteritems(): reaction = gc.kegg.rid2reaction(rid) try: error_mat = [] for rowid in rowids: row = nist.data[rowid] #evaluation = row[3] # A, B, C, D dG0_est = [reaction.PredictReactionEnergy(predictor, pH=row.pH, I=row.I, T=row.T) for predictor in [A, H]] error_mat.append([(row.dG0_r - x) for x in dG0_est]) error_mat = pylab.array(error_mat) rmse = pylab.sqrt(pylab.mean(error_mat**2, 0)) (ddG_min, ddG_max) = calculate_uncertainty(reaction, min_C=10**min_C, max_C=10**max_C, T=300) data_mat.append([ddG_max - ddG_min, rmse[0], rmse[1], rmse[2]]) except MissingCompoundFormationEnergy: continue data_mat = pylab.matrix(data_mat) pylab.subplot(1,len(limits),i+1) pylab.hold(True) pylab.plot(data_mat[:,0], data_mat[:,1:], '.') pylab.plot([0, 200], [0, 200], '--k') pylab.axis('scaled') pylab.xlabel(r"uncertainty in $\Delta_r G$ due to concentrations [kJ/mol]") if (i == 0): pylab.ylabel(r"RMSE of $\Delta_r G^\circ$ estimation [kJ/mol]") pylab.title(r"$10^{%g}M$ $<$ [c] $<$ $10^{%g}M$" % (min_C, max_C)) pylab.legend(['Alberty', 'Hatzimanikatis', 'Rugged'], loc="upper left") pylab.savefig('../res/compare_uncertainty.pdf', format='pdf')
def runtimes_stats(): df = pd.read_csv('test_runtimes.csv', skipinitialspace=True) print "total trials" print len(df['algorithm']) / len(df['algorithm'].unique()) ratios = [] labels = [] weights = [] hist_algorithms = ['prim', 'khuller'] algorithm_labels = {'prim': 'Karger', 'khuller': 'Khuller'} sns.set() pylab.figure() for algorithm, group in df.groupby('algorithm'): print algorithm comparisons = group['comparisons'].sum() dominated = group['dominated'].sum() print float(dominated) / float( comparisons), "(", dominated, "/", comparisons, ")" print binom_test(dominated, comparisons) group = group.groupby('points', as_index=False).agg(pylab.mean) pylab.plot(group['points'], group['runtime'], label=algorithm) ratio = group['cost ratio'] ratio = ratio[~pylab.isnan(ratio)] ratio = ratio - 1 print "cost comparisons", len(ratio) print "cost ratio", pylab.mean(ratio), "+/-", pylab.std(ratio, ddof=1) if algorithm in hist_algorithms: ratios.append(ratio) labels.append(algorithm_labels[algorithm]) weight = pylab.ones_like(ratio) / float(len(ratio)) weights.append(weight) pylab.legend(loc=2) pylab.xlabel('number of points') pylab.ylabel('rumtime (minutes)') pylab.savefig('test_runtimes/runtimes.pdf', format='pdf') pylab.close() pylab.figure() pylab.hist(ratios, label=labels, weights=weights) pylab.xlabel('percent better/worse than Steiner', size=20) pylab.ylabel('proportion', size=20) pylab.legend() ax = pylab.gca() pylab.setp(ax.get_legend().get_texts(), fontsize=20) # for legend text pylab.tight_layout() pylab.savefig('test_runtimes/cost_ratios_hist.pdf', format='pdf') pylab.close()
def check_page(image): if len(image.shape) == 3: return "input image is color image %s" % (image.shape, ) if mean(image) < median(image): return "image may be inverted" h, w = image.shape if h < 600: return "image not tall enough for a page image %s" % (image.shape, ) if h > 10000: return "image too tall for a page image %s" % (image.shape, ) if w < 600: return "image too narrow for a page image %s" % (image.shape, ) if w > 10000: return "line too wide for a page image %s" % (image.shape, ) slots = int(w * h * 1.0 / (30 * 30)) _, ncomps = measurements.label(image > mean(image)) if ncomps < 10: return "too few connected components for a page image (got %d)" % ( ncomps, ) if ncomps > slots: return "too many connnected components for a page image (%d > %d)" % ( ncomps, slots) return None
def PLOT_DATA(arr, Xplot, Yplot): x = arr[:, OSZ_Labels[Xplot][0]] y = arr[:, OSZ_Labels[Yplot][0]] lb = OSZ_Labels[Yplot][1] print("\n\tMean {} = {} {}\n".format(OSZ_Labels[Yplot][1], p.mean(y), OSZ_Labels[Yplot][2])) p.rcParams["font.family"] = "serif" p.figure(figsize=(10, 6)) p.rcParams.update({"font.size": 14}) p.xlabel(OSZ_Labels[Xplot][1] + OSZ_Labels[Xplot][2]) p.ylabel(OSZ_Labels[Yplot][1] + OSZ_Labels[Yplot][2]) p.title(OSZ_Labels[Yplot][3]) p.plot(x, y, "r") p.show()
def truncation_stats(): df = get_df() df = df.drop_duplicates(subset='name') df['swc'] = df['name'].str.slice(stop=-1) #df = df[['swc', 'neuron_type', 'points']] original_points = [] new_points = [] for name, group in df.groupby('swc'): points = group['points'] neuron_types = group['neuron_type'] index1 = neuron_types == 'axon' index2 = neuron_types == 'truncated axon' if 'axon' in neuron_types.values and 'truncated axon' in neuron_types.values: points1 = list(points[index1])[0] points2 = list(points[index2])[0] original_points.append(float(points1)) new_points.append(float(points2)) original_points = pylab.array(original_points) new_points = pylab.array(new_points) diff = original_points - new_points print pylab.mean(diff) print pylab.mean(diff / original_points)
def fit_quality(time, parameters, noise, repetitions): """ Apply the fitting routine a number of times, as given by `repetitions`, and return informations about the fit performance. """ results = [] errors = [] from numpy.random import seed alpha_psp = AlphaPSP() for _ in range(repetitions): seed() value = noisy_psp(time=time, noise=noise, **parameters) fit_result = fit(alpha_psp, time, value, noise, fail_on_negative_cov=[True, True, True, False, False]) if fit_result is not None: result, error, chi2, success = fit_result if chi2 < 1.5 and success: print(chi2, result) results.append(result) errors.append(error) else: print("fit failed:", end=' ') print(fit_result) keys = alpha_psp.parameter_names() result_dict = dict(((key, []) for key in keys)) error_dict = dict(((key, []) for key in keys)) for result in results: for r, key in zip(result, keys): result_dict[key].append(r) for error in errors: for r, key in zip(p.diag(error), keys): error_dict[key].append(p.sqrt(r)) if p.isnan(p.sqrt(r)): print("+++++++", r) return ([p.mean(result_dict[key]) for key in keys], [p.std(result_dict[key]) for key in keys], len(results), keys, [result_dict[key] for key in keys], [error_dict[key] for key in keys])
def run_simulation(num_robots, speed, capacity, width, height, dirt_amount, min_coverage, num_trials, robot_type): """ Runs num_trials trials of the simulation and returns the mean number of time-steps needed to clean the fraction min_coverage of the room. The simulation is run with num_robots robots of type robot_type, each with the input speed and capacity in a room of dimensions width x height with the dirt dirt_amount on each tile. num_robots: an int (num_robots > 0) speed: a float (speed > 0) capacity: an int (capacity >0) width: an int (width > 0) height: an int (height > 0) dirt_amount: an int min_coverage: a float (0 <= min_coverage <= 1.0) num_trials: an int (num_trials > 0) robot_type: class of robot to be instantiated (e.g. StandardRobot or FaultyRobot) """ # Initialize list to track means from each trial (which we will then return the mean of) trial_means = [] # Loop through each trial for t in range(num_trials): # Initialize time step tracker t_steps = 0 # Create new room for each trial room = EmptyRoom(width, height, dirt_amount) # Create new fleet of robots for each room robots = [] for i in range(num_robots): robots.append(robot_type(room, speed, capacity)) # While the fraction of clean tiles is less than the provided min coverage: while min_coverage > (room.get_num_cleaned_tiles()/room.get_num_tiles()): # New time step needed to complete simulation t_steps += 1 for robot in robots: robot.update_position_and_clean() trial_means.append(t_steps) return pylab.mean(trial_means)
def efficient_frontier(self, xi=0.01, xf=4, npts=100, scale=10): frontier = linspace(xi, xf, npts) i = 0 rets = zeros(len(frontier)) sharpe = zeros(len(frontier)) for f in frontier: w = self.efficient_frontier_w(f) tmp = self.ret_for_w(w) rets[i] = tmp.sum() * scale sharpe[i] = mean(tmp) / std(tmp) * sqrt(len(tmp)) i += 1 risk = rets / sharpe return pd.Series(rets, index=risk), sharpe.max()
def res_dist(x, y, e, n_runs=100, random_state=None): x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.4, random_state=random_state) test_res = [] train_res = [] start_time = time() for i in range(n_runs): e.fit(x_train, y_train) train_res.append(e.score(x_train, y_train)) test_res.append(e.score(x_test, y_test)) if i % (n_runs / 10) == 0: print("%d" % i, end=' ') print("\nTime: %.3f secs" % (time() - start_time)) print("Test Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (min(test_res), mean(test_res), max(test_res), std(test_res))) print("Train Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (min(train_res), mean(train_res), max(train_res), std(train_res))) print() return train_res, test_res
def plot_edge_lengths(): import pandas as pd df = pd.read_csv('imaris_lengths.csv', names = ['neuron', 'length']) pylab.hist(df['length']) pylab.savefig('imaris/imaris_lengths.pdf', format='pdf') pylab.close() mu = pylab.mean(df['length']) sigma2 = pylab.var(df['length'], ddof=1) l = 1.0 / mu l2 = l ** 2 print "mean", mu print "lambda", l print 'variance', sigma2 print 'var-hat', 1.0 / l2
def measure(self, line): h, w = line.shape smoothed = filters.gaussian_filter(line, (h * 0.5, h * self.smoothness), mode='constant') smoothed += 0.001 * filters.uniform_filter(smoothed, (h * 0.5, w), mode='constant') self.shape = (h, w) a = argmax(smoothed, axis=0) a = filters.gaussian_filter(a, h * self.extra) self.center = array(a, 'i') deltas = abs(arange(h)[:, newaxis] - self.center[newaxis, :]) self.mad = mean(deltas[line != 0]) self.r = int(1 + self.range * self.mad)
def score(self, audio): nfft = int(self.window * audio.framerate) audio.calculate_specgram(nfft=nfft, noverlap=nfft / 2) freqs = np.where((audio.specgram_freqs >= self.lower_call_frequency) * (audio.specgram_freqs <= self.upper_call_frequency)) spec2 = mlab.specgram(mean(log(audio.specgram[freqs[0], ]), 0), NFFT=1024, noverlap=512, Fs=2 / self.window) freqs2 = np.where((spec2[1] >= self.lower_syllable_frequency) * (spec2[1] <= self.upper_syllable_frequency)) max_kiwi = max(np.max(spec2[0][freqs2[0], :], 0)) mean_kiwi = np.exp(np.mean(np.mean(np.log(spec2[0][freqs2[0], :]), 0))) return max_kiwi / mean_kiwi
def r_squared(y, estimated): """ Calculate the R-squared error term. Args: y: 1-d pylab array with length N, representing the y-coordinates of the N sample points estimated: an 1-d pylab array of values estimated by the regression model Returns: a float for the R-squared error term """ return 1 - sum((y - estimated)**2) / sum((y - pylab.mean(y))**2)
def plot_nonlinear_model(m, color='green', label='Nonlinear'): X = pl.arange(0., 1., .01) tfr_trace = [] for beta, gamma in zip(m.beta.trace(), m.gamma.trace()): y = beta[0] + beta[1] * X + pl.maximum(0., beta[2] * (X - gamma)) pl.plot(X, y, color='gray', alpha=.75, zorder=-1) tfr_trace.append(y) pl.plot(X, pl.mean(tfr_trace, axis=0), color=color, linewidth=5, label=label) decorate_plot()
def boutons_plots(): groups = defaultdict(list) for fname in os.listdir('boutons/swc_files'): group = fname[:-5] num = fname[-5] groups[group].append(num) all_edge_lengths = [] for group in groups: G = None for num in groups[group]: fname = 'boutons/swc_files/' + group + num + '.swc' graphs = get_neuron_points(fname) H = graphs[0] if G == None: G = H else: G = nx.disjoint_union(G, H) viz_tree(G, group, 'boutons/drawings') edge_lengths = [] for u, v in G.edges_iter(): edge_lengths.append(G[u][v]['length']) print "group", group print pylab.mean(edge_lengths) pylab.figure() pylab.hist(edge_lengths) pylab.savefig('boutons/histograms/edge_lengths_%s.pdf' % group, format='pdf') pylab.close() all_edge_lengths += edge_lengths print "grand average" print pylab.mean(all_edge_lengths) pylab.figure() pylab.hist(all_edge_lengths) pylab.savefig('boutons/histograms/edge_lengths_all.pdf', format='pdf') pylab.close()
def __init__(self, data=None, X=None, Y=None, bins=None): """.. rubric:: **Constructor** One should provide either the parameter **data** alone, or the X and Y parameters, which are the histogram of some data sample. :param data: random data :param X: evenly spaced X data :param Y: probability density of the data :param bins: if data is providede, we will compute the probability using hist function and bins may be provided. """ self.data = data if data: Y, X, _ = pylab.hist(self.data, bins=bins, density=True) self.N = len(X) - 1 self.X = [(X[i]+X[i+1])/2 for i in range(self.N)] self.Y = Y self.A = 1 self.guess_std = pylab.std(self.data) self.guess_mean = pylab.mean(self.data) self.guess_amp = 1 else: self.X = X self.Y = Y self.Y = self.Y / sum(self.Y) if len(self.X) == len(self.Y) + 1 : self.X = [(X[i]+X[i+1])/2 for i in range(len(X)-1)] self.N = len(self.X) self.guess_mean = self.X[int(self.N/2)] self.guess_std = sqrt(sum((self.X - mean(self.X))**2)/self.N)/(sqrt(2*3.14)) self.guess_amp = 1. self.func = self._func_normal
def plotMeanWaves(data,labels=None,figureNumber=1): #can also be used without outliers (if labels are provided with -1) n=len(data) if labels!= None: outliers = pylab.find(labels==-1) notoutliers = list( set(range(n)) - set(outliers)) else: notoutliers = range(n) fig=pylab.figure(figureNumber).clf() ax = fig.add_subplot(111) c=[0, 0, 0]#[.75,.75,.75] l=2. #alternativa minimalista: #map(lambda i:pl.plot(x[i,:],color=c,alpha=.45,linewidth=l),pl.arange(0,pl.shape(x)[0],1)) for i in range(len(data)): if i in notoutliers: #h2, = ax.plot(data[i], color=c,alpha=.15,linewidth=l) ax.plot(data[i], color=c,alpha=.15,linewidth=l) else: ax.plot(data[i], color=[1, 0, 0],alpha=.15,linewidth=l) pylab.xlabel('t [ms]') l=2. c=[0,0,0] ax.plot(pylab.mean(data[notoutliers],0)-pylab.std(data[notoutliers],0),'--',color=c, linewidth=l) ax.plot(pylab.mean(data[notoutliers],0)+pylab.std(data[notoutliers],0),'--',color=c, linewidth=l) ax.plot(pylab.mean(data[notoutliers],0),'k', linewidth=5.) pylab.grid() handles, labels = ax.get_legend_handles_labels() #pylab.axis('off') pylab.axis('tight')
def parse_times(infile): if not os.path.exists(infile): exit(1) time_relative = {} time_delta = {} with open(infile) as f: for line in f: if line.find(" = ") != -1: rawinfo = line.split(" = ") if "Ping stat" in rawinfo[0]: time_relative[rawinfo[0]] = ast.literal_eval(rawinfo[1]) else: time_relative[rawinfo[0]] = float(rawinfo[1]) time_delta["test"] = time_relative["Stop Test"] time_delta["network-uptime"] = time_relative[ "Network stopped"] - time_relative["Network start"] time_delta["network-start"] = time_relative[ "Network started"] - time_relative["Network start"] time_delta["network-stop"] = time_relative[ "Network stopped"] - time_relative["Network stop"] time_delta["mote-uptime"] = time_relative["Mote stopped"] - time_relative[ "Mote start"] #time_delta["mote-start"] = time_relative["Mote detect start"] - time_relative["Mote start"] time_delta["mote-stop"] = time_relative["Mote stopped"] - time_relative[ "Mote reached"] time_delta["mote-detect"] = time_relative["Mote detected"] - time_relative[ "Mote start"] time_delta[ "ping1"] = time_relative["Mote reached"] - time_relative["Mote ping"] if time_relative.has_key("Moved mote ping"): time_delta["pingm"] = time_relative[ "Moved mote reached"] - time_relative["Moved mote ping"] elif time_relative.has_key("Mote ping2"): time_delta["ping2"] = time_relative["Mote reached2"] - time_relative[ "Mote ping2"] if time_relative.has_key("Ping stat"): #time_relative["Ping stat"] = ast.literal_eval(time_relative["Ping stat"]) time_delta["ping2-stat"] = time_relative["Ping stat"] time_delta["ping2-mean"] = pylab.mean(time_relative["Ping stat"]) time_delta["ping2-std"] = pylab.std(time_relative["Ping stat"]) time_delta["ping2-var"] = pylab.var(time_relative["Ping stat"]) return time_delta
def get_bootstrap_interval_for_list_of_samples(list_of_samples, num_reps=1000, conf_level=0.95, num_comparisons=1): '''Given a list of samples, determine the desired confidence interval (defined by conf_level*100%) by bootstrapping (i.e., sampling with random replacement). Correct the confidence limits for number of simultaneous comparisons (num_comparisons). Parameters ---------- list_of_samples : python list list of values calculated from the data (usually single-trial values for one cell/pair, or across-trial averages for a population of cells/pairs) num_reps: int number of times to repeat the re-sampling process conf_level: float confidence level of the bootstrap bands, BEFORE correcting for multiple comparisons num_comparisons : int number of simultaneous comparisons to be made with the resulting data; will be used to adjust the confidence level ''' bootstrap = [] alpha_uncorrected = 1.0 - conf_level #0.05 random_distribution_means = [] shuff_num = 0 while shuff_num <= num_reps: random_dist = [] while len(random_dist) <= len(list_of_samples): random_ndx = random.randint(0, len(list_of_samples) - 1) random_dist.append(list_of_samples[random_ndx]) random_distribution_means.append(pylab.mean(random_dist)) shuff_num += 1 random_distribution_means = sorted(random_distribution_means) for k, val in enumerate(random_distribution_means): if (k + 1) == int((alpha_uncorrected / (float(2 * num_comparisons))) * len(random_distribution_means)): bootstrap.append(val) elif (k + 1) == int( (1 - (alpha_uncorrected / (float(2 * num_comparisons)))) * len(random_distribution_means)): bootstrap.append(val) return bootstrap