def runAll(ScaleDensity, b, f, Particle, Scale): import pylab as p import shutil, subprocess path = "./outData/" savepath = "./plots/compare/" inifilename = "mybody.ini" filenameMine = "mass.dat" filenameRockstar = "halos_0.ascii" filenameAmiga = "amiga.dat" inifile = open(inifilename, 'r') tmpinifile = open(inifilename + ".tmp", "w") for line in inifile: if line[0:15] == "ScaleDensity = ": line = "ScaleDensity = "+ str(ScaleDensity)+"\n " if line[0:4] == "b = ": line = "b = " + str(b)+"\n " if line[0:4] == "f = ": line = "f = " + str(f)+"\n " if line[0:21] == "LinkingLenghtScale = ": line = "LinkingLenghtScale = "+ str(Scale)+"\n " if line[0:20] == "NrParticlesDouble = ": line = "NrParticlesDouble = " + str(Particle)+"\n " tmpinifile.write(line) inifile.close() tmpinifile.close() shutil.move(inifilename + ".tmp", inifilename) #Run my halofinder #print "Running for: " +"ScaleDensity="+ str(ScaleDensity) + " b=" + str(b) + " f="+str(f) subprocess.call(["mpirun","-n","2", "./main"]) #Read data from files mineData = p.loadtxt(path + filenameMine) rockstarData = p.loadtxt(path + filenameRockstar) amigaData = p.loadtxt(path + filenameAmiga) sortedDataR = p.sort(rockstarData[:,2])[::-1] sortedData = p.sort(mineData)[::-1] sortedDataA = p.sort(amigaData[:,3])[::-1] p.figure() p.loglog(sortedData,range(1,len(sortedData)+1)) p.loglog(sortedDataR,range(1,len(sortedDataR)+1)) p.loglog(sortedDataA,range(1,len(sortedDataA)+1)) p.xlabel("log(Mass) Msun") p.ylabel("log(Nr of halos with mass >= Mass)") p.legend(("mine","Rockstar","AMIGA")) name = "MassFunction_ScaleDensity="+ str(ScaleDensity) + "_b=" + str(b) + "_f="+str(f) + "_Scale=" +str(Scale) + "_Double="+str(double) p.savefig(savepath+name+".png")
def disease_info(obj): '''find disease number from disease name or find a disease name from a disease number''' bm_path = '/snfs1/Project/GBD/dalynator/yld/best_models.csv' bm_csv = pandas.read_csv(bm_path,index_col=None) dismod_models = bm_csv.groupby('dismod_model_number').apply(lambda df: df.ix[df.index[0], 'outcome_name']) dismod_models = dismod_models.drop([0], axis=0) dismod_models = dict(pl.sort(dismod_models)) if type(obj)==str: # change Series object into dictionary from collections import defaultdict reversed_dict = defaultdict(list) for key,value in dismod_models.iteritems(): reversed_dict[value].append(key) num = reversed_dict[obj] if num == []: print 'No DisMod-MR estimates for %s'%obj elif len(num) > 1: print 'DisMod-MR has more than one model for %s'%obj num = [int(k) for k in num] else: num = int(num[0]) return num elif type(obj)==int: try: name = dismod_models[float(obj)] except: print 'No DisMod-MR best model for %s'%obj name = [] return name else: print 'Invalid entry. Please enter disease number or name' return []
def store_mcmc_fit(dm, key, model_vars=None, rate_trace=None): """ Store the parameter estimates generated by an MCMC fit of the negative-binomial model in the disease_model object, keyed by key Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str model_vars : dict of PyMC stochastic or deterministic variable Results ------- Save a regional estimate of the model prediction, with uncertainty """ if rate_trace == None: rate_trace = calc_rate_trace(dm, key, model_vars) rate_trace = pl.sort(rate_trace, axis=0) rate = {} for x in [2.5, 50, 97.5]: rate[x] = rate_trace[x/100.*len(rate_trace), :] param_mesh = dm.get_param_age_mesh() age_mesh = dm.get_estimate_age_mesh() dm.set_mcmc('lower_ui', key, rate[2.5]) dm.set_mcmc('median', key, rate[50]) dm.set_mcmc('upper_ui', key, rate[97.5]) dm.set_mcmc('mean', key, pl.mean(rate_trace,axis=0)) if dm.vars[key].has_key('dispersion'): dm.set_mcmc('dispersion', key, dm.vars[key]['dispersion'].stats()['quantiles'].values())
def fwhm(x, y): hm = pl.amax(y/2.0); y_diff = pl.absolute(y-hm); y_diff_sorted = pl.sort(y_diff); i1 = pl.where(y_diff==y_diff_sorted[0]); i2 = pl.where(y_diff==y_diff_sorted[1]); fwhm = pl.absolute(x[i1]-x[i2]); return hm, fwhm
def prec_rec(ranks): """ :: Return precision and recall arrays for ranks array data """ P = (1.0 + pylab.arange(pylab.size(ranks))) / ( 1.0 + pylab.sort(ranks)) R = (1.0 + pylab.arange(pylab.size(ranks))) / pylab.size(ranks) return P, R
def loadCube(dir, data_dir, code = None, *args, **kw): files = sort(listdir_path(data_dir + dir)) files = files[0:120] dat = iris.load_cube(files) dat = ExtractLocation(dat, *args, **kw).cubes dat.data = (dat.data > 0.00001) / 1.0 return dat
def histo(count,x_lim,data_label): x = pylab.sort(pylab.array(count)) #n, bins, patches = pylab.hist(x, 200, log=True) n, bins = numpy.histogram(x,max(count)) ax.semilogy(range(1,len(n)+1),n, label=data_label) ax.set_xlim([0,x_lim]) #pyplot.show() return n
def DFA(data, npoints=None, degree=1, use_median=False): """ computes the detrended fluctuation analysis returns the fluctuation F and the corresponding window length L :args: data (n-by-1 array): the data from which to compute the DFA npoints (int): the number of points to evaluate; if omitted the log(n) will be used degree (int): degree of the polynomial to use for detrending use_median (bool): use median instead of mean fluctuation :returns: F, L: the fluctuation F as function of the window length L """ # max window length: n/4 #0th: compute integral integral = cumsum(data - mean(data)) #1st: compute different window lengths n_samples = npoints if npoints is not None else int(log(len(data))) lengths = sort(array(list(set( logspace(2,log(len(data)/4.),n_samples,base=exp(1)).astype(int) )))) #print lengths all_flucs = [] used_lengths = [] for wlen in lengths: # compute the fluctuation of residuals from a linear fit # according to Kantz&Schreiber, ddof must be the degree of polynomial, # i.e. 1 (or 2, if mean also counts? -> see in book) curr_fluc = [] # rrt = 0 for startIdx in arange(0,len(integral),wlen): pt = integral[startIdx:startIdx+wlen] if len(pt) > 3*(degree+1): resids = pt - polyval(polyfit(arange(len(pt)),pt,degree), arange(len(pt))) # if abs(wlen - lengths[0]) < -1: # print resids[:20] # elif rrt == 0: # print "wlen", wlen, "l0", lengths[0] # rrt += 1 curr_fluc.append(std(resids, ddof=degree+1)) if len(curr_fluc) > 0: if use_median: all_flucs.append(median(curr_fluc)) else: all_flucs.append(mean(curr_fluc)) used_lengths.append(wlen) return array(all_flucs), array(used_lengths)
def play(self, fileroot='Network', mspikes=[], gspikes=[], save_png=False, sim_step=10, windowsize=10): view = mlab.view() f = mlab.gcf() if not mspikes: mspikes = self.mspikes if not gspikes: gspikes = self.gspikes img_counter = 0 ts = sort(array([t for t in set(gspikes[:,0])])) # can use either spike set ts = ts[(ts > self.sim_start) * (ts < self.sim_end)] mqueue = []; gqueue = []; for t in ts[::sim_step]: self.t = t mlab.gcf().scene.disable_render=True timestamp = u"Time: %.1f" % (t) print timestamp if save_png: # Diplay time stamp f.name = timestamp try:ftitle.text = timestamp except:ftitle = mlab.title(timestamp) # Delete old spheres if len(mqueue) >= windowsize: mpts=mqueue.pop(0) mpts.parent.parent.remove() gpts=gqueue.pop(0) gpts.parent.parent.remove() # It would be great to make prevoius arrays dimmer # Plot activate spheres mqueue.append(self.plot_points(self.mx, self.my, self.mz, mspikes, t=t, color=(1., 1., 1.), csize = self.mcsize)) gqueue.append(self.plot_points(self.gx, self.gy, self.gz, gspikes, t=t, color=(1., 1., 1.), csize = self.gcsize)) mlab.view(view [0], view [1], view [2], view [3]) mlab.gcf().scene.disable_render=False if save_png: f.scene.save_png('img/%s_%03d.png' % (fileroot, img_counter)) img_counter += 1 return mqueue, gqueue
def load_wtc(idx=None, corpus=m21.__path__[0]+'/corpus/bach/bwv8[4-9][0-9]'): """ Load items from a corpus, use given idx slice argument to select subsets """ wtc = glob.glob(corpus) wtc.sort() idx = slice(0,len(wtc)) if idx is None else idx WTC = [] for w in wtc[idx]: for v in sort(glob.glob(w+'/*')): WTC.append(m21.converter.parse(v)) return WTC
def fwhm_2gauss(x, y, dx=0.001): ''' Finds the FWHM for the profile y(x), with accuracy dx=0.001 Uses a 2-Gauss 1D fit. ''' popt, pcov = curve_fit(gauss2, x, y); xx = pl.arange(pl.amin(x), pl.amax(x)+dx, dx); ym = gauss2(xx, popt[0], popt[1], popt[2], popt[3], popt[4], popt[5]) hm = pl.amax(ym/2.0); y_diff = pl.absolute(ym-hm); y_diff_sorted = pl.sort(y_diff); i1 = pl.where(y_diff==y_diff_sorted[0]); i2 = pl.where(y_diff==y_diff_sorted[1]); fwhm = pl.absolute(xx[i1]-xx[i2]); return hm, fwhm, xx, ym
def summary_table(db, table_start=2007, table_end=2010, parameter="itn coverage", midyear=True): """ Output a table of midyear coverage estimates by country Example ------- >>> db = explore.load_pickles('/home/j/Project/Models/bednets/2010_07_09/') >>> tab = explore.midyear_coverage_table(db) >>> f = open('/home/j/Project/Models/bednets/2010_08_05/best_case.csv', 'w') >>> import csv >>> cf = csv.writer(f) >>> cf.writerows(tab) >>> f.close() """ import settings from pylab import mean, std, sort headers = ["Country"] for y in range(table_start, table_end + 1): headers += [y, "ui"] tab = [headers] for k, p in sorted(db.items()): row = [k.split("_")[2]] # TODO: refactor k.split into function cov = p.__getattribute__(parameter).gettrace() for y in range(table_start, table_end + 1): i = y - settings.year_start if midyear: c_y = sort(0.5 * (cov[:, i] + cov[:, i + 1])) # compute mid-year estimate from posterior draws else: c_y = sort(cov[:, i]) # compute jan 1 / whole-year estimate posterior draws n = len(c_y) row += ["%f" % c_y[0.5 * n], "(%f, %f)" % (c_y[0.025 * n], c_y[0.975 * n])] tab.append(row) return tab
def boot_curvefit(x,y,fit, p0, ci = .05, bootstraps=2000): """use of bootstrapping to perform curve fitting. Inputs: x - x values y - corresponding y values fit - a packaged fitting function p0 - intial parameter list that fit will use fit should be a function of the form p1 = fit(x, y, p0) with p1 being the optimized parameter vector Outputs: ci - 3xn array (n = number of parameters: median, low_ci, high_ci) booted_p - an bxn array of parameter values (b = number of bootstraps) An example fit function is: def fit(x, y, p0): func = lambda p, t: p[0]*pylab.exp(-t/abs(p[1])) + p[2] errfunc = lambda p, t, y: func(p, t) - y p1, success = optimize.leastsq(errfunc, p0, args=(t, y)) return p1 """ p0 = pylab.array(p0) #Make it an array in case it isn't one if bootstraps > 1: idx = pylab.randint(x.size, size=(x.size, bootstraps)) else: idx = pylab.zeros((x.size,1),dtype=int) idx[:,0] = pylab.arange(x.size) booted_p = pylab.zeros((p0.size, bootstraps)) for n in xrange(bootstraps): booted_p[:,n] = fit(x[idx[:,n]], y[idx[:,n]], p0) p_ci = pylab.zeros((3, p0.size)) for p in xrange(p0.size): booted_samp = pylab.sort(booted_p[p]) med = pylab.median(booted_samp) idx_lo = int(bootstraps * ci/2.0) idx_hi = int(bootstraps * (1.0-ci/2)) p_ci[:,p] = [med, med-booted_samp[idx_lo], booted_samp[idx_hi]-med] return p_ci, booted_p
def _make_fig_colorbar(logp): import matplotlib as mpl import pylab # Option 1: min to min + 4 #vmin=-max(logp); vmax=vmin+4 # Option 1b: min to min log10(num samples) #vmin=-max(logp); vmax=vmin+log10(len(logp)) # Option 2: full range of best 98% snllf = pylab.sort(-logp) vmin, vmax = snllf[0], snllf[int(0.98*(len(snllf)-1))] # robust range # Option 3: full range #vmin,vmax = -max(logp),-min(logp) fig = pylab.gcf() ax = fig.add_axes([0.60, 0.95, 0.35, 0.05]) cmap = mpl.cm.copper # Set the colormap and norm to correspond to the data for which # the colorbar will be used. norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax) # ColorbarBase derives from ScalarMappable and puts a colorbar # in a specified axes, so it has everything needed for a # standalone colorbar. There are many more kwargs, but the # following gives a basic continuous colorbar with ticks # and labels. class MinDigitsFormatter(mpl.ticker.Formatter): def __init__(self, low, high): self.delta = high - low def __call__(self, x, pos=None): return format_value(x, self.delta) ticks = (vmin, vmax) formatter = MinDigitsFormatter(vmin, vmax) cb = mpl.colorbar.ColorbarBase(ax, cmap=cmap, norm=norm, ticks=ticks, format=formatter, orientation='horizontal') #cb.set_ticks(ticks) #cb.set_ticklabels(labels) #cb.set_label('negative log likelihood') return vmin, vmax, cmap
def main(): # read in the forest as peak yield args = parseCMD() fileName = args.fileN forest = pl.loadtxt(fileName) # determine which sites were left unTreed at maximum yield sitesLeft = pl.array([]) for i in xrange(forest.size): if forest[i]!=1: sitesLeft = pl.append(sitesLeft, i) sitesLeft = sitesLeft[::-1] # determine intervals and sort them in order of size intervals = pl.array([forest.size-1-sitesLeft[0]]) for i in xrange(1,sitesLeft.size-1): sizeInt = sitesLeft[i]-sitesLeft[i+1] intervals = pl.append(intervals, sizeInt) intervals = pl.append(intervals, sitesLeft[-1]) intervals = pl.sort(intervals) # assign each interval a number nums = pl.arange(1,sitesLeft.size+1) # main plot fig1 = pl.figure(1) ax = fig1.add_subplot(111) pl.ylabel('Interval Size', fontsize=20) pl.xlabel('(Sorted) Inverval Number', fontsize=20) # loop over and plot each file we find newt = getD(fileName) ax.plot(nums,intervals,label='(D=%s)'%(newt), marker='o', linewidth=0, markerfacecolor='None', markeredgecolor='Navy') # put labels into legend ax.legend(loc='upper left',shadow=True) pl.show()
def norm_hist_bins(y, bins=10, normed='height'): """Just like the matplotlib mlab.hist, but can normalize by height. normed can be 'area' (produces matplotlib behavior, area is 1), any False value (no normalization), or any True value (normalization). Original docs from matplotlib: Return the histogram of y with bins equally sized bins. If bins is an array, use the bins. Return value is (n,x) where n is the count for each bin in x If normed is False, return the counts in the first element of the return tuple. If normed is True, return the probability density n/(len(y)*dbin) If y has rank>1, it will be raveled Credits: the Numeric 22 documentation """ y = asarray(y) if len(y.shape)>1: y = ravel(y) if not iterable(bins): ymin, ymax = min(y), max(y) if ymin==ymax: ymin -= 0.5 ymax += 0.5 if bins==1: bins=ymax dy = (ymax-ymin)/bins bins = ymin + dy*arange(bins) n = searchsorted(sort(y), bins) n = diff(concatenate([n, [len(y)]])) if normed: if normed == 'area': db = bins[1]-bins[0] else: db = 1.0 return 1/(len(y)*db)*n, bins else: return n, bins
def plot_net_survival(db, country_list): import pylab as pl import settings pl.clf() ii = 0.0 for k, p in sorted(db.items()): country = k.split("_")[2] # TODO: refactor k.split into function if country not in country_list: continue pr = pl.sort(p.__getattribute__("Pr[net is lost]").gettrace()) pr0 = pr[0.025 * len(pr)] pr1 = pr[0.975 * len(pr)] t = pl.arange(0, 5, 0.1) pct0 = 100.0 * pl.where(t < 3, (1 - pr0) ** t, 0.0) pct1 = 100.0 * pl.where(t < 3, (1 - pr1) ** t, 0.0) pl.fill( pl.concatenate((t, t[::-1])), pl.concatenate((pct0, pct1[::-1])), alpha=0.9, linewidth=3, facecolor="none", edgecolor=pl.cm.spectral(ii / len(country_list)), label=country, ) pl.fill( pl.concatenate((t, t[::-1])), pl.concatenate((pct0, pct1[::-1])), alpha=0.5, linewidth=0, facecolor=pl.cm.spectral(ii / len(country_list)), zorder=-ii, ) ii += 1.0 pl.legend() pl.ylabel("Nets Remaining (%)") pl.xlabel("Time in household (years)") pl.title("LLIN Survival Curve Posteriors") pl.savefig(settings.PATH + "net_survival.png")
def remove_outlier(value, sigma_th=100.0, width=200, ntrim=20): """ Replace outlying (> sigma_th times robust std deviation) values by NaN. Robust (trimmed) standard deviation and average are calculated for each subsample whose size is "width" and within which the smallest and the largest "ntrim" samples are trimmed. """ ndata = len(value) if width <= 2 * ntrim: raise ValueError, "remove_outlier: width should be greater than 2*ntrim." if ndata < width: print "Warning: Not enough number of samples to remove outliers." return value niter = int(ndata / width) + 1 result = value[:] # pl.clf() for i in range(niter): # pl.clf() idx = width * i if (idx + width) >= ndata: idx = ndata - width - 1 subarr = pl.array(value[idx : idx + width]) tsubarr = pl.sort(subarr)[ntrim : width - ntrim] # pl.plot(abs((subarr-tsubarr.mean())/tsubarr.std()),'.') # raw_input() subarr[abs(subarr - tsubarr.mean()) > sigma_th * tsubarr.std()] = pl.nan result[idx : idx + width] = subarr return result
def store_mcmc_fit(dm, key, model_vars): """ Store the parameter estimates generated by an MCMC fit of the normal model in the disease_model object, keyed by key Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str model_vars : dict of PyMC variables Results ------- Save a sketch of the distribution of rate_stoch keyed by key. """ rate_trace = model_vars['rate_stoch'].trace() rate_trace = pl.sort(rate_trace, axis=0) rate = {} for x in [2.5, 50, 97.5]: rate[x] = rate_trace[x/100.*len(rate_trace), :] param_mesh = dm.get_param_age_mesh() age_mesh = dm.get_estimate_age_mesh() dm.set_mcmc('lower_ui', key, dismod3.utils.interpolate(param_mesh, rate[2.5][param_mesh], age_mesh)) dm.set_mcmc('median', key, dismod3.utils.interpolate(param_mesh, rate[50][param_mesh], age_mesh)) dm.set_mcmc('upper_ui', key, dismod3.utils.interpolate(param_mesh, rate[97.5][param_mesh], age_mesh)) dm.set_mcmc('mean', key, dismod3.utils.interpolate(param_mesh, pl.mean(rate_trace,axis=0)[param_mesh], age_mesh)) if dm.vars[key].has_key('dispersion'): dm.set_mcmc('dispersion', key, dm.vars[key]['dispersion'].stats()['quantiles'].values())
def indepVarList(estimFiles, canonical, reduceVar): ''' Make list of all independent variable, in order, based on ensemble. This is set up to be temperature or chemical potential. If another variable is desired, then proper adjustments may need to be made. ''' indList = pl.array([]) for f in estimFiles: if reduceVar == 'T': if canonical: tempVar = f[13:19] else: tempVar = f[14:20] elif reduceVar == 'u': if canonical: tempVar = f[28:35] else: tempVar = f[29:36] if tempVar not in indList: indList = pl.append(indList, tempVar) return pl.sort(indList)
def refine(self, edge_errors, gamma=1.4): """ This function iterates through the cells in the mesh, then refines the mesh based on the relative error and the cell's location in the mesh. :param edge_errors : Dolfin edge function containing edge errors of of the current mesh. :param gamma : Scaling factor for determining which edges need be refined. This is determined by the average error of the edge_errors variable """ mesh = self.mesh mesh.init(1,2) mesh.init(0,2) mesh.init(0,1) avg_error = edge_errors.array().mean() error_sorted_edge_indices = p.argsort(edge_errors.array())[::-1] refine_edge = FacetFunction('bool', mesh) for e in edges(mesh): refine_edge[e] = edge_errors[e] > gamma*avg_error coordinates = p.copy(self.mesh.coordinates()) current_new_vertex = len(coordinates) cells_to_delete = [] new_cells = [] for iteration in range(refine_edge.array().sum()): for e in facets(self.mesh): if refine_edge[e] and (e.index()==error_sorted_edge_indices[0]): adjacent_cells = e.entities(2) adjacent_vertices = e.entities(0) if not any([c in cells_to_delete for c in adjacent_cells]): new_x,new_y = e.midpoint().x(),e.midpoint().y() coordinates = p.vstack((coordinates,[new_x,new_y])) for c in adjacent_cells: off_facet_vertex = list(self.mesh.cells()[c]) [off_facet_vertex.remove(ii) for ii in adjacent_vertices] for on_facet_vertex in adjacent_vertices: new_cell = p.sort([current_new_vertex,off_facet_vertex[0],on_facet_vertex]) new_cells.append(new_cell) cells_to_delete.append(c) current_new_vertex+=1 error_sorted_edge_indices = error_sorted_edge_indices[1:] old_cells = self.mesh.cells() keep_cell = p.ones(len(old_cells)) keep_cell[cells_to_delete] = 0 old_cells_parsed = old_cells[keep_cell.astype('bool')] all_cells = p.vstack((old_cells_parsed,new_cells)) n_cells = len(all_cells) e = MeshEditor() refined_mesh = Mesh() e.open(refined_mesh,self.mesh.geometry().dim(),self.mesh.topology().dim()) e.init_vertices(current_new_vertex) for index,x in enumerate(coordinates): e.add_vertex(index,x[0],x[1]) e.init_cells(n_cells) for index,c in enumerate(all_cells): e.add_cell(index,c.astype('uintc')) e.close() refined_mesh.order() self.mesh = refined_mesh
#datasett = ["fofr4.bin", "fofr5.bin", "fofr6.bin"] nrBins = 1000 NrParticles = 20 sigma = 2 method = 2 legendnames = [] for n in datasett: legendnames.append(n[:-4]) for name in datasett: MData = p.loadtxt(path + "Mass_" + name[:-4] + "_UnbindingMethod=" + str(method) + ".dat") RData = p.loadtxt(path + name[:-4] + "Rockstar.dat") MData = (p.sort(MData)[::-1]) RData = (p.sort(RData[:, 2])[::-1]) MData = MData[p.where(MData >= NrParticles * 9.26490e9)] RData = RData[p.where(RData >= NrParticles * 9.26490e9)] histMdata = (p.histogram(MData, nrBins)[0]) histRdata = (p.histogram(RData, nrBins)[0]) x = (p.histogram(MData, nrBins)[1][1:]) massM = p.zeros(nrBins) massR = p.zeros(nrBins) massM[-1] = histMdata[-1] massR[-1] = histRdata[-1] for i in range(nrBins - 2, -1, -1): massM[i] = histMdata[i] + massM[i + 1]
pl.xlabel('Age (years)') pl.ylabel('Consumption (kg/d)') pl.yticks([0, .015, .03, .045, .06], [0, 0.15, 0.30, 0.45, 0.6]) my_axis(.075) pl.savefig('book/graphics/fruit-we_rate_type.pdf') pl.savefig('book/graphics/fruit-we_rate_type.png') # qq plot distribution comparison pl.figure(**book_graphics.full_plus_page_params) ix = pl.arange(GRC_data.index[0], GRC_data.index[-1], int(GRC_data.index[-1]*.025)) for i,c in enumerate(['GRC', 'ISL']): #in ['we_model', 'we_log_model', 'we_norm_model']: pl.subplot(2,2,i+1) pl.plot(pl.sort(pl.array(GRC_data['0'])[ix]), pl.sort(pl.array(GRC_data[str(i+1)])[ix]), 'ko') pl.plot([-1,1],[-1,1],'k-') pl.yticks([.025, .03, .035, .04], [.25, .30, .35, .40]) pl.xticks([.025, .03, .035, .04], [.25, .30, .35, .40]) pl.axis([.023, .042, .023, .041]) pl.xlabel('Negative-binomial') if i + 1 == 1: pl.ylabel('Greece logormal') book_graphics.subtitle('(a)') elif i + 1 == 2: pl.ylabel('Greece normal') book_graphics.subtitle('(b)') pl.subplot(2,2,i+3) pl.plot(pl.sort(pl.array(ISL_data['0'])[ix]), pl.sort(pl.array(ISL_data[str(i+1)])[ix]), 'ko') pl.plot([-1,1],[-1,1],'k-')
def detailed_summary_table(db): """ Output a table that duplicates the summary information generated by individual runs Example ------- >>> db = explore.load_pickles('/home/j/Project/Models/bednets/2010_09_23/') >>> tab = explore.summary_table(db) >>> f = open('/home/j/Project/Models/bednets/2010_09_23/summary.csv', 'w') >>> import csv >>> cf = csv.writer(f) >>> cf.writerows(tab) >>> f.close() """ import settings from pylab import mean, std, sort # save results in output file headers = [ 'Country', 'Year', 'Population', 'LLINs Shipped (Thousands)', 'LLINs Shipped Lower CI', 'LLINs Shipped Upper CI', 'LLINs Distributed (Thousands)', 'LLINs Distributed Lower CI', 'LLINs Distributed Upper CI', 'LLINs Not Owned Warehouse (Thousands)', 'LLINs Not Owned Lower CI', 'LLINs Not Owned Upper CI', 'LLINs Owned (Thousands)', 'LLINs Owned Lower CI', 'LLINs Owned Upper CI', 'non-LLIN ITNs Owned (Thousands)', 'non-LLIN ITNs Owned Lower CI', 'non-LLIN ITNs Owned Upper CI', 'ITNs Owned (Thousands)', 'ITNs Owned Lower CI', 'ITNs Owned Upper CI', 'LLIN Coverage (Percent)', 'LLIN Coverage Lower CI', 'LLIN Coverage Upper CI', 'ITN Coverage (Percent)', 'ITN Coverage Lower CI', 'ITN Coverage Upper CI', ] tab = [headers] year_start = settings.year_start year_end = settings.year_end from data import Data data = Data() from pymc.utils import hpd def my_summary(stoch, i, li, ui, factor=.001): row = [] row += [mean(trace[stoch][:, i]) * factor] row += list(hpd(trace[stoch][[li, ui], i], .05) * factor) return row for k, p in sorted(db.items()): trace = {} for stoch in [ 'llins shipped', 'llins distributed', 'llin warehouse net stock', 'household llin stock', 'non-llin household net stock', 'household itn stock', 'llin coverage', 'itn coverage' ]: trace[stoch] = sort(p.__getattribute__(stoch).gettrace(), axis=0) c = k.split('_')[2] # TODO: refactor k.split into function pop = data.population_for(c, year_start, year_end) for i in range(year_end - year_start): row = [c, year_start + i, pop[i]] li = .025 * len(trace['llins shipped'][:, 0]) ui = .975 * len(trace['llins shipped'][:, 0]) if i == year_end - year_start - 1: row += [-99, -99, -99] row += [-99, -99, -99] else: row += my_summary('llins shipped', i, li, ui) row += my_summary('llins distributed', i, li, ui) row += my_summary('llin warehouse net stock', i, li, ui) row += my_summary('household llin stock', i, li, ui) row += my_summary('non-llin household net stock', i, li, ui) row += my_summary('household itn stock', i, li, ui) row += my_summary('llin coverage', i, li, ui, 100) row += my_summary('itn coverage', i, li, ui, 100) tab.append(row) return tab
def detailed_summary_table(db): """ Output a table that duplicates the summary information generated by individual runs Example ------- >>> db = explore.load_pickles('/home/j/Project/Models/bednets/2010_09_23/') >>> tab = explore.summary_table(db) >>> f = open('/home/j/Project/Models/bednets/2010_09_23/summary.csv', 'w') >>> import csv >>> cf = csv.writer(f) >>> cf.writerows(tab) >>> f.close() """ import settings from pylab import mean, std, sort # save results in output file headers = [ "Country", "Year", "Population", "LLINs Shipped (Thousands)", "LLINs Shipped Lower CI", "LLINs Shipped Upper CI", "LLINs Distributed (Thousands)", "LLINs Distributed Lower CI", "LLINs Distributed Upper CI", "LLINs Not Owned Warehouse (Thousands)", "LLINs Not Owned Lower CI", "LLINs Not Owned Upper CI", "LLINs Owned (Thousands)", "LLINs Owned Lower CI", "LLINs Owned Upper CI", "non-LLIN ITNs Owned (Thousands)", "non-LLIN ITNs Owned Lower CI", "non-LLIN ITNs Owned Upper CI", "ITNs Owned (Thousands)", "ITNs Owned Lower CI", "ITNs Owned Upper CI", "LLIN Coverage (Percent)", "LLIN Coverage Lower CI", "LLIN Coverage Upper CI", "ITN Coverage (Percent)", "ITN Coverage Lower CI", "ITN Coverage Upper CI", ] tab = [headers] year_start = settings.year_start year_end = settings.year_end from data import Data data = Data() from pymc.utils import hpd def my_summary(stoch, i, li, ui, factor=0.001): row = [] row += [mean(trace[stoch][:, i]) * factor] row += list(hpd(trace[stoch][[li, ui], i], 0.05) * factor) return row for k, p in sorted(db.items()): trace = {} for stoch in [ "llins shipped", "llins distributed", "llin warehouse net stock", "household llin stock", "non-llin household net stock", "household itn stock", "llin coverage", "itn coverage", ]: trace[stoch] = sort(p.__getattribute__(stoch).gettrace(), axis=0) c = k.split("_")[2] # TODO: refactor k.split into function pop = data.population_for(c, year_start, year_end) for i in range(year_end - year_start): row = [c, year_start + i, pop[i]] li = 0.025 * len(trace["llins shipped"][:, 0]) ui = 0.975 * len(trace["llins shipped"][:, 0]) if i == year_end - year_start - 1: row += [-99, -99, -99] row += [-99, -99, -99] else: row += my_summary("llins shipped", i, li, ui) row += my_summary("llins distributed", i, li, ui) row += my_summary("llin warehouse net stock", i, li, ui) row += my_summary("household llin stock", i, li, ui) row += my_summary("non-llin household net stock", i, li, ui) row += my_summary("household itn stock", i, li, ui) row += my_summary("llin coverage", i, li, ui, 100) row += my_summary("itn coverage", i, li, ui, 100) tab.append(row) return tab
def otheridx(idx,max_idx): """ returns a sorted, integer array containing all numbers in a range from 0 to max_idx-1 that are not in idx """ return sort(list(set(arange(max_idx))-set(idx)))
print("D doesn't look normal") else: print("D looks normal") k2, p = ss.normaltest(A) print("p= {:g}".format(p)) if p < alpha: # null hypothesis: x comes from a normal distribution print("A doesn't look normal") else: print("A looks normal") k2, p = ss.normaltest(B) print("p= {:g}".format(p)) if p < alpha: # null hypothesis: x comes from a normal distribution print("B doesn't look normal") else: print("B looks normal") #fig, (ax1,ax2,ax3) = pp.subplots(1,3) #ax1.hist(a,bins = 1000) #ax2.hist(A,bins = 1000) #ax3.hist(B,bins = 1000) #pp.show() z = np.polyfit(pp.sort(norm), pp.sort(B), 1) p = np.poly1d(z) pp.figure() pp.plot(pp.sort(norm), pp.sort(B)) pp.plot(pp.sort(norm), p(pp.sort(norm)), "k--", linewidth=2) pp.show()
def _loadData(self, ): """ **internally used function** Here, the data is loaded when __init__ is called. """ self.LOG('setting data') cfilename = ''.join([ ('s%it1_' % self.sid), ''.join(self.markers), '_symd.dict']) cfilefound = False if cfilename in os.listdir('cache'): res = mload(os.sep.join(['cache', cfilename])) if not res['markers'] == self.markers: self.LOG('Cache file is inconsistent - accessing database') else: self.dl = res['dl'] self.dr = res['dr'] self.labels = res['labels'] self.ndim = self.dl.shape[1] cfilefound = True if not cfilefound: import subjData as sd a = sd.sdata() cfilefound = False self.LOG('No valid cache file - accessing database') selection = [] for elem in self.markers: selection.append('l_' + elem + '_x - com_x') selection.append('r_' + elem + '_x - com_x') selection.append('l_' + elem + '_y - com_y') selection.append('r_' + elem + '_y - com_y') selection.append('l_' + elem + '_z - com_z') selection.append('r_' + elem + '_z - com_z') selection.extend( ['CoM_x', 'CoM_y', 'CoM_z', ]) a.selection = selection ndim0 = len(selection) sl, sr, pl, pr, idict = get_data(self.sid, self.tid, datdir='./SLIP_params2/', detrend=False) dat_l, dat_r = a.get_kin_from_idict(self.sid, 1, idict) rmslice = array(list(set(arange(2 * ndim0)) - set([ndim0 - 2, ndim0 - 3]))) rmslice.sort() labels = [x[:-8] for x in selection[:-3]] labels.append('com_z') labels.extend(['v_' + x[:-8] for x in selection[:-3]]) labels.extend(['v_' + x for x in selection[-3:]]) ndim = len(rmslice) dat_l = hstack(dat_l).T[:, rmslice] dat_r = hstack(dat_r).T[:, rmslice] #import misc as fda dl = mi.dt_movingavg(dat_l, 30) dr = mi.dt_movingavg(dat_r, 30) # find 'good' indices - remove strides with very high data amplitude dln = dl / std(dl, axis=0) drn = dr / std(dr, axis=0) mal = array([max(abs(x)) for x in dln]) mar = array([max(abs(x)) for x in drn]) badidx = set(find(mal > self.sthresh)) | set( find(mar > self.sthresh)) goodidx = sort(list(set(arange(dln.shape[0])) - badidx)) # attention: strictly speaking, these data will now be inconsistent. There are # some data points whose successors have been removed - consequently, their # (new) successors will not be their true successors, and a regression on these # data points will produce inconsistent results. However, when only a small # fraction of points is removed, this effect should be rather small. self.dl = dl[goodidx, :] self.dr = dr[goodidx, :] self.ndim = ndim self.labels = labels # optional step - rescale all velocities. A factor of ~11. leads to roughly # similar variance in positions and velocities across all subjects. # This does not alter results at all. #vscale = 1. / 11. #dl[:, ndim0 - 2:] /= 11. #dr[:, ndim0 - 2:] /= 11. # store results if they were not cached self.LOG('storing data in cache file') res = {'dl' : self.dl, 'dr' : self.dr, 'markers' : self.markers, 'labels' : self.labels} msave(os.sep.join(['cache', cfilename]), res)
elapsed[row['label']][threads].append(int(row['elapsed'])) timestamps[row['label']][threads].append(int(row['timeStamp'])) starttimes[row['label']][threads].append(int(row['timeStamp']) - int(row['elapsed'])) if (row['success'] != 'true'): errors[row['label']][threads].append(int(row['elapsed'])) # Draw a separate figure for each label found in the results. for label in elapsed: # Transform the lists for plotting plot_data = [] throughput_data = [None] error_x = [] error_y = [] plot_labels = [] column = 1 for thread_count in pylab.sort(elapsed[label].keys()): plot_data.append(elapsed[label][thread_count]) plot_labels.append(thread_count) test_start = min(starttimes[label][thread_count]) test_end = max(timestamps[label][thread_count]) test_length = (test_end - test_start) / 1000 num_requests = len(timestamps[label][thread_count]) - len(errors[label][thread_count]) if (test_length > 0): throughput_data.append(num_requests / float(test_length)) else: throughput_data.append(0) for error in errors[label][thread_count]: error_x.append(column) error_y.append(error) column += 1
def visualize_single_step(mod, i, alpha=0.0, description_str=""): """ Show how a random walk in a two dimensional space has progressed up to step i""" X = mod.X.trace() pl.clf() sq_size = 0.3 # show 2d trace pl.axes([0.05, 0.05, sq_size, sq_size]) pl.plot(X[:i, 0], X[:i, 1], "b.-", alpha=0.1) Y = alpha * X[i, :] + (1 - alpha) * X[i - 1, :] pl.plot([Y[0], Y[0]], [Y[1], 2.0], "k-", alpha=0.5) pl.plot([Y[0], 2], [Y[1], Y[1]], "k-", alpha=0.5) pl.plot(Y[0], Y[1], "go") if hasattr(mod, "shape"): pl.fill(mod.shape[:, 0], mod.shape[:, 1], color="b", alpha=0.2) if hasattr(mod, "plot_distribution"): mod.plot_distribution() pl.axis([-1.1, 1.1, -1.1, 1.1]) pl.xticks([]) pl.yticks([]) # show 1d marginals ## X[0] is horizontal position pl.axes([0.05, 0.05 + sq_size, sq_size, 1.0 - 0.1 - sq_size]) pl.plot(X[: (i + 1), 0], i + 1 - pl.arange(i + 1), "k-") pl.axis([-1.1, 1.1, 0, 1000]) pl.xticks([]) pl.yticks([]) pl.text(-1, 0.1, "$X_0$") ## X[1] is vertical position pl.axes([0.05 + sq_size, 0.05, 1.0 - 0.1 - sq_size, sq_size]) pl.plot(i + 1 - pl.arange(i + 1), X[: (i + 1), 1], "k-") pl.axis([0, 1000, -1.1, 1.1]) pl.xticks([]) pl.yticks([]) pl.text(10, -1.0, "$X_1$") ## show X[i, j] acorr N, D = X.shape if i > 250: for j in range(D): pl.axes( [ 1 - 0.1 - 1.5 * sq_size * (1 - j * D ** -1.0), 1.0 - 0.1 - 1.5 * sq_size * D ** -1, 1.5 * sq_size * D ** -1.0, 1.5 * sq_size * D ** -1.0, ] ) pl.acorr(X[(i / 2.0) : i : 10, j], detrend=pl.mlab.detrend_mean) pl.xlabel("$X_%d$" % j) if j == 0: pl.ylabel("autocorr") pl.xticks([]) pl.yticks([]) pl.axis([-10, 10, -0.1, 1]) ## show X[1] acorr ## textual information str = "" str += "t = %d\n" % i str += "acceptance rate = %.2f\n\n" % (1.0 - pl.mean(pl.diff(X[(i / 2.0) : i, 0]) == 0.0)) str += "mean(X) = %s" % pretty_array(X[(i / 2.0) : i, :].mean(0)) if hasattr(mod, "true_mean"): str += " / true mean = %s\n" % pretty_array(mod.true_mean) else: str += "\n" if i > 10: iqr = pl.sort(X[(i / 2.0) : i, :], axis=0)[[0.25 * (i / 2.0), 0.75 * (i / 2.0)], :].T for j in range(D): str += "IQR(X[%d]) = (%.2f, %.2f)" % (j, iqr[j, 0], iqr[j, 1]) if hasattr(mod, "true_iqr"): str += " / true IQR = %s\n" % mod.true_iqr[j] else: str += "\n" pl.figtext(0.05 + 0.01 + sq_size, 0.05 + 0.01 + sq_size, str, va="bottom", ha="left") pl.figtext(sq_size + 0.5 * (1.0 - sq_size), 0.96, description_str, va="top", ha="center", size=32) pl.figtext(0.95, 0.05, "healthyalgorithms.wordpress.com", ha="right")
def format_contour_array(data, points_per_cell=20, bulk=0.8): """Formats [x,y] series of data into x_bins, y_bins and data for contour(). data: 2 x n array of float representing x,y coordinates points_per_cell: average points per unit cell in the bulk of the data, default 3 bulk: fraction containing the 'bulk' of the data in x and y, default 0.8 (i.e. 80% of the data will be used in the calculation). returns: x-bin, y-bin, and a square matrix of frequencies to be plotted WARNING: Assumes x and y are in the range 0-1. """ #bind x and y data data_x = sort(data[0]) #note: numpy sort returns a sorted copy data_y = sort(data[1]) num_points = len(data_x) #calculate the x and y bounds holding the bulk of the data low_prob = (1-bulk)/2.0 low_tail = int(num_points*low_prob) high_tail = int(num_points*(1-low_prob)) x_low = data_x[low_tail] x_high = data_x[high_tail] y_low = data_y[low_tail] y_high = data_y[high_tail] #calculate the side length in the bulk that holds the right number of #points delta_x = x_high - x_low delta_y = y_high - y_low points_in_bulk = num_points * bulk #approximate: assumes no correlation area_of_bulk = delta_x * delta_y points_per_area = points_in_bulk/area_of_bulk side_length = sqrt(points_per_cell / points_per_area) #correct the side length so we get an integer number of bins. num_bins = int(1/side_length) corrected_side_length = 1.0/num_bins #figure out how many items are in each grid square in x and y # #this is the tricky part, because contour() takes as its data matrix #the points at the vertices of each cell, rather than the points at #the centers of each cell. this means that if we were going to make #a 3 x 3 grid, we actually have to estimate a 4 x 4 matrix that's offset #by half a unit cell in both x and y. # #if the data are between 0 and 1, the first and last bin in our range are #superfluous because searchsorted will put items before the first #bin into bin 0, and items after the last bin into bin n+1, where #n is the maximum index in the original array. for example, if we #have 3 bins, the values .33 and .66 would suffice to find the centers, #because anything below .33 gets index 0 and anything above .66 gets index #2 (anything between them gets index 1). incidentally, this prevents #issues with floating-point error and values slightly below 0 or above 1 #that might otherwise arise. # #however, for our 3 x 3 case, we actually want to start estimating at the #cell centered at 0, i.e. starting at -.33/2, so that we get the four #estimates centered at (rather than starting at) 0, .33, .66, and 1. #because the data are constrained to be between 0 and 1, we will need to #double the counts at the edges (and quadruple them at the corners) to get #a fair estimate of the density. csl = corrected_side_length #save typing below eps = csl/10 #don't ever want max value to be in the list precisely half_csl = .5*csl bins = arange(half_csl, 1+half_csl-eps, csl) x_coords = searchsorted(bins, data[0]) y_coords = searchsorted(bins, data[1]) #matrix has dimension 1 more than num bins, b/c can be above largest matrix = zeros((num_bins+1, num_bins+1)) #for some reason, need to swap x and y to match up with normal #scatter plots for coord in zip(y_coords, x_coords): matrix[coord] += 1 #we now have estimates of the densities at the edge of each of the #n x n cells in the grid. for example, if we have a 3 x 3 grid, we have #16 densities, one at the center of each grid cell (0, .33, .66, 1 in each #dimension). need to double the counts at edges to reflect places where #we can't observe data because of range restrictions. matrix[0]*=2 matrix[:,0]*=2 matrix[-1]*=2 matrix[:,-1]*=2 #return adjusted_bins as centers, rather than boundaries, of the range x_bins = csl*arange(num_bins+1) return x_bins, x_bins, matrix
NrParticles = 20 sigma = 2 method = 2 legendnames = [] for n in datasett: legendnames.append(n[:-4]) for name in datasett: MData = p.loadtxt(path + "Mass_" + name[:-4] + "_UnbindingMethod=" + str(method) + ".dat") RData = p.loadtxt(path + name[:-4] + "Rockstar.dat") MData = (p.sort(MData)[::-1]) RData = (p.sort(RData[:,2])[::-1]) MData = MData[p.where(MData >= NrParticles*9.26490e9)] RData = RData[p.where(RData >= NrParticles*9.26490e9)] histMdata = (p.histogram(MData,nrBins)[0]) histRdata = (p.histogram(RData,nrBins)[0]) x = (p.histogram(MData,nrBins)[1][1:]) massM = p.zeros(nrBins) massR = p.zeros(nrBins) massM[-1] = histMdata[-1] massR[-1] = histRdata[-1] for i in range(nrBins-2,-1,-1):
import pylab as p path = "/home/simen/Master/mybody-mpi/outData/" filenameMine = "mass.dat" filenameRockstar = "rockstar128.dat" filenameAmiga = "amiga.dat" #Read data from files mineData = p.loadtxt(path + filenameMine) rockstarData = p.loadtxt(path + filenameRockstar) amigaData = p.loadtxt(path + filenameAmiga) sortedDataR = p.sort(rockstarData[:,2])[::-1] sortedData = p.sort(mineData)[::-1] sortedDataA = p.sort(amigaData[:,3])[::-1] #print sortedData #p.hist(sortedDataR,bins=100) #p.hist(sortedData,bins=100) p.loglog(sortedData,range(1,len(sortedData)+1)) p.loglog(sortedDataR,range(1,len(sortedDataR)+1)) p.loglog(sortedDataA,range(1,len(sortedDataA)+1)) #p.axis([0,9000,0,500]) p.legend(("mine","Rockstar","AMIGA")) ##p.hist(p.log(rockstarData[:,1]),bins=100) #p.hist(mineData,bins=100) p.show()
def format_contour_array(data, points_per_cell=20, bulk=0.8): """Formats [x,y] series of data into x_bins, y_bins and data for contour(). data: 2 x n array of float representing x,y coordinates points_per_cell: average points per unit cell in the bulk of the data, default 3 bulk: fraction containing the 'bulk' of the data in x and y, default 0.8 (i.e. 80% of the data will be used in the calculation). returns: x-bin, y-bin, and a square matrix of frequencies to be plotted WARNING: Assumes x and y are in the range 0-1. """ #bind x and y data data_x = sort(data[0]) #note: numpy sort returns a sorted copy data_y = sort(data[1]) num_points = len(data_x) #calculate the x and y bounds holding the bulk of the data low_prob = (1 - bulk) / 2.0 low_tail = int(num_points * low_prob) high_tail = int(num_points * (1 - low_prob)) x_low = data_x[low_tail] x_high = data_x[high_tail] y_low = data_y[low_tail] y_high = data_y[high_tail] #calculate the side length in the bulk that holds the right number of #points delta_x = x_high - x_low delta_y = y_high - y_low points_in_bulk = num_points * bulk #approximate: assumes no correlation area_of_bulk = delta_x * delta_y points_per_area = points_in_bulk / area_of_bulk side_length = sqrt(points_per_cell / points_per_area) #correct the side length so we get an integer number of bins. num_bins = int(1 / side_length) corrected_side_length = 1.0 / num_bins #figure out how many items are in each grid square in x and y # #this is the tricky part, because contour() takes as its data matrix #the points at the vertices of each cell, rather than the points at #the centers of each cell. this means that if we were going to make #a 3 x 3 grid, we actually have to estimate a 4 x 4 matrix that's offset #by half a unit cell in both x and y. # #if the data are between 0 and 1, the first and last bin in our range are #superfluous because searchsorted will put items before the first #bin into bin 0, and items after the last bin into bin n+1, where #n is the maximum index in the original array. for example, if we #have 3 bins, the values .33 and .66 would suffice to find the centers, #because anything below .33 gets index 0 and anything above .66 gets index #2 (anything between them gets index 1). incidentally, this prevents #issues with floating-point error and values slightly below 0 or above 1 #that might otherwise arise. # #however, for our 3 x 3 case, we actually want to start estimating at the #cell centered at 0, i.e. starting at -.33/2, so that we get the four #estimates centered at (rather than starting at) 0, .33, .66, and 1. #because the data are constrained to be between 0 and 1, we will need to #double the counts at the edges (and quadruple them at the corners) to get #a fair estimate of the density. csl = corrected_side_length #save typing below eps = csl / 10 #don't ever want max value to be in the list precisely half_csl = .5 * csl bins = arange(half_csl, 1 + half_csl - eps, csl) x_coords = searchsorted(bins, data[0]) y_coords = searchsorted(bins, data[1]) #matrix has dimension 1 more than num bins, b/c can be above largest matrix = zeros((num_bins + 1, num_bins + 1)) #for some reason, need to swap x and y to match up with normal #scatter plots for coord in zip(y_coords, x_coords): matrix[coord] += 1 #we now have estimates of the densities at the edge of each of the #n x n cells in the grid. for example, if we have a 3 x 3 grid, we have #16 densities, one at the center of each grid cell (0, .33, .66, 1 in each #dimension). need to double the counts at edges to reflect places where #we can't observe data because of range restrictions. matrix[0] *= 2 matrix[:, 0] *= 2 matrix[-1] *= 2 matrix[:, -1] *= 2 #return adjusted_bins as centers, rather than boundaries, of the range x_bins = csl * arange(num_bins + 1) return x_bins, x_bins, matrix
def main(): import optparse from numpy import sum # Parse command line parser = optparse.OptionParser(usage=USAGE) parser.add_option("-p", "--plot", action="store_true", help="Generate pdf with IR-spectrum, broadened with Lorentzian") parser.add_option("-i", "--info", action="store_true", help="Set up/ Calculate vibrations & quit") parser.add_option("-s", "--suffix", action="store", help="Call suffix for binary e.g. 'mpirun -n 4 '", default='') parser.add_option("-r", "--run", action="store", help="path to FHI-aims binary",default='') parser.add_option("-x", "--relax", action="store_true", help="Relax initial geometry") parser.add_option("-m", "--molden", action="store_true", help="Output in molden format") parser.add_option("-w", "--distort", action="store_true", help="Output geometry distorted along imaginary modes") parser.add_option("-t", "--submit", action="store", help="""\ Path to submission script, string <jobname> will be replaced by name + counter, string <outfile> will be replaced by filename""") parser.add_option("-d", "--delta", action="store", type="float", help="Displacement", default=0.0025) parser.add_option("-b", "--broadening", action="store", type="float", help="Broadening for IR-spectrum in cm^{-1}", default=5) options, args = parser.parse_args() if options.info: print(__doc__) sys.exit(0) if len(args) != 2: parser.error("Need exactly two arguments") AIMS_CALL=options.suffix+' '+options.run hessian_thresh = -1 name=args[0] mode=args[1] delta=options.delta broadening=options.broadening run_aims=False if options.run!='': run_aims=True submit_script = options.submit is not None if options.plot: import matplotlib as mpl mpl.use('Agg') from pylab import figure if options.plot or mode=='1' or mode=='2': from pylab import savetxt, transpose, eig, argsort, sort,\ sign, pi, dot, sum, linspace, argmin, r_, convolve # Constant from scipy.constants bohr=constants.value('Bohr radius')*1.e10 hartree=constants.value('Hartree energy in eV') at_u=constants.value('atomic mass unit-kilogram relationship') eV=constants.value('electron volt-joule relationship') c=constants.value('speed of light in vacuum') Ang=1.0e-10 hbar=constants.value('Planck constant over 2 pi') Avo=constants.value('Avogadro constant') kb=constants.value('Boltzmann constant in eV/K') pi=constants.pi hessian_factor = eV/(at_u*Ang*Ang) grad_dipole_factor=(eV/(1./(10*c)))/Ang #(eV/Ang -> D/Ang) ir_factor = 1 # Asign all filenames inputgeomerty = 'geometry.in.'+name inputcontrol = 'control.in.'+name atomicmasses = 'masses.'+name+'.dat'; xyzfile = name+'.xyz'; moldenname =name+'.molden'; hessianname = 'hessian.'+name+'.dat'; graddipolename = 'grad_dipole.'+name+'.dat'; irname = 'ir.'+name+'.dat'; deltas=array([-delta,delta]) coeff=array([-1,1]) c_zero = - 1. / (2. * delta) f=open('control.in','r') # read control.in template template_control=f.read() f.close if submit_script: f=open(options.submit,'r') # read submission script template template_job=f.read() f.close folder='' # Dummy ########### Central Point ################################################## if options.relax and (mode=='0' or mode=='2'): # First relax input geometry filename=name+'.out' folder=name+'_relaxation' if not os.path.exists(folder): os.mkdir(folder) # Create folder shutil.copy('geometry.in', folder+'/geometry.in') # Copy geometry new_control=open(folder+'/control.in','w') new_control.write(template_control+'relax_geometry trm 1E-3\n') # Relax! new_control.close() os.chdir(folder) # Change directoy print('Central Point') if run_aims: os.system(AIMS_CALL+' > '+filename) # Run aims and pipe the output # into a file named 'filename' if submit_script: replace_submission(template_job, name, 0, filename) os.chdir('..') ############################################################################ # Check for relaxed geometry if os.path.exists(folder+'/geometry.in.next_step'): geometry=open(folder+'/geometry.in.next_step','r') else: geometry=open('geometry.in','r') # Read input geometry n_line=0 struc=structure() lines=geometry.readlines() for line in lines: n_line= n_line+1 if line.rfind('set_vacuum_level')!=-1: # Vacuum Level struc.vacuum_level=float(split_line(line)[-1]) if line.rfind('lattice_vector')!=-1: # Lattice vectors and periodic lat=split_line(line)[1:] struc.lattic_vector=append(struc.lattic_vector,float64(array(lat)) [newaxis,:],axis=0) struc.periodic=True if line.rfind('atom')!=-1: # Set atoms line_vals=split_line(line) at=Atom(line_vals[-1],line_vals[1:-1]) if n_line<len(lines): nextline=lines[n_line] if nextline.rfind('constrain_relaxation')!=-1: # constrained? at=Atom(line_vals[-1],line_vals[1:-1],True) else: at=Atom(line_vals[-1],line_vals[1:-1]) struc.join(at) geometry.close() n_atoms= struc.n() n_constrained=n_atoms-sum(struc.constrained) # Atomic mass file mass_file=open(atomicmasses,'w') mass_vector=zeros([0]) for at_unconstrained in struc.atoms[struc.constrained==False]: mass_vector=append(mass_vector,ones(3)*1./sqrt(at_unconstrained.mass())) line='{0:10.5f}'.format(at_unconstrained.mass()) for i in range(3): line=line+'{0:11.4f}'.format(at_unconstrained.coord[i]) line=line+'{0:}\n'.format(at_unconstrained.kind) mass_file.writelines(line) mass_file.close() # Init dip = zeros([n_constrained*3,3]) hessian = zeros([n_constrained*3,n_constrained*3]) index=0 counter=1 # Set up / Read folders for displaced atoms for atom in arange(n_atoms)[struc.constrained==False]: for coord in arange(3): for delta in deltas: filename=name+'.i_atom_'+str(atom)+'.i_coord_'+str(coord)+'.displ_'+\ str(delta)+'.out' folder=name+'.i_atom_'+str(atom)+'.i_coord_'+str(coord)+'.displ_'+\ str(delta) if mode=='0' or mode=='2': # Put new geometry and control.in into folder struc_new=copy.deepcopy(struc) struc_new.atoms[atom].coord[coord]=\ struc_new.atoms[atom].coord[coord]+delta geoname='geometry.i_atom_'+str(atom)+'.i_coord_'+str(coord)+\ '.displ_'+str(delta)+'.in' if not os.path.exists(folder): os.mkdir(folder) new_geo=open(folder+'/geometry.in','w') newline='#\n# temporary structure-file for finite-difference '+\ 'calculation of forces\n' newline=newline+'# displacement {0:8.4f} of \# atom '.format(delta)+\ '{0:5} direction {1:5}\n#\n'.format(atom,coord) new_geo.writelines(newline+struc_new.to_str()) new_geo.close() new_control=open(folder+'/control.in','w') template_control=template_control.replace('relax_geometry', '#relax_geometry') new_control.write(template_control+'compute_forces .true. \n'+\ 'final_forces_cleaned '+\ '.true. \noutput dipole \n') new_control.close() os.chdir(folder) # Change directoy print('Processing atom: '+str(atom+1)+'/'+str(n_atoms)+', coord.: '+\ str(coord+1)+'/'+str(3)+', delta: '+str(delta)) if run_aims: os.system(AIMS_CALL+' > '+filename)# Run aims and pipe the output # into a file named 'filename' if submit_script: replace_submission(template_job, name, counter, filename) # os.system('qsub job.sh') # Mind the environment variables os.chdir('..') if mode=='1' or mode=='2': # Read output forces_reached=False atom_count=0 data=open(folder+'/'+filename) for line in data.readlines(): if line.rfind('Dipole correction potential jump')!=-1: dip_jump = float(split_line(line)[-2]) # Periodic if line.rfind('| Total dipole moment [eAng]')!=-1: dip_jump = float64(split_line(line)[-3:]) # Cluster if forces_reached and atom_count<n_atoms: # Read Forces struc.atoms[atom_count].force=float64(split_line(line)[2:]) atom_count=atom_count+1 if atom_count==n_atoms: forces_reached=False if line.rfind('Total atomic forces')!=-1: forces_reached=True data.close() if struc.periodic: dip[index,2]=dip[index,2]+dip_jump*coeff[deltas==delta]*c_zero else: dip[index,:]=dip[index,:]+dip_jump*coeff[deltas==delta]*c_zero forces=array([]) for at_unconstrained in struc.atoms[struc.constrained==False]: forces=append(forces,coeff[deltas==delta]*at_unconstrained.force) hessian[index,:]=hessian[index,:]+forces*c_zero counter=counter+1 index=index+1 if mode=='1' or mode=='2': # Calculate vibrations print('Entering hessian diagonalization') print('Number of atoms = '+str(n_atoms)) print('Name of Hessian input file = '+hessianname) print('Name of grad dipole input file = '+graddipolename) print('Name of Masses input file = '+atomicmasses) print('Name of XYZ output file = '+xyzfile) print('Threshold for Matrix elements = '+str(hessian_thresh)) if (hessian_thresh < 0.0): print(' All matrix elements are taken'+\ ' into account by default\n') savetxt(hessianname,hessian) savetxt(graddipolename,dip) mass_mat=mass_vector[:,newaxis]*mass_vector[newaxis,:] hessian[abs(hessian)<hessian_thresh]=0.0 hessian=hessian*mass_mat*hessian_factor hessian=(hessian+transpose(hessian))/2. # Diagonalize hessian (scipy) print('Solving eigenvalue system for Hessian Matrix') freq, eig_vec = eig(hessian) print('Done ... ') eig_vec=eig_vec[:,argsort(freq)] freq=sort(sign(freq)*sqrt(abs(freq))) ZPE=hbar*(freq)/(2.0*eV) freq = (freq)/(200.*pi*c) grad_dipole = dip * grad_dipole_factor eig_vec = eig_vec*mass_vector[:,newaxis]*ones(len(mass_vector))[newaxis,:] infrared_intensity = sum(dot(transpose(grad_dipole),eig_vec)**2,axis=0)*\ ir_factor reduced_mass=sum(eig_vec**2,axis=0) norm = sqrt(reduced_mass) eig_vec = eig_vec/norm # The rest is output, xyz, IR,... print('Results\n') print('List of all frequencies found:') print('Mode number Frequency [cm^(-1)] Zero point energy [eV] '+\ 'IR-intensity [D^2/Ang^2]') for i in range(len(freq)): print('{0:11}{1:25.8f}{2:25.8f}{3:25.8f}'.format(i+1,freq[i],ZPE[i], infrared_intensity[i])) print('\n') print('Summary of zero point energy for entire system:') print('| Cumulative ZPE = {0:15.8f} eV'.format(sum(ZPE))) print('| without first six eigenmodes = {0:15.8f} eV\n'.format(sum(ZPE)- sum(ZPE[:6]))) print('Stability checking - eigenvalues should all be positive for a '+\ 'stable structure. ') print('The six smallest frequencies should be (almost) zero:') string='' for zz in ZPE[:6]: string=string+'{0:25.8f}'.format(zz) print(string) print('Compare this with the largest eigenvalue, ') print('{0:25.8f}'.format(freq[-1])) nums=arange(n_atoms)[struc.constrained==False] nums2=arange(n_atoms)[struc.constrained] newline='' newline_ir='[INT]\n' if options.molden: newline_molden='[Molden Format]\n[GEOMETRIES] XYZ\n' newline_molden=newline_molden+'{0:6}\n'.format(n_atoms)+'\n' for i_atoms in range(n_constrained): newline_molden=newline_molden+'{0:6}'.format( struc.atoms[nums[i_atoms]].kind) for i_coord in range(3): newline_molden=newline_molden+'{0:10.4f}'.format( struc.atoms[nums[i_atoms]].coord[i_coord]) newline_molden=newline_molden+'\n' newline_molden=newline_molden+'[FREQ]\n' for i in range(len(freq)): newline_molden=newline_molden+'{0:10.3f}\n'.format(freq[i]) newline_molden=newline_molden+'[INT]\n' for i in range(len(freq)): newline_molden=newline_molden+'{0:17.6e}\n'.format( infrared_intensity[i]) newline_molden=newline_molden+'[FR-COORD]\n' newline_molden=newline_molden+'{0:6}\n'.format(n_atoms)+'\n' for i_atoms in range(n_constrained): newline_molden=newline_molden+'{0:6}'.format( struc.atoms[nums[i_atoms]].kind) for i_coord in range(3): newline_molden=newline_molden+'{0:10.4f}'.format( struc.atoms[nums[i_atoms]].coord[i_coord]/bohr) newline_molden=newline_molden+'\n' newline_molden=newline_molden+'[FR-NORM-COORD]\n' for i in range(len(freq)): newline=newline+'{0:6}\n'.format(n_atoms) if freq[i]>0: newline=newline+'stable frequency at ' elif freq[i]<0: newline=newline+'unstable frequency at ' if options.distort and freq[i]<-50: struc_new=copy.deepcopy(struc) for i_atoms in range(n_constrained): for i_coord in range(3): struc_new.atoms[i_atoms].coord[i_coord]=\ struc_new.atoms[i_atoms].coord[i_coord]+\ eig_vec[(i_atoms)*3+i_coord,i] geoname=name+'.distorted.vibration_'+str(i+1)+'.geometry.in' new_geo=open(geoname,'w') newline_geo='#\n# distorted structure-file for based on eigenmodes\n' newline_geo=newline_geo+\ '# vibration {0:5} :{1:10.3f} 1/cm\n#\n'.format(i+1,freq[i]) new_geo.writelines(newline_geo+struc_new.to_str()) new_geo.close() elif freq[i]==0: newline=newline+'translation or rotation ' newline=newline+'{0:10.3f} 1/cm IR int. is '.format(freq[i]) newline=newline+'{0:10.4e} D^2/Ang^2; red. mass is '.format( infrared_intensity[i]) newline=newline+'{0:5.3f} a.m.u.; force const. is '.format( 1.0/reduced_mass[i]) newline=newline+'{0:5.3f} mDyne/Ang.\n'.format(((freq[i]*(200*pi*c))**2)* (1.0/reduced_mass[i])*at_u*1.e-2) if options.molden: newline_molden=newline_molden+\ 'vibration {0:6}\n'.format(i+1) for i_atoms in range(n_constrained): newline=newline+'{0:6}'.format(struc.atoms[nums[i_atoms]].kind) for i_coord in range(3): newline=newline+'{0:10.4f}'.format( struc.atoms[nums[i_atoms]].coord[i_coord]) for i_coord in range(3): newline=newline+'{0:10.4f}'.format(eig_vec[(i_atoms)*3+i_coord,i]) if options.molden: newline_molden=newline_molden+'{0:10.4f}'.format( eig_vec[(i_atoms)*3+i_coord,i]/bohr) newline=newline+'\n' if options.molden: newline_molden=newline_molden+'\n' for i_atoms in range(n_atoms-n_constrained): newline=newline+'{0:6}'.format(struc.atoms[nums2[i_atoms]].kind) for i_coord in range(3): newline=newline+'{0:10.4f}'.format( struc.atoms[nums2[i_atoms]].coord[i_coord]) for i_coord in range(3): newline=newline+'{0:10.4f}'.format(0.0) newline=newline+'\n' newline_ir=newline_ir+'{0:10.4e}\n'.format(infrared_intensity[i]) xyz=open(xyzfile,'w') xyz.writelines(newline) xyz.close() ir=open(irname,'w') ir.writelines(newline_ir) ir.close() if options.molden: molden=open(moldenname,'w') molden.writelines(newline_molden) molden.close() if (mode=='1' or mode=='2') and options.plot: x=linspace(freq.min()-500,freq.max()+500,1000) z=zeros(len(x)) for i in range(len(freq)): z[argmin(abs(x-freq[i]))]=infrared_intensity[i] window_len=150 lorentzian=lorentz(pi,broadening,arange(250))#signal.gaussian(window_len,broadening) s=r_[z[window_len-1:0:-1],z,z[-1:-window_len:-1]] z_convolve=convolve(lorentzian/lorentzian.sum(),s,mode='same')[ window_len-1:-window_len+1] fig=figure(0) ax=fig.add_subplot(111) ax.plot(x,z_convolve,'r',lw=2) ax.set_xlim([freq.min()-500,freq.max()+500]) ax.set_ylim([-0.01,ax.get_ylim()[1]]) ax.set_yticks([]) ax.set_xlabel('Frequency [1/cm]',size=20) ax.set_ylabel('Intensity [a.u.]',size=20) fig.savefig(name+'_IR_spectrum.pdf') print('\n Done. ')
def ChoosePointsInFBZ(self, nkp, type=0): # Chooses the path in the 1BZ we will use def kv0(iq, q): return (iq - int((q + 1.5) / 2) + 1) / (q + 0.0) if type == 0: # Choose mesh in the 1BZ to cover the whole space - for SC calculation kp = [] for i0 in range(nkp): r0 = kv0(i0, nkp) print 'r0 = ', r0 for i1 in range(nkp): r1 = kv0(i1, nkp) for i2 in range(nkp): r2 = kv0(i2, nkp) k = self.b0 * r0 + self.b1 * r1 + self.b2 * r2 kp.append(k) print "Number of all k-points =", len(kp) kpc = [] for k in kp: kpc.append(sort(k)) # ChooseIrreducible k-points only # The function performs all symmetry operations of a cubic point-group to each k-point and # keeps only those k-points which can not be obtained from another k-point by group operation. # These k-points are obviously irreducible. irkp = [ ] # temporary list where irreducible k points will be stored wkp = [] # temporary weights while len( kpc ) > 0: # continues until all k-points are grouped into irreducible classes tk = kpc[ 0] # we concentrate on the k-point which is the first in the list irkp.append(tk) # the first can be stored as irreducible wkp.append( 0 ) # and the weights for this irreducible k-point is set to zero # We go over 48 symmetry operations of cubic system: # Each vector component can change sign: 2^3=8 possibilities # All permutations of components: 3!=6 # Since the operations are independent, we have 3!*2^3=48 operations == number of cubic point group operations for ix in [-1, 1]: # three loops for all possible sign changes for iy in [-1, 1]: for iz in [-1, 1]: nk = sort( [ix * tk[0], iy * tk[1], iz * tk[2]] ) # sorted so that we do not need to try all permutations ii = 0 while ii < len( kpc ): # This permutation and sign change leads to some element still in the list of k-points? diff = sum(abs(nk - kpc[ii])) if diff < 1e-6: del kpc[ ii] # These two k-points are the same wkp[-1] += 1. else: ii += 1 # irreducible k-points are stored in the output vectors self.wkp = array(wkp) / sum(wkp) self.kp = array(irkp) print "Number of irreducible k points is", len(self.kp) #for ik,k in enumerate(self.kmesh): # print "%10.6f"*3 % tuple(k), ' ', self.wkp[ik] else: # Choose one particular path in the 1BZ - for plotting purposes nkp = 4 * int(nkp / 4.) + 1 print "number of k-points =", nkp self.kp = zeros((nkp, 3), dtype=float) N0 = nkp / 4 #self.Points = [('$\Gamma$', 0), ('$X$', N0), ('$L$', 2*N0), ('$\Gamma$', 3*N0), ('$K$', 4*N0)] self.Points = [(r'$\Gamma$', 0), ('X', N0), ('L', 2 * N0), (r'$\Gamma$', 3 * N0), ('K', 4 * N0)] for i in range(N0): self.kp[i, :] = self.GPoint + (self.XPoint - self.GPoint) * i / (N0 - 0.) for i in range(N0): self.kp[N0 + i, :] = self.XPoint + (self.LPoint - self.XPoint) * i / (N0 - 0.) for i in range(N0): self.kp[N0 * 2 + i, :] = self.LPoint + (self.GPoint - self.LPoint) * i / (N0 - 0.) for i in range(N0): self.kp[N0 * 3 + i, :] = self.GPoint + (self.KPoint - self.GPoint) * i / (N0 - 0.) self.kp[4 * N0] = self.KPoint
logret=(logreturnf(i)) return (logret) with open('MICRO.csv') as csvdata:#gets the stoke price microsoft from dec20, 2016 to dec 20, 2019 read=csv.reader(csvdata,delimiter=',') a1=[] for r in read: S=r[1] a1.append(float(S)) S1 = a1 N=len(a1) Normal=pl.normal(0,1,size=len(f2(1)))#normal estimates fig,ax=pl.subplots(1,3) ax[0].plot(a1) ax[1].plot(pl.sort(Normal),f2(1)) #QQ plot ax[1].set_xlabel('normal distribution') ax[1].set_ylabel('normalized logret') ax[1].legend(('QQ plot'),loc='upper right') ax[2].acorr(f3(1),maxlags=60) #AUTOCORRELATION PLOT ax[2].set_xlabel('autocorrelation plot') pl.tight_layout() pl.show() S1=a1 N=len(S1) print("[mu,sigma]",f1(1)) #QUESTION NO (c) r=1.5/100#US treasury(1 year) sigma=0.33953053460635674 #from question (a)
def fit_emp_prior(dm, param_type, iter=100000, thin=50, burn=50000, dbname='/dev/null', map_only=False, store_results=True): """ Generate an empirical prior distribution for a single disease parameter Parameters ---------- dm : dismod3.DiseaseModel The object containing all the data, (hyper)-priors, and additional information (like input and output age-mesh). param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality' The disease parameter to work with Notes ----- The results of this fit are stored in the disease model's params hash for use when fitting multiple paramter types together Example ------- $ python2.5 gbd_fit.py 231 -t incidence """ data = [d for d in dm.data if \ d['data_type'] == '%s data' % param_type \ and d.get('ignore') != -1] dm.clear_empirical_prior() dm.calc_effective_sample_size(data) dm.fit_initial_estimate(param_type, data) dm.vars = setup(dm, param_type, data) # don't do anything if there is no data for this parameter type if not dm.vars['data']: return debug('i: %s' % ', '.join(['%.2f' % x for x in dm.vars['rate_stoch'].value[::10]])) sys.stdout.flush() # fit the model def map_fit(stoch_names): print '\nfitting', ' '.join(stoch_names) map = mc.MAP([dm.vars[key] for key in stoch_names] + [dm.vars['observed_counts'], dm.vars['rate_potential'], dm.vars['priors']]) try: map.fit(method='fmin_powell', verbose=verbose) except KeyboardInterrupt: debug('User halted optimization routine before optimal value found') for key in stoch_names: print key, dm.vars[key].value.round(2) sys.stdout.flush() def mcmc_fit(stoch_names): print '\nfitting', ' '.join(stoch_names) mcmc = mc.MCMC([dm.vars[key] for key in stoch_names] + [dm.vars['observed_counts'], dm.vars['rate_potential'], dm.vars['priors']]) mcmc.use_step_method(mc.Metropolis, dm.vars['log_dispersion'], proposal_sd=dm.vars['dispersion_step_sd']) # TODO: make a wrapper function for handling this adaptive metropolis setup stoch_list = [dm.vars['study_coeffs'], dm.vars['region_coeffs'], dm.vars['age_coeffs_mesh']] d1 = len(dm.vars['study_coeffs'].value) d2 = len(dm.vars['region_coeffs_step_cov']) d3 = len(dm.vars['age_coeffs_mesh_step_cov']) C = pl.eye(d1+d2+d3) C[d1:(d1+d2), d1:(d1+d2)] = dm.vars['region_coeffs_step_cov'] C[(d1+d2):(d1+d2+d3), (d1+d2):(d1+d2+d3)] = dm.vars['age_coeffs_mesh_step_cov'] C *= .01 mcmc.use_step_method(mc.AdaptiveMetropolis, stoch_list, cov=C) # more step methods mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars['study_coeffs']) mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars['region_coeffs'], cov=dm.vars['region_coeffs_step_cov']) mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars['age_coeffs_mesh'], cov=dm.vars['age_coeffs_mesh_step_cov']) try: mcmc.sample(iter=10000, burn=5000, thin=5, verbose=verbose) except KeyboardInterrupt: debug('User halted optimization routine before optimal value found') sys.stdout.flush() # reset stoch values to sample mean for key in stoch_names: mean = dm.vars[key].stats()['mean'] if isinstance(dm.vars[key], mc.Stochastic): dm.vars[key].value = mean print key, mean.round(2) verbose = 1 stoch_names = 'region_coeffs age_coeffs_mesh study_coeffs'.split() ## start by optimizing parameters separately for key in stoch_names: map_fit([key]) ## then fit them all together map_fit(stoch_names) # now find the over-dispersion parameter that matches these values map_fit(['log_dispersion']) if map_only: return # make pymc warnings go to stdout mc.warnings.warn = sys.stdout.write mcmc_fit(['log_dispersion', 'dispersion', 'study_coeffs', 'region_coeffs', 'age_coeffs_mesh', 'age_coeffs', 'predicted_rates', 'expected_rates', 'rate_stoch']) alpha = dm.vars['region_coeffs'].stats()['mean'] beta = dm.vars['study_coeffs'].stats()['mean'] gamma_mesh = dm.vars['age_coeffs_mesh'].stats()['mean'] debug('a: %s' % ', '.join(['%.2f' % x for x in alpha])) debug('b: %s' % ', '.join(['%.2f' % x for x in pl.atleast_1d(beta)])) debug('g: %s' % ', '.join(['%.2f' % x for x in gamma_mesh])) debug('d: %.2f' % dm.vars['dispersion'].stats()['mean']) covariates_dict = dm.get_covariates() derived_covariate = dm.get_derived_covariate_values() X = covariates(data[0], covariates_dict) debug('p: %s' % ', '.join(['%.2f' % x for x in predict_rate(X, alpha, beta, gamma_mesh, dm.vars['bounds_func'], dm.get_param_age_mesh())])) if not store_results: return # save the results in the param_hash prior_vals = dict( alpha=list(dm.vars['region_coeffs'].stats()['mean']), beta=list(pl.atleast_1d(dm.vars['study_coeffs'].stats()['mean'])), gamma=list(dm.vars['age_coeffs'].stats()['mean']), delta=float(dm.vars['dispersion'].stats()['mean'])) prior_vals.update( sigma_alpha=list(dm.vars['region_coeffs'].stats()['standard deviation']), sigma_beta=list(pl.atleast_1d(dm.vars['study_coeffs'].stats()['standard deviation'])), sigma_gamma=list(dm.vars['age_coeffs'].stats()['standard deviation']), sigma_delta=float(dm.vars['dispersion'].stats()['standard deviation'])) dm.set_empirical_prior(param_type, prior_vals) dispersion = prior_vals['delta'] median_sample_size = pl.median([values_from(dm, d)[3] for d in dm.vars['data']] + [1000]) debug('median effective sample size: %.1f' % median_sample_size) param_mesh = dm.get_param_age_mesh() age_mesh = dm.get_estimate_age_mesh() trace = zip(dm.vars['region_coeffs'].trace(), dm.vars['study_coeffs'].trace(), dm.vars['age_coeffs'].trace())[::5] for r in dismod3.gbd_regions: debug('predicting rates for %s' % r) for y in dismod3.gbd_years: for s in dismod3.gbd_sexes: key = dismod3.utils.gbd_key_for(param_type, r, y, s) rate_trace = [] for a, b, g in trace: rate_trace.append(predict_region_rate(key, alpha=a, beta=b, gamma=g, covariates_dict=covariates_dict, derived_covariate=derived_covariate, bounds_func=dm.vars['bounds_func'], ages=dm.get_estimate_age_mesh())) mu = dismod3.utils.interpolate(param_mesh, pl.mean(rate_trace, axis=0)[param_mesh], age_mesh) dm.set_initial_value(key, mu) dm.set_mcmc('emp_prior_mean', key, mu) # similar to saving upper_ui and lower_ui in function store_mcmc_fit below rate_trace = pl.sort(rate_trace, axis=0) dm.set_mcmc('emp_prior_upper_ui', key, dismod3.utils.interpolate(param_mesh, rate_trace[.975 * len(rate_trace), :][param_mesh], age_mesh)) dm.set_mcmc('emp_prior_lower_ui', key, dismod3.utils.interpolate(param_mesh, rate_trace[.025 * len(rate_trace), :][param_mesh], age_mesh))
from libs.which import * from libs.nanRound import * from libs.grid_area import grid_area data_dir = 'data/u-aj523/' tileFrac_file = 'data/N96e_GA7_17_tile_cci_reorder.anc' tile_lev = np.array([101 ,102 ,103 ,201 ,202 ,3 ,301 ,302 ,4 ,401 , 402 ,501 ,502 ,6 ,7 ,8 ,9]) tile_nme = np.array(['BD','TBE','tBE','NLD','NLE','C3G','C3C','C3P','C4G','C4C', 'C4P','SHD','SHE','Urban','Lake','Bare Soil','Ice']) var_name = ['VIS', 'NIR'] stashCde = ['m01s01i270', 'm01s01i271'] files = sort(listdir_path(data_dir)) def plotBox(dat, weights, N, n, title = '', maxy = 2, xlab = None): fig = plt.subplot(N, 1, n) plt.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25) bp, dat = weightedBoxplot(dat, weights, notch=0, sym='+', vert=1, whis=1.5) ax1 = plt.gca() plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='red', marker='+') # Add a horizontal grid to the plot, but make it very light in color # so we can use it for reading data values but not be distracting
plt.figure(figsize=(15, 5 * (len(dat) - 1))) plot_cubes(dat, title, cmap) plt.gcf().text(.05, .95, git, rotation=90) plt.savefig(fig_name) dat[-1].long_name = title return dat[-1] ############################################################################# ## Run ## ############################################################################# files = sort(listdir_path(data_dir + mod_out)) soil = open_plot_and_return(soil_fignm, soil_title, soil_codes, soil_names, soil_units, soil_cmap, scale=soil_scale) wood = open_plot_and_return(Wood_fignm, Wood_title, Wood_codes, Wood_names, Wood_units, Wood_cmap,
print '>> matches:', len(matches) # Choose a threshold for selecting "good" matches dist = [m.distance for m in matches] #thresh_dist = (sum(dist) / len(dist)) # mean distance as threshold thresh_dist = max(dist)*0.6 print 'distance: min: %.3f' % min(dist) print 'distance: mean: %.3f' % (sum(dist) / len(dist)) print 'distance: max: %.3f' % max(dist) good_matches = [m for m in matches if m.distance <= thresh_dist] poor_matches = [m for m in matches if m.distance > thresh_dist] print '>> selected matches:', len(good_matches) # Visualization goodMatchImage = imgUtils.drawMatches(firstImg,firstKP,secondImg,secondKP,good_matches) poorMatchImage = imgUtils.drawMatches(firstImg,firstKP,secondImg,secondKP,poor_matches) cv2.imshow("goodMatchImage", goodMatchImage) cv2.imshow("poorMatchImage", poorMatchImage) cv2.waitKey() cv2.destroyAllWindows() cv2.imwrite('./SURF_goodMatches1.jpg',goodMatchImage) cv2.imwrite('./SURF_poorMatches1.jpg',poorMatchImage) figure() plot(arange(len(dist)),sort(dist)) # plot(xData,yData) show()
def fts_fft(cts, optv,rate, pks2, length, band=[], hann=False, bolo=0, plt=False,absv=False, phaseb=[], chop=False, crange=[], notquiet=False): ''' IF N_PARAMS() == 0: print('pro fts_fft, cts, optv,rate, pks2, length, band=band, hann=hann, bolo=bolo, abs=abs, phaseb=phaseb, chop=chop, crange=crange, notquiet=notquiet' print('To be added TC deconvolution' print('/abs -> phase corrected interferrogram is from both real and imaginary FFT otherwise phase corrected interferrogram just the real part.' ENDIF ''' #Document the output #result = {'freq': freq_a, 'real_FFT': real_a, 'im_FFT': im_a,'abs': abs_a, 'time_scan': timeout, 'whitel': pks, 'xint': xint_a, 'intf': int_a, 'scan_length':scanl} if len(phaseb)==0: phaseb=band pks = pl.sort(pks2) pos = pl.arange(len(cts))*optv/rate time = pks/rate for i in range(len(pks)): #grab the data within length of each #white light peak and make scan #symmetric around peak dd = pl.where((pos > (pos[pks[i]] - length)) & (pos<pos[pks[i]] + length))[0] d1 = pl.where(pos[dd] > (pos[pks[i]]))[0] d2 = pl.where(pos[dd]<(pos[pks[i]]))[0] mmin = min([len(d1), len(d2)]) #figuring out if one half is shorter than the other cts_range = pl.arange(2*mmin) + pks[i]-mmin #take out the mean and slope: xxtemp = pl.arange(len(cts_range)) rrtemp = pl.polyfit(xxtemp, cts[cts_range],1) yr = cts[cts_range] - rrtemp[0]*xxtemp yr = yr - yr.mean() #Need even length for FFT if len(yr) % 2 != 0 : yr = yr[1:len(yr)] #deal with possible chopping now: #basic idea: find the chopper peak frequency - then we'll take # the FFT, and set this #chopper peak to 0 frequency. Use the negative side band as our signal (lose 1/2 #the signal, but then don't have to deal with potentially large #change in frequency response between the two side bands). if chop != 0: if len(crange) == 0: print(crange) chophi = 15 choplow = 8 else: chophi = crange[1] choplow = crange[0] #fitting chopped signal FFT with a gaussian around the chopper peak def gaussian(x,a0,a1,a2,a3): # Mimics IDL's gaussfit with 'nterms' = 4 z = (x-a1)/a2 y = a0*pl.exp(-(z**2)/2) + a3 return y qout = time_fft(yr, samplerate =rate, hann=True) ######Does this work?####### q2 = time_fft((qout['real']+1j*qout['im']), inverse=True) fr = pl.where((qout['freq'] > choplow) & (qout['freq']<chophi))[0] fit, pcov = curve_fit(gaussian,qout['freq'][fr],qout['abs'][fr]) ## This fit has inf covariance matrix. Not great. pkf = fit[1] #found the peak response frequency. chspec_f = 30.0*pkf/optv #this is where it maps in GHz if notquiet: print("Chopper at "+str(pkf)+ " Hz") print("Spectra: "+str(chspec_f)+ " GHz") print('3rd Harmonic in subtracted scan at ',+str(2*chspec_f)+" GHz") #Save orignal for comparision with created intf yout = yr if hann: w1 = pl.hanning(len(yr)) yr = yr*w1 #Let's get this shifting right: yr=deque(yr) yr.rotate(int(-len(yr)/2.0 +1)) yr=pl.array(yr) n = pl.arange(len(yr)/2. + 1) n2 = pl.concatenate((n, -(n[1:len(n)-1])[::-1])) #CRAP should this be -2 or-1? icm = n2/len(yr)/(optv/rate) icm0 = icm FFT_r = pl.fft(yr) icm2 = icm #okay now let's do the shifting and such with the chopper: if chop != 0: chspec_icm = chspec_f/30.0 icm = -1.0*(icm-chspec_icm) if notquiet: pl.figure() pl.plot(30*icm, abs(FFT(yr)), label='Not Demodulated') pl.plot(30*icm2, abs(FFT(yr)), label='Lower Side Band') pl.plot(-30*icm, abs(FFT(yr)), label='Upper Side Band') pl.xlim(50, 300) pl.title('Chopped data, abs value') pl.ylim(0, .5) #stop #now to take out a phase: phase = pl.arctan2(FFT_r.imag, FFT_r.real) tt =pl.where((icm > phaseb[0]/30.) & (icm<phaseb[1]/30.))[0] def linfit(x,a,b): y=a+b*x return y if len(tt) == 1 : r = [0,0] if len(tt) != 1 : r,pcov = curve_fit(linfit,icm[tt], phase[tt],sigma = 1/abs(FFT_r[tt])**2) # 'sigma' here was 'measure_errors' in IDL #apply phase correction: pc = r[0] + r[1]*icm shift_c = pl.exp(-pc*1j) FFT_r = FFT_r*shift_c FFT_net = FFT_r #create the interferrogram to feed back: intf = create_interf(30*icm0,pl.fft(yr).real) if absv : intf = create_interf(30*icm, pl.fft(yr)) xin = intf['x'] intfa = intf['intf'] length_int =[len(intfa)] #pl.plot(intf.x, intf.intf, xr = [-2, 2] #keep only positive frequencies: qq = pl.where(icm >= 0) icm = icm[qq] FFT_r = FFT_r[qq] #sort this qqsort = icm.argsort() icm = icm[qqsort] FFT_r = FFT_r[qqsort] if len(band) == 0 : band = [50, 700] ptitle = 'FTS Data for bolo ' + str(bolo) if i == 0: freqout = 30.0*icm realout = (FFT_r[0:len(icm)]).real imout = (FFT_r[0:len(icm)]).imag sindex = pl.zeros(len(icm))+i timeout = [time[i]] length_inter = length_int xint = xin intfg = intfa #stop if plt: pl.figure() inband = pl.where((30*icm > band[0]) & (30*icm < band[1]))[0] if len(inband) == 0 : inband = pl.arange(len(icm)) if absv == 0: pl.plot(30*icm[1:], (FFT_r[1:]).real/max((FFT_r[inband]).real),'k-',label='%.4f cm'%(time[i]*optv)) pl.plot(30*icm[1:], (FFT_r[1:]).imag/max((FFT_r[inband]).real), 'k--') pl.xlabel('Frequency (GHz)') pl.ylabel('Normalized Spectra') pl.xlim(band[0],band[1]) pl.title(ptitle) pl.ylim(-0.5, 1) #stop if absv != 0: pl.plot(30*icm[1:], abs(FFT_r[1:])/max(abs(FFT_r[inband]))) pl.xlabel('Frequency (GHz)') pl.ylabel('Normalized Abs Value of Spectra') pl.xlim(band) pl.title(ptitle) pl.xlim(-.1, 1) if i != 0: tfreqout = 30*icm trealout = (FFT_r[0:len(icm)]).real timout = (FFT_r[0:len(icm)]).imag tsindex = pl.zeros(len(icm))+i ttimeout = [time[i]] freqout = pl.concatenate((freqout, tfreqout)) ## Don't know if these are lists,arrays or integers realout = pl.concatenate((realout, trealout)) imout = pl.concatenate((imout, timout)) sindex = pl.concatenate((sindex, tsindex)) timeout = pl.concatenate((timeout, ttimeout)) length_inter = pl.concatenate((length_inter, length_int)) xint = pl.concatenate((xint, xin)) intfg = pl.concatenate((intfg, intfa)) if plt: inband = pl.where((30*icm > band[0]) & (30*icm<band[1]))[0] if inband[0] == -1 : inband = pl.arange(len(icm)) if absv == 0: pl.plot(30*icm[1:], (FFT_r[1:]).real/max((FFT_r[inband]).real),color=pl.cm.jet(.15*i),label='%.4f cm'%(time[i]*optv)) pl.plot(30*icm[1:], (FFT_r[1:]).imag/max((FFT_r[inband]).real), '--',color=pl.cm.jet(.15*i)) if absv != 0: pl.plot(30*icm[1:], abs(FFT_r[1:])/max(abs(FFT_r[inband])),color=pl.cm.jet(.15*i),label='%.4f cm'%(time[i]*optv)) pl.legend(loc=4) pl.grid() #okay, let's get this result in some resasonable form: scanl = pl.histogram(sindex, bins = int(max(sindex)-min(sindex)+1))[0] #figure out the lengths of each scan and pad array with zeros if necessary maxl = max(scanl) scan_index = pl.arange(len(pks)) freq_a = pl.zeros((maxl, len(pks))) ##IDL is backwards, had to flip all these from CxR to RxC real_a = pl.zeros((maxl, len(pks))) im_a = pl.zeros((maxl, len(pks))) abs_a = pl.zeros((maxl, len(pks))) for i in range(len(pks)): ## The below all comes from the column-focused IDL. Re-write? ## if i == 0: if maxl - scanl[i] != 0: zeros_to_add = pl.zeros(maxl - scanl[i]) ## Don't know if these are arrays or integers ## freq_a[:,i] = pl.concatenate((freqout[0:scanl[i]],zeros_to_add)) real_a[:,i] = pl.concatenate((realout[0:scanl[i]],zeros_to_add)) im_a[:,i] = pl.concatenate((imout[0:scanl[i]],zeros_to_add)) abs_a[:,i] = pl.sqrt(real_a[:,i]**2 + im_a[:,i]**2) new_start = scanl[i] else: freq_a[:,i] = freqout[0:scanl[i]] real_a[:,i] = realout[0:scanl[i]] im_a[:,i] =imout[0:scanl[i]] abs_a[:,i] = pl.sqrt(real_a[:,i]**2 + im_a[:,i]**2) new_start = scanl[i] else: if maxl - scanl[i] != 0: zeros_to_add = pl.zeros(maxl - scanl[i]) freq_a[:,i] = pl.concatenate((freqout[new_start:new_start +scanl[i]],zeros_to_add)) real_a[:,i] = pl.concatenate((realout[new_start:new_start +scanl[i]],zeros_to_add)) im_a[:,i] =pl.concatenate((imout[new_start:new_start + scanl[i]],zeros_to_add)) abs_a[:,i] = pl.sqrt(real_a[:,i]**2 + im_a[:,i]**2) new_start = new_start + scanl[i] else: freq_a[:,i] = freqout[new_start:new_start +scanl[i]] real_a[:,i] = realout[new_start:new_start +scanl[i]] im_a[:,i] =imout[new_start:new_start +scanl[i]] abs_a[:,i] = pl.sqrt(real_a[:,i]**2 + im_a[:,i]**2) new_start = new_start + scanl[i] #now to deal with the interferrograms: maxl = max(length_inter) xint_a = pl.zeros((maxl, len(length_inter))) int_a = pl.zeros((maxl, len(length_inter))) for i in range(len(pks)): if i == 0: if maxl - length_inter[i] != 0: zeros_to_add = pl.zeros(maxl - length_inter[i]) xint_a[:,i] = pl.concatenate((zeros_to_add, xint[0:length_inter[i]])) int_a[:,i] = pl.concatenate((zeros_to_add, intfg[0:length_inter[i]])) new_start = length_inter[i] else: xint_a[:,i] = xint[0:length_inter[i]] int_a[:,i] = intfg[0:length_inter[i]] new_start = length_inter[i] else: if maxl - length_inter[i] != 0: zeros_to_add = pl.zeros(maxl - length_inter[i]) xint_a[:,i] = pl.concatenate((zeros_to_add, xint[new_start:new_start+length_inter[i]])) int_a[:,i] = pl.concatenate((zeros_to_add, intfg[new_start:new_start+length_inter[i]])) new_start = new_start + length_inter[i] else: xint_a[:,i] = xint[new_start:new_start+length_inter[i]] int_a[:,i] = intfg[new_start:new_start+length_inter[i]] result = {'freq': freq_a, 'real_FFT': real_a, 'im_FFT': im_a,'abs': abs_a, 'time_scan': timeout, 'whitel': pks, 'xint': xint_a, 'intf': int_a, 'scan_length':scanl} return result