def cv1(x, bws, model='gaussian', plot=False, n_folds=10): """ This calculates the leave-one-out cross validation. If you set plot to True, then it will show a big grid of the test and training samples with the KDE chosen at each step. You might need to modify the code if you want a nicer layout :) """ # Get the number of bandwidths to check and the number of objects N_bw = len(bws) N = len(x) cv_1 = np.zeros(N_bw) # If plotting is requested, set up the plot region if plot: fig, axes = plt.subplots(N_bw, int(np.ceil(N/n_folds)), figsize=(15, 8)) xplot = np.linspace(-3, 8, 1000) # Loop over each band-width and calculate the probability of the # test set for this band-width for i, bw in enumerate(bws): # I will do N-fold CV here. This divides X into N_folds kf = KFold(N) # Initiate - lnP will contain the log likelihood of the test sets # and i_k is a counter for the folds that is used for plotting and # nothing else.. lnP = 0.0 i_k = 0 # Loop over each fold for train, test in kf.split(x): x_train = x[train, :] x_test = x[test, :] # Create the kernel density model for this bandwidth and fit # to the training set. kde = KD(kernel=model, bandwidth=bw).fit(x_train) # score evaluates the log likelihood of a dataset given the fitted KDE. log_prob = kde.score(x_test) if plot: # Show the tries ax = axes[i][i_k] # Note that the test sample is hard to see here. hist(x_train, bins=10, ax=ax, color='red') hist(x_test, bins=10, ax=ax, color='blue') ax.plot(xplot, np.exp(kde.score_samples(xplot[:, np.newaxis]))) i_k += 1 lnP += log_prob # Calculate the average likelihood cv_1[i] = lnP/N return cv_1
def Plot_hist( self, KeyTicks=None ): # Plots the histogram of the data, with a legend and axis. if self.Mass == True: plt.figure() hist(self.mass_data, bins=self.bins, align='left', label=self.Protein + "(" + self.Conc + "," + self.Buffer + "," + self.Nucleotide + ")") plt.legend() plt.xlabel("Mass (kDa)") plt.ylabel("Particle Frequency") plt.xlim(self.m_range) if not KeyTicks == None: plt.xticks(KeyTicks) plt.show() else: plt.figure() hist(self.contrast, bins=self.bins, align='left', label=self.Protein + "(" + self.Conc + "," + self.Buffer + "," + self.Nucleotide + ")") plt.legend() plt.xlabel("Contrast") plt.ylabel("Particle Frequency") plt.xlim(self.c_range) plt.show()
def generateToy(): print 'loading values' if not os.path.isfile('values2.p'): z_data = np.loadtxt('values2.dat') pkl.dump( z_data, open( 'values2.p', "wb" ),pkl.HIGHEST_PROTOCOL ) else: z_data = pkl.load(open('values2.p',"rb")) print 'loaded' #x = np.random.normal(size=1000) z_data_subset = z_data[0:20000] plot_range = [50,400] print 'max',max(z_data_subset),'min',min(z_data_subset) plt.yscale('log', nonposy='clip') plt.axes().set_ylim(0.0000001,0.17) hist(z_data_subset,range=plot_range,bins=100,normed=1,histtype='stepfilled', color=['lightgrey'], label=['100 bins']) #hist(z_data_subset,range=plot_range,bins='knuth',normed=1,histtype='step',linewidth=1.5, # color=['navy'], label=['knuth']) hist(z_data_subset,range=plot_range,bins='blocks',normed=1,histtype='step',linewidth=2.0, color=['crimson'], label=['b blocks']) plt.legend() #plt.yscale('log', nonposy='clip') #plt.axes().set_ylim(0.0000001,0.17) plt.xlabel(r'$m_{\ell\ell}$ (GeV)') plt.ylabel('A.U.') plt.title(r'Z$\to\mu\mu$ Data') plt.savefig('z_data_hist_comp.png') plt.show()
def triangle(self): assert self.sample_invoked == True, \ 'Must sample first! Use sample(iter, burn)' fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) hist(self.trace[:].flatten(), bins='knuth', normed=True, histtype='step', color='k', ax=ax) plt.xlabel('$b$')
def histogram(totalDF, column, axis, binN=10, incl_untrunc=True): trunc = totalDF[(totalDF.untruncated == False) & (totalDF.h2 > 0) & (totalDF.M1 < 50)][column].dropna() if not incl_untrunc: untrunc = trunc else: untrunc = totalDF[(totalDF.untruncated == True) & (totalDF.h2 > 0) & (totalDF.M1 < 50)][column].dropna() bins = get_bins(binN, untrunc, trunc) wuntrunc, wtrunc = [1./len(untrunc)]*len(untrunc), [1./len(trunc)]*len(trunc) hist(trunc, bins, ax=axis, alpha=0.5, label='Truncated', weights=wtrunc) if incl_untrunc: hist(untrunc, bins, ax=axis, alpha=0.5, label='Untruncated', weights=wuntrunc)
def Batch_Fit(Path, Files, Rescaled=False, bins='knuth', Auto=True, Mass=False, m=1.0, b=0.0): ctr = [] amp = [] wid = [] popt = [] for File in Files: print(File) if Rescaled == True: Cf = np.load(Path + File + "Cf_rescaled.npy") else: Cf = np.load(Path + File + "Cf.npy") if Mass == True: Cf = abs(Cf) * m + b if Auto == True: p_guess = Auto_Gauss(Path, File, Cf) else: plt.figure() hist(abs(Cf), bins=bins, normed=True, align='mid') plt.show() print("Number of Gaussians:") No_gauss = int(input()) print('\n') p_guess = [] for i in range(No_gauss): print("Gaussian %i" % (i + 1)) print("Centre:") p_guess.append(float(input())) print("Amplitude:") p_guess.append(float(input())) print("Width:") p_guess.append(float(input())) print("Parameter Guess:", p_guess) center, amplitude, width, param_opt = Fit_Gaussian( abs(Cf), p_guess, bins, Mass) ctr.append(center) amp.append(amplitude) wid.append(width) popt.append(param_opt) return ctr, amp, wid, popt
def make_hist(pan, subpairs, ax=None, **kwargs): t = pd.concat([pan.xs(x, axis='items').diff().iloc[30] for x in subpairs], axis=1) t.columns = map(str, subpairs) _, idx1, fig = hist(t[t.columns[0]], bins='scott', alpha=.35, width=.0005) plt.close() if ax is None: fig = plt.figure(**kwargs) ax = fig.add_subplot(111) return hist(t.values, bins=idx1, ax=ax, histtype='bar', label=t.columns.tolist())
def bin_edges_f(bin_method, mag_col_cl): ''' Obtain bin edges for each photometric dimension using the cluster region diagram. ''' bin_edges = [] if bin_method in ['sturges', 'sqrt']: if bin_method == 'sturges': b_num = 1 + np.log2(len(mag_col_cl[0])) else: b_num = np.sqrt(len(mag_col_cl[0])) for mag_col in mag_col_cl: bin_edges.append(np.histogram(mag_col, bins=b_num)[1]) elif bin_method == 'bb': # Based on Bonatto & Bica (2007) 377, 3, 1301-1323. Fixed bin width # of 0.25 for colors and 0.5 for magnitudes. b_num = [(max(mag_col_cl[0]) - min(mag_col_cl[0])) / 0.25, (max(mag_col_cl[1]) - min(mag_col_cl[1])) / 0.5] for i, mag_col in enumerate(mag_col_cl): bin_edges.append(np.histogram(mag_col, bins=b_num[i])[1]) else: for mag_col in mag_col_cl: bin_edges.append(hist(mag_col, bins=bin_method)[1]) return bin_edges
def get_index(df): """ A bit of a wasteful, hackish way to get an index to use for the various lags. """ _, idx, _ = hist(df.diff(1).loc[30], bins='scott', alpha=.35) return idx
def plot_labeled_histogram(style, data, name, x, pdf_true, ax=None, hide_x=False, hide_y=False): if ax is not None: ax = plt.axes(ax) counts, bins, patches = hist(data, bins=style, ax=ax, color='k', histtype='step', normed=True) ax.text(0.99, 0.95, '%s:\n%i bins' % (name, len(counts)), transform=ax.transAxes, ha='right', va='top', fontsize=12) ax.fill(x, pdf_true, '-', color='#CCCCCC', zorder=0) if hide_x: ax.xaxis.set_major_formatter(plt.NullFormatter()) if hide_y: ax.yaxis.set_major_formatter(plt.NullFormatter()) ax.set_xlim(-5, 5) return ax
def Auto_Gauss(Path, File, Cf): plt.figure n, bins, pat = hist(abs(Cf), bins='knuth', align='left') plt.show() Rel_Max = argrelextrema(n, np.greater, order=3) ctr = bins[Rel_Max] amp = n[Rel_Max] Wid = np.zeros(len(n[Rel_Max])) j = 0 for idx in Rel_Max[0]: i = 1 while n[idx] / 2.0 < n[idx + i]: i += 1 ind = idx + i Wid[j] = (bins[ind] - bins[idx]) * 2 j += 1 p_guess = np.zeros(len(Wid) * 3) j = 0 for i in range(0, len(p_guess), 3): p_guess[i] = ctr[j] p_guess[i + 1] = amp[j] p_guess[i + 2] = Wid[j] j += 1 print("Auto p_guess:", p_guess) return p_guess
def Fit_Gaussian( self ): # Fits multiple Gaussians according to the parameter guess input (either manual or auto) plt.figure() if self.Mass == True: n, bins, pathces = hist(self.mass_data, bins=self.bins, normed=True, align='mid') else: n, bins, pathces = hist(self.contrast, bins=self.bins, normed=True, align='mid') while True: try: self.popt, pcov = curve_fit(self.func, bins[:-1], n, p0=self.p_guess) break except RuntimeError: # If the curvefit fails, then instead of returning the errors, it recalls the Auto_gauss with a different order print("Error - curve_fit failed, re-running Auto_Gauss") self.order += 1 self.Auto_Gauss() if self.Mass == True: self.x = np.linspace(self.m_range[0], self.m_range[1], 3000) else: self.x = np.linspace(self.c_range[0], self.c_range[1], 3000) self.fit = self.func(self.x, *self.popt) plt.plot(self.x, self.fit, 'b--') plt.yticks([]) if self.Mass == True: plt.xlabel("Mass (kDa)", fontsize=12, color='blue') else: plt.xlabel("Contrast", fontsize=12, color='blue') n, bins, pathces = hist(self.contrast, bins=self.bins, normed=True, align='mid') plt.ylabel("Probability Density", fontsize=12, color='blue') plt.show()
def normalisation(self): if self.Mass == True: n, bins, p = hist(self.mass_data, bins=self.bins, range=self.m_range) else: n, bins, p = hist(self.contrast, bins=self.bins, range=self.c_range) sum_n = 0 for i in range(len(n)): sum_n += n[i] * (bins[i + 1] - bins[i]) N = 1.0 / sum_n self.norm_n = n * N self.norm_bins = bins
def Manual_Gauss(self): # Seeds the parameters, based on manual input. plt.figure hist(self.contrast, bins=self.bins, align='left') plt.show() print("Number of Gaussians:") No_gauss = int(input()) temp_guess = [] for i in range(No_gauss): print("Gaussian %i" % (i + 1)) print("Centre:") temp_guess.append(float(input())) print("Amplitude:") temp_guess.append(float(input())) print("Width:") temp_guess.append(float(input())) self.p_guess = np.array(temp_guess)
def Manual_Gauss(self): # Seeds the parameters, based on manual input. plt.figure if self.Mass == True: hist(self.mass_data, bins=self.bins, align='left') else: hist(self.contrast, bins=self.bins, align='left') plt.show() print("Number of Gaussians:") No_gauss = int(input()) for i in range(No_gauss): print("Gaussian %i" % (i + 1)) print("Centre:") np.append(self.p_guess, float(input())) print("Amplitude:") np.append(self.p_guess, float(input())) print("Width:") np.append(self.p_guess, float(input()))
def generateToy(): print('loading values') if not os.path.isfile('values2.p'): z_data = np.loadtxt('values2.dat') pkl.dump(z_data, open('values2.p', "wb"), pkl.HIGHEST_PROTOCOL) else: z_data = pkl.load(open('values2.p', "rb")) print('loaded') #x = np.random.normal(size=1000) z_data_subset = z_data[0:20000] plot_range = [50, 400] print('max', max(z_data_subset), 'min', min(z_data_subset)) plt.yscale('log', nonposy='clip') plt.axes().set_ylim(0.0000001, 0.17) hist(z_data_subset, range=plot_range, bins=100, normed=1, histtype='stepfilled', color=['lightgrey'], label=['100 bins']) #hist(z_data_subset,range=plot_range,bins='knuth',normed=1,histtype='step',linewidth=1.5, # color=['navy'], label=['knuth']) hist(z_data_subset, range=plot_range, bins='blocks', normed=1, histtype='step', linewidth=2.0, color=['crimson'], label=['b blocks']) plt.legend() #plt.yscale('log', nonposy='clip') #plt.axes().set_ylim(0.0000001,0.17) plt.xlabel(r'$m_{\ell\ell}$ (GeV)') plt.ylabel('A.U.') plt.title(r'Z$\to\mu\mu$ Data') plt.savefig('z_data_hist_comp.png') plt.show()
def Auto_Gauss( self ): # Automatically seeds parameters, based on the realtive maximum. plt.figure if self.Mass == True: n, bins, pat = hist(self.mass_data, bins=self.bins, align='left') else: n, bins, pat = hist(self.contrast, bins=self.bins, align='left') Rel_Max = argrelextrema(n, np.greater, order=self.order) amp = n[Rel_Max] temp_idx = [] for i in range(len(amp)): if amp[i] <= 3.0: temp_idx.append(i) Rel_Max = np.delete(Rel_Max, temp_idx) ctr = bins[Rel_Max] amp = n[Rel_Max] Wid = np.zeros(len(amp)) j = 0 for idx in Rel_Max: i = 1 while n[idx] / 2.0 < n[idx + i]: i += 1 ind = idx + i Wid[j] = (bins[ind] - bins[idx]) * 2 j += 1 self.p_guess = np.zeros(len(Wid) * 3) j = 0 for i in range(0, len(self.p_guess), 3): self.p_guess[i] = ctr[j] self.p_guess[i + 1] = amp[j] self.p_guess[i + 2] = Wid[j] j += 1 plt.plot(ctr, amp, 'r.') plt.plot(ctr - Wid / 2.0, amp / 2.0, 'g.') plt.plot(ctr + Wid / 2.0, amp / 2.0, 'g.') plt.show()
def Fit_Gaussian( self ): # Fits multiple Gaussians according to the parameter guess input (either manual or auto) plt.figure() n, bins, pathces = hist(self.contrast, bins=self.bins, normed=True, align='left') self.popt, pcov = curve_fit(self.func, bins[:-1], n, p0=self.p_guess) for i in range(0, len(self.popt), 3): self.ctr = self.popt[i]
def histo(self): #------------------------------------------------------------ # First figure: show normal histogram binning fig = plt.figure(figsize=(10, 4)) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.15) ax1 = fig.add_subplot(121) ax1.hist(self.entropy, bins=15, histtype='stepfilled', alpha=0.2, normed=True) ax1.set_xlabel('entropy bins=15') ax1.set_ylabel('Count(t)') ax2 = fig.add_subplot(122) ax2.hist(self.entropy, bins=200, histtype='stepfilled', alpha=0.2, normed=True) ax2.set_xlabel('entropy bins=200') ax2.set_ylabel('Count(t)') #------------------------------------------------------------ # Second & Third figure: Knuth bins & Bayesian Blocks fig = plt.figure(figsize=(10, 4)) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.15) for bins, title, subplot in zip(['knuth', 'blocks'], ["Knuth's rule-fixed bin-width", 'Bayesian blocks variable width'], [121, 122]): ax = fig.add_subplot(subplot) # plot a standard histogram in the background, with alpha transparency hist(self.entropy, bins=200, histtype='stepfilled', alpha=0.2, normed=True, label='standard histogram') # plot an adaptive-width histogram on top hist(self.entropy, bins='blocks', ax=ax, color='black', histtype='step', normed=True, label=title) ax.legend(prop=dict(size=12)) ax.set_xlabel('entropy bins') ax.set_ylabel('C(t)') plt.show()
def auto_discretize(self, num_data, method, range_min_max): """ Perform automatic discretization of a selected feature; a method (bayesian blocks, scott method or fixed bin number) along the desired data range is passed to a special version of hist which gives cutpoints for discretization and returns the "categorized" version of the original data """ hist_data = hist(num_data, bins=method, range=range_min_max) plt.close("all") leng = len(hist_data[1]) # fix cutoff to make sure outliers are properly categorized as well if necessary hist_data[1][leng - 1] = num_data.max() # hist_data[1][0] = num_data.min() # automatically assign category labels of '1','2',etc cat_data = pandas.cut(num_data, hist_data[1], labels=range(1, leng), include_lowest="TRUE") return pandas.Series(cat_data).astype(str)
def noise(fname, x0 = 100, y0 = 100, maxrad = 30): from astroML.plotting import hist hdulist = pf.open(fname) im = hdulist[0].data #print np.mean(im), np.min(im), np.max(im) #print im[95:105, 95:105] # x0, y0 = 100, 100 xi, yi = np.indices(im.shape) R = np.sqrt( (yi - int(y0))**2. + (xi - int(x0))**2. ) phot_a = np.zeros(maxrad + 1) phot_a[0] = 0 bmasked = im * ((R > maxrad) * (R < maxrad + 20.)) bdata = bmasked.flatten() #print bdata[bdata != 0.] #print len(bdata[bdata != 0.]) #print len(bdata) plt.subplot(3, 1, 1) hist(bdata[bdata != 0.], bins = 'blocks') plt.xlabel('Flux') plt.ylabel('(Bayesian Blocks)') plt.title('Noise') #plt.show() plt.subplot(3, 1, 2) hist(bdata[bdata != 0.], bins = 50) plt.xlabel('Flux') plt.ylabel('(50 bins)') #plt.title('Noise (50 bins)') #plt.show() plt.subplot(3, 1, 3) hist(bdata[bdata != 0.], bins = 'knuth') plt.xlabel('Flux') plt.ylabel('(Knuth\'s Rule)') #plt.title('Noise (Knuth\'s Rule)') plt.show() A2, crit, sig = anderson(bdata[bdata != 0.], dist = 'norm') print 'A-D Statistic:', A2 print ' CVs \t Sig.' print np.vstack((crit, sig)).T normality = normaltest(bdata[bdata != 0.]) print 'Normality:', normality skewness = skewtest(bdata[bdata != 0.]) print 'Skewness:', skewness kurtosis = kurtosistest(bdata[bdata != 0.]) print 'Kurtosis:', kurtosis print 'Mean:', np.mean(bdata[bdata != 0.]) print 'Median:', np.median(bdata[bdata != 0.])
def plot_wage_change_dist(df, pi, lambda_, nperiods=4, log=True, figkwargs=None, axkwargs=None): """ Make and save the figure for the distribution of wage changes. figkwargs is a dict passed to the fig constructor. axkwargs is a dict passed to the axes constructor. """ idx = get_index(df) SOME_SS = 30 # just some period in the steady state. diffs = range(1, nperiods + 1) if log: df = np.log(df) strlog = ', (log scale),' # see title formatting else: strlog = '' t = pd.concat([df.diff(x).iloc[SOME_SS] for x in diffs], axis=1, keys=diffs) _figkwargs = {'figsize': (13, 8)} # Leading _ is internal. if figkwargs is not None: _figkwargs.update(figkwargs) _axkwargs = {} if axkwargs is not None: _axkwargs.update(axkwargs) fig, ax = plt.subplots(**_figkwargs) cts, idx, other = hist(t.values, histtype='bar', bins=idx, label=['lag={}'.format(i) for i in diffs], ax=ax, normed=True, **_axkwargs) ax.set_title('Across Periods{0} $\pi={1:.3f}$, $\lambda={2:.3f}$'.format( strlog, pi, lambda_)) ax.legend() return fig, ax
def Fit_Gaussian(Cf, p_guess, bins, Mass=False): plt.figure() (n, bins, pathces) = hist(abs(Cf), bins=bins, normed=True, align='mid') print("Number of Bins:", len(bins)) while True: try: popt, pcov = curve_fit(func, bins[:-1], n, p0=p_guess) break except RuntimeError: print("Error - curve_fit failed") p_guess = p_guess[:-3] if Mass == True: x = np.linspace(0.0, 1500, 1500) else: x = np.linspace(0.0, 0.2, 1500) fit = func(x, *popt) plt.plot(x, fit, 'b--') plt.yticks([]) if Mass == True: plt.xlabel("Mass (kDa)", fontsize=12, color='blue') else: plt.xlabel("Contrast", fontsize=12, color='blue') plt.ylabel("Probability Density", fontsize=12, color='blue') plt.show() ctr_opt = [] amp_opt = [] wid_opt = [] for i in range(0, len(popt), 3): ctr_opt.append(popt[i]) amp_opt.append(popt[i + 1]) wid_opt.append(popt[i + 2]) ctr_opt = np.array(ctr_opt) amp_opt = np.array(amp_opt) wid_opt = np.array(wid_opt) return ctr_opt, amp_opt, wid_opt, popt
def main(): import sys try: csv_path = sys.argv[1] except: print("usage: " + sys.argv[0] + " <path_to_csv_file>") exit() df = np.genfromtxt(csv_path) h = hist(df, bins='blocks', histtype='step', normed=False, label='standard histogram') frequencies = [0] + h[0].tolist() boundaries = h[1].tolist() stoex = "DoublePDF[" for f, b in zip(frequencies, boundaries): prob = f / sum(frequencies) stoex += "(" + str(b) + ";" + str(prob) + ")" stoex += "]" print(stoex)
def plot_redshifts(): gz2main_fitsfile = '/Users/willettk/Astronomy/Research/GalaxyZoo/fits/gz2main_table_sample.fits' hdulist = pyfits.open(gz2main_fitsfile) gz2main = hdulist[1].data hdulist.close() redshift_all = gz2main['redshift'] redshift_finite = redshift_all[np.isfinite(redshift_all)] fig = plt.figure() ax = fig.add_subplot(111) # hist(redshift_finite, bins='blocks', ax=ax, histtype='stepfilled', color='r', ec='r', normed=True) hist(redshift_finite, bins='scotts', ax=ax, histtype='step', color='b',normed=True) hist(redshift_finite, bins='freedman', ax=ax, histtype='step', color='y',normed=True) hist(redshift_finite, bins='knuth', ax=ax, histtype='step', color='y',normed=True) ax.set_xlabel('Redshift') ax.set_ylabel('Frequency') plt.show() return None
neighbordist = neighbordist[:,1] # ignore self-match neighbori = neighbori[:,1] # ignore self-match # find 3D neighbors with Z_Mpcnopec coordsnopec = np.array([X_Mpcnopec, Y_Mpcnopec, Z_Mpcnopec]).T kdtnopec = cKDTree(coordsnopec) neighbordistnopec, neighborinopec = kdtnopec.query(coordsnopec, k=2) neighbordistnopec = neighbordistnopec[:,1] # ignore self-match neighborinopec = neighborinopec[:,1] # ignore self-match # plot histograms of distances with optimal binning plt.figure(1) plt.clf() #hist(neighbordist,bins='freedman',label='freedman',normed=1,histtype='stepfilled',color='green',alpha=0.5) #hist(neighbordist,bins='scott',label='scott',normed=1,histtype='step',color='purple',alpha=0.5,hatch='///') hist(neighbordist,bins='knuth',label='knuth',normed=1,histtype='stepfilled',color='blue',alpha=0.25) plt.xlim(0,6) plt.xlabel("dist (Mpc)") plt.title("Allowing peculiar motions, false Delta Z-dist within groups") plt.legend(loc="best") plt.figure(2) plt.clf() #hist(neighbordistnopec,bins='freedman',label='freedman',normed=1,histtype='stepfilled',color='green',alpha=0.5) #hist(neighbordistnopec,bins='scott',label='scott',normed=1,histtype='step',color='purple',alpha=0.5,hatch='///') n0, bins0, patches0 = hist(neighbordistnopec,bins='knuth',label='knuth',normed=1,histtype='stepfilled',color='blue',alpha=0.25) plt.xlim(0,6) plt.xlabel("dist (Mpc)") plt.title("No peculiar motions, zero Delta Z-dist within groups") plt.legend(loc="best")
t = np.linspace(-10, 30, 1000) # Compute density with KDE kde = KDE('gaussian', h=0.1).fit(xN[:, None]) dens_kde = kde.eval(t[:, None]) / N # Compute density with Bayesian nearest neighbors nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None]) dens_nbrs = nbrs.eval(t[:, None]) / N # plot the results #ax.plot(t, true_pdf(t), ':', color='black', zorder=3, # label="Generating Distribution") ax.plot(xN, -0.005 * np.ones(len(xN)), '|k', lw=1.5) hist(xN, bins='blocks', ax=ax, normed=True, zorder=1, histtype='stepfilled', lw=1.5, color='k', alpha=0.2, label="Bayesian Blocks") ax.plot(t, dens_nbrs, '-', lw=2, color='gray', zorder=2, label="Nearest Neighbors (k=%i)" % k) ax.plot(t, dens_kde, '-', color='black', zorder=3, label="Kernel Density (h=0.1)") # label the plot ax.text(0.02, 0.95, "%i points" % N, ha='left', va='top', transform=ax.transAxes) ax.set_ylabel('$p(x)$') ax.legend(loc='upper right', prop=dict(size=12)) if subplot == 212: ax.set_xlabel('$x$')
# print j # peakabsmagvalue = [peakabsmagvalueb,peakabsmagvaluec] # print peakabsmagvalueb # hist(peakabsmagvalueb, bins = 'knuth', label = str(ib) + ' Ib datapoints', color = 'blue', histtype='stepfilled', alpha=0.2)#, stacked=True) # hist(peakabsmagvaluec, bins = 'knuth', label = str(ic) + ' Ic datapoints', color ='green', histtype='stepfilled', alpha=0.2, des)#, stacked=True) # plotting best fit gaussian plt.subplot(221) result = hist( peakabsmagvalueb, bins="knuth", label=str(ib) + " Ib datapoints", color="blue", histtype="stepfilled", alpha=0.2 ) # , stacked=True) mean = np.mean(peakabsmagvalueb) variance = np.var(peakabsmagvalueb) sigma = np.sqrt(variance) x = np.linspace(-23, -13, 100) dx = result[1][1] - result[1][0] scale = len(peakabsmagvalueb) * dx plt.plot( x, mlab.normpdf(x, mean, sigma) * scale, label="Best fit, mean: " + str(round(mean, 3)) + " sigma: " + str(round(sigma, 3)), )
imX = np.empty((len(image_data), 2), dtype=np.float64) imX[:, 0] = image_data['ra'] imX[:, 1] = image_data['dec'] # get standard stars standards_data = fetch_sdss_S82standards() stX = np.empty((len(standards_data), 2), dtype=np.float64) stX[:, 0] = standards_data['RA'] stX[:, 1] = standards_data['DEC'] # crossmatch catalogs max_radius = 1. / 3600 # 1 arcsec dist, ind = crossmatch_angular(imX, stX, max_radius) match = ~np.isinf(dist) dist_match = dist[match] dist_match *= 3600 ax = plt.axes() hist(dist_match, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA') ax.set_xlabel('radius of match (arcsec)') ax.set_ylabel('N(r, r+dr)') ax.text(0.95, 0.95, "Total objects: %i\nNumber with match: %i" % (imX.shape[0], np.sum(match)), ha='right', va='top', transform=ax.transAxes) ax.set_xlim(0, 0.2) plt.show()
def plot_hist(x, filename): fig = plt.figure(figsize=(10, 10)) hist(x, bins='scott') plt.savefig(filename) plt.close()
#------------------------------------------------------------ # plot the results fig = plt.figure(figsize=(8, 8)) fig.subplots_adjust() N_values = (500, 5000) subplots = (211, 212) for N, subplot in zip(N_values, subplots): ax = fig.add_subplot(subplot) xN = x[:N] t = np.linspace(-10, 30, 1000) # plot the results ax.plot(xN, -0.005 * np.ones(len(xN)), '|k', lw=1.5) hist(xN, bins='knuth', ax=ax, normed=True, histtype='stepfilled', alpha=0.3, label='Knuth Histogram') hist(xN, bins='blocks', ax=ax, normed=True, histtype='step', lw=1.5, color='k', label="Bayesian Blocks") ax.plot(t, true_pdf(t), '-', color='black', label="Generating Distribution") # label the plot ax.text(0.02, 0.95, "%i points" % N, ha='left', va='top', transform=ax.transAxes) ax.set_ylabel('$p(x)$') ax.legend(loc='upper right', prop=dict(size=12)) if subplot == 212: ax.set_xlabel('$x$')
def bayes_block(x_data, filename, format, x_label = '', title = '',\ log_x = False, log_y = False): ''' Description This function takes the given data and produces a Bayesian Block histogram of it. The given axis label and title are applied, and then the histogram is saved using the given filename and format. Required Input x_data: The data array to be graphed. Numpy array or list of floats. This array is flattened to one dimension before creating the histogram. filename: The filename (including extension) to use when saving the image. Provide as a string. format: The format (e.g. png, jpeg) in which to save the image. This is a string. x_label: String specifying the x-axis label. title: String specifying the title of the graph. log_x: If this is True, then logarithmic binning is used for the histogram, and the x-axis of the saved image is logarithmic. If this is False (default) then linear binning is used. log_y: If this is True, then a logarithmic scale is used for the y-axis of the histogram. If this is False (default), then a linear scale is used. Output A histogram is automatically saved using the given data and labels, in the specified format. 1 is returned if the code performs to completion. ''' # First make a figure object with matplotlib (default size) fig = plt.figure() # Create an axis object to go with this figure ax = fig.add_subplot(111) # Check to see if the x-axis of the histogram needs to be logarithmic if log_x == True: # Set the x-axis scale of the histogram to be logarithmic ax.set_xscale('log') # Make a histogram of the given data, with the specified number of # bins. Note that the data array is flattened to one dimension. # Do we need to normalise to account for the bin sizes being different? aML.hist(x_data.flatten(), bins = 'blocks', normed = True, log = log_y) # Add the specified x-axis label to the plot plt.xlabel(x_label) # Add a y-axis label to the plot plt.ylabel('Counts') # Add the specified title to the plot plt.title(title) # Save the figure using the title given by the user plt.savefig(filename, format = format) # Print a message to the screen saying that the image was created print filename + ' created successfully.\n' # Close the figure so that it does not take up memory plt.close() # Now that the graph has been produced, return 1 return 1
'GRPCZ', 'FC', 'LOGMH', 'DEN1MPC'. """ data = np.genfromtxt("ECO_dr1_subset.csv", delimiter=",", dtype=None, names=True) name = data['NAME'] logmstar = data ['LOGMSTAR'] urcolor = data['MODELU_RCORR'] cz = data['CZ'] goodur = (urcolor > -99) & (logmstar > 10.) colors=urcolor[goodur] # First plot histograms of u-r color with different bin width "rules" plt.figure(1) plt.clf() hist(colors,bins='freedman',label='freedman',normed=1,histtype='stepfilled',color='green',alpha=0.5) hist(colors,bins='scott',label='scott',normed=1,histtype='step',color='purple',alpha=0.5,hatch='///') # note the different format used below so as to save the bin info for Knuth's rule n0, bins0, patches0 = hist(colors,bins='knuth',label='knuth',normed=1,histtype='stepfilled',color='blue',alpha=0.25) plt.xlim(0,3) plt.xlabel("u-r color (mag)") plt.title("Galaxy Color Distribution") plt.legend(loc="best") # As in Fig. 5.20 (p. 227), Scott's rule makes broader bins. # Now give Kernel Density Estimation a try. KDE is shown in Ivezic+ Fig. 6.1 # but we'll use this newer version: sklearn.neighbors.KernelDensity -- see # http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html bw = 0.5*(bins0[2]-bins0[1]) # initially using 0.5*Knuth binsize from above as bandwidth; should test other values
ax1.set_ylabel('P(entropy)') ax2 = fig.add_subplot(122) ax2.hist(ent, bins=1000, histtype='stepfilled', alpha=0.2, normed=True) ax2.set_xlabel('entropy') ax2.set_ylabel('P(entropy)') #------------------------------------------------------------ # Second & Third figure: Knuth bins & Bayesian Blocks fig = plt.figure(figsize=(10, 4)) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.15) for bins, title, subplot in zip(['knuth', 'blocks'], ["Knuth's rule", 'Bayesian blocks'], [121, 122]): ax = fig.add_subplot(subplot) # plot a standard histogram in the background, with alpha transparency hist(ent, bins=200, histtype='stepfilled', alpha=0.2, normed=True, label='standard histogram') # plot an adaptive-width histogram on top hist(ent, bins=bins, ax=ax, color='black', histtype='step', normed=True, label=title) ax.legend(prop=dict(size=12)) ax.set_xlabel('WTS') ax.set_ylabel('P(WTS)') plt.show()
#------------------------------------------------------------ # plot the r vs u-r color-magnitude diagram u = data['modelMag_u'] r = data['modelMag_r'] rPetro = data['petroMag_r'] plt.figure() ax = plt.axes() plt.scatter(u - r, rPetro, s=1, lw=0, c=data['z'], cmap=plt.cm.copper, vmin=0, vmax=0.4) plt.colorbar(ticks=np.linspace(0, 0.4, 9)).set_label('redshift') plt.xlim(0.5, 5.5) plt.ylim(18, 12.5) plt.xlabel('u-r') plt.ylabel('rPetrosian') #------------------------------------------------------------ # plot a histogram of the redshift from astroML.plotting import hist plt.figure() hist(data['z'], bins='knuth', histtype='stepfilled', ec='k', fc='#F5CCB0') plt.xlim(0, 0.4) plt.xlabel('z (redshift)') plt.ylabel('dN/dz(z)') plt.show()
# crossmatch catalogs max_radius = 3600. / 3600 # 1 arcsec dist, ind = crossmatch_angular(imX, stX, max_radius) #dist, ind = crossmatch_angular(c1, c2, max_radius) match = ~np.isinf(dist) dist_match = dist[match] dist_match *= 3600 print('Number with match:', np.sum(match)) print('PN:', imX[match]) sys.exit() ax = plt.axes() hist(dist_match, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA') ax.set_xlabel('radius of match (arcsec)') ax.set_ylabel('N(r, r+dr)') ax.text(0.95, 0.95, "Total objects: %i\nNumber with match: %i" % (imX.shape[0], np.sum(match)), ha='right', va='top', transform=ax.transAxes) ax.set_xlim(0, 2500) plt.savefig("number-macht-jplusstripe82-hash.pdf")
""" Start with the same data as in dists1.py """ data = np.genfromtxt("ECO_DR1_withradec.csv", delimiter=",", dtype=None, names=True) name = data['NAME'] urcolor = data['MODELU_RCORR'] goodur = (urcolor > -99) ''' In the previous activity we used Kernel Density Estimation from scikit-learn, specifically sklearn.neighbors.KernelDensity, but we chose the bandwidth in an ad hoc way, taking half of the Knuth histogram bin width. ''' n0, bins0, patches0 = hist(urcolor[np.where(goodur)],bins='knuth',label='knuth',normed=1,histtype='stepfilled',color='blue',alpha=0.25) knuthbw = (bins0[2]-bins0[1]) bw = 0.5*knuthbw ''' Ivezic et al. suggested that cross-validation could be a good way to optimize the bandwidth, so let's try it. ''' input = np.load("crossvalidationflag.npz") flag12 = input['flag12'] ''' Instead of leave-one-out cross-validation as described on p. 254, we'll use regular cross-validation, with sample 1 as the training set (50% of data) and samples 2a and 2b as the cross-validation and test sets (25% of data each).
fig = plt.figure(figsize=(5, 5)) fig.subplots_adjust(bottom=0.08, top=0.95, right=0.95, hspace=0.1) N_values = (500, 5000) subplots = (211, 212) for N, subplot in zip(N_values, subplots): ax = fig.add_subplot(subplot) xN = x[:N] t = np.linspace(-10, 30, 1000) # plot the results ax.plot(xN, -0.005 * np.ones(len(xN)), '|k') hist(xN, bins='knuth', ax=ax, normed=True, histtype='stepfilled', alpha=0.3, label='Knuth Histogram') hist(xN, bins='blocks', ax=ax, normed=True, histtype='step', color='k', label="Bayesian Blocks") ax.plot(t, true_pdf(t), '-', color='black', label="Generating Distribution")
names=True) name = data['NAME'] logmstar = data['LOGMSTAR'] urcolor = data['MODELU_RCORR'] cz = data['CZ'] goodur = (urcolor > -99) & (logmstar > 10.) colors = urcolor[goodur] # First plot histograms of u-r color with different bin width "rules" plt.figure(1) plt.clf() hist(colors, bins='freedman', label='freedman', normed=1, histtype='stepfilled', color='green', alpha=0.5) hist(colors, bins='scott', label='scott', normed=1, histtype='step', color='purple', alpha=0.5, hatch='///') # note the different format used below so as to save the bin info for Knuth's rule n0, bins0, patches0 = hist(colors, bins='knuth', label='knuth',
import mpl_toolkits pl.rcParams['font.size'] = 20 nh2 = r'$\log(n(H_2))$ [cm$^{-3}$]' dens = fits.getdata('W51_H2CO11_to_22_logdensity_supersampled.fits') cube = pyspeckit.Cube('W51_H2CO11_to_22_logdensity_supersampled.fits') densOK = dens==dens pl.figure(2) pl.clf() ax = pl.subplot(1,2,1) densp = dens[densOK] counts,bins,patches = ampl.hist(densp, bins=100, log=True, histtype='step', linewidth=2, alpha=0.8, color='k') ylim = ax.get_ylim() sp = pyspeckit.Spectrum(xarr=(bins[1:]+bins[:-1])/2.,data=counts) sp.specfit(guesses= [660.23122694399035, 3.1516848752486522, 0.33836811902343894, 396.62714060001434, 2.5539176548294318, 0.32129608858734149, 199.13259679527025, 3.730112763513838, 0.4073913996012487]) def ntuples(lst, n): return zip(*[lst[i::n]+lst[:i:n] for i in range(n)])
cm = cm.get_cmap('jet_r') #a/b, axis ratio, compared with RAS #ba = convert(db.dbUtils.getFromDB('ba', dbDir+'CALIFA.sqlite', 'nadine')) ba = convert(db.dbUtils.getFromDB('isoB_r/isoA_r', dbDir+'CALIFA.sqlite', 'mothersample')) ba = np.reshape(ba, (ba.shape[0], )) ba_ras = convert(db.dbUtils.getFromDB('isoB_r/isoA_r', dbDir+'RAS.sqlite', 'RAS', ' where isoA_r > 15 and petroMag_r < 20')) ba_ras = np.reshape(ba_ras, (ba_ras.shape[0], )) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.15) n, bins, patches = hist(ba, normed=True, bins='knuth', color='red', alpha=0) hist(ba_ras, normed=True, bins=bins, label='RAS b/a', histtype='stepfilled', color='grey', alpha=1, hatch='o') hist(ba, normed=True, bins=bins, alpha=0.8, label='CALIFA b/a', color='red') plt.legend() plt.xlabel("Isophotal b/a") plt.savefig('ba_hist_RAS', bbox_inches='tight') exit() #apparent r magnitude comparison concentration = convert(db.dbUtils.getFromDB('re/r90', dbDir+'CALIFA.sqlite', 'nadine', ' where califa_id in '+petro_ids)) r_mag = convert(db.dbUtils.getFromDB('el_mag', dbDir+'CALIFA.sqlite', 'r_tot', ' where califa_id in '+petro_ids)) petroMag_r = convert(db.dbUtils.getFromDB('petromag_r', dbDir+'CALIFA.sqlite', 'mothersample', ' where califa_id in'+petro_ids)) r_mag = np.reshape(r_mag, (r_mag.shape[0], )) petroMag_r = np.reshape(petroMag_r, (petroMag_r.shape[0], )) fig = plt.figure(figsize=(8, 8))
nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None]) dens_nbrs = nbrs.eval(t[:, None]) / N # plot the results ax.plot(t, true_pdf(t), ':', color='black', zorder=3, label="Generating Distribution") ax.plot(xN, -0.005 * np.ones(len(xN)), '|k') hist(xN, bins='blocks', ax=ax, normed=True, zorder=1, histtype='stepfilled', color='k', alpha=0.2, label="Bayesian Blocks") ax.plot(t, dens_nbrs, '-', lw=1.5, color='gray', zorder=2, label="Nearest Neighbors (k=%i)" % k) ax.plot(t, dens_kde, '-', color='black',
#Introduce some noise with both measurement uncertainties # and non-trivial correlated errors. yerr = 0.1 + 0.4 * np.random.rand(N) iid_cov = np.diag(yerr**2) true_cov = 0.5 * np.exp(-0.5 * (x[:, None] - x[None, :])**2 / 1.3**2) + np.diag(yerr** 2) y = np.random.multivariate_normal(y, true_cov) #y = np.random.multivariate_normal(y, iid_cov) ''' Plot I: #Make a histogram of the noise ''' fig = plt.figure(figsize=(8, 8)) fig.subplots_adjust(left=0.11, right=0.95, wspace=0.3, bottom=0.17, top=0.9) ax = fig.add_subplot(111) hist(yerr, bins='knuth', ax=ax, normed=True, histtype='stepfilled', alpha=0.4) ax.set_xlabel('$y_{err}$') ax.set_ylabel('$p(y_{err})$') plt.savefig("figures/yerr.pdf") plt.close() ''' Plot II: #Make an image of the noise ''' #Visualize the covariance fig, ax = plt.subplots(1) ppl.pcolormesh(fig, ax, true_cov) fig.savefig('figures/pplLineCov.png') #plt.show() plt.close() ''' Plot III: #Data vs 'truth'
'e5', 'e4', 'e6', 'e1', 'e3', 'e2', ] pl.figure(2) pl.clf() for ii, frq in enumerate(fluxes.keys()): pl.subplot(3, 3, ii + 1) d = np.array([fluxes[frq][x] for x in pointsources]) ampl.hist(3 + np.log10(d[d > 0]), bins=10, log=True, alpha=0.5, histtype='stepfilled') pl.title(frq) pl.figure(3) pl.clf() for ii, frq in enumerate(peaks.keys()): pl.subplot(3, 3, ii + 1) d = np.array([peaks[frq][x] for x in pointsources]) ampl.hist(3 + np.log10(d[d > 0]), bins=10, log=True, alpha=0.5, histtype='stepfilled') pl.title(frq)
tck = interpolate.splrep(Px_cuml, x) # sample evenly along the cumulative distribution, and interpolate Px_cuml_sample = np.linspace(0, 1, 10 * Ndata) x_sample = interpolate.splev(Px_cuml_sample, tck) #------------------------------------------------------------ # Plot the cloned distribution and the procedure for obtaining it fig = plt.figure(figsize=(5, 5)) fig.subplots_adjust(hspace=0.3, left=0.1, right=0.95, bottom=0.08, top=0.92) indices = np.linspace(0, Ndata - 1, 20).astype(int) # plot a histogram of the input ax = fig.add_subplot(221) hist(x, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA') ax.set_ylim(0, 300) ax.set_title('Input data distribution') ax.set_xlabel('$x$') ax.set_ylabel('$N(x)$') # plot the cumulative distribution ax = fig.add_subplot(222) ax.scatter(x[indices], Px_cuml[indices], lw=0, c='k', s=9) ax.plot(x, Px_cuml, '-k') ax.set_xlim(-3, 3) ax.set_ylim(-0.05, 1.05) ax.set_title('Cumulative Distribution') ax.set_xlabel('$x$') ax.set_ylabel('$p(<x)$')
from paths import dpath,fpath pl.rcParams['font.size'] = 20 h2co11 = fits.getdata(dpath('W51_H2CO11_taucube_supersampled.fits')) h2co22 = fits.getdata(dpath('W51_H2CO22_pyproc_taucube_lores_supersampled.fits')) ratio = fits.getdata(dpath('W51_H2CO11_to_22_tau_ratio_supersampled_neighbors.fits')) pl.close(1) pl.figure(1, figsize=(10,10)) pl.clf() ax1 = pl.subplot(3,1,1) oneone = h2co11[h2co11==h2co11] counts, bins, patches = ampl.hist(oneone, bins=100, log=True, histtype='step', linewidth=2, alpha=0.8, color='k') ylim = ax1.get_ylim() med, mad = np.median(oneone),MAD(oneone) pl.plot(bins,counts.max()*np.exp(-(bins-med)**2/(2*mad**2)),'r--') ax1.set_ylim(*ylim) ax1.set_xlabel("$\\tau_{obs}($H$_2$CO 1-1$)$", labelpad=10) ax1.set_ylabel("$N($voxels$)$") ax2 = pl.subplot(3,1,2) twotwo = h2co22[h2co22==h2co22] counts, bins, patches = ampl.hist(twotwo, bins=100, log=True, histtype='step', linewidth=2, alpha=0.8, color='k') ylim = ax2.get_ylim() med, mad = np.median(twotwo),MAD(twotwo) pl.plot(bins,counts.max()*np.exp(-(bins-med)**2/(2*mad**2)),'r--') ax2.set_ylim(*ylim) ax2.set_xlabel("$\\tau_{obs}($H$_2$CO 2-2$)$", labelpad=10)
def showSkyHist(skypix, skypix2=None, skypix3=None, sbExpt=None, pngName='skyhist.png', skyAvg=None, skyStd=None, skyMed=None, skySkw=None, savePng=True): """ Plot the distribution of sky pixels. Parameters: """ fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) fig.subplots_adjust(hspace=0.1, wspace=0.1, top=0.95, right=0.95) fontsize = 18 ax.minorticks_on() for tick in ax.xaxis.get_major_ticks(): tick.label1.set_fontsize(fontsize) for tick in ax.yaxis.get_major_ticks(): tick.label1.set_fontsize(fontsize) counts1, bins1, patches1 = hist(skypix, bins='knuth', ax=ax, alpha=0.4, color='cyan', histtype='stepfilled', normed=True) if skypix2 is not None: counts2, bins2, patches2 = hist(skypix2, bins='knuth', ax=ax, alpha=0.9, color='k', histtype='step', normed=True, linewidth=2) if skypix3 is not None: counts3, bins3, patches3 = hist(skypix3, bins='knuth', ax=ax, alpha=0.8, color='k', histtype='step', normed=True, linewidth=2, linestyle='dashed') # Horizontal line ax.axvline(0.0, linestyle='-', color='k', linewidth=1.5) # Basic properties of the sky pixels skyMin = np.nanmin(skypix) skyMax = np.nanmax(skypix) if skyAvg is None: skyAvg = np.nanmean(skypix) if skyStd is None: skyStd = np.nanstd(skypix) if skyMed is None: skyMed = np.nanmedian(skypix) if not np.isfinite(skyMed): skyMed = np.median(skypix) if skySkw is None: skySkw = scipy.stats.skew(skypix) # Highligh the mode of sky pixel distribution ax.axvline(skyMed, linestyle='--', color='b', linewidth=1.5) ax.set_xlabel('Pixel Value', fontsize=20) ax.set_xlim(skyAvg - 4.0 * skyStd, skyAvg + 5.0 * skyStd) # Show a few information ax.text(0.7, 0.9, "Min : %8.4f" % skyMin, fontsize=21, transform=ax.transAxes) ax.text(0.7, 0.8, "Max : %8.4f" % skyMax, fontsize=21, transform=ax.transAxes) ax.text(0.7, 0.7, "Avg : %8.4f" % skyAvg, fontsize=21, transform=ax.transAxes) ax.text(0.7, 0.6, "Std : %8.4f" % skyStd, fontsize=21, transform=ax.transAxes) ax.text(0.7, 0.5, "Med : %8.4f" % skyMed, fontsize=21, transform=ax.transAxes) ax.text(0.7, 0.4, "Skew: %8.4f" % skySkw, fontsize=21, transform=ax.transAxes) if sbExpt is not None: ax.text(0.7, 0.3, "S.B : %8.5f" % sbExpt, fontsize=21, transform=ax.transAxes) if savePng: fig.savefig(pngName, dpi=70) plt.close(fig)
ctr = params[i] amp = params[i+1] wid = params[i+2] y = y + amp * np.exp( -((x - ctr)/wid)**2) return y guess = [0.0055,4000,0.001,0.0105,700,0.002,0.0145,200,0.002,0.019,60,0.002,0.025,15,0.002] LS_output = np.zeros(len(guess)) ODR_output = np.zeros(len(guess)) X_tot = np.zeros((5,500)) Y_tot = np.zeros((5,500)) plt.figure() for i in range(5): Cf_temp = Cf*10**i n, bins, p = hist(Cf_temp,bins='knuth') #data = Data(bins[:-1],n) #model = Model(func) new_guess = [] for j in range(0,len(guess),3): new_guess.append(guess[j]*10**i) new_guess.append(guess[j+1]) new_guess.append(guess[j+2]*10**i) print(new_guess) popt, pcov = curve_fit(func,bins[:-1],n,p0=new_guess) xn = np.linspace(0,0.03*(10**i),500) yn = func(xn,*popt)
]) logp2 = get_logp(S2, model2) return trace1, logp1, trace2, logp2 trace1, logp1, trace2, logp2 = compute_MCMC_models() #------------------------------------------------------------ # Compute Odds ratio with density estimation technique BF1, dBF1 = estimate_bayes_factor(trace1, logp1, r=0.02) BF1_list = estimate_bayes_factor(trace1, logp1, r=0.02, return_list=True) BF2, dBF2 = estimate_bayes_factor(trace2, logp2, r=0.05) BF2_list = estimate_bayes_factor(trace2, logp2, r=0.05, return_list=True) print "Bayes Factor (Single Gaussian): Median = {0:.3f}, p75-p25 = {1:.3f}".format( BF1, dBF1) print "Bayes Factor (Double Gaussian): Median = {0:.3f}, p75-p25 = {1:.3f}".format( BF2, dBF2) print np.sum(BF1_list), np.sum(BF2_list) BF1_list_plot = BF1_list[(BF1_list >= BF1 - 1. * dBF1) & (BF1_list <= BF1 + 1. * dBF1)] BF2_list_plot = BF2_list[(BF2_list >= BF2 - 1. * dBF2) & (BF2_list <= BF2 + 1. * dBF2)] ax = plt.figure().add_subplot(111) hist(BF1_list_plot, bins='knuth', ax=ax, normed=True, color='red', alpha=0.25) hist(BF2_list, bins='knuth', ax=ax, normed=True, color='green', alpha=0.25) ax.figure.savefig('figure_5-24_BFhist.png', dpi=300)
def convert_catalog(cat_table): ''' This function selects stars from a size-mag catalogue Inputs: cat_table: directory of catalogue of all sources on an image. Must contain FWHM_IMAGE or FLUX_RADIUS; MAG_APER or FLUX_APER; FLAGS; VIGNET; etc cat_4PSFEx_table: FITS_LDAC catalogue ready to be processed by PSFEx. ''' path = os.getcwd() direc = cat_table.split('/')[-1] if not os.path.exists(direc): os.makedirs(direc) os.chdir(direc) hdu = p.open(cat_table) data = hdu[2].data reff = data['FLUX_RADIUS'] flux = data['FLUX_APER'] mags = 30 - 2.5 * n.log10(flux) flags = data['FLAGS'] mask = n.where((flux > 0) & (flags < 4) & (reff > 0))[0] reff1 = reff[mask] mags1 = mags[mask] s = n.argsort(mags1) mags1 = mags1[s] reff1 = reff1[s] perc = 0.1 i = 1 medians = [] mad_stds = [] gammas = [] chisq = [] centers = [] sigmas = [] while perc < 5: mask2 = n.where(mags1 < n.percentile(mags1, perc))[0] reff2 = reff1[mask2] mags2 = mags1[mask2] # N,bins,patches=hist(reff2,bins='scotts',label='clean, n_obj='+str(len(reff2)),normed=True,histtype='step') # hist(reff1,bins=bins,label='not so clean, n_obj='+str(len(reff1)),normed=True,histtype='step') # hist(reff,bins=bins,label='not clean at all, n_obj='+str(len(reff)),normed=True,histtype='step') # pl.legend() # pl.xlim(bins[0],bins[-1]) # pl.savefig('hist_'+str(i)+'.png') # pl.clf() # print 'done hist' medians.append(n.median(reff2)) mad_stds.append(mad_std(reff2)) N, bins, patches = hist(reff2, bins='scotts', histtype='stepfilled', color='g', normed=False) pl.axvline(n.median(reff2), color='red') pl.axvline(n.median(reff2) + mad_std(reff2), color='black') pl.axvline(n.median(reff2) - mad_std(reff2), color='black') pl.axvline(n.median(reff2) + n.std(reff2), color='grey') pl.axvline(n.median(reff2) - n.std(reff2), color='grey') X = reff2[:, n.newaxis] X_plot = n.linspace(min(X)[0], max(X)[0], 10000)[:, n.newaxis] kde = KDE(kernel='gaussian', bandwidth=min(mad_stds)).fit(X) ld = kde.score_samples(X_plot) pl.plot(X_plot[:, 0], n.exp(ld) * min(mad_stds) * len(reff2), lw=3) # pl.xscale('log') # pl.yscale('log') ''' model = SkewedGaussianModel() x = n.array([0.5 * (bins[i] + bins[i+1]) for i in xrange(len(bins)-1)]) pars = model.guess(N, x=x) result=model.fit(N,pars,x=x) print(result.fit_report()) pl.plot(x, result.best_fit,'k--',lw=3) ''' pl.savefig('hist_clean_log_' + str(i) + '.png') pl.clf() mask3 = n.where((reff2 >= n.median(reff2) - mad_std(reff2)) & (reff2 <= n.median(reff2) + mad_std(reff2)))[0] reff3 = reff2[mask3] N, bins, patches = hist(reff3, bins='scotts', histtype='stepfilled', color='g', alpha=.5, normed=True) x = n.array( [0.5 * (bins[j] + bins[j + 1]) for j in xrange(len(bins) - 1)]) #X=reff3[:,n.newaxis] x_plot = n.linspace(min(bins), max(bins), 100) #kde=KDE(kernel='epanechnikov',bandwidth=min(mad_stds)).fit(X) #ld=kde.score_samples(X_plot) #pl.plot(X_plot[:,0],n.exp(ld),lw=3) model = SkewedGaussianModel() pars = model.guess(N, x=x) result = model.fit(N, pars, x=x) gammas.append(result.params['gamma'].value) chisq.append(result.redchi) centers.append(result.params['center'].value) sigmas.append(result.params['sigma'].value) #print result.fit_report() pl.plot(x_plot, result.eval(x=x_plot), 'k--', lw=3) # pl.axvline(result.params['center'].value,color='red') # pl.axvline(result.params['center'].value+result.params['sigma'].value,color='grey') # pl.axvline(result.params['center'].value-result.params['sigma'].value,color='grey') pl.axvline(result.params['center'].value, color='red') pl.axvline(result.params['center'].value + result.params['sigma'].value, color='black') pl.axvline(result.params['center'].value - result.params['sigma'].value, color='black') mu, sigma = norm.fit(reff3, loc=max(result.eval(x=x_plot)), scale=result.params['sigma'].value) pdf = norm.pdf(x_plot, loc=mu, scale=sigma) pl.plot(x_plot, pdf, color='yellow') pl.savefig('hist_stellarseq_' + str(i) + '.png') pl.clf() print 'done hist stellarseq' pl.plot(reff, mags, 'b.', label='not clean at all', alpha=.5) #pl.plot(reff1,mags1,'k.',label='not so clean',alpha=.5) pl.plot(reff2, mags2, 'r.', label='clean, mag_lims = ' + str(n.min(mags2)) + ', ' + str(n.max(mags2)), alpha=.5) pl.axvline(n.median(reff2), color='green') pl.axhline(n.max(mags2), color='black') pl.xlim([0, 20]) pl.ylim([n.percentile(mags1, 99), n.percentile(mags1, 0)]) pl.legend() pl.savefig('magsize_' + str(i) + '.png') pl.clf() print 'done all' perc = perc + 0.05 print 'end loop ' + str(i) i = i + 1 pl.clf() f, axes = pl.subplots(2, 3, sharex=True) names = ('median', 'mad_std', 'gamma', 'redchi', 'center', 'sigma') for ax, ind, name in zip( axes.flat, (medians, mad_stds, gammas, chisq, centers, sigmas), names): ax.plot(ind, 'o') ax.set_title(name) # plt.show() # pl.plot(medians/n.max(medians),'o',label='median') # pl.plot(mad_stds/n.max(mad_stds),'o',label='mad_std') # pl.plot(gammas/n.max(gammas),'o',label='gammas') # pl.plot(chisq/n.max(chisq),'o',label='chisqr') # pl.legend(loc='best') pl.savefig('param_variations.png') pl.clf() os.chdir(path) '''
r = ratio_to_dens(ratio) dcs[abund][sigma] = r[r == r] pl.figure(3) pl.clf() ax = pl.axes([0.1, 0.1, 0.65, 0.8]) for abund in abunds: for sigma in (0, 1.0): rr = dcs[abund][sigma] counts, bins, patches = ampl.hist( rr, bins=100, log=True, histtype="step", linewidth=2, alpha=1.0, color=colors.next(), label=r"$X=%s, \sigma=%i$" % (abund, sigma), ) pl.xlabel(r"$\log(n(H_2))$ [cm$^{-3}$]") ax.set_ylim(1, 2e3) ax.set_xlim(1.5, 6) ax.set_ylabel("$N$(voxels)") pl.legend(bbox_to_anchor=(1.0, 1.0), fontsize=18, loc="upper left") pl.savefig("/Users/adam/work/h2co/maps/paper/figures/cube_histograms_density_ppv_multimodel.pdf", bbox_inches="tight")
def bayes_block(x_data, filename, format, x_label = '', title = '',\ log_x = False, log_y = False): ''' Description This function takes the given data and produces a Bayesian Block histogram of it. The given axis label and title are applied, and then the histogram is saved using the given filename and format. Required Input x_data: The data array to be graphed. Numpy array or list of floats. This array is flattened to one dimension before creating the histogram. filename: The filename (including extension) to use when saving the image. Provide as a string. format: The format (e.g. png, jpeg) in which to save the image. This is a string. x_label: String specifying the x-axis label. title: String specifying the title of the graph. log_x: If this is True, then logarithmic binning is used for the histogram, and the x-axis of the saved image is logarithmic. If this is False (default) then linear binning is used. log_y: If this is True, then a logarithmic scale is used for the y-axis of the histogram. If this is False (default), then a linear scale is used. Output A histogram is automatically saved using the given data and labels, in the specified format. 1 is returned if the code performs to completion. ''' # First make a figure object with matplotlib (default size) fig = plt.figure() # Create an axis object to go with this figure ax = fig.add_subplot(111) # Check to see if the x-axis of the histogram needs to be logarithmic if log_x == True: # Set the x-axis scale of the histogram to be logarithmic ax.set_xscale('log') # Make a histogram of the given data, with the specified number of # bins. Note that the data array is flattened to one dimension. # Do we need to normalise to account for the bin sizes being different? aML.hist(x_data.flatten(), bins='blocks', normed=True, log=log_y) # Add the specified x-axis label to the plot plt.xlabel(x_label) # Add a y-axis label to the plot plt.ylabel('Counts') # Add the specified title to the plot plt.title(title) # Save the figure using the title given by the user plt.savefig(filename, format=format) # Print a message to the screen saying that the image was created print filename + ' created successfully.\n' # Close the figure so that it does not take up memory plt.close() # Now that the graph has been produced, return 1 return 1
# sample evenly along the cumulative distribution, and interpolate Px_cuml_sample = np.linspace(0, 1, 10 * Ndata) x_sample = interpolate.splev(Px_cuml_sample, tck) #------------------------------------------------------------ # Plot the cloned distribution and the procedure for obtaining it fig = plt.figure(figsize=(10, 10)) fig.subplots_adjust(hspace=0.25, left=0.1, right=0.95, bottom=0.08, top=0.92) indices = np.linspace(0, Ndata - 1, 20).astype(int) # plot a histogram of the input ax = fig.add_subplot(221) hist(x, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA') ax.set_ylim(0, 300) ax.set_title('Input data distribution') ax.set_xlabel('x') ax.set_ylabel('N(x)') # plot the cumulative distribution ax = fig.add_subplot(222) ax.scatter(x[indices], Px_cuml[indices], lw=0, c='k') ax.plot(x, Px_cuml, '-k') ax.set_xlim(-3, 3) ax.set_ylim(-0.05, 1.05) ax.set_title('Cumulative Distribution') ax.set_xlabel('x') ax.set_ylabel('p(<x)')
#peakabsmagvalue = [peakabsmagvalueb,peakabsmagvaluec] #print peakabsmagvalueb #hist(peakabsmagvalueb, bins = 'knuth', label = str(ib) + ' Ib datapoints', color = 'blue', histtype='stepfilled', alpha=0.2)#, stacked=True) #hist(peakabsmagvaluec, bins = 'knuth', label = str(ic) + ' Ic datapoints', color ='green', histtype='stepfilled', alpha=0.2, des)#, stacked=True) #plotting best fit gaussian plt.subplot(221) result = hist(peakabsmagvalueb, bins = 'knuth', label = str(ib) + ' Ib datapoints', color = 'blue', histtype='stepfilled', alpha=0.2)#, stacked=True) mean = np.mean(peakabsmagvalueb) variance = np.var(peakabsmagvalueb) sigma = np.sqrt(variance) x = np.linspace(-21, -13,100) dx = result[1][1] - result[1][0] scale = len(peakabsmagvalueb)*dx plt.plot(x, mlab.normpdf(x,mean,sigma)*scale, label = 'Best fit, mean: ' +str(round(mean, 3))+' sigma: ' + str(round(sigma, 3))) print '\n','\n', 'Ib mean:', mean, 'sigma:', sigma plt.title("Peak Absolute Magnitude Histogram of TypeIb and TypeIc using the s11-2005hm model") plt.xlabel("Absolute Magnitude") plt.ylabel("Frequency")
for F in (F1,F2): F.show_colorscale() F.recenter(49.23,-0.28,width=1,height=0.5) F.add_colorbar() F.colorbar.set_axis_label_text(nh2) F.colorbar.set_axis_label_rotation(270) F.colorbar.set_axis_label_pad(25) F.recenter(49.23,-0.28,width=1,height=0.5) ymin1,ymax1 = F1._ax1.bbox._bbox._points.T[1] ymin2,ymax2 = F2._ax1.bbox._bbox._points.T[1] pl.figure(1) ax1 = pl.axes([0.68,ymin1,0.25,ymax1-ymin1]) counts,bins,patches = ampl.hist(dens_peak[dens_peak==dens_peak], bins=100, log=True, histtype='step', linewidth=2, alpha=0.8, color='k', ax=ax1) sp = pyspeckit.Spectrum(xarr=(bins[1:]+bins[:-1])/2.,data=counts) sp.specfit(guesses=[200,3,1]) p,m,w = sp.specfit.parinfo.values g = p*np.exp(-(bins-m)**2/(2*(w)**2)) pl.plot(bins, g,'r--', label=r'$\mu=%0.1f, \sigma=%0.1f$' % (m,w)) pl.fill_between(bins,0,g,color='r',alpha=0.1,hatch='//',facecolor='none') pl.annotate(r'$\mu=%0.1f$' % m ,(4.5,200),) pl.annotate(r'$\sigma=%0.1f$' % w,(4.5,100),) #pl.legend(loc='upper right',fontsize=18, bbox_to_anchor=(1.0,1.25)) ax1.yaxis.set_ticks_position('right') ax1.set_xticks([2,3,4,5]) ax1.set_ylim(0.8,400) ax1.set_xlim(1.5,6) ax1.set_xlabel(nh2)