def ex3(exclude=sc.array([1,2,3,4]),plotfilename='ex3.png', bovyprintargs={}): """ex3: solve exercise 3 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-05-27 - Written - Bovy (NYU) """ #Read the data data= read_data('data_yerr.dat') ndata= len(data) nsample= ndata- len(exclude) #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) A= sc.ones((nsample,3)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] A[jj,1]= data[ii][1][0] A[jj,2]= data[ii][1][0]**2. C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) #Now plot the solution plot.bovy_print(**bovyprintargs) #plot bestfit xrange=[0,300] yrange=[0,700] nsamples= 1001 xs= sc.linspace(xrange[0],xrange[1],nsamples) ys= sc.zeros(nsamples) for ii in range(nsamples): ys[ii]= bestfit[0]+bestfit[1]*xs[ii]+bestfit[2]*xs[ii]**2. plot.bovy_plot(xs,ys,'k-',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) #Plot data errorbar(A[:,1],Y,yerr,marker='o',color='k',linestyle='None',zorder=1) #Put in a label with the best fit text(5,30,r'$y = ('+'%4.4f \pm %4.4f)\,x^2 + ( %4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit[2], m.sqrt(bestfitvar[2,2]),bestfit[1], m.sqrt(bestfitvar[1,1]), bestfit[0],m.sqrt(bestfitvar[0,0]))+r')$') plot.bovy_end_print(plotfilename) return 0
def ex15(exclude=sc.array([1,2,3,4]),plotfilename='ex15.png', bovyprintargs={}): """ex15: solve exercise 15 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',allerr=True) ndata= len(data) nsample= ndata- len(exclude) #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) Z= sc.zeros((nsample,2)) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] Z[jj,0]= X[jj] Z[jj,1]= Y[jj] jj= jj+1 #Now compute the PCA solution Zm= sc.mean(Z,axis=0) Q= sc.cov(Z.T) eigs= linalg.eig(Q) maxindx= sc.argmax(eigs[0]) V= eigs[1][maxindx] V= V/linalg.norm(V) m= sc.sqrt(1/V[0]**2.-1) bestfit= sc.array([-m*Zm[0]+Zm[1],m]) #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),bestfit[1]*sc.array(xrange)+bestfit[0], 'k--',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) plot.bovy_plot(X,Y,marker='o',color='k',linestyle='None', zorder=0,overplot=True) plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0])+r'$', bottom_right=True) plot.bovy_end_print(plotfilename)
def plot_data_yerr(): """plot_data_yerr: Plot the data with the error bars in the y-direction History: 2009-05-20 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) #Put the data into x, y, and yerr id = sc.zeros(ndata) x = sc.zeros(ndata) y = sc.zeros(ndata) yerr = sc.zeros(ndata) for ii in range(ndata): id[ii] = data[ii][0] x[ii] = data[ii][1][0] y[ii] = data[ii][1][1] yerr[ii] = data[ii][2] plotfilename = 'data_yerr.png' fig_width = 7.5 fig_height = 7.5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) errorbar(x, y, yerr, marker='o', color='k', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') savefig(plotfilename, format='png') return 0
def plot_data_yerr(): """plot_data_yerr: Plot the data with the error bars in the y-direction History: 2009-05-20 - Written - Bovy (NYU) """ #Read the data data= read_data('data_yerr.dat') ndata= len(data) #Put the data into x, y, and yerr id= sc.zeros(ndata) x= sc.zeros(ndata) y= sc.zeros(ndata) yerr= sc.zeros(ndata) for ii in range(ndata): id[ii]= data[ii][0] x[ii]= data[ii][1][0] y[ii]= data[ii][1][1] yerr[ii]= data[ii][2] plotfilename='data_yerr.png' fig_width=7.5 fig_height=7.5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size} rcParams.update(params) errorbar(x,y,yerr,marker='o',color='k',linestyle='None') xlabel(r'$x$') ylabel(r'$y$') savefig(plotfilename,format='png') return 0
# These are the functions we will be testing from informative_priors import loss_diff, loss_ratio, bayes_p BAYES_PROBS = [0.90, 0.95, 0.99] BAYES_P_LIST = [] for p in BAYES_PROBS: #b = bayes_p(p) #b.__name__ = 'bayes_' + str(int(p * 100)) #BAYES_P_LIST.append(b) b = bayes_p(p, True) b.__name__ = 'bayes_' + str(int(p * 100)) + '_hard' BAYES_P_LIST.append(b) #FUNCTION_LIST = [loss_diff, loss_ratio] + BAYES_P_LIST FUNCTION_LIST = BAYES_P_LIST results = [] DATA_FILE = 'test_data.pkl' data_dict = read_data(DATA_FILE) for i, f in enumerate(FUNCTION_LIST): print('Evaluting decision function ' + str(i + 1) + ': ' + f.__name__) d = evaluate_all(data_dict, f) d['Rule name'] = f.__name__ results.append(d) COLS = ['Rule name', 'Average loss', 'Average number of samples', 'Estimate bias', 'Estimate MSE', 'TP', 'TN', 'FP', 'FN'] results_df = pd.DataFrame(results).sort_values('Average loss').reset_index(drop=True).loc[:, COLS] results_df.to_csv('results_informative_priors.csv', index=False) print(results_df)
def ex8(plotfilename='ex8.png', nburn=1000, nsamples=10000, parsigma=[.075, 2., 0.1]): """ex8: solve exercise 8 using...? Input: plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2009-06-25 -- hacked from Bovy code - Hogg (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data = read_data('data_yerr.dat') ndata = len(data) #Put the data in the appropriate arrays and matrices X = sc.zeros(ndata) Y = sc.zeros(ndata) A = sc.ones((ndata, 2)) Yivar = sc.zeros(ndata) C = sc.zeros((ndata, ndata)) yerr = sc.zeros(ndata) jj = 0 for ii in range(ndata): X[jj] = data[ii][1][0] Y[jj] = data[ii][1][1] A[jj, 1] = data[ii][1][0] Yivar[jj] = 1.0 / (data[ii][2]**2) C[jj, jj] = data[ii][2]**2 yerr[jj] = data[ii][2] jj = jj + 1 #First find the chi-squared solution, which we will use as an #initial guess bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) m = bestfit[1] b = bestfit[0] q = sc.array([1 for cc in range(ndata)]) q[0:4] = 0 pgood = 0.9 #pgood=0.999759#3 sigma for uncertainty~50 initialguess = [m, b, q, pgood] print initialguess #With this initial guess start off the sampling procedure bgmean = sc.mean(Y) bgivar = 1.0 / sc.sum((Y - bgmean)**2) initialX = lnposterior(X, Y, Yivar, m, b, q, pgood, bgmean, bgivar) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution thisguess = c.deepcopy(currentguess) m = thisguess[0] b = thisguess[1] q = thisguess[2] pgood = thisguess[3] #First Gibbs sample each q for ii in range(ndata): thisdatagood = ma.sqrt(Yivar[ii] / (2. * math.pi)) * ma.exp( -.5 * (Y[ii] - m * X[ii] - b)**2. * Yivar[ii]) * pgood thisdatabad = ma.sqrt(bgivar / (2. * math.pi)) * ma.exp( -.5 * (Y[ii] - bgmean)**2. * bgivar) * (1.0 - pgood) a = thisdatagood / (thisdatagood + thisdatabad) u = stats.uniform.rvs() if u < a: q[ii] = 1 else: q[ii] = 0 #Then Metropolis sample m and b m += stats.norm.rvs() * parsigma[0] b += stats.norm.rvs() * parsigma[1] pgood += stats.norm.rvs() * parsigma[2] if pgood > MAXP: pgood = MAXP if pgood < (1.0 - MAXP): pgood = (1.0 - MAXP) newsample = [m, b, q, pgood] #Calculate the objective function for the newsample newX = lnposterior(X, Y, Yivar, m, b, q, pgood, bgmean, bgivar) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() if u < ma.exp(newX - currentX): #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: print currentguess bestfit = currentguess bestX = currentX print "Acceptance ratio was " + str(double(naccept) / (nburn + nsamples)) #Now plot the best solution fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) #Plot data errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') xlim(0, 300) ylim(0, 700) xmin, xmax = xlim() (m, b, q, pgood) = bestfit print bestfit print m xs = sc.linspace(xmin, xmax, 3) ys = m * xs + b if b < 0: sgn_str = '-' else: sgn_str = '+' label = r'$y = %4.2f\, x' % m + sgn_str + '%4.0f ' % ma.fabs( b) + '$' #+r'; X = '+ '%3.1f' % bestX+'$' plot(xs, ys, color='k', ls='--', label=label) l = legend(loc=(.3, .1), numpoints=8) l.draw_frame(False) xlim(0, 300) ylim(0, 700) savefig(plotfilename, format='png') return 0
def ex9(exclude=sc.array([1,2,3,4]),plotfilename='ex9.png',zoom=False, bovyprintargs={}): """ex9: solve exercise 9 Input: exclude - ID numbers to exclude from the analysis zoom - zoom in Output: plot History: 2009-05-27 - Written - Bovy (NYU) """ #Read the data data= read_data('data_yerr.dat') ndata= len(data) nsample= ndata- len(exclude) nSs= 1001 if zoom: Srange=[900,1000] else: Srange=[0.001,1500] Ss= sc.linspace(Srange[0],Srange[1],nSs) chi2s= sc.zeros(nSs) for kk in range(nSs): #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] A[jj,1]= data[ii][1][0] C[jj,jj]= Ss[kk] yerr[jj]= data[ii][2]#OMG, such bad code jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) chi2s[kk]= chi2(bestfit,A,Y,C) #Now plot the solution plot.bovy_print(**bovyprintargs) #Plot the best fit line xrange=Srange if zoom: yrange=[nsample-4,nsample] else: yrange=[nsample-10,nsample+8] plot.bovy_plot(Ss, chi2s, 'k-',xrange=xrange,yrange=yrange, xlabel=r'$S$',ylabel=r'$\chi^2$',zorder=1) plot.bovy_plot(sc.array(Srange),sc.array([nsample-2,nsample-2]), 'k--',zorder=2,overplot=True) #plot.bovy_plot(sc.array([sc.median(yerr**2.),sc.median(yerr**2.)]), # sc.array(yrange),color='0.75',overplot=True) plot.bovy_plot(sc.array([sc.mean(yerr**2.),sc.mean(yerr**2.)]), sc.array(yrange),color='0.75',overplot=True) plot.bovy_end_print(plotfilename) return 0
def start(): flag_main = True while flag_main: print("""Comands: 1-filter, 2-analyse 3-read_data 4-gen_data 5-exit """) a = int(input("Enter command:")) if a == 2: print(""" Analyze 1 - Total receipt of a specific year 2 - Total receipt of range of years 3 - full data of certain duty 4 - Receipt of a duty in a certain year 5 - Receipt of a duty in a range of years 6 - Total sum of receipts of a duty 7- Total Betting 8- Total Gaming 8 - quit """) b = int(input("Enter command: ")) if b == 1: c = int((input("Enter year: "))) ad.total_receipt(c) if b == 2: u = int(input("enter number of years you want to compare: ")) years = [int(input("Enter year")) for i in range(u)] ad.total_receipt_years(years) if b == 3: print(""" Analyze 1 - general_betting_duty 2 - pool_betting_duty 3 - gaming_duty 4 - amusement_machine_licence 5 - bingo 6 - machine_games_duty 7 - lottery_duty """) j = int(input("enter command: ")) if j == 1: ad.column_full("general_betting_duty") if j == 2: ad.column_full("pool_betting_duty") if j == 3: ad.column_full("gaming_duty") if j == 4: ad.column_full("amusement_machine_licence") if j == 5: ad.column_full("bingo") if j == 6: ad.column_full("machine_games_duty") if j == 7: ad.column_full("lottery_duty") if b == 4: yy = int(input("enter year: ")) print(""" Analyze 1 - general_betting_duty 2 - pool_betting_duty 3 - gaming_duty 4 - amusement_machine_licence 5 - bingo 6 - machine_games_duty 7 - lottery_duty """) j = int(input("enter command: ")) if j == 1: ad.column_year("general_betting_duty", yy) if j == 2: ad.column_year("pool_betting_duty", yy) if j == 3: ad.column_year("gaming_duty", yy) if j == 4: ad.column_year("amusement_machine_licence", yy) if j == 5: ad.column_year("bingo", yy) if j == 6: ad.column_year("machine_games_duty", yy) if j == 7: ad.column_year("lottery_duty", yy) if b == 5: u = int(input("enter number of years you want to compare: ")) years = [int(input("Enter year: ")) for i in range(u)] print(""" Analyze 1 - general_betting_duty 2 - pool_betting_duty 3 - gaming_duty 4 - amusement_machine_licence 5 - bingo 6 - machine_games_duty 7 - lottery_duty """) j = int(input("enter command: ")) if j == 1: ad.column_multiple_years("general_betting_duty", years) if j == 2: ad.column_multiple_years("pool_betting_duty", years) if j == 3: ad.column_multiple_years("gaming_duty", years) if j == 4: ad.column_multiple_years("amusement_machine_licence", years) if j == 5: ad.column_multiple_years("bingo", years) if j == 6: ad.column_multiple_years("machine_games_duty", years) if j == 7: ad.column_multiple_years("lottery_duty", years) if b == 6: print(""" Analyze 1 - general_betting_duty 2 - pool_betting_duty 3 - gaming_duty 4 - amusement_machine_licence 5 - bingo 6 - machine_games_duty 7 - lottery_duty """ ) j = int(input("enter command: ")) if j == 1: ad.annual_receipt("general_betting_duty") if j == 2: ad.annual_receipt("pool_betting_duty") if j == 3: ad.annual_receipt("gaming_duty") if j == 4: ad.annual_receipt("amusement_machine_licence") if j == 5: ad.annual_receipt("bingo") if j == 6: ad.annual_receipt("machine_games_duty") if j == 7: ad.annual_receipt("lottery_duty") if b == 7: ad.annual_betting() if b == 8: ad.annual_gaming() elif a == 1: print(""" Filter 1 - Year 2 - Year > 3 - Year < 4 - Month 5 - Month > 6 - Month < 7 - Date 8 - Other fields 9 - quit """) b = int(input("Enter command:")) if b == 1: c = int((input("Enter year:"))) fd.filter_date("year", c) if b == 2: c = int((input("Enter year:"))) fd.filter_date_gt("year", c) if b == 3: c = input("Enter year:") fd.filter_date_ls("year", c) if b == 4: c = int((input("Enter month:"))) fd.filter_date("month", c) if b == 5: c = int((input("Enter month:"))) fd.filter_date_gt("month", c) if b == 6: c = input("Enter month:") fd.filter_date_ls("month", c) if b == 7: c = input("Enter date:(yyyy-mm)") fullDate = "%s-01" % c print(fullDate) fd.filter_data("date", fullDate) if b == 8: print(""" operation 1 - = 2 - > 3 - < """) op = int(input("Enter operation: ")) val = int(input("Enter Value: ")) print(""" Field 1 - general_betting_duty 2 - pool_betting_duty 3 - gaming_duty 4 - amusement_machine_licence 5 - bingo 6 - machine_games_duty 7 - lottery_duty """) j = int(input("enter command")) s = "" if j == 1: s = "general_betting_duty" if j == 2: s = "pool_betting_duty" if j == 3: s = "gaming_duty" if j == 4: s = "amusement_machine_licence" if j == 5: s = "bingo" if j == 6: s = "machine_games_duty" if j == 7: s = "lottery_duty" if op == 1: fd.filter_data(s, val) if op == 2: fd.filter_data_gt(s, val) if op == 3: fd.filter_data_ls(s, val) if b == 15: break if (b < 1 or b > 15): print("Error number") flag_main = True elif a == 3: gd.read_data() elif a == 4: c = int(input("Enter start year: ")) gd.generate_data(c) elif a == 5: break
def ex6b(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex6b.png'): """ex6b: solve exercise 6 using a simulated annealing optimization Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-02 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1]]) #With this initial guess start off the annealing procedure initialchisq = nsample * 10. chisq = initialchisq bestfit = initialguess nonglobal = True print( "Performing 10 runs of the simulating basinhopping optimization algorithm" ) for jj in range(10): #Do ten runs of the sa algorithm sc.random.seed( jj + 1) #In the interest of reproducibility (if that's a word) minimizer_kwargs = {"args": (X, Y, yerr)} bestfitbiexp = optimize.basinhopping(logbiexp, x0=initialguess, minimizer_kwargs=minimizer_kwargs, niter=100) # print(bestfitbiexp.keys()) # dict_keys(['lowest_optimization_result', # # 'message', 'minimization_failures', 'nit', 'x', 'nfev', 'njev', 'fun']) # print(bestfitbiexp.x, bestfitbiexp.fun) # print(chisq) # print(bestfit) print(bestfitbiexp) # NOTE: result of anneal (not basinhopping) res[0] is obtained min # and res[1] is function value at that minimum. # but result of basinhopping is OpitimizeResult object # with attributes .x and .fun with others. # # res[0] ==> res.x ndarray # res[1] ==> res.fun function value at ndarray # res[6] ==> res.status success(bool) status(int) # if bestfitbiexp.fun < chisq: bestfit = bestfitbiexp.x chisq = bestfitbiexp.fun bestfitsbiexp = bestfit #Now plot the solution fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, #'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) #Plot data errorbar(X, Y, yerr, color='k', marker='o', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') xlim(0, 300) ylim(0, 700) xmin, xmax = xlim() nsamples = 1001 xs = sc.linspace(xmin, xmax, nsamples) ys = sc.zeros(nsamples) for ii in range(nsamples): ys[ii] = bestfitsbiexp[0] + bestfitsbiexp[1] * xs[ii] if bestfitsbiexp[0] < 0: sgn_str = '-' else: sgn_str = '+' label = r'$y = %4.2f\, x' % ( bestfitsbiexp[1]) + sgn_str + '%4.0f ' % m.fabs( bestfitsbiexp[0]) + r'; X = ' + '%3.1f' % chisq + '$' plot(xs, ys, color='k', ls='--', label=label) l = legend(loc=(.3, .1), numpoints=8) l.draw_frame(False) xlim(0, 300) ylim(0, 700) print('Creating: ', plotfilename) savefig(plotfilename, format='png') return 0
def exMix1( exclude=None, plotfilenameA="exMix1a.png", plotfilenameB="exMix1b.png", plotfilenameC="exMix1c.png", nburn=20000, nsamples=1000000, parsigma=[5, 0.075, 0.2, 1, 0.1], dsigma=1.0, bovyprintargs={}, sampledata=None, ): """exMix1: solve exercise 5 (mixture model) using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename* - filenames for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) dsigma - divide uncertainties by this amount Output: plot History: 2010-04-28 - Written - Bovy (NYU) """ sc.random.seed(-1) # In the interest of reproducibility (if that's a word) # Read the data data = read_data("data_yerr.dat") ndata = len(data) if not exclude == None: nsample = ndata - len(exclude) else: nsample = ndata # First find the chi-squared solution, which we will use as an # initial guess # Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2] ** 2.0 / dsigma ** 2.0 yerr[jj] = data[ii][2] / dsigma jj = jj + 1 brange = [-120, 120] mrange = [1.5, 3.2] # This matches the order of the parameters in the "samples" vector mbrange = [brange, mrange] if sampledata is None: sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange) (histmb, edges, mbsamples, pbhist, pbedges) = sampledata # Hack -- produce fake Pbad samples from Pbad histogram. pbsamples = hstack([array([x] * N) for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist)]) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] # 2D histogram plot.bovy_print(**bovyprintargs) levels = special.erf(0.5 * sc.arange(1, 4)) xe = [edges[0][0], edges[0][-1]] ye = [edges[1][0], edges[1][-1]] aspect = (xe[1] - xe[0]) / (ye[1] - ye[0]) plot.bovy_dens2d( histmb.T, origin="lower", cmap=cm.gist_yarg, interpolation="nearest", contours=True, cntrmass=True, extent=xe + ye, levels=levels, aspect=aspect, xlabel=r"$b$", ylabel=r"$m$", ) xlim(brange) ylim(mrange) plot.bovy_end_print(plotfilenameA) # Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = edges[0][indxi] bestm = edges[1][indxj] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot( xrange, bestm * sc.array(xrange) + bestb, "k-", xrange=xrange, yrange=yrange, xlabel=r"$x$", ylabel=r"$y$", zorder=2, ) errorbar(X, Y, yerr, marker="o", color="k", linestyle="None", zorder=1) for m, b in mbsamples: plot.bovy_plot( xrange, m * sc.array(xrange) + b, overplot=True, xrange=xrange, yrange=yrange, xlabel=r"$x$", ylabel=r"$y$", color="0.75", zorder=1, ) plot.bovy_end_print(plotfilenameB) # Pb plot if not "text_fontsize" in bovyprintargs: bovyprintargs["text_fontsize"] = 11 plot.bovy_print(**bovyprintargs) plot.bovy_hist( pbsamples, bins=round(sc.sqrt(nsamples) / 5.0), xlabel=r"$P_\mathrm{b}$", normed=True, histtype="step", range=[0, 1], edgecolor="k", ) ylim(0, 4.0) if dsigma == 1.0: plot.bovy_text(r"$\mathrm{using\ correct\ data\ uncertainties}$", top_right=True) else: plot.bovy_text(r"$\mathrm{using\ data\ uncertainties\ /\ 2}$", top_left=True) plot.bovy_end_print(plotfilenameC) return sampledata
def ex14( exclude=sc.array([1, 2, 3, 4]), plotfilename='ex14.png', bovyprintargs={}): """ex12: solve exercise 14 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y1 = sc.zeros(nsample) X1 = sc.zeros(nsample) A1 = sc.ones((nsample, 2)) C1 = sc.zeros((nsample, nsample)) Y2 = sc.zeros(nsample) X2 = sc.zeros(nsample) A2 = sc.ones((nsample, 2)) C2 = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) xerr = sc.zeros(nsample) ycovar = sc.zeros((2, nsample, 2)) #Makes the sc.dot easier jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y1[jj] = data[ii][1][1] X1[jj] = data[ii][1][0] A1[jj, 1] = data[ii][1][0] C1[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] Y2[jj] = data[ii][1][0] X2[jj] = data[ii][1][1] A2[jj, 1] = data[ii][1][1] C2[jj, jj] = data[ii][3]**2. xerr[jj] = data[ii][3] jj = jj + 1 #Now compute the best fit and the uncertainties: forward bestfit1 = sc.dot(linalg.inv(C1), Y1.T) bestfit1 = sc.dot(A1.T, bestfit1) bestfitvar1 = sc.dot(linalg.inv(C1), A1) bestfitvar1 = sc.dot(A1.T, bestfitvar1) bestfitvar1 = linalg.inv(bestfitvar1) bestfit1 = sc.dot(bestfitvar1, bestfit1) #Now compute the best fit and the uncertainties: backward bestfit2 = sc.dot(linalg.inv(C2), Y2.T) bestfit2 = sc.dot(A2.T, bestfit2) bestfitvar2 = sc.dot(linalg.inv(C2), A2) bestfitvar2 = sc.dot(A2.T, bestfitvar2) bestfitvar2 = linalg.inv(bestfitvar2) bestfit2 = sc.dot(bestfitvar2, bestfit2) #Propagate to y=mx+b linerrprop = sc.array([[-1. / bestfit2[1], bestfit2[0] / bestfit2[1]**2], [0., -1. / bestfit2[1]**2.]]) bestfit2 = sc.array([-bestfit2[0] / bestfit2[1], 1. / bestfit2[1]]) bestfitvar2 = sc.dot(linerrprop, sc.dot(bestfitvar2, linerrprop.T)) #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), bestfit1[1] * sc.array(xrange) + bestfit1[0], 'k--', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) plot.bovy_plot(sc.array(xrange), bestfit2[1] * sc.array(xrange) + bestfit2[0], 'k-.', overplot=True, zorder=2) #Plot data errorbar(A1[:, 1], Y1, yerr, xerr, color='k', marker='o', linestyle='None', zorder=0) plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( ' + '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit1[1], m.sqrt(bestfitvar1[1, 1]), bestfit1[0], m.sqrt(bestfitvar1[0, 0])) + r')$' + '\n' + r'$\mathrm{reverse}\ -\cdot -\:\ y = ( ' + '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit2[1], m.sqrt(bestfitvar2[1, 1]), bestfit2[0], m.sqrt(bestfitvar2[0, 0])) + r')$', bottom_right=True) plot.bovy_end_print(plotfilename)
def ex13(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex13.png', nburn=1000, nsamples=100000, parsigma=[1, m.pi / 200., .01, .5, 1., .05, .1, .005], bovyprintargs={}): """ex13: solve exercise 13 by MCMC Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-06 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) Z = sc.zeros((nsample, 2)) yerr = sc.zeros(nsample) ycovar = sc.zeros((2, nsample, 2)) #Makes the sc.dot easier jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] Z[jj, 0] = X[jj] Z[jj, 1] = Y[jj] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] ycovar[0, jj, 0] = data[ii][3]**2. ycovar[1, jj, 1] = data[ii][2]**2. ycovar[0, jj, 1] = data[ii][4] * m.sqrt( ycovar[0, jj, 0] * ycovar[1, jj, 1]) ycovar[1, jj, 0] = ycovar[0, jj, 1] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) #Now sample inittheta = m.acos(1. / m.sqrt(1. + bestfit[1]**2.)) if bestfit[1] < 0.: inittheta = m.pi - inittheta initialguess = sc.array([ m.cos(inittheta), inittheta, 0., sc.mean(X), sc.mean(Y), m.log(sc.var(X)), m.log(sc.var(X)), 0. ]) #(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX = objective(initialguess, Z, ycovar) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 samples = [] samples.append(currentguess) for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution newsample = sc.zeros(8) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2] newsample[3] = currentguess[3] + stats.norm.rvs() * parsigma[3] newsample[4] = currentguess[4] + stats.norm.rvs() * parsigma[4] newsample[5] = currentguess[5] + stats.norm.rvs() * parsigma[5] newsample[6] = currentguess[6] + stats.norm.rvs() * parsigma[6] newsample[7] = currentguess[7] + stats.norm.rvs() * parsigma[7] #Calculate the objective function for the newsample newX = objective(newsample, Z, ycovar) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() try: test = m.exp(newX - currentX) except OverflowError: test = 2. if u < test: #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: bestfit = currentguess bestX = currentX samples.append(currentguess) if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / ( nburn + nsamples) > .8: print "Acceptance ratio was " + str( double(naccept) / (nburn + nsamples)) samples = sc.array(samples).T[:, nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :]) histmb, edges = sc.histogramdd(samples.T[:, 0:2], bins=round(sc.sqrt(nsamples) / 2.)) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] t = edges[1][indxj] bcost = edges[0][indxi] mf = m.sqrt(1. / m.cos(t)**2. - 1.) b = bcost / m.cos(t) print b, mf #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), mf * sc.array(xrange) + b, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) for ii in range(10): #Random sample ransample = sc.floor((stats.uniform.rvs() * nsamples)).astype('int') ransample = samples.T[ransample, 0:2] mf = m.sqrt(1. / m.cos(ransample[1])**2. - 1.) b = ransample[0] / m.cos(ransample[1]) bestb = b bestm = mf plot.bovy_plot(sc.array(xrange), bestm * sc.array(xrange) + bestb, overplot=True, color='0.75', zorder=0) #Add labels nsamples = samples.shape[1] for ii in range(nsample): Pb = 0. for jj in range(nsamples): Pb += Pbad(samples[:, jj], Z[ii, :], ycovar[:, ii, :]) Pb /= nsamples text(Z[ii, 0] + 5, Z[ii, 1] + 5, '%.1f' % Pb, color='0.5', zorder=3) #Plot the data OMG straight from plot_data.py data = read_data('data_allerr.dat', True) ndata = len(data) #Create the ellipses and the data points id = sc.zeros(nsample) x = sc.zeros(nsample) y = sc.zeros(nsample) ellipses = [] ymin, ymax = 0, 0 xmin, xmax = 0, 0 jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj] = data[ii][0] x[jj] = data[ii][1][0] y[jj] = data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar = sc.zeros((2, 2)) ycovar[0, 0] = data[ii][3]**2. ycovar[1, 1] = data[ii][2]**2. ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1]) ycovar[1, 0] = ycovar[0, 1] eigs = linalg.eig(ycovar) angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180. thisellipse = Ellipse(sc.array([x[jj], y[jj]]), 2 * m.sqrt(eigs[0][0]), 2 * m.sqrt(eigs[0][1]), angle) ellipses.append(thisellipse) if (x[jj] + m.sqrt(ycovar[0, 0])) > xmax: xmax = (x[jj] + m.sqrt(ycovar[0, 0])) if (x[jj] - m.sqrt(ycovar[0, 0])) < xmin: xmin = (x[jj] - m.sqrt(ycovar[0, 0])) if (y[jj] + m.sqrt(ycovar[1, 1])) > ymax: ymax = (y[jj] + m.sqrt(ycovar[1, 1])) if (y[jj] - m.sqrt(ycovar[1, 1])) < ymin: ymin = (y[jj] - m.sqrt(ycovar[1, 1])) jj = jj + 1 #Add the error ellipses ax = gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x, y, color='k', marker='o', linestyle='None') plot.bovy_end_print(plotfilename)
def marginalize_mixture(mixture=True, short=False): if mixture: prefix = 'mixture2d' else: prefix = 'nomixture2d' random.seed(-1) #In the interest of reproducibility (if that's a word) # Read the data data = read_data('data_allerr.dat', True) ndata = len(data) # Create the ellipses and the data points x = zeros(ndata) y = zeros(ndata) ellipses = [] yvar = zeros((ndata, 2, 2)) for ii in range(ndata): x[ii] = data[ii][1][0] y[ii] = data[ii][1][1] #Calculate the eigenvalues and the rotation angle yvar[ii, 0, 0] = data[ii][3]**2. yvar[ii, 1, 1] = data[ii][2]**2. yvar[ii, 0, 1] = data[ii][4] * sqrt(yvar[ii, 0, 0] * yvar[ii, 1, 1]) yvar[ii, 1, 0] = yvar[ii, 0, 1] eigs = linalg.eig(yvar[ii, :, :]) angle = arctan(-eigs[1][0, 1] / eigs[1][1, 1]) / pi * 180. thisellipse = Ellipse(array([x[ii], y[ii]]), 2 * sqrt(eigs[0][0]), 2 * sqrt(eigs[0][1]), angle) ellipses.append(thisellipse) # initialize parameters theta = arctan2(y[7] - y[9], x[7] - x[9]) bperp = (y[7] - tan(theta) * x[7]) * cos(theta) # bad at theta = 0.5 * pi if mixture: Pbad = 0.5 else: Pbad = 0. Ybad = mean(y) Vbad = mean((y - Ybad)**2) p = posterior(x, y, yvar, theta, bperp, Pbad, Ybad, Vbad) print 'starting p=', p chain = [] oldp = p oldparams = (theta, bperp, Pbad, Ybad, Vbad) bestparams = oldparams bestp = oldp nsteps = 0 naccepts = 0 NSTEPS = 100000 if short: NSTEPS /= 2 print 'doing', NSTEPS, 'steps of MCMC...' while nsteps < NSTEPS: newparams = pick_new_parameters(nsteps, *oldparams) if not mixture: # clamp Pbad to zero. (theta, bperp, Pbad, Ybad, Vbad) = newparams newparams = (theta, bperp, 0, Ybad, Vbad) p = posterior(x, y, yvar, *newparams) if p / oldp > random.uniform(): chain.append((p, newparams)) oldparams = newparams oldp = p if p > bestp: bestp = p bestparams = newparams naccepts += 1 else: chain.append((oldp, oldparams)) # keep oldparams, oldp nsteps += 1 if (nsteps % 5000 == 1): print nsteps, naccepts, (naccepts / float(nsteps)), oldp, bestp, bestparams print 'acceptance fraction', (naccepts / float(nsteps)) # plot a sample fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size, 'image.interpolation': 'nearest', 'image.origin': 'lower', } rcParams.update(params) # Plot data clf() ax = gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') xlabel(r'$x$') ylabel(r'$y$') xlim(0, 300) ylim(0, 700) savefig(prefix + '-data.pdf') a = axis() xmin, xmax = xlim() ymin, ymax = ylim() xs = linspace(xmin, xmax, 2) Nchain = len(chain) if mixture: # select 10 samples at random from the second half of the chain. I = Nchain / 2 + random.permutation(Nchain / 2)[:10] else: I = array([argmax([p for (p, params) in chain])]) for i in I: (p, params) = chain[i] (theta, bperp, Pbad, Ybad, Vbad) = params ys = tan(theta) * xs + bperp / cos( theta) # replace this with smarter linear algebra plot(xs, ys, color='k', alpha=0.3) axis(a) savefig(prefix + '-xy.pdf') if mixture: bgp = zeros(len(x)) fgp = zeros(len(x)) for (p, params) in chain[Nchain / 2:]: (theta, bperp, Pbad, Ybad, Vbad) = params bgp += Pbad * single_point_likelihoods(x, y, yvar, theta, bperp, 1, Ybad, Vbad) fgp += (1. - Pbad) * single_point_likelihoods( x, y, yvar, theta, bperp, 0, Ybad, Vbad) bgodds = bgp / fgp for i, bgo in enumerate(bgodds): if bgo < 1: continue dxl = (xmax - xmin) * 0.01 dyl = (ymax - ymin) * 0.01 t = text(x[i] + dxl, y[i] + dyl, '%.1f' % log10(bgo), horizontalalignment='left', verticalalignment='bottom', alpha=0.3) savefig(prefix + '-xy-bg.pdf') clf() # note horrifying theta = 0.5 * pi behavior! ms = array([ tan(theta) for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain / 2:] ]) bs = array([ bperp / cos(theta) for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain / 2:] ]) #plot(ms, bs, 'k,', alpha=0.1) xlabel('slope $m$') ylabel('intercept $b$') #savefig(prefix + '-mb-scatter.pdf') clf() (H, xe, ye) = histogram2d(ms, bs, bins=(100, 100)) imshow(log(1 + H.T), extent=(xe.min(), xe.max(), ye.min(), ye.max()), aspect='auto', cmap=antigray) xlabel('slope $m$') ylabel('intercept $b$') savefig(prefix + '-mb.pdf')
def marginalize_mixture(mixture=True, thirds=False, short=False): if mixture: prefix = 'mixture' else: prefix = 'nomixture' if thirds: prefix += '-thirds' random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) #Put the data in the appropriate arrays and matrices x= zeros(ndata) y= zeros(ndata) yvar= zeros(ndata) jj= 0 for ii in arange(ndata): x[jj]= data[ii][1][0] y[jj]= data[ii][1][1] yvar[jj]= data[ii][2]**2 jj= jj+1 ndata= jj x= x[0:ndata] y= y[0:ndata] yvar= yvar[0:ndata] if thirds: yvar /= 9. # initialize parameters m = (y[7]-y[9]) / (x[7]-x[9]) b = y[7] - m * x[7] if mixture: Pbad = 0.5 else: Pbad = 0. Ybad = mean(y) Vbad = mean((y-Ybad)**2) p = posterior(x, y, yvar, m, b, Pbad, Ybad, Vbad) print 'starting p=', p chain = [] oldp = p oldparams = (m, b, Pbad, Ybad, Vbad) bestparams = oldparams bestp = oldp nsteps = 0 naccepts = 0 NSTEPS = 100000 if short: NSTEPS /= 2 print 'doing', NSTEPS, 'steps of MCMC...' while nsteps < NSTEPS: newparams = pick_new_parameters(nsteps, *oldparams) if not mixture: # clamp Pbad to zero. (m, b, Pbad, Ybad, Vbad) = newparams newparams = (m, b, 0, Ybad, Vbad) p = posterior(x, y, yvar, *newparams) if p/oldp > random.uniform(): chain.append((p,newparams)) oldparams = newparams oldp = p if p > bestp: bestp = p bestparams = newparams naccepts += 1 else: chain.append((oldp,oldparams)) # keep oldparams, oldp nsteps += 1 if (nsteps % 5000 == 1): print nsteps, naccepts, (naccepts/float(nsteps)), oldp, bestp, bestparams print 'acceptance fraction', (naccepts/float(nsteps)) # plot a sample # Plot data errorbar(x, y, sqrt(yvar), color='k', marker='o', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') xlim(0,300) ylim(0,700) savefig(prefix + '-data.pdf') a = axis() xmin, xmax = xlim() ymin, ymax = ylim() xs = linspace(xmin, xmax, 2) # select 10 samples at random from the second half of the chain. Nchain = len(chain) if mixture: I = Nchain/2 + random.permutation(Nchain/2)[:10] else: I = array([argmax([p for (p, params) in chain])]) for i in I: (p,params) = chain[i] (m, b, Pbad, Ybad, Vbad) = params ys = m * xs + b plot(xs, ys, color='k', alpha=0.3) axis(a) savefig(prefix + '-xy.pdf') if mixture: bgp = zeros(len(x)) fgp = zeros(len(x)) for (p,params) in chain[Nchain/2:]: (m, b, Pbad, Ybad, Vbad) = params bgp += Pbad * single_point_likelihoods(x, y, yvar, m, b, 1, Ybad, Vbad) fgp += (1.-Pbad) * single_point_likelihoods(x, y, yvar, m, b, 0, Ybad, Vbad) bgodds = bgp / fgp for i,bgo in enumerate(bgodds): if bgo < 1: continue dxl = (xmax-xmin) * 0.01 dyl = (ymax-ymin) * 0.01 t = text(x[i]+dxl, y[i]+dyl, '%.1f' % log10(bgo), horizontalalignment='left', verticalalignment='bottom', alpha=0.3) savefig(prefix + '-xy-bg.pdf') clf() ms = array([m for (p, (m, b, Pbad, Ybad, Vbad)) in chain[Nchain/2:]]) bs = array([b for (p, (m, b, Pbad, Ybad, Vbad)) in chain[Nchain/2:]]) #plot(ms, bs, 'k,', alpha=0.1) #xlabel('slope $m$') #ylabel('intercept $b$') #savefig(prefix + '-mb-scatter.pdf') clf() (H, xe, ye) = histogram2d(ms, bs, bins=(100,100)) print 'max H:', H.max() imshow(log(1 + H.T), extent=(xe.min(), xe.max(), ye.min(), ye.max()), aspect='auto', cmap=antigray) xlabel('slope $m$') ylabel('intercept $b$') savefig(prefix + '-mb.pdf')
def plot_data_allerr(): """plot_data_allerr: Plot the data with full error ellipses History: 2009-05-20 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', True) ndata = len(data) #Create the ellipses and the data points id = sc.zeros(ndata) x = sc.zeros(ndata) y = sc.zeros(ndata) ellipses = [] ymin, ymax = 0, 0 xmin, xmax = 0, 0 for ii in range(ndata): id[ii] = data[ii][0] x[ii] = data[ii][1][0] y[ii] = data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar = sc.zeros((2, 2)) ycovar[0, 0] = data[ii][3]**2. ycovar[1, 1] = data[ii][2]**2. ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1]) ycovar[1, 0] = ycovar[0, 1] eigs = linalg.eig(ycovar) angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180. #print x[ii], y[ii], m.sqrt(ycovar[1,1]), m.sqrt(ycovar[0,0]) #print m.sqrt(eigs[0][0]), m.sqrt(eigs[0][1]), angle thisellipse = Ellipse(sc.array([x[ii], y[ii]]), 2 * m.sqrt(eigs[0][0]), 2 * m.sqrt(eigs[0][1]), angle) ellipses.append(thisellipse) if (x[ii] + m.sqrt(ycovar[0, 0])) > xmax: xmax = (x[ii] + m.sqrt(ycovar[0, 0])) if (x[ii] - m.sqrt(ycovar[0, 0])) < xmin: xmin = (x[ii] - m.sqrt(ycovar[0, 0])) if (y[ii] + m.sqrt(ycovar[1, 1])) > ymax: ymax = (y[ii] + m.sqrt(ycovar[1, 1])) if (y[ii] - m.sqrt(ycovar[1, 1])) < ymin: ymin = (y[ii] - m.sqrt(ycovar[1, 1])) plotfilename = 'data_allerr.png' fig_width = 7.5 fig_height = 7.5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) fig = figure() ax = fig.add_subplot(111) #Add the error ellipses for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x, y, color='k', marker='o', linestyle='None') ax.set_xlabel(r'$x$') ax.set_ylabel(r'$y$') ax.set_xlim((xmin, xmax)) ax.set_ylim((ymin, ymax)) savefig(plotfilename, format='png') return 0
def ex8(plotfilename='ex8.png',nburn=1000,nsamples=10000,parsigma=[.075,2.,0.1]): """ex8: solve exercise 8 using...? Input: plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2009-06-25 -- hacked from Bovy code - Hogg (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) #Put the data in the appropriate arrays and matrices X= sc.zeros(ndata) Y= sc.zeros(ndata) A= sc.ones((ndata,2)) Yivar= sc.zeros(ndata) C= sc.zeros((ndata,ndata)) yerr= sc.zeros(ndata) jj= 0 for ii in range(ndata): X[jj]= data[ii][1][0] Y[jj]= data[ii][1][1] A[jj,1]= data[ii][1][0] Yivar[jj]= 1.0/(data[ii][2]**2) C[jj,jj]= data[ii][2]**2 yerr[jj]= data[ii][2] jj= jj+1 #First find the chi-squared solution, which we will use as an #initial guess bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) m= bestfit[1] b= bestfit[0] q= sc.array([1 for cc in range(ndata)]) q[0:4] = 0 pgood= 0.9 #pgood=0.999759#3 sigma for uncertainty~50 initialguess= [m,b,q,pgood] print initialguess #With this initial guess start off the sampling procedure bgmean= sc.mean(Y) bgivar= 1.0/sc.sum((Y-bgmean)**2) initialX= lnposterior(X,Y,Yivar,m,b,q,pgood,bgmean,bgivar) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution thisguess = c.deepcopy(currentguess) m= thisguess[0] b= thisguess[1] q= thisguess[2] pgood= thisguess[3] #First Gibbs sample each q for ii in range(ndata): thisdatagood= ma.sqrt(Yivar[ii]/(2.*math.pi))*ma.exp(-.5*(Y[ii]-m*X[ii]-b)**2.*Yivar[ii])*pgood thisdatabad= ma.sqrt(bgivar/(2.*math.pi))*ma.exp(-.5*(Y[ii]-bgmean)**2.*bgivar)*(1.0-pgood) a= thisdatagood/(thisdatagood+thisdatabad) u= stats.uniform.rvs() if u<a: q[ii]= 1 else: q[ii]= 0 #Then Metropolis sample m and b m += stats.norm.rvs()*parsigma[0] b += stats.norm.rvs()*parsigma[1] pgood += stats.norm.rvs()*parsigma[2] if pgood > MAXP: pgood = MAXP if pgood < (1.0-MAXP): pgood = (1.0-MAXP) newsample = [m,b,q,pgood] #Calculate the objective function for the newsample newX= lnposterior(X,Y,Yivar,m,b,q,pgood,bgmean,bgivar) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() if u < ma.exp(newX-currentX): #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: print currentguess bestfit= currentguess bestX= currentX print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) #Now plot the best solution fig_width=5 fig_height=5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size} rcParams.update(params) #Plot data errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None') xlabel(r'$x$') ylabel(r'$y$') xlim(0,300) ylim(0,700) xmin, xmax= xlim() (m,b,q,pgood) = bestfit print bestfit print m xs= sc.linspace(xmin,xmax,3) ys= m*xs+b if b < 0: sgn_str= '-' else: sgn_str= '+' label= r'$y = %4.2f\, x'% m+sgn_str+ '%4.0f ' % ma.fabs(b)+'$'#+r'; X = '+ '%3.1f' % bestX+'$' plot(xs,ys,color='k',ls='--',label=label) l=legend(loc=(.3,.1),numpoints=8) l.draw_frame(False) xlim(0,300) ylim(0,700) savefig(plotfilename,format='png') return 0
def ex9(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex9.png', zoom=False, bovyprintargs={}): """ex9: solve exercise 9 Input: exclude - ID numbers to exclude from the analysis zoom - zoom in Output: plot History: 2009-05-27 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) nSs = 1001 if zoom: Srange = [900, 1000] else: Srange = [0.001, 1500] Ss = sc.linspace(Srange[0], Srange[1], nSs) chi2s = sc.zeros(nSs) for kk in range(nSs): #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] A[jj, 1] = data[ii][1][0] C[jj, jj] = Ss[kk] yerr[jj] = data[ii][2] #OMG, such bad code jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) chi2s[kk] = chi2(bestfit, A, Y, C) #Now plot the solution plot.bovy_print(**bovyprintargs) #Plot the best fit line xrange = Srange if zoom: yrange = [nsample - 4, nsample] else: yrange = [nsample - 10, nsample + 8] plot.bovy_plot(Ss, chi2s, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$S$', ylabel=r'$\chi^2$', zorder=1) plot.bovy_plot(sc.array(Srange), sc.array([nsample - 2, nsample - 2]), 'k--', zorder=2, overplot=True) #plot.bovy_plot(sc.array([sc.median(yerr**2.),sc.median(yerr**2.)]), # sc.array(yrange),color='0.75',overplot=True) plot.bovy_plot(sc.array([sc.mean(yerr**2.), sc.mean(yerr**2.)]), sc.array(yrange), color='0.75', overplot=True) plot.bovy_end_print(plotfilename) return 0
def ex7c(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex7c.png'): """ex7d: solve exercise 7 using a simulated annealing optimization Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-02 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1]]) #With this initial guess start off the annealing procedure Qs = [1., 2.] bestfitssoft = sc.zeros((2, len(Qs))) initialchisq = 0. for jj in range(nsample): initialchisq = initialchisq + (Y[jj] - X[jj] * initialguess[1] - initialguess[0])**2 / (yerr[jj]**2) chisqQ = sc.zeros(len(Qs)) for ii in range(len(Qs)): chisqQ[ii] = initialchisq bestfit = initialguess nonglobal = True print "Working on Q = " + str(Qs[ii]) print "Performing 10 runs of the simulating annealing optimization algorithm" for jj in range(10): #Do ten runs of the sa algorithm sc.random.seed( jj + 1) #In the interest of reproducibility (if that's a word) bestfitsoft = optimize.anneal( softchisquared, initialguess, (X, Y, yerr, Qs[ii]), schedule='boltzmann', full_output=1) #,dwell=200,maxiter=1000) if bestfitsoft[1] < chisqQ[ii]: bestfit = bestfitsoft[0] chisqQ[ii] = bestfitsoft[1] if bestfitsoft[6] == 0: nonglobal = False if nonglobal: print "Did not cool to the global optimum" try: x = raw_input('continue to plot? [yn]\n') except EOFError: print "Since you are in non-interactive mode I will assume 'y'" x = 'y' if x == 'n': print "returning..." return -1 bestfitssoft[:, ii] = bestfit #Now plot the solution fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) #Plot data errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line for the different Qs linestyles = ('--', ':', '-.') for jj in range(len(Qs)): xlim(0, 300) ylim(0, 700) xmin, xmax = xlim() nsamples = 1001 xs = sc.linspace(xmin, xmax, nsamples) ys = sc.zeros(nsamples) for ii in range(nsamples): ys[ii] = bestfitssoft[0, jj] + bestfitssoft[1, jj] * xs[ii] if bestfitssoft[0, jj] < 0: sgn_str = '-' else: sgn_str = '+' label = r'$Q= ' + '%i: y = %4.2f\, x' % (Qs[jj], bestfitssoft[ 1, jj]) + sgn_str + '%4.0f ' % m.fabs(bestfitssoft[ 0, jj]) + r'; \chi^2_Q = ' + '%3.1f' % chisqQ[jj] + '$' plot(xs, ys, color='k', ls=linestyles[jj], label=label) l = legend(loc=(.2, .1), numpoints=8) l.draw_frame(False) xlim(0, 300) ylim(0, 700) savefig(plotfilename, format='png') return 0
def ex6c(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex6c.png', nburn=100, nsamples=10000, parsigma=[5, .075]): """ex6c: solve exercise 6 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2009-06-02 - Written - Bovy (NYU) """ sc.random.seed(100) #In the interest of reproducibility (if that's a word) #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1]]) #With this initial guess start off the sampling procedure initialX = 0. for jj in range(nsample): initialX = initialX + m.fabs(Y[jj] - bestfit[1] * X[jj] - bestfit[0]) / yerr[jj] currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution newsample = sc.zeros(2) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] #Calculate the objective function for the newsample newX = logbiexp(newsample, X, Y, yerr) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() if u < m.exp(currentX - newX): #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX < bestX: bestfit = currentguess bestX = currentX bestfitsbiexp = bestfit if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / ( nburn + nsamples) > .8: print("Acceptance ratio was " + str(double(naccept) / (nburn + nsamples))) #Now plot the solution fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, #'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) #Plot data errorbar(X, Y, yerr, color='k', marker='o', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') xlim(0, 300) ylim(0, 700) xmin, xmax = xlim() nsamples = 1001 xs = sc.linspace(xmin, xmax, nsamples) ys = sc.zeros(nsamples) for ii in range(nsamples): ys[ii] = bestfitsbiexp[0] + bestfitsbiexp[1] * xs[ii] if bestfitsbiexp[0] < 0: sgn_str = '-' else: sgn_str = '+' label = r'$y = %4.2f\, x' % ( bestfitsbiexp[1]) + sgn_str + '%4.0f ' % m.fabs( bestfitsbiexp[0]) + r'; X = ' + '%3.1f' % bestX + '$' plot(xs, ys, color='k', ls='--', label=label) l = legend(loc=(.3, .1), numpoints=8) l.draw_frame(False) xlim(0, 300) ylim(0, 700) print('Creating: ', plotfilename) savefig(plotfilename, format='png') return 0
def ex7a(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex7a.png'): """ex7a: solve exercise 7 using non-linear optimization Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-01 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1]]) #Now optimize the soft chi-squared objective function Qs = [1., 2.] bestfitssoft = sc.zeros((2, len(Qs))) chisqQ = sc.zeros(len(Qs)) for ii in range(len(Qs)): print "Working on Q = " + str(Qs[ii]) bestfitsoft1 = optimize.fmin(softchisquared, initialguess, (X, Y, yerr, Qs[ii]), disp=False) #Restart the optimization once using a different method bestfitsoft = optimize.fmin_powell(softchisquared, bestfitsoft1, (X, Y, yerr, Qs[ii]), disp=False) if linalg.norm(bestfitsoft - bestfitsoft1) > 10**-12: if linalg.norm(bestfitsoft - bestfitsoft1) < 10**-6: print "Different optimizers give slightly different results..." else: print "Different optimizers give rather different results..." print "The norm of the results differs by %g" % linalg.norm( bestfitsoft - bestfitsoft1) try: x = raw_input('continue to plot? [yn]\n') except EOFError: print "Since you are in non-interactive mode I will assume 'y'" x = 'y' if x == 'n': print "returning..." return -1 bestfitssoft[:, ii] = bestfitsoft #Calculate chi^2_Q for jj in range(nsample): chisqQ[ii] = chisqQ[ii] + 1. / ( yerr[jj]**2 / (Y[jj] - X[jj] * bestfitsoft[1] - bestfitsoft[1])**2 + 1. / Qs[ii]**2) #Now plot the solution fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) #Plot data errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line for the different Qs linestyles = ('--', ':', '-.') for jj in range(len(Qs)): xlim(0, 300) ylim(0, 700) xmin, xmax = xlim() nsamples = 1001 xs = sc.linspace(xmin, xmax, nsamples) ys = sc.zeros(nsamples) for ii in range(nsamples): ys[ii] = bestfitssoft[0, jj] + bestfitssoft[1, jj] * xs[ii] if bestfitssoft[0, jj] < 0: sgn_str = '-' else: sgn_str = '+' label = r'$Q= ' + '%i: y = %4.2f\, x' % (Qs[jj], bestfitssoft[ 1, jj]) + sgn_str + '%4.0f ' % m.fabs(bestfitssoft[ 0, jj]) + r'; \chi^2_Q = ' + '%3.1f' % chisqQ[jj] + '$' plot(xs, ys, color='k', ls=linestyles[jj], label=label) l = legend(loc=(.2, .1), numpoints=8) l.draw_frame(False) xlim(0, 300) ylim(0, 700) savefig(plotfilename, format='png') return 0
def ex7c(exclude=sc.array([1,2,3,4]),plotfilename='ex7c.png'): """ex7d: solve exercise 7 using a simulated annealing optimization Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-02 - Written - Bovy (NYU) """ #Read the data data= read_data('data_yerr.dat') ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1]]) #With this initial guess start off the annealing procedure Qs= [1.,2.] bestfitssoft= sc.zeros((2,len(Qs))) initialchisq= 0. for jj in range(nsample): initialchisq= initialchisq+(Y[jj]-X[jj]*initialguess[1]-initialguess[0])**2/(yerr[jj]**2) chisqQ= sc.zeros(len(Qs)) for ii in range(len(Qs)): chisqQ[ii]= initialchisq bestfit= initialguess nonglobal= True print "Working on Q = "+str(Qs[ii]) print "Performing 10 runs of the simulating annealing optimization algorithm" for jj in range(10):#Do ten runs of the sa algorithm sc.random.seed(jj+1) #In the interest of reproducibility (if that's a word) bestfitsoft= optimize.anneal(softchisquared,initialguess,(X,Y,yerr,Qs[ii]), schedule='boltzmann',full_output=1)#,dwell=200,maxiter=1000) if bestfitsoft[1] < chisqQ[ii]: bestfit= bestfitsoft[0] chisqQ[ii]= bestfitsoft[1] if bestfitsoft[6] == 0: nonglobal= False if nonglobal: print "Did not cool to the global optimum" try: x=raw_input('continue to plot? [yn]\n') except EOFError: print "Since you are in non-interactive mode I will assume 'y'" x='y' if x == 'n': print "returning..." return -1 bestfitssoft[:,ii]= bestfit #Now plot the solution fig_width=5 fig_height=5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size} rcParams.update(params) #Plot data errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line for the different Qs linestyles= ('--',':', '-.') for jj in range(len(Qs)): xlim(0,300) ylim(0,700) xmin, xmax= xlim() nsamples= 1001 xs= sc.linspace(xmin,xmax,nsamples) ys= sc.zeros(nsamples) for ii in range(nsamples): ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii] if bestfitssoft[0,jj] < 0: sgn_str= '-' else: sgn_str= '+' label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$' plot(xs,ys,color='k',ls=linestyles[jj],label=label) l=legend(loc=(.2,.1),numpoints=8) l.draw_frame(False) xlim(0,300) ylim(0,700) savefig(plotfilename,format='png') return 0
def ex16(exclude=sc.array([3]), plotfilename='ex16.png', bovyprintargs={}): """ex16: solve exercise 16 by optimization of the objective function Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) Z = sc.zeros((nsample, 2)) yerr = sc.zeros(nsample) ycovar = sc.zeros((2, nsample, 2)) #Makes the sc.dot easier jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] Z[jj, 0] = X[jj] Z[jj, 1] = Y[jj] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] ycovar[0, jj, 0] = data[ii][3]**2. ycovar[1, jj, 1] = data[ii][2]**2. ycovar[0, jj, 1] = data[ii][4] * m.sqrt( ycovar[0, jj, 0] * ycovar[1, jj, 1]) ycovar[1, jj, 0] = ycovar[0, jj, 1] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) #Now optimize initial = sc.array([bestfit[0], bestfit[1], sc.log(100)]) bestfit2d1 = optimize.fmin(objective, initial, (Z, ycovar), disp=False) #Restart the optimization once using a different method bestfit2d = optimize.fmin_powell(objective, initial, (Z, ycovar), disp=False) if linalg.norm(bestfit2d - bestfit2d1) > 10**-12: if linalg.norm(bestfit2d - bestfit2d1) < 10**-6: print "Different optimizers give slightly different results..." else: print "Different optimizers give rather different results..." print "The norm of the results differs by %g" % linalg.norm(bestfit2d - bestfit2d1) try: x = raw_input('continue to plot? [yn]\n') except EOFError: print "Since you are in non-interactive mode I will assume 'y'" x = 'y' if x == 'n': print "returning..." return -1 b = bestfit2d[0] mf = bestfit2d[1] V = sc.exp(bestfit2d[2] / 2.) cost = 1. / sc.sqrt(1 + mf**2.) bcost = b * cost #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), bestfit2d[1] * sc.array(xrange) + bcost + V, 'k--', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) plot.bovy_plot(sc.array(xrange), bestfit2d[1] * sc.array(xrange) + bcost - V, 'k--', zorder=2, overplot=True) #Plot the data OMG straight from plot_data.py data = read_data('data_allerr.dat', True) ndata = len(data) #Create the ellipses and the data points id = sc.zeros(nsample) x = sc.zeros(nsample) y = sc.zeros(nsample) ellipses = [] ymin, ymax = 0, 0 xmin, xmax = 0, 0 jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj] = data[ii][0] x[jj] = data[ii][1][0] y[jj] = data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar = sc.zeros((2, 2)) ycovar[0, 0] = data[ii][3]**2. ycovar[1, 1] = data[ii][2]**2. ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1]) ycovar[1, 0] = ycovar[0, 1] eigs = linalg.eig(ycovar) angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180. thisellipse = Ellipse(sc.array([x[jj], y[jj]]), 2 * m.sqrt(eigs[0][0]), 2 * m.sqrt(eigs[0][1]), angle) ellipses.append(thisellipse) if (x[jj] + m.sqrt(ycovar[0, 0])) > xmax: xmax = (x[jj] + m.sqrt(ycovar[0, 0])) if (x[jj] - m.sqrt(ycovar[0, 0])) < xmin: xmin = (x[jj] - m.sqrt(ycovar[0, 0])) if (y[jj] + m.sqrt(ycovar[1, 1])) > ymax: ymax = (y[jj] + m.sqrt(ycovar[1, 1])) if (y[jj] - m.sqrt(ycovar[1, 1])) < ymin: ymin = (y[jj] - m.sqrt(ycovar[1, 1])) jj = jj + 1 #Add the error ellipses ax = gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x, y, color='k', marker='o', linestyle='None') plot.bovy_end_print(plotfilename)
def ex7a(exclude=sc.array([1,2,3,4]),plotfilename='ex7a.png'): """ex7a: solve exercise 7 using non-linear optimization Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-01 - Written - Bovy (NYU) """ #Read the data data= read_data('data_yerr.dat') ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1]]) #Now optimize the soft chi-squared objective function Qs= [1.,2.] bestfitssoft= sc.zeros((2,len(Qs))) chisqQ= sc.zeros(len(Qs)) for ii in range(len(Qs)): print "Working on Q = "+str(Qs[ii]) bestfitsoft1= optimize.fmin(softchisquared,initialguess,(X,Y,yerr,Qs[ii]),disp=False) #Restart the optimization once using a different method bestfitsoft= optimize.fmin_powell(softchisquared,bestfitsoft1,(X,Y,yerr,Qs[ii]),disp=False) if linalg.norm(bestfitsoft-bestfitsoft1) > 10**-12: if linalg.norm(bestfitsoft-bestfitsoft1) < 10**-6: print "Different optimizers give slightly different results..." else: print "Different optimizers give rather different results..." print "The norm of the results differs by %g" % linalg.norm(bestfitsoft-bestfitsoft1) try: x=raw_input('continue to plot? [yn]\n') except EOFError: print "Since you are in non-interactive mode I will assume 'y'" x='y' if x == 'n': print "returning..." return -1 bestfitssoft[:,ii]= bestfitsoft #Calculate chi^2_Q for jj in range(nsample): chisqQ[ii]= chisqQ[ii]+1./(yerr[jj]**2/(Y[jj]-X[jj]*bestfitsoft[1]-bestfitsoft[1])**2+1./Qs[ii]**2) #Now plot the solution fig_width=5 fig_height=5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size} rcParams.update(params) #Plot data errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line for the different Qs linestyles= ('--',':', '-.') for jj in range(len(Qs)): xlim(0,300) ylim(0,700) xmin, xmax= xlim() nsamples= 1001 xs= sc.linspace(xmin,xmax,nsamples) ys= sc.zeros(nsamples) for ii in range(nsamples): ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii] if bestfitssoft[0,jj] < 0: sgn_str= '-' else: sgn_str= '+' label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$' plot(xs,ys,color='k',ls=linestyles[jj],label=label) l=legend(loc=(.2,.1),numpoints=8) l.draw_frame(False) xlim(0,300) ylim(0,700) savefig(plotfilename,format='png') return 0
def ex6a(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex6a.png'): """ex6a: solve exercise 6 by optimization of the objective function Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-01 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) #Now optimize the bi-exponential objective function bestfitbiexp1 = optimize.fmin(logbiexp, bestfit, (X, Y, yerr), disp=False) #Restart the optimization once using a different method bestfitbiexp = optimize.fmin_powell(logbiexp, bestfitbiexp1, (X, Y, yerr), disp=False) if linalg.norm(bestfitbiexp - bestfitbiexp1) > 10**-12: if linalg.norm(bestfitbiexp - bestfitbiexp1) < 10**-6: print("Different optimizers give slightly different results...") else: print("Different optimizers give rather different results...") print("The norm of the results differs by %g" % linalg.norm(bestfitbiexp - bestfitbiexp1)) #Calculate X XX = 0. for jj in range(nsample): XX = XX + m.fabs(Y[jj] - bestfitbiexp[1] * X[jj] - bestfitbiexp[0]) / yerr[jj] #Now plot the solution fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, #'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) #Plot data errorbar(X, Y, yerr, color='k', marker='o', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line xlim(0, 300) ylim(0, 700) xmin, xmax = xlim() nsamples = 1001 xs = sc.linspace(xmin, xmax, nsamples) ys = sc.zeros(nsamples) for ii in range(nsamples): ys[ii] = bestfitbiexp[0] + bestfitbiexp[1] * xs[ii] if bestfitbiexp[0] < 0: sgn_str = '-' else: sgn_str = '+' label = r'$y = %4.2f\, x' % ( bestfitbiexp[1]) + sgn_str + '%4.0f ' % m.fabs( bestfitbiexp[0]) + r'; X = ' + '%3.1f' % XX + '$' plot(xs, ys, color='k', ls='--', label=label) l = legend(loc=(.3, .1), numpoints=8) l.draw_frame(False) plot(xs, ys, 'k--') xlim(0, 300) ylim(0, 700) print('Creating: ', plotfilename) savefig(plotfilename, format='png') return 0
def ex13(exclude=sc.array([1,2,3,4]),plotfilename='ex13.png', nburn=1000,nsamples=100000, parsigma=[1,m.pi/200.,.01,.5,1.,.05,.1,.005], bovyprintargs={}): """ex13: solve exercise 13 by MCMC Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-06 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',allerr=True) ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) Z= sc.zeros((nsample,2)) yerr= sc.zeros(nsample) ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] Z[jj,0]= X[jj] Z[jj,1]= Y[jj] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] ycovar[0,jj,0]= data[ii][3]**2. ycovar[1,jj,1]= data[ii][2]**2. ycovar[0,jj,1]= data[ii][4]*m.sqrt(ycovar[0,jj,0]*ycovar[1,jj,1]) ycovar[1,jj,0]= ycovar[0,jj,1] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) #Now sample inittheta= m.acos(1./m.sqrt(1.+bestfit[1]**2.)) if bestfit[1] < 0.: inittheta= m.pi- inittheta initialguess= sc.array([m.cos(inittheta),inittheta,0.,sc.mean(X),sc.mean(Y),m.log(sc.var(X)),m.log(sc.var(X)),0.])#(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX= objective(initialguess,Z,ycovar) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(8) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3] newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4] newsample[5]= currentguess[5]+stats.norm.rvs()*parsigma[5] newsample[6]= currentguess[6]+stats.norm.rvs()*parsigma[6] newsample[7]= currentguess[7]+stats.norm.rvs()*parsigma[7] #Calculate the objective function for the newsample newX= objective(newsample,Z,ycovar) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() try: test= m.exp(newX-currentX) except OverflowError: test= 2. if u < test: #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] t= edges[1][indxj] bcost= edges[0][indxi] mf= m.sqrt(1./m.cos(t)**2.-1.) b= bcost/m.cos(t) print b, mf #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),mf*sc.array(xrange)+b, 'k-',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) for ii in range(10): #Random sample ransample= sc.floor((stats.uniform.rvs()*nsamples)) ransample= samples.T[ransample,0:2] mf= m.sqrt(1./m.cos(ransample[1])**2.-1.) b= ransample[0]/m.cos(ransample[1]) bestb= b bestm= mf plot.bovy_plot(sc.array(xrange),bestm*sc.array(xrange)+bestb, overplot=True,color='0.75',zorder=0) #Add labels nsamples= samples.shape[1] for ii in range(nsample): Pb= 0. for jj in range(nsamples): Pb+= Pbad(samples[:,jj],Z[ii,:],ycovar[:,ii,:]) Pb/= nsamples text(Z[ii,0]+5,Z[ii,1]+5,'%.1f'%Pb,color='0.5',zorder=3) #Plot the data OMG straight from plot_data.py data= read_data('data_allerr.dat',True) ndata= len(data) #Create the ellipses and the data points id= sc.zeros(nsample) x= sc.zeros(nsample) y= sc.zeros(nsample) ellipses=[] ymin, ymax= 0, 0 xmin, xmax= 0,0 jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj]= data[ii][0] x[jj]= data[ii][1][0] y[jj]= data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar= sc.zeros((2,2)) ycovar[0,0]= data[ii][3]**2. ycovar[1,1]= data[ii][2]**2. ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1]) ycovar[1,0]= ycovar[0,1] eigs= linalg.eig(ycovar) angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180. thisellipse= Ellipse(sc.array([x[jj],y[jj]]),2*m.sqrt(eigs[0][0]), 2*m.sqrt(eigs[0][1]),angle) ellipses.append(thisellipse) if (x[jj]+m.sqrt(ycovar[0,0])) > xmax: xmax= (x[jj]+m.sqrt(ycovar[0,0])) if (x[jj]-m.sqrt(ycovar[0,0])) < xmin: xmin= (x[jj]-m.sqrt(ycovar[0,0])) if (y[jj]+m.sqrt(ycovar[1,1])) > ymax: ymax= (y[jj]+m.sqrt(ycovar[1,1])) if (y[jj]-m.sqrt(ycovar[1,1])) < ymin: ymin= (y[jj]-m.sqrt(ycovar[1,1])) jj= jj+1 #Add the error ellipses ax=gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x,y,color='k',marker='o',linestyle='None') plot.bovy_end_print(plotfilename)
def ex7b(exclude=sc.array([1,2,3,4]),plotfilename='ex7b.png'): """ex7c: solve exercise 7 using an iterative procedure Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-01 - Written - Bovy (NYU) """ #Read the data data= read_data('data_yerr.dat') ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1]]) #With this initial guess start the iteration, using as the weights Q^2/(sigma^2*Q^2+(y-mx-b)^2 tol= 10**-10. Qs= [1.,2.] bestfitssoft= sc.zeros((2,len(Qs))) chisqQ= sc.zeros(len(Qs)) for jj in range(len(Qs)): currentguess= initialguess diff= 2*tol while diff > tol: oldguess= currentguess #Calculate the weight based on the previous iteration for ii in range(nsample): #Update C C[ii,ii]= (yerr[ii]**2.+(Y[ii]-oldguess[1]*X[ii]-oldguess[0])**2/Qs[jj]**2.) #Re-fit bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) currentguess= sc.array([bestfit[0],bestfit[1]]) diff= m.sqrt((currentguess[0]-oldguess[0])**2/oldguess[0]**2.+(currentguess[1]-oldguess[1])**2/oldguess[1]**2.) bestfitssoft[0,jj]= currentguess[0] bestfitssoft[1,jj]= currentguess[1] #Calculate chi^2_Q for ii in range(nsample): chisqQ[jj]= chisqQ[jj]+1./(yerr[ii]**2/(Y[ii]-X[ii]*currentguess[1]-currentguess[1])**2+1./Qs[jj]**2) #Now plot the solution fig_width=5 fig_height=5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size} rcParams.update(params) #Plot data errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line for the different Qs linestyles= ('--',':', '-.') for jj in range(len(Qs)): xlim(0,300) ylim(0,700) xmin, xmax= xlim() nsamples= 1001 xs= sc.linspace(xmin,xmax,nsamples) ys= sc.zeros(nsamples) for ii in range(nsamples): ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii] if bestfitssoft[0,jj] < 0: sgn_str= '-' else: sgn_str= '+' label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$' plot(xs,ys,color='k',ls=linestyles[jj],label=label) l=legend(loc=(.2,.1),numpoints=8) l.draw_frame(False) xlim(0,300) ylim(0,700) savefig(plotfilename,format='png') return 0
def plot_data_allerr(): """plot_data_allerr: Plot the data with full error ellipses History: 2009-05-20 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',True) ndata= len(data) #Create the ellipses and the data points id= sc.zeros(ndata) x= sc.zeros(ndata) y= sc.zeros(ndata) ellipses=[] ymin, ymax= 0, 0 xmin, xmax= 0,0 for ii in range(ndata): id[ii]= data[ii][0] x[ii]= data[ii][1][0] y[ii]= data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar= sc.zeros((2,2)) ycovar[0,0]= data[ii][3]**2. ycovar[1,1]= data[ii][2]**2. ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1]) ycovar[1,0]= ycovar[0,1] eigs= linalg.eig(ycovar) angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180. #print x[ii], y[ii], m.sqrt(ycovar[1,1]), m.sqrt(ycovar[0,0]) #print m.sqrt(eigs[0][0]), m.sqrt(eigs[0][1]), angle thisellipse= Ellipse(sc.array([x[ii],y[ii]]),2*m.sqrt(eigs[0][0]), 2*m.sqrt(eigs[0][1]),angle) ellipses.append(thisellipse) if (x[ii]+m.sqrt(ycovar[0,0])) > xmax: xmax= (x[ii]+m.sqrt(ycovar[0,0])) if (x[ii]-m.sqrt(ycovar[0,0])) < xmin: xmin= (x[ii]-m.sqrt(ycovar[0,0])) if (y[ii]+m.sqrt(ycovar[1,1])) > ymax: ymax= (y[ii]+m.sqrt(ycovar[1,1])) if (y[ii]-m.sqrt(ycovar[1,1])) < ymin: ymin= (y[ii]-m.sqrt(ycovar[1,1])) plotfilename='data_allerr.png' fig_width=7.5 fig_height=7.5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size} rcParams.update(params) fig= figure() ax= fig.add_subplot(111) #Add the error ellipses for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x,y,color='k',marker='o',linestyle='None') ax.set_xlabel(r'$x$') ax.set_ylabel(r'$y$') ax.set_xlim((xmin,xmax)) ax.set_ylim((ymin,ymax)) savefig(plotfilename,format='png') return 0
def ex7d(exclude=sc.array([1,2,3,4]),plotfilename='ex7d.png',nburn=100,nsamples=10000,parsigma=[5.,.075]): """ex7c: solve exercise 7 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2009-06-02 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the data in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1]]) #With this initial guess start off the sampling procedure Qs= [1.,2.] bestfitssoft= sc.zeros((2,len(Qs))) chisqQ= sc.zeros(len(Qs)) for kk in range(len(Qs)): print "Working on Q = "+str(Qs[kk]) initialchisqQ= softchisquared(initialguess,X,Y,yerr,Qs[kk]) bestfit= initialguess currentchisqQ= initialchisqQ bestchisqQ= initialchisqQ currentguess= initialguess naccept= 0 for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(2) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] #Calculate the objective function for the newsample newchisqQ= softchisquared(newsample,X,Y,yerr,Qs[kk]) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() if u < m.exp(currentchisqQ-newchisqQ): #Accept currentchisqQ= newchisqQ currentguess= newsample naccept= naccept+1 if currentchisqQ < bestchisqQ: bestfit= currentguess bestchisqQ= currentchisqQ bestfitssoft[:,kk]= bestfit chisqQ[kk]= bestchisqQ if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) #Now plot the solution fig_width=5 fig_height=5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size} rcParams.update(params) #Plot data errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line for the different Qs linestyles= ('--',':', '-.') for jj in range(len(Qs)): xlim(0,300) ylim(0,700) xmin, xmax= xlim() nsamples= 1001 xs= sc.linspace(xmin,xmax,nsamples) ys= sc.zeros(nsamples) for ii in range(nsamples): ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii] if bestfitssoft[0,jj] < 0: sgn_str= '-' else: sgn_str= '+' label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$' plot(xs,ys,color='k',ls=linestyles[jj],label=label) l=legend(loc=(.2,.1),numpoints=8) l.draw_frame(False) xlim(0,300) ylim(0,700) savefig(plotfilename,format='png') return 0
def ex14(exclude=sc.array([1,2,3,4]),plotfilename='ex14.png', bovyprintargs={}): """ex12: solve exercise 14 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',allerr=True) ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y1= sc.zeros(nsample) X1= sc.zeros(nsample) A1= sc.ones((nsample,2)) C1= sc.zeros((nsample,nsample)) Y2= sc.zeros(nsample) X2= sc.zeros(nsample) A2= sc.ones((nsample,2)) C2= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) xerr= sc.zeros(nsample) ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y1[jj]= data[ii][1][1] X1[jj]= data[ii][1][0] A1[jj,1]= data[ii][1][0] C1[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] Y2[jj]= data[ii][1][0] X2[jj]= data[ii][1][1] A2[jj,1]= data[ii][1][1] C2[jj,jj]= data[ii][3]**2. xerr[jj]= data[ii][3] jj= jj+1 #Now compute the best fit and the uncertainties: forward bestfit1= sc.dot(linalg.inv(C1),Y1.T) bestfit1= sc.dot(A1.T,bestfit1) bestfitvar1= sc.dot(linalg.inv(C1),A1) bestfitvar1= sc.dot(A1.T,bestfitvar1) bestfitvar1= linalg.inv(bestfitvar1) bestfit1= sc.dot(bestfitvar1,bestfit1) #Now compute the best fit and the uncertainties: backward bestfit2= sc.dot(linalg.inv(C2),Y2.T) bestfit2= sc.dot(A2.T,bestfit2) bestfitvar2= sc.dot(linalg.inv(C2),A2) bestfitvar2= sc.dot(A2.T,bestfitvar2) bestfitvar2= linalg.inv(bestfitvar2) bestfit2= sc.dot(bestfitvar2,bestfit2) #Propagate to y=mx+b linerrprop= sc.array([[-1./bestfit2[1],bestfit2[0]/bestfit2[1]**2], [0.,-1./bestfit2[1]**2.]]) bestfit2= sc.array([-bestfit2[0]/bestfit2[1],1./bestfit2[1]]) bestfitvar2= sc.dot(linerrprop,sc.dot(bestfitvar2,linerrprop.T)) #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),bestfit1[1]*sc.array(xrange)+bestfit1[0], 'k--',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) plot.bovy_plot(sc.array(xrange),bestfit2[1]*sc.array(xrange)+bestfit2[0], 'k-.',overplot=True,zorder=2) #Plot data errorbar(A1[:,1],Y1,yerr,xerr,color='k',marker='o', linestyle='None',zorder=0) plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit1[1], m.sqrt(bestfitvar1[1,1]), bestfit1[0],m.sqrt(bestfitvar1[0,0]))+r')$'+'\n'+ r'$\mathrm{reverse}\ -\cdot -\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit2[1], m.sqrt(bestfitvar2[1,1]), bestfit2[0],m.sqrt(bestfitvar2[0,0]))+r')$',bottom_right=True) plot.bovy_end_print(plotfilename)
def ex15( exclude=sc.array([1, 2, 3, 4]), plotfilename='ex15.png', bovyprintargs={}): """ex15: solve exercise 15 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) Z = sc.zeros((nsample, 2)) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] Z[jj, 0] = X[jj] Z[jj, 1] = Y[jj] jj = jj + 1 #Now compute the PCA solution Zm = sc.mean(Z, axis=0) Q = sc.cov(Z.T) eigs = linalg.eig(Q) maxindx = sc.argmax(eigs[0]) V = eigs[1][maxindx] V = V / linalg.norm(V) m = sc.sqrt(1 / V[0]**2. - 1) bestfit = sc.array([-m * Zm[0] + Zm[1], m]) #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), bestfit[1] * sc.array(xrange) + bestfit[0], 'k--', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) plot.bovy_plot(X, Y, marker='o', color='k', linestyle='None', zorder=0, overplot=True) plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0]) + r'$', bottom_right=True) plot.bovy_end_print(plotfilename)
def ex10(exclude=sc.array([1, 2, 3, 4]), plotfilenameA='ex10a.png', plotfilenameB='ex10b.png', nburn=1000, nsamples=200000, parsigma=[5, .075, 0.1], bovyprintargs={}): """ex10: solve exercise 10 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data = read_data('data_yerr.dat') ndata = len(data) if not exclude == None: nsample = ndata - len(exclude) else: nsample = ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1], 0.]) #(m,b,logS) #With this initial guess start off the sampling procedure initialX = objective(initialguess, X, Y, yerr) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 samples = [] samples.append(currentguess) for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution newsample = sc.zeros(3) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2] #Calculate the objective function for the newsample newX = objective(newsample, X, Y, yerr) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() accept = False try: test = m.exp(newX - currentX) if u < test: accept = True except OverflowError: accept = True if accept: #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: bestfit = currentguess bestX = currentX samples.append(currentguess) if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / ( nburn + nsamples) > .8: print "Acceptance ratio was " + str( double(naccept) / (nburn + nsamples)) samples = sc.array(samples).T[:, nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :]) histmb, edges = sc.histogramdd(samples.T[:, 0:2], bins=round(sc.sqrt(nsamples) / 2.)) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] print "Best-fit for S marginalized" histS, edgesS = sc.histogram(samples.T[:, 2], bins=round(sc.sqrt(nsamples) / 2.)) indx = sc.argmax(histS) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = bestfit[0] bestm = bestfit[1] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, sc.exp(bestfit[2] / 2.), marker='o', color='k', linestyle='None', zorder=1) plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0]) + r'$' + '\n' + r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2] / 2.)), bottom_right=True) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = edges[0][indxi] bestm = edges[1][indxj] bestS = edgesS[indx] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, sc.exp(bestS / 2.), marker='o', color='k', linestyle='None', zorder=1) plot.bovy_text( r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb) + r'$' + '\n' + r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS / 2.)), bottom_right=True) plot.bovy_end_print(plotfilenameB) return
def ex10(exclude=sc.array([1,2,3,4]), plotfilenameA='ex10a.png', plotfilenameB='ex10b.png', nburn=1000,nsamples=200000, parsigma=[5,.075,0.1], bovyprintargs={}): """ex10: solve exercise 10 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) if not exclude == None: nsample= ndata- len(exclude) else: nsample= ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1],0.])#(m,b,logS) #With this initial guess start off the sampling procedure initialX= objective(initialguess,X,Y,yerr) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(3) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] #Calculate the objective function for the newsample newX= objective(newsample,X,Y,yerr) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() accept=False try: test= m.exp(newX-currentX) if u < test: accept= True except OverflowError: accept= True if accept: #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] print "Best-fit for S marginalized" histS,edgesS= sc.histogram(samples.T[:,2],bins=round(sc.sqrt(nsamples)/2.)) indx= sc.argmax(histS) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb= bestfit[0] bestm= bestfit[1] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,sc.exp(bestfit[2]/2.), marker='o',color='k',linestyle='None',zorder=1) plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0])+r'$'+'\n'+r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2]/2.)), bottom_right=True) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb= edges[0][indxi] bestm= edges[1][indxj] bestS= edgesS[indx] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,sc.exp(bestS/2.), marker='o',color='k',linestyle='None',zorder=1) plot.bovy_text(r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb)+r'$'+'\n'+r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS/2.)), bottom_right=True) plot.bovy_end_print(plotfilenameB) return
def marginalize_mixture(mixture=True, short=False): if mixture: prefix = 'mixture2d' else: prefix = 'nomixture2d' random.seed(-1) #In the interest of reproducibility (if that's a word) # Read the data data= read_data('data_allerr.dat',True) ndata= len(data) # Create the ellipses and the data points x= zeros(ndata) y= zeros(ndata) ellipses=[] yvar= zeros((ndata,2,2)) for ii in range(ndata): x[ii]= data[ii][1][0] y[ii]= data[ii][1][1] #Calculate the eigenvalues and the rotation angle yvar[ii,0,0]= data[ii][3]**2. yvar[ii,1,1]= data[ii][2]**2. yvar[ii,0,1]= data[ii][4]*sqrt(yvar[ii,0,0]*yvar[ii,1,1]) yvar[ii,1,0]= yvar[ii,0,1] eigs= linalg.eig(yvar[ii,:,:]) angle= arctan(-eigs[1][0,1]/eigs[1][1,1])/pi*180. thisellipse= Ellipse(array([x[ii],y[ii]]),2*sqrt(eigs[0][0]), 2*sqrt(eigs[0][1]),angle) ellipses.append(thisellipse) # initialize parameters theta = arctan2(y[7]-y[9],x[7]-x[9]) bperp = (y[7] - tan(theta) * x[7]) * cos(theta) # bad at theta = 0.5 * pi if mixture: Pbad = 0.5 else: Pbad = 0. Ybad = mean(y) Vbad = mean((y-Ybad)**2) p = posterior(x, y, yvar, theta, bperp, Pbad, Ybad, Vbad) print 'starting p=', p chain = [] oldp = p oldparams = (theta, bperp, Pbad, Ybad, Vbad) bestparams = oldparams bestp = oldp nsteps = 0 naccepts = 0 NSTEPS = 100000 if short: NSTEPS /= 2 print 'doing', NSTEPS, 'steps of MCMC...' while nsteps < NSTEPS: newparams = pick_new_parameters(nsteps, *oldparams) if not mixture: # clamp Pbad to zero. (theta, bperp, Pbad, Ybad, Vbad) = newparams newparams = (theta, bperp, 0, Ybad, Vbad) p = posterior(x, y, yvar, *newparams) if p/oldp > random.uniform(): chain.append((p,newparams)) oldparams = newparams oldp = p if p > bestp: bestp = p bestparams = newparams naccepts += 1 else: chain.append((oldp,oldparams)) # keep oldparams, oldp nsteps += 1 if (nsteps % 5000 == 1): print nsteps, naccepts, (naccepts/float(nsteps)), oldp, bestp, bestparams print 'acceptance fraction', (naccepts/float(nsteps)) # plot a sample fig_width=5 fig_height=5 fig_size = [fig_width,fig_height] params = {'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize':10, 'ytick.labelsize':10, 'text.usetex': True, 'figure.figsize': fig_size, 'image.interpolation':'nearest', 'image.origin':'lower', } rcParams.update(params) # Plot data clf() ax = gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') xlabel(r'$x$') ylabel(r'$y$') xlim(0,300) ylim(0,700) savefig(prefix + '-data.pdf') a = axis() xmin, xmax = xlim() ymin, ymax = ylim() xs = linspace(xmin, xmax, 2) Nchain = len(chain) if mixture: # select 10 samples at random from the second half of the chain. I = Nchain/2 + random.permutation(Nchain/2)[:10] else: I = array([argmax([p for (p, params) in chain])]) for i in I: (p,params) = chain[i] (theta, bperp, Pbad, Ybad, Vbad) = params ys = tan(theta) * xs + bperp / cos(theta) # replace this with smarter linear algebra plot(xs, ys, color='k', alpha=0.3) axis(a) savefig(prefix + '-xy.pdf') if mixture: bgp = zeros(len(x)) fgp = zeros(len(x)) for (p,params) in chain[Nchain/2:]: (theta, bperp, Pbad, Ybad, Vbad) = params bgp += Pbad * single_point_likelihoods(x, y, yvar, theta, bperp, 1, Ybad, Vbad) fgp += (1.-Pbad) * single_point_likelihoods(x, y, yvar, theta, bperp, 0, Ybad, Vbad) bgodds = bgp / fgp for i,bgo in enumerate(bgodds): if bgo < 1: continue dxl = (xmax-xmin) * 0.01 dyl = (ymax-ymin) * 0.01 t = text(x[i]+dxl, y[i]+dyl, '%.1f' % log10(bgo), horizontalalignment='left', verticalalignment='bottom', alpha=0.3) savefig(prefix + '-xy-bg.pdf') clf() # note horrifying theta = 0.5 * pi behavior! ms = array([tan(theta) for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain/2:]]) bs = array([bperp / cos(theta) for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain/2:]]) #plot(ms, bs, 'k,', alpha=0.1) xlabel('slope $m$') ylabel('intercept $b$') #savefig(prefix + '-mb-scatter.pdf') clf() (H, xe, ye) = histogram2d(ms, bs, bins=(100,100)) imshow(log(1+H.T), extent=(xe.min(), xe.max(), ye.min(), ye.max()), aspect='auto', cmap=antigray) xlabel('slope $m$') ylabel('intercept $b$') savefig(prefix + '-mb.pdf')
def ex16(exclude=sc.array([3]),plotfilename='ex16.png', bovyprintargs={}): """ex16: solve exercise 16 by optimization of the objective function Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',allerr=True) ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) Z= sc.zeros((nsample,2)) yerr= sc.zeros(nsample) ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] Z[jj,0]= X[jj] Z[jj,1]= Y[jj] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] ycovar[0,jj,0]= data[ii][3]**2. ycovar[1,jj,1]= data[ii][2]**2. ycovar[0,jj,1]= data[ii][4]*m.sqrt(ycovar[0,jj,0]*ycovar[1,jj,1]) ycovar[1,jj,0]= ycovar[0,jj,1] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) #Now optimize initial= sc.array([bestfit[0],bestfit[1],sc.log(100)]) bestfit2d1= optimize.fmin(objective,initial,(Z,ycovar),disp=False) #Restart the optimization once using a different method bestfit2d= optimize.fmin_powell(objective,initial, (Z,ycovar),disp=False) if linalg.norm(bestfit2d-bestfit2d1) > 10**-12: if linalg.norm(bestfit2d-bestfit2d1) < 10**-6: print "Different optimizers give slightly different results..." else: print "Different optimizers give rather different results..." print "The norm of the results differs by %g" % linalg.norm(bestfit2d-bestfit2d1) try: x=raw_input('continue to plot? [yn]\n') except EOFError: print "Since you are in non-interactive mode I will assume 'y'" x='y' if x == 'n': print "returning..." return -1 b= bestfit2d[0] mf= bestfit2d[1] V=sc.exp(bestfit2d[2]/2.) cost= 1./sc.sqrt(1+mf**2.) bcost= b*cost #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),bestfit2d[1]*sc.array(xrange)+bcost+V, 'k--',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) plot.bovy_plot(sc.array(xrange),bestfit2d[1]*sc.array(xrange)+bcost-V, 'k--',zorder=2,overplot=True) #Plot the data OMG straight from plot_data.py data= read_data('data_allerr.dat',True) ndata= len(data) #Create the ellipses and the data points id= sc.zeros(nsample) x= sc.zeros(nsample) y= sc.zeros(nsample) ellipses=[] ymin, ymax= 0, 0 xmin, xmax= 0,0 jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj]= data[ii][0] x[jj]= data[ii][1][0] y[jj]= data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar= sc.zeros((2,2)) ycovar[0,0]= data[ii][3]**2. ycovar[1,1]= data[ii][2]**2. ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1]) ycovar[1,0]= ycovar[0,1] eigs= linalg.eig(ycovar) angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180. thisellipse= Ellipse(sc.array([x[jj],y[jj]]),2*m.sqrt(eigs[0][0]), 2*m.sqrt(eigs[0][1]),angle) ellipses.append(thisellipse) if (x[jj]+m.sqrt(ycovar[0,0])) > xmax: xmax= (x[jj]+m.sqrt(ycovar[0,0])) if (x[jj]-m.sqrt(ycovar[0,0])) < xmin: xmin= (x[jj]-m.sqrt(ycovar[0,0])) if (y[jj]+m.sqrt(ycovar[1,1])) > ymax: ymax= (y[jj]+m.sqrt(ycovar[1,1])) if (y[jj]-m.sqrt(ycovar[1,1])) < ymin: ymin= (y[jj]-m.sqrt(ycovar[1,1])) jj= jj+1 #Add the error ellipses ax=gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x,y,color='k',marker='o',linestyle='None') plot.bovy_end_print(plotfilename)
def ex7b(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex7b.png'): """ex7c: solve exercise 7 using an iterative procedure Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2009-06-01 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues for the bi-exponential optimization #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1]]) #With this initial guess start the iteration, using as the weights Q^2/(sigma^2*Q^2+(y-mx-b)^2 tol = 10**-10. Qs = [1., 2.] bestfitssoft = sc.zeros((2, len(Qs))) chisqQ = sc.zeros(len(Qs)) for jj in range(len(Qs)): currentguess = initialguess diff = 2 * tol while diff > tol: oldguess = currentguess #Calculate the weight based on the previous iteration for ii in range(nsample): #Update C C[ii, ii] = (yerr[ii]**2. + (Y[ii] - oldguess[1] * X[ii] - oldguess[0])**2 / Qs[jj]**2.) #Re-fit bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) currentguess = sc.array([bestfit[0], bestfit[1]]) diff = m.sqrt((currentguess[0] - oldguess[0])**2 / oldguess[0]**2. + (currentguess[1] - oldguess[1])**2 / oldguess[1]**2.) bestfitssoft[0, jj] = currentguess[0] bestfitssoft[1, jj] = currentguess[1] #Calculate chi^2_Q for ii in range(nsample): chisqQ[jj] = chisqQ[jj] + 1. / ( yerr[ii]**2 / (Y[ii] - X[ii] * currentguess[1] - currentguess[1])**2 + 1. / Qs[jj]**2) #Now plot the solution fig_width = 5 fig_height = 5 fig_size = [fig_width, fig_height] params = { 'axes.labelsize': 12, 'text.fontsize': 11, 'legend.fontsize': 12, 'xtick.labelsize': 10, 'ytick.labelsize': 10, 'text.usetex': True, 'figure.figsize': fig_size } rcParams.update(params) #Plot data errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None') xlabel(r'$x$') ylabel(r'$y$') #Plot the best fit line for the different Qs linestyles = ('--', ':', '-.') for jj in range(len(Qs)): xlim(0, 300) ylim(0, 700) xmin, xmax = xlim() nsamples = 1001 xs = sc.linspace(xmin, xmax, nsamples) ys = sc.zeros(nsamples) for ii in range(nsamples): ys[ii] = bestfitssoft[0, jj] + bestfitssoft[1, jj] * xs[ii] if bestfitssoft[0, jj] < 0: sgn_str = '-' else: sgn_str = '+' label = r'$Q= ' + '%i: y = %4.2f\, x' % (Qs[jj], bestfitssoft[ 1, jj]) + sgn_str + '%4.0f ' % m.fabs(bestfitssoft[ 0, jj]) + r'; \chi^2_Q = ' + '%3.1f' % chisqQ[jj] + '$' plot(xs, ys, color='k', ls=linestyles[jj], label=label) l = legend(loc=(.2, .1), numpoints=8) l.draw_frame(False) xlim(0, 300) ylim(0, 700) savefig(plotfilename, format='png') return 0
def exNew(exclude=sc.array([1,2,3,4]), plotfilename='exNew.png',nburn=20000,nsamples=200000, parsigma=[5,.075,.01,1,.1],dsigma=1.): """exMix1: solve the new exercise using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) dsigma - divide uncertainties by this amount Output: plot History: 2010-04-28 - Written - Bovy (NYU) """ sc.random.seed(1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) if not exclude == None: nsample= ndata- len(exclude) else: nsample= ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2./dsigma**2. yerr[jj]= data[ii][2]/dsigma jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1],0.,sc.mean(Y),m.log(sc.var(Y))])#(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX= objective(initialguess,X,Y,yerr) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(5) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] #newsample[2]= stats.uniform.rvs() newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3] newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4] #Calculate the objective function for the newsample newX= objective(newsample,X,Y,yerr) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() if u < m.exp(newX-currentX): #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .2 or double(naccept)/(nburn+nsamples) > .6: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/5.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] #2D histogram plot.bovy_print() levels= special.erf(0.5*sc.arange(1,4)) #xrange=[edges[0][0],edges[0][-1]] #yrange=[edges[1][0],edges[1][-1]] xrange=[-120,120] yrange=[1.5,3.2] histmb,edges= sc.histogramdd(samples.T[:,0:2], range=[[-120,120],[1.5,3.2]], bins=(round(sc.sqrt(nsamples)/5.)/(edges[0][-1]-edges[0][0])*(xrange[1]-xrange[0]), round(sc.sqrt(nsamples)/5.)/(edges[1][-1]-edges[1][0])*(yrange[1]-yrange[0]))) aspect=(xrange[1]-xrange[0])/(yrange[1]-yrange[0]) plot.bovy_dens2d(histmb.T,origin='lower',cmap='gist_yarg', contours=True,cntrmass=True, xrange=xrange,yrange=yrange, levels=levels, aspect=aspect, xlabel=r'$b$',ylabel=r'$m$') if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1a.png') else: plot.bovy_end_print('exNew2a.png') #Data with MAP line and sampling plot.bovy_print() bestb= edges[0][indxi] bestm= edges[1][indxj] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None',zorder=1) for ii in range(10): #Random sample ransample= sc.floor((stats.uniform.rvs()*nsamples)) ransample= samples.T[ransample,0:2] bestb= ransample[0] bestm= ransample[1] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb, overplot=True,xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',color='0.75',zorder=1) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1b.png') else: plot.bovy_end_print('exNew2b.png') #Pb plot plot.bovy_print() plot.bovy_hist(samples.T[:,2],color='k',bins=round(sc.sqrt(nsamples)/5.), xlabel=r'$P_\mathrm{b}$',normed=True,histtype='step', range=[0,1]) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1c.png') else: plot.bovy_end_print('exNew2c.png') return
def exMix1(exclude=None, plotfilenameA='exMix1a.png', plotfilenameB='exMix1b.png', plotfilenameC='exMix1c.png', nburn=20000, nsamples=1000000, parsigma=[5, .075, .2, 1, .1], dsigma=1., bovyprintargs={}, sampledata=None): """exMix1: solve exercise 5 (mixture model) using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename* - filenames for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) dsigma - divide uncertainties by this amount Output: plot History: 2010-04-28 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data = read_data('data_yerr.dat') ndata = len(data) if not exclude == None: nsample = ndata - len(exclude) else: nsample = ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. / dsigma**2. yerr[jj] = data[ii][2] / dsigma jj = jj + 1 brange = [-120, 120] mrange = [1.5, 3.2] # This matches the order of the parameters in the "samples" vector mbrange = [brange, mrange] if sampledata is None: sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange) (histmb, edges, mbsamples, pbhist, pbedges) = sampledata # Hack -- produce fake Pbad samples from Pbad histogram. pbsamples = hstack([ array([x] * N) for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist) ]) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] #2D histogram plot.bovy_print(**bovyprintargs) levels = special.erf(0.5 * sc.arange(1, 4)) xe = [edges[0][0], edges[0][-1]] ye = [edges[1][0], edges[1][-1]] aspect = (xe[1] - xe[0]) / (ye[1] - ye[0]) plot.bovy_dens2d(histmb.T, origin='lower', cmap=cm.gist_yarg, interpolation='nearest', contours=True, cntrmass=True, extent=xe + ye, levels=levels, aspect=aspect, xlabel=r'$b$', ylabel=r'$m$') xlim(brange) ylim(mrange) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = edges[0][indxi] bestm = edges[1][indxj] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, yerr, marker='o', color='k', linestyle='None', zorder=1) for m, b in mbsamples: plot.bovy_plot(xrange, m * sc.array(xrange) + b, overplot=True, xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', color='0.75', zorder=1) plot.bovy_end_print(plotfilenameB) #Pb plot if not 'text_fontsize' in bovyprintargs: bovyprintargs['text_fontsize'] = 11 plot.bovy_print(**bovyprintargs) plot.bovy_hist(pbsamples, bins=round(sc.sqrt(nsamples) / 5.), xlabel=r'$P_\mathrm{b}$', normed=True, histtype='step', range=[0, 1], edgecolor='k') ylim(0, 4.) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_left=True) plot.bovy_end_print(plotfilenameC) return sampledata