def ex15(exclude=sc.array([1,2,3,4]),plotfilename='ex15.png', bovyprintargs={}): """ex15: solve exercise 15 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',allerr=True) ndata= len(data) nsample= ndata- len(exclude) #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) Z= sc.zeros((nsample,2)) jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] Z[jj,0]= X[jj] Z[jj,1]= Y[jj] jj= jj+1 #Now compute the PCA solution Zm= sc.mean(Z,axis=0) Q= sc.cov(Z.T) eigs= linalg.eig(Q) maxindx= sc.argmax(eigs[0]) V= eigs[1][maxindx] V= V/linalg.norm(V) m= sc.sqrt(1/V[0]**2.-1) bestfit= sc.array([-m*Zm[0]+Zm[1],m]) #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),bestfit[1]*sc.array(xrange)+bestfit[0], 'k--',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) plot.bovy_plot(X,Y,marker='o',color='k',linestyle='None', zorder=0,overplot=True) plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0])+r'$', bottom_right=True) plot.bovy_end_print(plotfilename)
def exNew(exclude=sc.array([1,2,3,4]), plotfilename='exNew.png',nburn=20000,nsamples=200000, parsigma=[5,.075,.01,1,.1],dsigma=1.): """exMix1: solve the new exercise using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) dsigma - divide uncertainties by this amount Output: plot History: 2010-04-28 - Written - Bovy (NYU) """ sc.random.seed(1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) if not exclude == None: nsample= ndata- len(exclude) else: nsample= ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2./dsigma**2. yerr[jj]= data[ii][2]/dsigma jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1],0.,sc.mean(Y),m.log(sc.var(Y))])#(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX= objective(initialguess,X,Y,yerr) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(5) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] #newsample[2]= stats.uniform.rvs() newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3] newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4] #Calculate the objective function for the newsample newX= objective(newsample,X,Y,yerr) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() if u < m.exp(newX-currentX): #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .2 or double(naccept)/(nburn+nsamples) > .6: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/5.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] #2D histogram plot.bovy_print() levels= special.erf(0.5*sc.arange(1,4)) #xrange=[edges[0][0],edges[0][-1]] #yrange=[edges[1][0],edges[1][-1]] xrange=[-120,120] yrange=[1.5,3.2] histmb,edges= sc.histogramdd(samples.T[:,0:2], range=[[-120,120],[1.5,3.2]], bins=(round(sc.sqrt(nsamples)/5.)/(edges[0][-1]-edges[0][0])*(xrange[1]-xrange[0]), round(sc.sqrt(nsamples)/5.)/(edges[1][-1]-edges[1][0])*(yrange[1]-yrange[0]))) aspect=(xrange[1]-xrange[0])/(yrange[1]-yrange[0]) plot.bovy_dens2d(histmb.T,origin='lower',cmap='gist_yarg', contours=True,cntrmass=True, xrange=xrange,yrange=yrange, levels=levels, aspect=aspect, xlabel=r'$b$',ylabel=r'$m$') if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1a.png') else: plot.bovy_end_print('exNew2a.png') #Data with MAP line and sampling plot.bovy_print() bestb= edges[0][indxi] bestm= edges[1][indxj] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None',zorder=1) for ii in range(10): #Random sample ransample= sc.floor((stats.uniform.rvs()*nsamples)) ransample= samples.T[ransample,0:2] bestb= ransample[0] bestm= ransample[1] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb, overplot=True,xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',color='0.75',zorder=1) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1b.png') else: plot.bovy_end_print('exNew2b.png') #Pb plot plot.bovy_print() plot.bovy_hist(samples.T[:,2],color='k',bins=round(sc.sqrt(nsamples)/5.), xlabel=r'$P_\mathrm{b}$',normed=True,histtype='step', range=[0,1]) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1c.png') else: plot.bovy_end_print('exNew2c.png') return
def ex10(exclude=sc.array([1,2,3,4]), plotfilenameA='ex10a.png', plotfilenameB='ex10b.png', nburn=1000,nsamples=200000, parsigma=[5,.075,0.1], bovyprintargs={}): """ex10: solve exercise 10 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) if not exclude == None: nsample= ndata- len(exclude) else: nsample= ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1],0.])#(m,b,logS) #With this initial guess start off the sampling procedure initialX= objective(initialguess,X,Y,yerr) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(3) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] #Calculate the objective function for the newsample newX= objective(newsample,X,Y,yerr) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() accept=False try: test= m.exp(newX-currentX) if u < test: accept= True except OverflowError: accept= True if accept: #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] print "Best-fit for S marginalized" histS,edgesS= sc.histogram(samples.T[:,2],bins=round(sc.sqrt(nsamples)/2.)) indx= sc.argmax(histS) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb= bestfit[0] bestm= bestfit[1] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,sc.exp(bestfit[2]/2.), marker='o',color='k',linestyle='None',zorder=1) plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0])+r'$'+'\n'+r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2]/2.)), bottom_right=True) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb= edges[0][indxi] bestm= edges[1][indxj] bestS= edgesS[indx] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,sc.exp(bestS/2.), marker='o',color='k',linestyle='None',zorder=1) plot.bovy_text(r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb)+r'$'+'\n'+r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS/2.)), bottom_right=True) plot.bovy_end_print(plotfilenameB) return
def ex15( exclude=sc.array([1, 2, 3, 4]), plotfilename='ex15.png', bovyprintargs={}): """ex15: solve exercise 15 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) Z = sc.zeros((nsample, 2)) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] Z[jj, 0] = X[jj] Z[jj, 1] = Y[jj] jj = jj + 1 #Now compute the PCA solution Zm = sc.mean(Z, axis=0) Q = sc.cov(Z.T) eigs = linalg.eig(Q) maxindx = sc.argmax(eigs[0]) V = eigs[1][maxindx] V = V / linalg.norm(V) m = sc.sqrt(1 / V[0]**2. - 1) bestfit = sc.array([-m * Zm[0] + Zm[1], m]) #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), bestfit[1] * sc.array(xrange) + bestfit[0], 'k--', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) plot.bovy_plot(X, Y, marker='o', color='k', linestyle='None', zorder=0, overplot=True) plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0]) + r'$', bottom_right=True) plot.bovy_end_print(plotfilename)
def ex10(exclude=sc.array([1, 2, 3, 4]), plotfilenameA='ex10a.png', plotfilenameB='ex10b.png', nburn=1000, nsamples=200000, parsigma=[5, .075, 0.1], bovyprintargs={}): """ex10: solve exercise 10 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data = read_data('data_yerr.dat') ndata = len(data) if not exclude == None: nsample = ndata - len(exclude) else: nsample = ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1], 0.]) #(m,b,logS) #With this initial guess start off the sampling procedure initialX = objective(initialguess, X, Y, yerr) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 samples = [] samples.append(currentguess) for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution newsample = sc.zeros(3) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2] #Calculate the objective function for the newsample newX = objective(newsample, X, Y, yerr) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() accept = False try: test = m.exp(newX - currentX) if u < test: accept = True except OverflowError: accept = True if accept: #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: bestfit = currentguess bestX = currentX samples.append(currentguess) if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / ( nburn + nsamples) > .8: print "Acceptance ratio was " + str( double(naccept) / (nburn + nsamples)) samples = sc.array(samples).T[:, nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :]) histmb, edges = sc.histogramdd(samples.T[:, 0:2], bins=round(sc.sqrt(nsamples) / 2.)) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] print "Best-fit for S marginalized" histS, edgesS = sc.histogram(samples.T[:, 2], bins=round(sc.sqrt(nsamples) / 2.)) indx = sc.argmax(histS) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = bestfit[0] bestm = bestfit[1] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, sc.exp(bestfit[2] / 2.), marker='o', color='k', linestyle='None', zorder=1) plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0]) + r'$' + '\n' + r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2] / 2.)), bottom_right=True) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = edges[0][indxi] bestm = edges[1][indxj] bestS = edgesS[indx] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, sc.exp(bestS / 2.), marker='o', color='k', linestyle='None', zorder=1) plot.bovy_text( r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb) + r'$' + '\n' + r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS / 2.)), bottom_right=True) plot.bovy_end_print(plotfilenameB) return
def ex14(exclude=sc.array([1,2,3,4]),plotfilename='ex14.png', bovyprintargs={}): """ex12: solve exercise 14 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',allerr=True) ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y1= sc.zeros(nsample) X1= sc.zeros(nsample) A1= sc.ones((nsample,2)) C1= sc.zeros((nsample,nsample)) Y2= sc.zeros(nsample) X2= sc.zeros(nsample) A2= sc.ones((nsample,2)) C2= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) xerr= sc.zeros(nsample) ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y1[jj]= data[ii][1][1] X1[jj]= data[ii][1][0] A1[jj,1]= data[ii][1][0] C1[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] Y2[jj]= data[ii][1][0] X2[jj]= data[ii][1][1] A2[jj,1]= data[ii][1][1] C2[jj,jj]= data[ii][3]**2. xerr[jj]= data[ii][3] jj= jj+1 #Now compute the best fit and the uncertainties: forward bestfit1= sc.dot(linalg.inv(C1),Y1.T) bestfit1= sc.dot(A1.T,bestfit1) bestfitvar1= sc.dot(linalg.inv(C1),A1) bestfitvar1= sc.dot(A1.T,bestfitvar1) bestfitvar1= linalg.inv(bestfitvar1) bestfit1= sc.dot(bestfitvar1,bestfit1) #Now compute the best fit and the uncertainties: backward bestfit2= sc.dot(linalg.inv(C2),Y2.T) bestfit2= sc.dot(A2.T,bestfit2) bestfitvar2= sc.dot(linalg.inv(C2),A2) bestfitvar2= sc.dot(A2.T,bestfitvar2) bestfitvar2= linalg.inv(bestfitvar2) bestfit2= sc.dot(bestfitvar2,bestfit2) #Propagate to y=mx+b linerrprop= sc.array([[-1./bestfit2[1],bestfit2[0]/bestfit2[1]**2], [0.,-1./bestfit2[1]**2.]]) bestfit2= sc.array([-bestfit2[0]/bestfit2[1],1./bestfit2[1]]) bestfitvar2= sc.dot(linerrprop,sc.dot(bestfitvar2,linerrprop.T)) #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),bestfit1[1]*sc.array(xrange)+bestfit1[0], 'k--',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) plot.bovy_plot(sc.array(xrange),bestfit2[1]*sc.array(xrange)+bestfit2[0], 'k-.',overplot=True,zorder=2) #Plot data errorbar(A1[:,1],Y1,yerr,xerr,color='k',marker='o', linestyle='None',zorder=0) plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit1[1], m.sqrt(bestfitvar1[1,1]), bestfit1[0],m.sqrt(bestfitvar1[0,0]))+r')$'+'\n'+ r'$\mathrm{reverse}\ -\cdot -\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit2[1], m.sqrt(bestfitvar2[1,1]), bestfit2[0],m.sqrt(bestfitvar2[0,0]))+r')$',bottom_right=True) plot.bovy_end_print(plotfilename)
def ex14( exclude=sc.array([1, 2, 3, 4]), plotfilename='ex14.png', bovyprintargs={}): """ex12: solve exercise 14 Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y1 = sc.zeros(nsample) X1 = sc.zeros(nsample) A1 = sc.ones((nsample, 2)) C1 = sc.zeros((nsample, nsample)) Y2 = sc.zeros(nsample) X2 = sc.zeros(nsample) A2 = sc.ones((nsample, 2)) C2 = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) xerr = sc.zeros(nsample) ycovar = sc.zeros((2, nsample, 2)) #Makes the sc.dot easier jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y1[jj] = data[ii][1][1] X1[jj] = data[ii][1][0] A1[jj, 1] = data[ii][1][0] C1[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] Y2[jj] = data[ii][1][0] X2[jj] = data[ii][1][1] A2[jj, 1] = data[ii][1][1] C2[jj, jj] = data[ii][3]**2. xerr[jj] = data[ii][3] jj = jj + 1 #Now compute the best fit and the uncertainties: forward bestfit1 = sc.dot(linalg.inv(C1), Y1.T) bestfit1 = sc.dot(A1.T, bestfit1) bestfitvar1 = sc.dot(linalg.inv(C1), A1) bestfitvar1 = sc.dot(A1.T, bestfitvar1) bestfitvar1 = linalg.inv(bestfitvar1) bestfit1 = sc.dot(bestfitvar1, bestfit1) #Now compute the best fit and the uncertainties: backward bestfit2 = sc.dot(linalg.inv(C2), Y2.T) bestfit2 = sc.dot(A2.T, bestfit2) bestfitvar2 = sc.dot(linalg.inv(C2), A2) bestfitvar2 = sc.dot(A2.T, bestfitvar2) bestfitvar2 = linalg.inv(bestfitvar2) bestfit2 = sc.dot(bestfitvar2, bestfit2) #Propagate to y=mx+b linerrprop = sc.array([[-1. / bestfit2[1], bestfit2[0] / bestfit2[1]**2], [0., -1. / bestfit2[1]**2.]]) bestfit2 = sc.array([-bestfit2[0] / bestfit2[1], 1. / bestfit2[1]]) bestfitvar2 = sc.dot(linerrprop, sc.dot(bestfitvar2, linerrprop.T)) #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), bestfit1[1] * sc.array(xrange) + bestfit1[0], 'k--', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) plot.bovy_plot(sc.array(xrange), bestfit2[1] * sc.array(xrange) + bestfit2[0], 'k-.', overplot=True, zorder=2) #Plot data errorbar(A1[:, 1], Y1, yerr, xerr, color='k', marker='o', linestyle='None', zorder=0) plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( ' + '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit1[1], m.sqrt(bestfitvar1[1, 1]), bestfit1[0], m.sqrt(bestfitvar1[0, 0])) + r')$' + '\n' + r'$\mathrm{reverse}\ -\cdot -\:\ y = ( ' + '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit2[1], m.sqrt(bestfitvar2[1, 1]), bestfit2[0], m.sqrt(bestfitvar2[0, 0])) + r')$', bottom_right=True) plot.bovy_end_print(plotfilename)
def ex12(exclude=sc.array([1,2,3,4]),plotfilename='ex12.png', bovyprintargs={}): """ex12: solve exercise 12 by optimization of the objective function Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-06 - Written - Bovy (NYU) """ #Read the data data= read_data('data_allerr.dat',allerr=True) ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) Z= sc.zeros((nsample,2)) yerr= sc.zeros(nsample) ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] Z[jj,0]= X[jj] Z[jj,1]= Y[jj] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] ycovar[0,jj,0]= data[ii][3]**2. ycovar[1,jj,1]= data[ii][2]**2. ycovar[0,jj,1]= data[ii][4]*m.sqrt(ycovar[0,jj,0]*ycovar[1,jj,1]) ycovar[1,jj,0]= ycovar[0,jj,1] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) #Now optimize bestfit2d1= optimize.fmin(objective,bestfit,(Z,ycovar),disp=False) #Restart the optimization once using a different method bestfit2d= optimize.fmin_powell(objective,bestfit, (Z,ycovar),disp=False) if linalg.norm(bestfit2d-bestfit2d1) > 10**-12: if linalg.norm(bestfit2d-bestfit2d1) < 10**-6: print "Different optimizers give slightly different results..." else: print "Different optimizers give rather different results..." print "The norm of the results differs by %g" % linalg.norm(bestfit2d-bestfit2d1) try: x=raw_input('continue to plot? [yn]\n') except EOFError: print "Since you are in non-interactive mode I will assume 'y'" x='y' if x == 'n': print "returning..." return -1 #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),bestfit2d[1]*sc.array(xrange)+bestfit2d[0], 'k-',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) #Plot the data OMG straight from plot_data.py data= read_data('data_allerr.dat',True) ndata= len(data) #Create the ellipses and the data points id= sc.zeros(nsample) x= sc.zeros(nsample) y= sc.zeros(nsample) ellipses=[] ymin, ymax= 0, 0 xmin, xmax= 0,0 jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj]= data[ii][0] x[jj]= data[ii][1][0] y[jj]= data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar= sc.zeros((2,2)) ycovar[0,0]= data[ii][3]**2. ycovar[1,1]= data[ii][2]**2. ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1]) ycovar[1,0]= ycovar[0,1] eigs= linalg.eig(ycovar) angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180. thisellipse= Ellipse(sc.array([x[jj],y[jj]]),2*m.sqrt(eigs[0][0]), 2*m.sqrt(eigs[0][1]),angle) ellipses.append(thisellipse) if (x[jj]+m.sqrt(ycovar[0,0])) > xmax: xmax= (x[jj]+m.sqrt(ycovar[0,0])) if (x[jj]-m.sqrt(ycovar[0,0])) < xmin: xmin= (x[jj]-m.sqrt(ycovar[0,0])) if (y[jj]+m.sqrt(ycovar[1,1])) > ymax: ymax= (y[jj]+m.sqrt(ycovar[1,1])) if (y[jj]-m.sqrt(ycovar[1,1])) < ymin: ymin= (y[jj]-m.sqrt(ycovar[1,1])) jj= jj+1 #Add the error ellipses ax=gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x,y,color='k',marker='o',linestyle='None') plot.bovy_text(r'$y = %4.2f \,x+ %4.0f' % (bestfit2d[1], bestfit2d[0])+r'$', bottom_right=True) plot.bovy_end_print(plotfilename)
def exMix1( exclude=None, plotfilenameA="exMix1a.png", plotfilenameB="exMix1b.png", plotfilenameC="exMix1c.png", nburn=20000, nsamples=1000000, parsigma=[5, 0.075, 0.2, 1, 0.1], dsigma=1.0, bovyprintargs={}, sampledata=None, ): """exMix1: solve exercise 5 (mixture model) using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename* - filenames for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) dsigma - divide uncertainties by this amount Output: plot History: 2010-04-28 - Written - Bovy (NYU) """ sc.random.seed(-1) # In the interest of reproducibility (if that's a word) # Read the data data = read_data("data_yerr.dat") ndata = len(data) if not exclude == None: nsample = ndata - len(exclude) else: nsample = ndata # First find the chi-squared solution, which we will use as an # initial guess # Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2] ** 2.0 / dsigma ** 2.0 yerr[jj] = data[ii][2] / dsigma jj = jj + 1 brange = [-120, 120] mrange = [1.5, 3.2] # This matches the order of the parameters in the "samples" vector mbrange = [brange, mrange] if sampledata is None: sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange) (histmb, edges, mbsamples, pbhist, pbedges) = sampledata # Hack -- produce fake Pbad samples from Pbad histogram. pbsamples = hstack([array([x] * N) for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist)]) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] # 2D histogram plot.bovy_print(**bovyprintargs) levels = special.erf(0.5 * sc.arange(1, 4)) xe = [edges[0][0], edges[0][-1]] ye = [edges[1][0], edges[1][-1]] aspect = (xe[1] - xe[0]) / (ye[1] - ye[0]) plot.bovy_dens2d( histmb.T, origin="lower", cmap=cm.gist_yarg, interpolation="nearest", contours=True, cntrmass=True, extent=xe + ye, levels=levels, aspect=aspect, xlabel=r"$b$", ylabel=r"$m$", ) xlim(brange) ylim(mrange) plot.bovy_end_print(plotfilenameA) # Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = edges[0][indxi] bestm = edges[1][indxj] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot( xrange, bestm * sc.array(xrange) + bestb, "k-", xrange=xrange, yrange=yrange, xlabel=r"$x$", ylabel=r"$y$", zorder=2, ) errorbar(X, Y, yerr, marker="o", color="k", linestyle="None", zorder=1) for m, b in mbsamples: plot.bovy_plot( xrange, m * sc.array(xrange) + b, overplot=True, xrange=xrange, yrange=yrange, xlabel=r"$x$", ylabel=r"$y$", color="0.75", zorder=1, ) plot.bovy_end_print(plotfilenameB) # Pb plot if not "text_fontsize" in bovyprintargs: bovyprintargs["text_fontsize"] = 11 plot.bovy_print(**bovyprintargs) plot.bovy_hist( pbsamples, bins=round(sc.sqrt(nsamples) / 5.0), xlabel=r"$P_\mathrm{b}$", normed=True, histtype="step", range=[0, 1], edgecolor="k", ) ylim(0, 4.0) if dsigma == 1.0: plot.bovy_text(r"$\mathrm{using\ correct\ data\ uncertainties}$", top_right=True) else: plot.bovy_text(r"$\mathrm{using\ data\ uncertainties\ /\ 2}$", top_left=True) plot.bovy_end_print(plotfilenameC) return sampledata
def ex12( exclude=sc.array([1, 2, 3, 4]), plotfilename='ex12.png', bovyprintargs={}): """ex12: solve exercise 12 by optimization of the objective function Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot Output: plot History: 2010-05-06 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) Z = sc.zeros((nsample, 2)) yerr = sc.zeros(nsample) ycovar = sc.zeros((2, nsample, 2)) #Makes the sc.dot easier jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] Z[jj, 0] = X[jj] Z[jj, 1] = Y[jj] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] ycovar[0, jj, 0] = data[ii][3]**2. ycovar[1, jj, 1] = data[ii][2]**2. ycovar[0, jj, 1] = data[ii][4] * m.sqrt( ycovar[0, jj, 0] * ycovar[1, jj, 1]) ycovar[1, jj, 0] = ycovar[0, jj, 1] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) #Now optimize bestfit2d1 = optimize.fmin(objective, bestfit, (Z, ycovar), disp=False) #Restart the optimization once using a different method bestfit2d = optimize.fmin_powell(objective, bestfit, (Z, ycovar), disp=False) if linalg.norm(bestfit2d - bestfit2d1) > 10**-12: if linalg.norm(bestfit2d - bestfit2d1) < 10**-6: print("Different optimizers give slightly different results...") else: print("Different optimizers give rather different results...") print("The norm of the results differs by %g" % linalg.norm(bestfit2d - bestfit2d1)) try: x = raw_input('continue to plot? [yn]\n') except EOFError: print("Since you are in non-interactive mode I will assume 'y'") x = 'y' if x == 'n': print("returning...") return -1 #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), bestfit2d[1] * sc.array(xrange) + bestfit2d[0], 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) #Plot the data OMG straight from plot_data.py data = read_data('data_allerr.dat', True) ndata = len(data) #Create the ellipses and the data points id = sc.zeros(nsample) x = sc.zeros(nsample) y = sc.zeros(nsample) ellipses = [] ymin, ymax = 0, 0 xmin, xmax = 0, 0 jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj] = data[ii][0] x[jj] = data[ii][1][0] y[jj] = data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar = sc.zeros((2, 2)) ycovar[0, 0] = data[ii][3]**2. ycovar[1, 1] = data[ii][2]**2. ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1]) ycovar[1, 0] = ycovar[0, 1] eigs = linalg.eig(ycovar) angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180. thisellipse = Ellipse(sc.array([x[jj], y[jj]]), 2 * m.sqrt(eigs[0][0]), 2 * m.sqrt(eigs[0][1]), angle) ellipses.append(thisellipse) if (x[jj] + m.sqrt(ycovar[0, 0])) > xmax: xmax = (x[jj] + m.sqrt(ycovar[0, 0])) if (x[jj] - m.sqrt(ycovar[0, 0])) < xmin: xmin = (x[jj] - m.sqrt(ycovar[0, 0])) if (y[jj] + m.sqrt(ycovar[1, 1])) > ymax: ymax = (y[jj] + m.sqrt(ycovar[1, 1])) if (y[jj] - m.sqrt(ycovar[1, 1])) < ymin: ymin = (y[jj] - m.sqrt(ycovar[1, 1])) jj = jj + 1 #Add the error ellipses ax = gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x, y, color='k', marker='o', linestyle='None') plot.bovy_text(r'$y = %4.2f \,x+ %4.0f' % (bestfit2d[1], bestfit2d[0]) + r'$', bottom_right=True) plot.bovy_end_print(plotfilename)
def bar_detectability(parser, dx=_XWIDTH/20.,dy=_YWIDTH/20., nx=100,ny=20, ngrid=201,rrange=[0.7,1.3], phirange=[-m.pi/2.,m.pi/2.], saveDir='../bar/1dLarge/'): """ NAME: bar_detectability PURPOSE: analyze the detectability of the Hercules moving group in the los-distribution around the Galaxy INPUT: nx - number of plots in the x-direction ny - number of plots in the y direction dx - x-spacing dy - y-spacing ngrid - number of gridpoints to evaluate the density on rrange - range of Galactocentric radii to consider phirange - range of Galactic azimuths to consider saveDir - directory to save the pickles in OUTPUT: plot in plotfilename HISTORY: 2010-05-09 - Written - Bovy (NYU) """ (options,args)= parser.parse_args() if len(args) == 0: parser.print_help() return if not options.convolve == None: bar_detectability_convolve(parser,dx=dx,dy=dy,nx=nx,ny=ny,ngrid=ngrid, rrange=rrange,phirange=phirange, saveDir=saveDir) return vloslinspace= (-.9,.9,ngrid) vloss= sc.linspace(*vloslinspace) picklebasename= '1d_%i_%i_%i_%.1f_%.1f_%.1f_%.1f' % (nx,ny,ngrid,rrange[0],rrange[1],phirange[0],phirange[1]) detect= sc.zeros((nx,ny)) losd= sc.zeros((nx,ny)) gall= sc.zeros((nx,ny)) for ii in range(nx): for jj in range(ny): thisR= (rrange[0]+(rrange[1]-rrange[0])/ (ny*_YWIDTH+(ny-1)*dy)*(jj*(_YWIDTH+dy)+_YWIDTH/2.)) thisphi= (phirange[0]+(phirange[1]-phirange[0])/ (nx*_XWIDTH+(nx-1)*dx)*(ii*(_XWIDTH+dx)+_XWIDTH/2.)) thissavefilename= os.path.join(saveDir,picklebasename+'_%i_%i.sav' %(ii,jj)) if os.path.exists(thissavefilename): print "Restoring los-velocity distribution at %.2f, %.2f ..." %(thisR,thisphi) savefile= open(thissavefilename,'r') vlosd= pickle.load(savefile) axivlosd= pickle.load(savefile) savefile.close() else: print "Did not find the los-velocity distribution at at %.2f, %.2f ..." %(thisR,thisphi) print "returning ..." return ddx= 1./sc.sum(axivlosd) #skipCenter if not options.skipCenter == 0.: skipIndx= (sc.fabs(vloss) < options.skipCenter) indx= (sc.fabs(vloss) >= options.skipCenter) vlosd= vlosd/sc.sum(vlosd[indx])/ddx axivlosd= axivlosd/sc.sum(axivlosd[indx])/ddx vlosd[skipIndx]= 1. axivlosd[skipIndx]= 1. vlosd_zeroindx= (vlosd == 0.) axivlosd_zeroindx= (axivlosd == 0.) vlosd[vlosd_zeroindx]= 1. axivlosd[vlosd_zeroindx]= 1. vlosd[axivlosd_zeroindx]= 1. axivlosd[axivlosd_zeroindx]= 1. detect[ii,jj]= probDistance.kullbackLeibler(vlosd,axivlosd,ddx,nan=True) #los distance and Galactic longitude d= m.sqrt(thisR**2.+1.-2.*thisR*m.cos(thisphi)) losd[ii,jj]= d if 1./m.cos(thisphi) < thisR and m.cos(thisphi) > 0.: l= m.pi-m.asin(thisR/d*m.sin(thisphi)) else: l= m.asin(thisR/d*m.sin(thisphi)) gall[ii,jj]= l #Find maximum, further than 3 kpc away detectformax= detect.flatten() detectformax[losd.flatten() < 3./8.2]= 0. x= sc.argmax(detectformax) indx = sc.unravel_index(x,detect.shape) maxR= (rrange[0]+(rrange[1]-rrange[0])/ (ny*_YWIDTH+(ny-1)*dy)*(indx[1]*(_YWIDTH+dy)+_YWIDTH/2.)) maxphi= (phirange[0]+(phirange[1]-phirange[0])/ (nx*_XWIDTH+(nx-1)*dx)*(indx[0]*(_XWIDTH+dx)+_XWIDTH/2.)) print maxR, maxphi, losd[indx[0],indx[1]], detect[indx[0],indx[1]], gall[indx[0],indx[1]]*180./sc.pi #Now plot plot.bovy_print() plot.bovy_dens2d(detect.T,origin='lower',#interpolation='nearest', xlabel=r'$\mathrm{Galactocentric\ azimuth}\ [\mathrm{deg}]$', ylabel=r'$\mathrm{Galactocentric\ radius}\ /R_0$', cmap='gist_yarg',xrange=sc.array(phirange)*_RADTODEG, yrange=rrange, aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0])) #contour the los distance and gall #plot.bovy_text(-22.,1.1,r'$\mathrm{apogee}$',color='w', # rotation=105.) plot.bovy_text(-18.,1.1,r'$\mathrm{APOGEE}$',color='w', rotation=285.) levels= [2/8.2*(ii+1/2.) for ii in range(10)] contour(losd.T,levels,colors='0.25',origin='lower',linestyles='--', aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0]), extent=(phirange[0]*_RADTODEG,phirange[1]*_RADTODEG, rrange[0],rrange[1])) gall[gall < 0.]+= sc.pi*2. levels= [0.,sc.pi/2.,sc.pi,3.*sc.pi/2.] contour(gall.T,levels,colors='w',origin='lower',linestyles='--', aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0]), extent=(phirange[0]*_RADTODEG,phirange[1]*_RADTODEG, rrange[0],rrange[1])) levels= [-5/180.*sc.pi,250/180.*sc.pi] contour(gall.T,levels,colors='w',origin='lower',linestyles='-.', aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0]), extent=(phirange[0]*_RADTODEG,phirange[1]*_RADTODEG, rrange[0],rrange[1])) if options.skipCenter == 0.: plot.bovy_text(r'$\mathrm{KL\ divergence\ / \ all}\ v_{\mathrm{los}}$', title=True) else: plot.bovy_text(r'$\mathrm{KL\ divergence\ / }\ |v_{\mathrm{los}}| \geq %.2f \ v_0$' % options.skipCenter, title=True) plot.bovy_end_print(args[0])
def exMix1(exclude=None, plotfilenameA='exMix1a.png', plotfilenameB='exMix1b.png', plotfilenameC='exMix1c.png', nburn=20000, nsamples=1000000, parsigma=[5, .075, .2, 1, .1], dsigma=1., bovyprintargs={}, sampledata=None): """exMix1: solve exercise 5 (mixture model) using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename* - filenames for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) dsigma - divide uncertainties by this amount Output: plot History: 2010-04-28 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data = read_data('data_yerr.dat') ndata = len(data) if not exclude == None: nsample = ndata - len(exclude) else: nsample = ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. / dsigma**2. yerr[jj] = data[ii][2] / dsigma jj = jj + 1 brange = [-120, 120] mrange = [1.5, 3.2] # This matches the order of the parameters in the "samples" vector mbrange = [brange, mrange] if sampledata is None: sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange) (histmb, edges, mbsamples, pbhist, pbedges) = sampledata # Hack -- produce fake Pbad samples from Pbad histogram. pbsamples = hstack([ array([x] * N) for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist) ]) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] #2D histogram plot.bovy_print(**bovyprintargs) levels = special.erf(0.5 * sc.arange(1, 4)) xe = [edges[0][0], edges[0][-1]] ye = [edges[1][0], edges[1][-1]] aspect = (xe[1] - xe[0]) / (ye[1] - ye[0]) plot.bovy_dens2d(histmb.T, origin='lower', cmap=cm.gist_yarg, interpolation='nearest', contours=True, cntrmass=True, extent=xe + ye, levels=levels, aspect=aspect, xlabel=r'$b$', ylabel=r'$m$') xlim(brange) ylim(mrange) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = edges[0][indxi] bestm = edges[1][indxj] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, yerr, marker='o', color='k', linestyle='None', zorder=1) for m, b in mbsamples: plot.bovy_plot(xrange, m * sc.array(xrange) + b, overplot=True, xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', color='0.75', zorder=1) plot.bovy_end_print(plotfilenameB) #Pb plot if not 'text_fontsize' in bovyprintargs: bovyprintargs['text_fontsize'] = 11 plot.bovy_print(**bovyprintargs) plot.bovy_hist(pbsamples, bins=round(sc.sqrt(nsamples) / 5.), xlabel=r'$P_\mathrm{b}$', normed=True, histtype='step', range=[0, 1], edgecolor='k') ylim(0, 4.) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_left=True) plot.bovy_end_print(plotfilenameC) return sampledata