def ex15(exclude=sc.array([1,2,3,4]),plotfilename='ex15.png',
		 bovyprintargs={}):
    """ex15: solve exercise 15
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_allerr.dat',allerr=True)
    ndata= len(data)
    nsample= ndata- len(exclude)
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    Z= sc.zeros((nsample,2))
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            Z[jj,0]= X[jj]
            Z[jj,1]= Y[jj]
            jj= jj+1
    #Now compute the PCA solution
    Zm= sc.mean(Z,axis=0)
    Q= sc.cov(Z.T)
    eigs= linalg.eig(Q)
    maxindx= sc.argmax(eigs[0])
    V= eigs[1][maxindx]
    V= V/linalg.norm(V)

    m= sc.sqrt(1/V[0]**2.-1)
    bestfit= sc.array([-m*Zm[0]+Zm[1],m])

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(sc.array(xrange),bestfit[1]*sc.array(xrange)+bestfit[0],
                   'k--',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    plot.bovy_plot(X,Y,marker='o',color='k',linestyle='None',
                   zorder=0,overplot=True)
 
    plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0])+r'$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
Example #2
0
def exNew(exclude=sc.array([1,2,3,4]),
          plotfilename='exNew.png',nburn=20000,nsamples=200000,
          parsigma=[5,.075,.01,1,.1],dsigma=1.):
    """exMix1: solve the new exercise using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
       dsigma         - divide uncertainties by this amount
    Output:
       plot
    History:
       2010-04-28 - Written - Bovy (NYU)
    """
    sc.random.seed(1) #In the interest of reproducibility (if that's a word)
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    if not exclude == None:
        nsample= ndata- len(exclude)
    else:
        nsample= ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2./dsigma**2.
            yerr[jj]= data[ii][2]/dsigma
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1],0.,sc.mean(Y),m.log(sc.var(Y))])#(m,b,Pb,Yb,Vb)
    #With this initial guess start off the sampling procedure
    initialX= objective(initialguess,X,Y,yerr)
    currentX= initialX
    bestX= initialX
    bestfit= initialguess
    currentguess= initialguess
    naccept= 0
    samples= []
    samples.append(currentguess)
    for jj in range(nburn+nsamples):
        #Draw a sample from the proposal distribution
        newsample= sc.zeros(5)
        newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0]
        newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1]
        #newsample[2]= stats.uniform.rvs()
        newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2]
        newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3]
        newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4]
        #Calculate the objective function for the newsample
        newX= objective(newsample,X,Y,yerr)
        #Accept or reject
        #Reject with the appropriate probability
        u= stats.uniform.rvs()
        if u < m.exp(newX-currentX):
            #Accept
            currentX= newX
            currentguess= newsample
            naccept= naccept+1
        if currentX > bestX:
            bestfit= currentguess
            bestX= currentX
        samples.append(currentguess)
    if double(naccept)/(nburn+nsamples) < .2 or double(naccept)/(nburn+nsamples) > .6:
        print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples))

    samples= sc.array(samples).T[:,nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:])

    histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/5.))
    indxi= sc.argmax(sc.amax(histmb,axis=1))
    indxj= sc.argmax(sc.amax(histmb,axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi-1], edges[1][indxj-1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi+1], edges[1][indxj+1]
        
    #2D histogram
    plot.bovy_print()
    levels= special.erf(0.5*sc.arange(1,4))
    #xrange=[edges[0][0],edges[0][-1]]
    #yrange=[edges[1][0],edges[1][-1]]
    xrange=[-120,120]
    yrange=[1.5,3.2]
    histmb,edges= sc.histogramdd(samples.T[:,0:2],
                                 range=[[-120,120],[1.5,3.2]],
                                 bins=(round(sc.sqrt(nsamples)/5.)/(edges[0][-1]-edges[0][0])*(xrange[1]-xrange[0]),
                                       round(sc.sqrt(nsamples)/5.)/(edges[1][-1]-edges[1][0])*(yrange[1]-yrange[0])))
    aspect=(xrange[1]-xrange[0])/(yrange[1]-yrange[0])
    plot.bovy_dens2d(histmb.T,origin='lower',cmap='gist_yarg',
                     contours=True,cntrmass=True,
                     xrange=xrange,yrange=yrange,
                     levels=levels,
                     aspect=aspect,
                     xlabel=r'$b$',ylabel=r'$m$')
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_right=True)       
    if dsigma == 1.:
        plot.bovy_end_print('exNew1a.png')
    else:
        plot.bovy_end_print('exNew2a.png')

    #Data with MAP line and sampling
    plot.bovy_print()
    bestb= edges[0][indxi]
    bestm= edges[1][indxj]
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-',
                   xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None',zorder=1)
    for ii in range(10):
        #Random sample
        ransample= sc.floor((stats.uniform.rvs()*nsamples))
        ransample= samples.T[ransample,0:2]
        bestb= ransample[0]
        bestm= ransample[1]
        plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,
                       overplot=True,xrange=xrange,yrange=yrange,
                       xlabel=r'$x$',ylabel=r'$y$',color='0.75',zorder=1)
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_right=True)       
    if dsigma == 1.:
        plot.bovy_end_print('exNew1b.png')
    else:
        plot.bovy_end_print('exNew2b.png')
    
    #Pb plot
    plot.bovy_print()
    plot.bovy_hist(samples.T[:,2],color='k',bins=round(sc.sqrt(nsamples)/5.),
                   xlabel=r'$P_\mathrm{b}$',normed=True,histtype='step',
                   range=[0,1])
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_right=True)       
    if dsigma == 1.:
        plot.bovy_end_print('exNew1c.png')
    else:
        plot.bovy_end_print('exNew2c.png')
    
    return
def ex10(exclude=sc.array([1,2,3,4]),
		 plotfilenameA='ex10a.png',
		 plotfilenameB='ex10b.png',
         nburn=1000,nsamples=200000,
         parsigma=[5,.075,0.1],
		 bovyprintargs={}):
    """ex10: solve exercise 10 using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    sc.random.seed(-1) #In the interest of reproducibility (if that's a word)
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    if not exclude == None:
        nsample= ndata- len(exclude)
    else:
        nsample= ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1],0.])#(m,b,logS)
    #With this initial guess start off the sampling procedure
    initialX= objective(initialguess,X,Y,yerr)
    currentX= initialX
    bestX= initialX
    bestfit= initialguess
    currentguess= initialguess
    naccept= 0
    samples= []
    samples.append(currentguess)
    for jj in range(nburn+nsamples):
        #Draw a sample from the proposal distribution
        newsample= sc.zeros(3)
        newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0]
        newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1]
        newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2]
        #Calculate the objective function for the newsample
        newX= objective(newsample,X,Y,yerr)
        #Accept or reject
        #Reject with the appropriate probability
        u= stats.uniform.rvs()
        accept=False
        try:
            test= m.exp(newX-currentX)
            if u < test:
                accept= True
        except OverflowError:
            accept= True
        if accept:
            #Accept
            currentX= newX
            currentguess= newsample
            naccept= naccept+1
        if currentX > bestX:
            bestfit= currentguess
            bestX= currentX
        samples.append(currentguess)
    if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8:
        print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples))

    samples= sc.array(samples).T[:,nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:])

    histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.))
    indxi= sc.argmax(sc.amax(histmb,axis=1))
    indxj= sc.argmax(sc.amax(histmb,axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi-1], edges[1][indxj-1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi+1], edges[1][indxj+1]

    print "Best-fit for S marginalized"
    histS,edgesS= sc.histogram(samples.T[:,2],bins=round(sc.sqrt(nsamples)/2.))
    indx= sc.argmax(histS)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb= bestfit[0]
    bestm= bestfit[1]
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-',
                   xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    errorbar(X,Y,sc.exp(bestfit[2]/2.),
             marker='o',color='k',linestyle='None',zorder=1)
    plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0])+r'$'+'\n'+r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2]/2.)),
                   bottom_right=True)
    plot.bovy_end_print(plotfilenameA)
    
    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb= edges[0][indxi]
    bestm= edges[1][indxj]
    bestS= edgesS[indx]
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-',
                   xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    errorbar(X,Y,sc.exp(bestS/2.),
             marker='o',color='k',linestyle='None',zorder=1)
    plot.bovy_text(r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb)+r'$'+'\n'+r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS/2.)),
                   bottom_right=True)
    plot.bovy_end_print(plotfilenameB)

    return
Example #4
0
def ex15(
        exclude=sc.array([1, 2, 3, 4]), plotfilename='ex15.png',
        bovyprintargs={}):
    """ex15: solve exercise 15
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_allerr.dat', allerr=True)
    ndata = len(data)
    nsample = ndata - len(exclude)
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    Z = sc.zeros((nsample, 2))
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            Z[jj, 0] = X[jj]
            Z[jj, 1] = Y[jj]
            jj = jj + 1
    #Now compute the PCA solution
    Zm = sc.mean(Z, axis=0)
    Q = sc.cov(Z.T)
    eigs = linalg.eig(Q)
    maxindx = sc.argmax(eigs[0])
    V = eigs[1][maxindx]
    V = V / linalg.norm(V)

    m = sc.sqrt(1 / V[0]**2. - 1)
    bestfit = sc.array([-m * Zm[0] + Zm[1], m])

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(sc.array(xrange),
                   bestfit[1] * sc.array(xrange) + bestfit[0],
                   'k--',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    plot.bovy_plot(X,
                   Y,
                   marker='o',
                   color='k',
                   linestyle='None',
                   zorder=0,
                   overplot=True)

    plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0]) + r'$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
Example #5
0
def ex10(exclude=sc.array([1, 2, 3, 4]),
         plotfilenameA='ex10a.png',
         plotfilenameB='ex10b.png',
         nburn=1000,
         nsamples=200000,
         parsigma=[5, .075, 0.1],
         bovyprintargs={}):
    """ex10: solve exercise 10 using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    sc.random.seed(-1)  #In the interest of reproducibility (if that's a word)
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    if not exclude == None:
        nsample = ndata - len(exclude)
    else:
        nsample = ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    initialguess = sc.array([bestfit[0], bestfit[1], 0.])  #(m,b,logS)
    #With this initial guess start off the sampling procedure
    initialX = objective(initialguess, X, Y, yerr)
    currentX = initialX
    bestX = initialX
    bestfit = initialguess
    currentguess = initialguess
    naccept = 0
    samples = []
    samples.append(currentguess)
    for jj in range(nburn + nsamples):
        #Draw a sample from the proposal distribution
        newsample = sc.zeros(3)
        newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0]
        newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1]
        newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2]
        #Calculate the objective function for the newsample
        newX = objective(newsample, X, Y, yerr)
        #Accept or reject
        #Reject with the appropriate probability
        u = stats.uniform.rvs()
        accept = False
        try:
            test = m.exp(newX - currentX)
            if u < test:
                accept = True
        except OverflowError:
            accept = True
        if accept:
            #Accept
            currentX = newX
            currentguess = newsample
            naccept = naccept + 1
        if currentX > bestX:
            bestfit = currentguess
            bestX = currentX
        samples.append(currentguess)
    if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / (
            nburn + nsamples) > .8:
        print "Acceptance ratio was " + str(
            double(naccept) / (nburn + nsamples))

    samples = sc.array(samples).T[:, nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :])

    histmb, edges = sc.histogramdd(samples.T[:, 0:2],
                                   bins=round(sc.sqrt(nsamples) / 2.))
    indxi = sc.argmax(sc.amax(histmb, axis=1))
    indxj = sc.argmax(sc.amax(histmb, axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi - 1], edges[1][indxj - 1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi + 1], edges[1][indxj + 1]

    print "Best-fit for S marginalized"
    histS, edgesS = sc.histogram(samples.T[:, 2],
                                 bins=round(sc.sqrt(nsamples) / 2.))
    indx = sc.argmax(histS)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = bestfit[0]
    bestm = bestfit[1]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(xrange,
                   bestm * sc.array(xrange) + bestb,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    errorbar(X,
             Y,
             sc.exp(bestfit[2] / 2.),
             marker='o',
             color='k',
             linestyle='None',
             zorder=1)
    plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' %
                   (bestfit[1], bestfit[0]) + r'$' + '\n' +
                   r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' %
                   (sc.exp(bestfit[2] / 2.)),
                   bottom_right=True)
    plot.bovy_end_print(plotfilenameA)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = edges[0][indxi]
    bestm = edges[1][indxj]
    bestS = edgesS[indx]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(xrange,
                   bestm * sc.array(xrange) + bestb,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    errorbar(X,
             Y,
             sc.exp(bestS / 2.),
             marker='o',
             color='k',
             linestyle='None',
             zorder=1)
    plot.bovy_text(
        r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' %
        (bestm, bestb) + r'$' + '\n' +
        r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' %
        (sc.exp(bestS / 2.)),
        bottom_right=True)
    plot.bovy_end_print(plotfilenameB)

    return
def ex14(exclude=sc.array([1,2,3,4]),plotfilename='ex14.png',
		 bovyprintargs={}):
    """ex12: solve exercise 14
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_allerr.dat',allerr=True)
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y1= sc.zeros(nsample)
    X1= sc.zeros(nsample)
    A1= sc.ones((nsample,2))
    C1= sc.zeros((nsample,nsample))
    Y2= sc.zeros(nsample)
    X2= sc.zeros(nsample)
    A2= sc.ones((nsample,2))
    C2= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    xerr= sc.zeros(nsample)
    ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y1[jj]= data[ii][1][1]
            X1[jj]= data[ii][1][0]
            A1[jj,1]= data[ii][1][0]
            C1[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            Y2[jj]= data[ii][1][0]
            X2[jj]= data[ii][1][1]
            A2[jj,1]= data[ii][1][1]
            C2[jj,jj]= data[ii][3]**2.
            xerr[jj]= data[ii][3]
            jj= jj+1
    #Now compute the best fit and the uncertainties: forward
    bestfit1= sc.dot(linalg.inv(C1),Y1.T)
    bestfit1= sc.dot(A1.T,bestfit1)
    bestfitvar1= sc.dot(linalg.inv(C1),A1)
    bestfitvar1= sc.dot(A1.T,bestfitvar1)
    bestfitvar1= linalg.inv(bestfitvar1)
    bestfit1= sc.dot(bestfitvar1,bestfit1)
    #Now compute the best fit and the uncertainties: backward
    bestfit2= sc.dot(linalg.inv(C2),Y2.T)
    bestfit2= sc.dot(A2.T,bestfit2)
    bestfitvar2= sc.dot(linalg.inv(C2),A2)
    bestfitvar2= sc.dot(A2.T,bestfitvar2)
    bestfitvar2= linalg.inv(bestfitvar2)
    bestfit2= sc.dot(bestfitvar2,bestfit2)
    #Propagate to y=mx+b
    linerrprop= sc.array([[-1./bestfit2[1],bestfit2[0]/bestfit2[1]**2],
                          [0.,-1./bestfit2[1]**2.]])
    bestfit2= sc.array([-bestfit2[0]/bestfit2[1],1./bestfit2[1]])
    bestfitvar2= sc.dot(linerrprop,sc.dot(bestfitvar2,linerrprop.T))

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(sc.array(xrange),bestfit1[1]*sc.array(xrange)+bestfit1[0],
                   'k--',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    plot.bovy_plot(sc.array(xrange),bestfit2[1]*sc.array(xrange)+bestfit2[0],
                   'k-.',overplot=True,zorder=2)

    #Plot data
    errorbar(A1[:,1],Y1,yerr,xerr,color='k',marker='o',
             linestyle='None',zorder=0)
    plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit1[1], m.sqrt(bestfitvar1[1,1]), bestfit1[0],m.sqrt(bestfitvar1[0,0]))+r')$'+'\n'+
                   r'$\mathrm{reverse}\ -\cdot -\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit2[1], m.sqrt(bestfitvar2[1,1]), bestfit2[0],m.sqrt(bestfitvar2[0,0]))+r')$',bottom_right=True)
    plot.bovy_end_print(plotfilename)
def ex14(
        exclude=sc.array([1, 2, 3, 4]), plotfilename='ex14.png',
        bovyprintargs={}):
    """ex12: solve exercise 14
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_allerr.dat', allerr=True)
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y1 = sc.zeros(nsample)
    X1 = sc.zeros(nsample)
    A1 = sc.ones((nsample, 2))
    C1 = sc.zeros((nsample, nsample))
    Y2 = sc.zeros(nsample)
    X2 = sc.zeros(nsample)
    A2 = sc.ones((nsample, 2))
    C2 = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    xerr = sc.zeros(nsample)
    ycovar = sc.zeros((2, nsample, 2))  #Makes the sc.dot easier
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y1[jj] = data[ii][1][1]
            X1[jj] = data[ii][1][0]
            A1[jj, 1] = data[ii][1][0]
            C1[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            Y2[jj] = data[ii][1][0]
            X2[jj] = data[ii][1][1]
            A2[jj, 1] = data[ii][1][1]
            C2[jj, jj] = data[ii][3]**2.
            xerr[jj] = data[ii][3]
            jj = jj + 1
    #Now compute the best fit and the uncertainties: forward
    bestfit1 = sc.dot(linalg.inv(C1), Y1.T)
    bestfit1 = sc.dot(A1.T, bestfit1)
    bestfitvar1 = sc.dot(linalg.inv(C1), A1)
    bestfitvar1 = sc.dot(A1.T, bestfitvar1)
    bestfitvar1 = linalg.inv(bestfitvar1)
    bestfit1 = sc.dot(bestfitvar1, bestfit1)
    #Now compute the best fit and the uncertainties: backward
    bestfit2 = sc.dot(linalg.inv(C2), Y2.T)
    bestfit2 = sc.dot(A2.T, bestfit2)
    bestfitvar2 = sc.dot(linalg.inv(C2), A2)
    bestfitvar2 = sc.dot(A2.T, bestfitvar2)
    bestfitvar2 = linalg.inv(bestfitvar2)
    bestfit2 = sc.dot(bestfitvar2, bestfit2)
    #Propagate to y=mx+b
    linerrprop = sc.array([[-1. / bestfit2[1], bestfit2[0] / bestfit2[1]**2],
                           [0., -1. / bestfit2[1]**2.]])
    bestfit2 = sc.array([-bestfit2[0] / bestfit2[1], 1. / bestfit2[1]])
    bestfitvar2 = sc.dot(linerrprop, sc.dot(bestfitvar2, linerrprop.T))

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(sc.array(xrange),
                   bestfit1[1] * sc.array(xrange) + bestfit1[0],
                   'k--',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    plot.bovy_plot(sc.array(xrange),
                   bestfit2[1] * sc.array(xrange) + bestfit2[0],
                   'k-.',
                   overplot=True,
                   zorder=2)

    #Plot data
    errorbar(A1[:, 1],
             Y1,
             yerr,
             xerr,
             color='k',
             marker='o',
             linestyle='None',
             zorder=0)
    plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( ' +
                   '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' %
                   (bestfit1[1], m.sqrt(bestfitvar1[1, 1]), bestfit1[0],
                    m.sqrt(bestfitvar1[0, 0])) + r')$' + '\n' +
                   r'$\mathrm{reverse}\ -\cdot -\:\ y = ( ' +
                   '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' %
                   (bestfit2[1], m.sqrt(bestfitvar2[1, 1]), bestfit2[0],
                    m.sqrt(bestfitvar2[0, 0])) + r')$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
def ex12(exclude=sc.array([1,2,3,4]),plotfilename='ex12.png',
		 bovyprintargs={}):
    """ex12: solve exercise 12 by optimization of the objective function
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-06 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_allerr.dat',allerr=True)
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    Z= sc.zeros((nsample,2))
    yerr= sc.zeros(nsample)
    ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            Z[jj,0]= X[jj]
            Z[jj,1]= Y[jj]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            ycovar[0,jj,0]= data[ii][3]**2.
            ycovar[1,jj,1]= data[ii][2]**2.
            ycovar[0,jj,1]= data[ii][4]*m.sqrt(ycovar[0,jj,0]*ycovar[1,jj,1])
            ycovar[1,jj,0]= ycovar[0,jj,1]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    #Now optimize
    bestfit2d1= optimize.fmin(objective,bestfit,(Z,ycovar),disp=False)
    #Restart the optimization once using a different method
    bestfit2d= optimize.fmin_powell(objective,bestfit,
                                       (Z,ycovar),disp=False)
    if linalg.norm(bestfit2d-bestfit2d1) > 10**-12:
        if linalg.norm(bestfit2d-bestfit2d1) < 10**-6:
            print "Different optimizers give slightly different results..."
        else:
            print "Different optimizers give rather different results..."
        print "The norm of the results differs by %g" % linalg.norm(bestfit2d-bestfit2d1)
        try:
            x=raw_input('continue to plot? [yn]\n')
        except EOFError:
            print "Since you are in non-interactive mode I will assume 'y'"
            x='y'
        if x == 'n':
            print "returning..."
            return -1

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(sc.array(xrange),bestfit2d[1]*sc.array(xrange)+bestfit2d[0],
                   'k-',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
     
    #Plot the data OMG straight from plot_data.py
    data= read_data('data_allerr.dat',True)
    ndata= len(data)
    #Create the ellipses and the data points
    id= sc.zeros(nsample)
    x= sc.zeros(nsample)
    y= sc.zeros(nsample)
    ellipses=[]
    ymin, ymax= 0, 0
    xmin, xmax= 0,0
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            continue
        id[jj]= data[ii][0]
        x[jj]= data[ii][1][0]
        y[jj]= data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar= sc.zeros((2,2))
        ycovar[0,0]= data[ii][3]**2.
        ycovar[1,1]= data[ii][2]**2.
        ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1])
        ycovar[1,0]= ycovar[0,1]
        eigs= linalg.eig(ycovar)
        angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180.
        thisellipse= Ellipse(sc.array([x[jj],y[jj]]),2*m.sqrt(eigs[0][0]),
                             2*m.sqrt(eigs[0][1]),angle)
        ellipses.append(thisellipse)
        if (x[jj]+m.sqrt(ycovar[0,0])) > xmax:
            xmax= (x[jj]+m.sqrt(ycovar[0,0]))
        if (x[jj]-m.sqrt(ycovar[0,0])) < xmin:
            xmin= (x[jj]-m.sqrt(ycovar[0,0]))
        if (y[jj]+m.sqrt(ycovar[1,1])) > ymax:
            ymax= (y[jj]+m.sqrt(ycovar[1,1]))
        if (y[jj]-m.sqrt(ycovar[1,1])) < ymin:
            ymin= (y[jj]-m.sqrt(ycovar[1,1]))
        jj= jj+1
        
    #Add the error ellipses
    ax=gca()
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x,y,color='k',marker='o',linestyle='None')

    plot.bovy_text(r'$y = %4.2f \,x+ %4.0f' % (bestfit2d[1], bestfit2d[0])+r'$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
Example #9
0
def exMix1(
    exclude=None,
    plotfilenameA="exMix1a.png",
    plotfilenameB="exMix1b.png",
    plotfilenameC="exMix1c.png",
    nburn=20000,
    nsamples=1000000,
    parsigma=[5, 0.075, 0.2, 1, 0.1],
    dsigma=1.0,
    bovyprintargs={},
    sampledata=None,
):
    """exMix1: solve exercise 5 (mixture model) using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename*  - filenames for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
       dsigma         - divide uncertainties by this amount
    Output:
       plot
    History:
       2010-04-28 - Written - Bovy (NYU)
    """
    sc.random.seed(-1)  # In the interest of reproducibility (if that's a word)
    # Read the data
    data = read_data("data_yerr.dat")
    ndata = len(data)
    if not exclude == None:
        nsample = ndata - len(exclude)
    else:
        nsample = ndata
    # First find the chi-squared solution, which we will use as an
    # initial guess
    # Put the data in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2] ** 2.0 / dsigma ** 2.0
            yerr[jj] = data[ii][2] / dsigma
            jj = jj + 1

    brange = [-120, 120]
    mrange = [1.5, 3.2]

    # This matches the order of the parameters in the "samples" vector
    mbrange = [brange, mrange]

    if sampledata is None:
        sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange)

    (histmb, edges, mbsamples, pbhist, pbedges) = sampledata

    # Hack -- produce fake Pbad samples from Pbad histogram.
    pbsamples = hstack([array([x] * N) for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist)])

    indxi = sc.argmax(sc.amax(histmb, axis=1))
    indxj = sc.argmax(sc.amax(histmb, axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi - 1], edges[1][indxj - 1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi + 1], edges[1][indxj + 1]

    # 2D histogram
    plot.bovy_print(**bovyprintargs)
    levels = special.erf(0.5 * sc.arange(1, 4))
    xe = [edges[0][0], edges[0][-1]]
    ye = [edges[1][0], edges[1][-1]]
    aspect = (xe[1] - xe[0]) / (ye[1] - ye[0])
    plot.bovy_dens2d(
        histmb.T,
        origin="lower",
        cmap=cm.gist_yarg,
        interpolation="nearest",
        contours=True,
        cntrmass=True,
        extent=xe + ye,
        levels=levels,
        aspect=aspect,
        xlabel=r"$b$",
        ylabel=r"$m$",
    )
    xlim(brange)
    ylim(mrange)

    plot.bovy_end_print(plotfilenameA)

    # Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = edges[0][indxi]
    bestm = edges[1][indxj]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(
        xrange,
        bestm * sc.array(xrange) + bestb,
        "k-",
        xrange=xrange,
        yrange=yrange,
        xlabel=r"$x$",
        ylabel=r"$y$",
        zorder=2,
    )
    errorbar(X, Y, yerr, marker="o", color="k", linestyle="None", zorder=1)

    for m, b in mbsamples:
        plot.bovy_plot(
            xrange,
            m * sc.array(xrange) + b,
            overplot=True,
            xrange=xrange,
            yrange=yrange,
            xlabel=r"$x$",
            ylabel=r"$y$",
            color="0.75",
            zorder=1,
        )

    plot.bovy_end_print(plotfilenameB)

    # Pb plot
    if not "text_fontsize" in bovyprintargs:
        bovyprintargs["text_fontsize"] = 11
    plot.bovy_print(**bovyprintargs)
    plot.bovy_hist(
        pbsamples,
        bins=round(sc.sqrt(nsamples) / 5.0),
        xlabel=r"$P_\mathrm{b}$",
        normed=True,
        histtype="step",
        range=[0, 1],
        edgecolor="k",
    )
    ylim(0, 4.0)
    if dsigma == 1.0:
        plot.bovy_text(r"$\mathrm{using\ correct\ data\ uncertainties}$", top_right=True)
    else:
        plot.bovy_text(r"$\mathrm{using\ data\ uncertainties\ /\ 2}$", top_left=True)

    plot.bovy_end_print(plotfilenameC)

    return sampledata
Example #10
0
def ex12(
        exclude=sc.array([1, 2, 3, 4]), plotfilename='ex12.png',
        bovyprintargs={}):
    """ex12: solve exercise 12 by optimization of the objective function
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-06 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_allerr.dat', allerr=True)
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    Z = sc.zeros((nsample, 2))
    yerr = sc.zeros(nsample)
    ycovar = sc.zeros((2, nsample, 2))  #Makes the sc.dot easier
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            Z[jj, 0] = X[jj]
            Z[jj, 1] = Y[jj]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            ycovar[0, jj, 0] = data[ii][3]**2.
            ycovar[1, jj, 1] = data[ii][2]**2.
            ycovar[0, jj, 1] = data[ii][4] * m.sqrt(
                ycovar[0, jj, 0] * ycovar[1, jj, 1])
            ycovar[1, jj, 0] = ycovar[0, jj, 1]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    #Now optimize
    bestfit2d1 = optimize.fmin(objective, bestfit, (Z, ycovar), disp=False)
    #Restart the optimization once using a different method
    bestfit2d = optimize.fmin_powell(objective,
                                     bestfit, (Z, ycovar),
                                     disp=False)
    if linalg.norm(bestfit2d - bestfit2d1) > 10**-12:
        if linalg.norm(bestfit2d - bestfit2d1) < 10**-6:
            print("Different optimizers give slightly different results...")
        else:
            print("Different optimizers give rather different results...")
        print("The norm of the results differs by %g" %
              linalg.norm(bestfit2d - bestfit2d1))
        try:
            x = raw_input('continue to plot? [yn]\n')
        except EOFError:
            print("Since you are in non-interactive mode I will assume 'y'")
            x = 'y'
        if x == 'n':
            print("returning...")
            return -1

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(sc.array(xrange),
                   bestfit2d[1] * sc.array(xrange) + bestfit2d[0],
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)

    #Plot the data OMG straight from plot_data.py
    data = read_data('data_allerr.dat', True)
    ndata = len(data)
    #Create the ellipses and the data points
    id = sc.zeros(nsample)
    x = sc.zeros(nsample)
    y = sc.zeros(nsample)
    ellipses = []
    ymin, ymax = 0, 0
    xmin, xmax = 0, 0
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            continue
        id[jj] = data[ii][0]
        x[jj] = data[ii][1][0]
        y[jj] = data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar = sc.zeros((2, 2))
        ycovar[0, 0] = data[ii][3]**2.
        ycovar[1, 1] = data[ii][2]**2.
        ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1])
        ycovar[1, 0] = ycovar[0, 1]
        eigs = linalg.eig(ycovar)
        angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180.
        thisellipse = Ellipse(sc.array([x[jj], y[jj]]), 2 * m.sqrt(eigs[0][0]),
                              2 * m.sqrt(eigs[0][1]), angle)
        ellipses.append(thisellipse)
        if (x[jj] + m.sqrt(ycovar[0, 0])) > xmax:
            xmax = (x[jj] + m.sqrt(ycovar[0, 0]))
        if (x[jj] - m.sqrt(ycovar[0, 0])) < xmin:
            xmin = (x[jj] - m.sqrt(ycovar[0, 0]))
        if (y[jj] + m.sqrt(ycovar[1, 1])) > ymax:
            ymax = (y[jj] + m.sqrt(ycovar[1, 1]))
        if (y[jj] - m.sqrt(ycovar[1, 1])) < ymin:
            ymin = (y[jj] - m.sqrt(ycovar[1, 1]))
        jj = jj + 1

    #Add the error ellipses
    ax = gca()
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x, y, color='k', marker='o', linestyle='None')

    plot.bovy_text(r'$y = %4.2f \,x+ %4.0f' % (bestfit2d[1], bestfit2d[0]) +
                   r'$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
def bar_detectability(parser,
                      dx=_XWIDTH/20.,dy=_YWIDTH/20.,
                      nx=100,ny=20,
                      ngrid=201,rrange=[0.7,1.3],
                      phirange=[-m.pi/2.,m.pi/2.],
                      saveDir='../bar/1dLarge/'):
    """
    NAME:
       bar_detectability
    PURPOSE:
       analyze the detectability of the Hercules moving group in the 
       los-distribution around the Galaxy
    INPUT:
       nx - number of plots in the x-direction
       ny - number of plots in the y direction
       dx - x-spacing
       dy - y-spacing
       ngrid - number of gridpoints to evaluate the density on
       rrange - range of Galactocentric radii to consider
       phirange - range of Galactic azimuths to consider
       saveDir - directory to save the pickles in
    OUTPUT:
       plot in plotfilename
    HISTORY:
       2010-05-09 - Written - Bovy (NYU)
    """
    (options,args)= parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        return 
    
    if not options.convolve == None:
        bar_detectability_convolve(parser,dx=dx,dy=dy,nx=nx,ny=ny,ngrid=ngrid,
                                   rrange=rrange,phirange=phirange,
                                   saveDir=saveDir)
        return

    vloslinspace= (-.9,.9,ngrid)
    vloss= sc.linspace(*vloslinspace)

    picklebasename= '1d_%i_%i_%i_%.1f_%.1f_%.1f_%.1f' % (nx,ny,ngrid,rrange[0],rrange[1],phirange[0],phirange[1])

    detect= sc.zeros((nx,ny))
    losd= sc.zeros((nx,ny))
    gall= sc.zeros((nx,ny))
    for ii in range(nx):
        for jj in range(ny):
            thisR= (rrange[0]+(rrange[1]-rrange[0])/
                    (ny*_YWIDTH+(ny-1)*dy)*(jj*(_YWIDTH+dy)+_YWIDTH/2.))
            thisphi= (phirange[0]+(phirange[1]-phirange[0])/
                      (nx*_XWIDTH+(nx-1)*dx)*(ii*(_XWIDTH+dx)+_XWIDTH/2.))
            thissavefilename= os.path.join(saveDir,picklebasename+'_%i_%i.sav' %(ii,jj))
            if os.path.exists(thissavefilename):
                print "Restoring los-velocity distribution at %.2f, %.2f ..." %(thisR,thisphi)
                savefile= open(thissavefilename,'r')
                vlosd= pickle.load(savefile)
                axivlosd= pickle.load(savefile)
                savefile.close()
            else:
                print "Did not find the los-velocity distribution at at %.2f, %.2f ..." %(thisR,thisphi)
                print "returning ..."
                return
            ddx= 1./sc.sum(axivlosd)
            #skipCenter
            if not options.skipCenter == 0.:
                skipIndx= (sc.fabs(vloss) < options.skipCenter)
                indx= (sc.fabs(vloss) >= options.skipCenter)
                vlosd= vlosd/sc.sum(vlosd[indx])/ddx
                axivlosd= axivlosd/sc.sum(axivlosd[indx])/ddx
                vlosd[skipIndx]= 1.
                axivlosd[skipIndx]= 1.
            vlosd_zeroindx= (vlosd == 0.)
            axivlosd_zeroindx= (axivlosd == 0.)
            vlosd[vlosd_zeroindx]= 1.
            axivlosd[vlosd_zeroindx]= 1.
            vlosd[axivlosd_zeroindx]= 1.
            axivlosd[axivlosd_zeroindx]= 1.
            detect[ii,jj]= probDistance.kullbackLeibler(vlosd,axivlosd,ddx,nan=True)
            #los distance and Galactic longitude
            d= m.sqrt(thisR**2.+1.-2.*thisR*m.cos(thisphi))
            losd[ii,jj]= d
            if 1./m.cos(thisphi) < thisR and m.cos(thisphi) > 0.:
                l= m.pi-m.asin(thisR/d*m.sin(thisphi))
            else:
                l= m.asin(thisR/d*m.sin(thisphi))
            gall[ii,jj]= l

    #Find maximum, further than 3 kpc away
    detectformax= detect.flatten()
    detectformax[losd.flatten() < 3./8.2]= 0.
    x= sc.argmax(detectformax)
    indx = sc.unravel_index(x,detect.shape)
    maxR= (rrange[0]+(rrange[1]-rrange[0])/
           (ny*_YWIDTH+(ny-1)*dy)*(indx[1]*(_YWIDTH+dy)+_YWIDTH/2.))
    maxphi= (phirange[0]+(phirange[1]-phirange[0])/
                      (nx*_XWIDTH+(nx-1)*dx)*(indx[0]*(_XWIDTH+dx)+_XWIDTH/2.))
    print maxR, maxphi, losd[indx[0],indx[1]], detect[indx[0],indx[1]], gall[indx[0],indx[1]]*180./sc.pi

    #Now plot
    plot.bovy_print()
    plot.bovy_dens2d(detect.T,origin='lower',#interpolation='nearest',
                     xlabel=r'$\mathrm{Galactocentric\ azimuth}\ [\mathrm{deg}]$',
                     ylabel=r'$\mathrm{Galactocentric\ radius}\ /R_0$',
                     cmap='gist_yarg',xrange=sc.array(phirange)*_RADTODEG,
                     yrange=rrange,
                     aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0]))
    #contour the los distance and gall
    #plot.bovy_text(-22.,1.1,r'$\mathrm{apogee}$',color='w',
    #                rotation=105.)
    plot.bovy_text(-18.,1.1,r'$\mathrm{APOGEE}$',color='w',
                    rotation=285.)
    levels= [2/8.2*(ii+1/2.) for ii in range(10)]
    contour(losd.T,levels,colors='0.25',origin='lower',linestyles='--',
            aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0]),
            extent=(phirange[0]*_RADTODEG,phirange[1]*_RADTODEG,
                    rrange[0],rrange[1]))
    gall[gall < 0.]+= sc.pi*2.
    levels= [0.,sc.pi/2.,sc.pi,3.*sc.pi/2.]
    contour(gall.T,levels,colors='w',origin='lower',linestyles='--',
            aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0]),
            extent=(phirange[0]*_RADTODEG,phirange[1]*_RADTODEG,
                    rrange[0],rrange[1]))
    levels= [-5/180.*sc.pi,250/180.*sc.pi]
    contour(gall.T,levels,colors='w',origin='lower',linestyles='-.',
            aspect=(phirange[1]-phirange[0])*_RADTODEG/(rrange[1]-rrange[0]),
            extent=(phirange[0]*_RADTODEG,phirange[1]*_RADTODEG,
                    rrange[0],rrange[1]))
    if options.skipCenter == 0.:
        plot.bovy_text(r'$\mathrm{KL\ divergence\ / \ all}\ v_{\mathrm{los}}$',
                       title=True)
    else:
        plot.bovy_text(r'$\mathrm{KL\ divergence\ / }\ |v_{\mathrm{los}}| \geq %.2f \ v_0$' % options.skipCenter,
                       title=True)
    plot.bovy_end_print(args[0])
def exMix1(exclude=None,
           plotfilenameA='exMix1a.png',
           plotfilenameB='exMix1b.png',
           plotfilenameC='exMix1c.png',
           nburn=20000,
           nsamples=1000000,
           parsigma=[5, .075, .2, 1, .1],
           dsigma=1.,
           bovyprintargs={},
           sampledata=None):
    """exMix1: solve exercise 5 (mixture model) using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename*  - filenames for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
       dsigma         - divide uncertainties by this amount
    Output:
       plot
    History:
       2010-04-28 - Written - Bovy (NYU)
    """
    sc.random.seed(-1)  #In the interest of reproducibility (if that's a word)
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    if not exclude == None:
        nsample = ndata - len(exclude)
    else:
        nsample = ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2. / dsigma**2.
            yerr[jj] = data[ii][2] / dsigma
            jj = jj + 1

    brange = [-120, 120]
    mrange = [1.5, 3.2]

    # This matches the order of the parameters in the "samples" vector
    mbrange = [brange, mrange]

    if sampledata is None:
        sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma,
                                mbrange)

    (histmb, edges, mbsamples, pbhist, pbedges) = sampledata

    # Hack -- produce fake Pbad samples from Pbad histogram.
    pbsamples = hstack([
        array([x] * N)
        for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist)
    ])

    indxi = sc.argmax(sc.amax(histmb, axis=1))
    indxj = sc.argmax(sc.amax(histmb, axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi - 1], edges[1][indxj - 1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi + 1], edges[1][indxj + 1]

    #2D histogram
    plot.bovy_print(**bovyprintargs)
    levels = special.erf(0.5 * sc.arange(1, 4))
    xe = [edges[0][0], edges[0][-1]]
    ye = [edges[1][0], edges[1][-1]]
    aspect = (xe[1] - xe[0]) / (ye[1] - ye[0])
    plot.bovy_dens2d(histmb.T,
                     origin='lower',
                     cmap=cm.gist_yarg,
                     interpolation='nearest',
                     contours=True,
                     cntrmass=True,
                     extent=xe + ye,
                     levels=levels,
                     aspect=aspect,
                     xlabel=r'$b$',
                     ylabel=r'$m$')
    xlim(brange)
    ylim(mrange)

    plot.bovy_end_print(plotfilenameA)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = edges[0][indxi]
    bestm = edges[1][indxj]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(xrange,
                   bestm * sc.array(xrange) + bestb,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    errorbar(X, Y, yerr, marker='o', color='k', linestyle='None', zorder=1)

    for m, b in mbsamples:
        plot.bovy_plot(xrange,
                       m * sc.array(xrange) + b,
                       overplot=True,
                       xrange=xrange,
                       yrange=yrange,
                       xlabel=r'$x$',
                       ylabel=r'$y$',
                       color='0.75',
                       zorder=1)

    plot.bovy_end_print(plotfilenameB)

    #Pb plot
    if not 'text_fontsize' in bovyprintargs:
        bovyprintargs['text_fontsize'] = 11
    plot.bovy_print(**bovyprintargs)
    plot.bovy_hist(pbsamples,
                   bins=round(sc.sqrt(nsamples) / 5.),
                   xlabel=r'$P_\mathrm{b}$',
                   normed=True,
                   histtype='step',
                   range=[0, 1],
                   edgecolor='k')
    ylim(0, 4.)
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_left=True)

    plot.bovy_end_print(plotfilenameC)

    return sampledata