コード例 #1
0
ファイル: ex3.py プロジェクト: bhishanpdl/AstroSeminar_OU
def ex3(exclude=sc.array([1,2,3,4]),plotfilename='ex3.png', bovyprintargs={}):
    """ex3: solve exercise 3

    Input:
       exclude       - ID numbers to exclude from the analysis
       plotfilename  - filename for the output plot
    Output:
       plot
    History:
       2009-05-27 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    nsample= ndata- len(exclude)
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    A= sc.ones((nsample,3))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            A[jj,1]= data[ii][1][0]
            A[jj,2]= data[ii][1][0]**2.
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)

    #Now plot the solution
    plot.bovy_print(**bovyprintargs)
    #plot bestfit
    xrange=[0,300]
    yrange=[0,700]
    nsamples= 1001
    xs= sc.linspace(xrange[0],xrange[1],nsamples)
    ys= sc.zeros(nsamples)
    for ii in range(nsamples):
        ys[ii]= bestfit[0]+bestfit[1]*xs[ii]+bestfit[2]*xs[ii]**2.
    
    plot.bovy_plot(xs,ys,'k-',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    #Plot data
    errorbar(A[:,1],Y,yerr,marker='o',color='k',linestyle='None',zorder=1)
    #Put in a label with the best fit
    text(5,30,r'$y = ('+'%4.4f \pm %4.4f)\,x^2 + ( %4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit[2], m.sqrt(bestfitvar[2,2]),bestfit[1], m.sqrt(bestfitvar[1,1]), bestfit[0],m.sqrt(bestfitvar[0,0]))+r')$')
    plot.bovy_end_print(plotfilename)
    
    return 0
コード例 #2
0
def ex3(exclude=sc.array([1,2,3,4]),plotfilename='ex3.png', bovyprintargs={}):
    """ex3: solve exercise 3

    Input:
       exclude       - ID numbers to exclude from the analysis
       plotfilename  - filename for the output plot
    Output:
       plot
    History:
       2009-05-27 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    nsample= ndata- len(exclude)
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    A= sc.ones((nsample,3))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            A[jj,1]= data[ii][1][0]
            A[jj,2]= data[ii][1][0]**2.
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)

    #Now plot the solution
    plot.bovy_print(**bovyprintargs)
    #plot bestfit
    xrange=[0,300]
    yrange=[0,700]
    nsamples= 1001
    xs= sc.linspace(xrange[0],xrange[1],nsamples)
    ys= sc.zeros(nsamples)
    for ii in range(nsamples):
        ys[ii]= bestfit[0]+bestfit[1]*xs[ii]+bestfit[2]*xs[ii]**2.
    plot.bovy_plot(xs,ys,'k-',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    #Plot data
    errorbar(A[:,1],Y,yerr,marker='o',color='k',linestyle='None',zorder=1)
    #Put in a label with the best fit
    text(5,30,r'$y = ('+'%4.4f \pm %4.4f)\,x^2 + ( %4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit[2], m.sqrt(bestfitvar[2,2]),bestfit[1], m.sqrt(bestfitvar[1,1]), bestfit[0],m.sqrt(bestfitvar[0,0]))+r')$')
    plot.bovy_end_print(plotfilename)
    
    return 0
コード例 #3
0
def ex15(exclude=sc.array([1,2,3,4]),plotfilename='ex15.png',
		 bovyprintargs={}):
    """ex15: solve exercise 15
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_allerr.dat',allerr=True)
    ndata= len(data)
    nsample= ndata- len(exclude)
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    Z= sc.zeros((nsample,2))
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            Z[jj,0]= X[jj]
            Z[jj,1]= Y[jj]
            jj= jj+1
    #Now compute the PCA solution
    Zm= sc.mean(Z,axis=0)
    Q= sc.cov(Z.T)
    eigs= linalg.eig(Q)
    maxindx= sc.argmax(eigs[0])
    V= eigs[1][maxindx]
    V= V/linalg.norm(V)

    m= sc.sqrt(1/V[0]**2.-1)
    bestfit= sc.array([-m*Zm[0]+Zm[1],m])

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(sc.array(xrange),bestfit[1]*sc.array(xrange)+bestfit[0],
                   'k--',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    plot.bovy_plot(X,Y,marker='o',color='k',linestyle='None',
                   zorder=0,overplot=True)
 
    plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0])+r'$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
コード例 #4
0
def plot_data_yerr():
    """plot_data_yerr: Plot the data with the error bars in the y-direction

    History:
       2009-05-20 - Written - Bovy (NYU)

    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    #Put the data into x, y, and yerr
    id = sc.zeros(ndata)
    x = sc.zeros(ndata)
    y = sc.zeros(ndata)
    yerr = sc.zeros(ndata)
    for ii in range(ndata):
        id[ii] = data[ii][0]
        x[ii] = data[ii][1][0]
        y[ii] = data[ii][1][1]
        yerr[ii] = data[ii][2]

    plotfilename = 'data_yerr.png'
    fig_width = 7.5
    fig_height = 7.5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    errorbar(x, y, yerr, marker='o', color='k', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    savefig(plotfilename, format='png')

    return 0
コード例 #5
0
def plot_data_yerr():
    """plot_data_yerr: Plot the data with the error bars in the y-direction

    History:
       2009-05-20 - Written - Bovy (NYU)

    """
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    #Put the data into x, y, and yerr
    id= sc.zeros(ndata)
    x= sc.zeros(ndata)
    y= sc.zeros(ndata)
    yerr= sc.zeros(ndata)
    for ii in range(ndata):
        id[ii]= data[ii][0]
        x[ii]= data[ii][1][0]
        y[ii]= data[ii][1][1]
        yerr[ii]= data[ii][2]
        
    plotfilename='data_yerr.png'
    fig_width=7.5
    fig_height=7.5
    fig_size =  [fig_width,fig_height]
    params = {'axes.labelsize': 12,
              'text.fontsize': 11,
              'legend.fontsize': 12,
              'xtick.labelsize':10,
              'ytick.labelsize':10,
              'text.usetex': True,
              'figure.figsize': fig_size}
    rcParams.update(params)
    errorbar(x,y,yerr,marker='o',color='k',linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    savefig(plotfilename,format='png')

    return 0
コード例 #6
0
# These are the functions we will be testing
from informative_priors import loss_diff, loss_ratio, bayes_p

BAYES_PROBS = [0.90, 0.95, 0.99]
BAYES_P_LIST = []
for p in BAYES_PROBS:
    #b = bayes_p(p)
    #b.__name__ = 'bayes_' + str(int(p * 100))
    #BAYES_P_LIST.append(b)
    b = bayes_p(p, True)
    b.__name__ = 'bayes_' + str(int(p * 100)) + '_hard'
    BAYES_P_LIST.append(b)

#FUNCTION_LIST = [loss_diff, loss_ratio] + BAYES_P_LIST
FUNCTION_LIST = BAYES_P_LIST

results = []
DATA_FILE = 'test_data.pkl'
data_dict = read_data(DATA_FILE)

for i, f in enumerate(FUNCTION_LIST):
    print('Evaluting decision function ' + str(i + 1) + ': ' + f.__name__)
    d = evaluate_all(data_dict, f)
    d['Rule name'] = f.__name__
    results.append(d)
    
COLS = ['Rule name', 'Average loss', 'Average number of samples',
        'Estimate bias', 'Estimate MSE', 'TP', 'TN', 'FP', 'FN']
results_df = pd.DataFrame(results).sort_values('Average loss').reset_index(drop=True).loc[:, COLS]
results_df.to_csv('results_informative_priors.csv', index=False)
print(results_df)
コード例 #7
0
def ex8(plotfilename='ex8.png',
        nburn=1000,
        nsamples=10000,
        parsigma=[.075, 2., 0.1]):
    """ex8: solve exercise 8 using...?
    Input:
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2009-06-25 -- hacked from Bovy code - Hogg (NYU)
    """
    sc.random.seed(-1)  #In the interest of reproducibility (if that's a word)
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    #Put the data in the appropriate arrays and matrices
    X = sc.zeros(ndata)
    Y = sc.zeros(ndata)
    A = sc.ones((ndata, 2))
    Yivar = sc.zeros(ndata)
    C = sc.zeros((ndata, ndata))
    yerr = sc.zeros(ndata)
    jj = 0
    for ii in range(ndata):
        X[jj] = data[ii][1][0]
        Y[jj] = data[ii][1][1]
        A[jj, 1] = data[ii][1][0]
        Yivar[jj] = 1.0 / (data[ii][2]**2)
        C[jj, jj] = data[ii][2]**2
        yerr[jj] = data[ii][2]
        jj = jj + 1
    #First find the chi-squared solution, which we will use as an
    #initial guess
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    m = bestfit[1]
    b = bestfit[0]
    q = sc.array([1 for cc in range(ndata)])
    q[0:4] = 0
    pgood = 0.9
    #pgood=0.999759#3 sigma for uncertainty~50
    initialguess = [m, b, q, pgood]
    print initialguess
    #With this initial guess start off the sampling procedure
    bgmean = sc.mean(Y)
    bgivar = 1.0 / sc.sum((Y - bgmean)**2)
    initialX = lnposterior(X, Y, Yivar, m, b, q, pgood, bgmean, bgivar)
    currentX = initialX
    bestX = initialX
    bestfit = initialguess
    currentguess = initialguess
    naccept = 0
    for jj in range(nburn + nsamples):
        #Draw a sample from the proposal distribution
        thisguess = c.deepcopy(currentguess)
        m = thisguess[0]
        b = thisguess[1]
        q = thisguess[2]
        pgood = thisguess[3]
        #First Gibbs sample each q
        for ii in range(ndata):
            thisdatagood = ma.sqrt(Yivar[ii] / (2. * math.pi)) * ma.exp(
                -.5 * (Y[ii] - m * X[ii] - b)**2. * Yivar[ii]) * pgood
            thisdatabad = ma.sqrt(bgivar / (2. * math.pi)) * ma.exp(
                -.5 * (Y[ii] - bgmean)**2. * bgivar) * (1.0 - pgood)
            a = thisdatagood / (thisdatagood + thisdatabad)
            u = stats.uniform.rvs()
            if u < a:
                q[ii] = 1
            else:
                q[ii] = 0
        #Then Metropolis sample m and b
        m += stats.norm.rvs() * parsigma[0]
        b += stats.norm.rvs() * parsigma[1]
        pgood += stats.norm.rvs() * parsigma[2]
        if pgood > MAXP: pgood = MAXP
        if pgood < (1.0 - MAXP): pgood = (1.0 - MAXP)
        newsample = [m, b, q, pgood]
        #Calculate the objective function for the newsample
        newX = lnposterior(X, Y, Yivar, m, b, q, pgood, bgmean, bgivar)
        #Accept or reject
        #Reject with the appropriate probability
        u = stats.uniform.rvs()
        if u < ma.exp(newX - currentX):
            #Accept
            currentX = newX
            currentguess = newsample
            naccept = naccept + 1
        if currentX > bestX:
            print currentguess
            bestfit = currentguess
            bestX = currentX
    print "Acceptance ratio was " + str(double(naccept) / (nburn + nsamples))

    #Now plot the best solution
    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    #Plot data
    errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    xlim(0, 300)
    ylim(0, 700)
    xmin, xmax = xlim()
    (m, b, q, pgood) = bestfit
    print bestfit
    print m
    xs = sc.linspace(xmin, xmax, 3)
    ys = m * xs + b
    if b < 0:
        sgn_str = '-'
    else:
        sgn_str = '+'
    label = r'$y = %4.2f\, x' % m + sgn_str + '%4.0f ' % ma.fabs(
        b) + '$'  #+r'; X = '+ '%3.1f' % bestX+'$'
    plot(xs, ys, color='k', ls='--', label=label)
    l = legend(loc=(.3, .1), numpoints=8)
    l.draw_frame(False)
    xlim(0, 300)
    ylim(0, 700)
    savefig(plotfilename, format='png')

    return 0
コード例 #8
0
def ex9(exclude=sc.array([1,2,3,4]),plotfilename='ex9.png',zoom=False,
		bovyprintargs={}):
    """ex9: solve exercise 9

    Input:
       exclude  - ID numbers to exclude from the analysis
       zoom - zoom in
    Output:
       plot
    History:
       2009-05-27 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    nsample= ndata- len(exclude)
    nSs= 1001
    if zoom:
        Srange=[900,1000]
    else:
        Srange=[0.001,1500]
    Ss= sc.linspace(Srange[0],Srange[1],nSs)
    chi2s= sc.zeros(nSs)
    for kk in range(nSs):
        #Put the dat in the appropriate arrays and matrices
        Y= sc.zeros(nsample)
        A= sc.ones((nsample,2))
        C= sc.zeros((nsample,nsample))
        yerr= sc.zeros(nsample)
        jj= 0
        for ii in range(ndata):
            if sc.any(exclude == data[ii][0]):
                pass
            else:
                Y[jj]= data[ii][1][1]
                A[jj,1]= data[ii][1][0]
                C[jj,jj]= Ss[kk]
                yerr[jj]= data[ii][2]#OMG, such bad code
                jj= jj+1
        #Now compute the best fit and the uncertainties
        bestfit= sc.dot(linalg.inv(C),Y.T)
        bestfit= sc.dot(A.T,bestfit)
        bestfitvar= sc.dot(linalg.inv(C),A)
        bestfitvar= sc.dot(A.T,bestfitvar)
        bestfitvar= linalg.inv(bestfitvar)
        bestfit= sc.dot(bestfitvar,bestfit)
        chi2s[kk]= chi2(bestfit,A,Y,C)

    #Now plot the solution
    plot.bovy_print(**bovyprintargs)
    #Plot the best fit line
    xrange=Srange
    if zoom:
        yrange=[nsample-4,nsample]
    else:
        yrange=[nsample-10,nsample+8]
    plot.bovy_plot(Ss,
                   chi2s,
                   'k-',xrange=xrange,yrange=yrange,
                   xlabel=r'$S$',ylabel=r'$\chi^2$',zorder=1)
    plot.bovy_plot(sc.array(Srange),sc.array([nsample-2,nsample-2]),
                   'k--',zorder=2,overplot=True)
    #plot.bovy_plot(sc.array([sc.median(yerr**2.),sc.median(yerr**2.)]),
    #               sc.array(yrange),color='0.75',overplot=True)
    plot.bovy_plot(sc.array([sc.mean(yerr**2.),sc.mean(yerr**2.)]),
                   sc.array(yrange),color='0.75',overplot=True)
    plot.bovy_end_print(plotfilename)

    return 0
コード例 #9
0
ファイル: main.py プロジェクト: bbehrang-bot/course3
def start():
    flag_main = True
    while flag_main:
        print("""Comands:
        1-filter,
        2-analyse 
        3-read_data
        4-gen_data
        5-exit
         """)
        a = int(input("Enter command:"))
        if a == 2:
            print(""" Analyze
                        1 - Total receipt of a specific year
                        2 - Total receipt of range of years
                        3 - full data of certain duty
                        4 - Receipt of a duty in a certain year
                        5 - Receipt of a duty in a range of years
                        6 - Total sum of receipts of a duty
                        7-  Total Betting 
                        8-  Total Gaming
                        8 - quit

                        """)
            b = int(input("Enter command: "))
            if b == 1:
                c = int((input("Enter year: ")))
                ad.total_receipt(c)
            if b == 2:
                u = int(input("enter number of years you want to compare: "))
                years = [int(input("Enter year")) for i in range(u)]
                ad.total_receipt_years(years)
            if b == 3:
                print(""" Analyze
                        1 - general_betting_duty
                        2 - pool_betting_duty
                        3 - gaming_duty
                        4 - amusement_machine_licence
                        5 - bingo
                        6 - machine_games_duty
                        7 - lottery_duty
                                        """)
                j = int(input("enter command: "))
                if j == 1:
                    ad.column_full("general_betting_duty")
                if j == 2:
                    ad.column_full("pool_betting_duty")
                if j == 3:
                    ad.column_full("gaming_duty")
                if j == 4:
                    ad.column_full("amusement_machine_licence")
                if j == 5:
                    ad.column_full("bingo")
                if j == 6:
                    ad.column_full("machine_games_duty")
                if j == 7:
                    ad.column_full("lottery_duty")
            if b == 4:
                yy = int(input("enter year: "))
                print(""" Analyze
                        1 - general_betting_duty
                        2 - pool_betting_duty
                        3 - gaming_duty
                        4 - amusement_machine_licence
                        5 - bingo
                        6 - machine_games_duty
                        7 - lottery_duty
                                                       """)
                j = int(input("enter command: "))
                if j == 1:
                    ad.column_year("general_betting_duty", yy)
                if j == 2:
                    ad.column_year("pool_betting_duty", yy)
                if j == 3:
                    ad.column_year("gaming_duty", yy)
                if j == 4:
                    ad.column_year("amusement_machine_licence", yy)
                if j == 5:
                    ad.column_year("bingo", yy)
                if j == 6:
                    ad.column_year("machine_games_duty", yy)
                if j == 7:
                    ad.column_year("lottery_duty", yy)
            if b == 5:
                u = int(input("enter number of years you want to compare: "))
                years = [int(input("Enter year: ")) for i in range(u)]
                print(""" Analyze
                        1 - general_betting_duty
                        2 - pool_betting_duty
                        3 - gaming_duty
                        4 - amusement_machine_licence
                        5 - bingo
                        6 - machine_games_duty
                        7 - lottery_duty
                                                                       """)
                j = int(input("enter command: "))
                if j == 1:
                    ad.column_multiple_years("general_betting_duty", years)
                if j == 2:
                    ad.column_multiple_years("pool_betting_duty", years)
                if j == 3:
                    ad.column_multiple_years("gaming_duty", years)
                if j == 4:
                    ad.column_multiple_years("amusement_machine_licence",
                                             years)
                if j == 5:
                    ad.column_multiple_years("bingo", years)
                if j == 6:
                    ad.column_multiple_years("machine_games_duty", years)
                if j == 7:
                    ad.column_multiple_years("lottery_duty", years)
            if b == 6:
                print(""" Analyze
                        1 - general_betting_duty
                        2 - pool_betting_duty
                        3 - gaming_duty
                        4 - amusement_machine_licence
                        5 - bingo
                        6 - machine_games_duty
                        7 - lottery_duty
                                                                                       """
                      )
                j = int(input("enter command: "))
                if j == 1:
                    ad.annual_receipt("general_betting_duty")
                if j == 2:
                    ad.annual_receipt("pool_betting_duty")
                if j == 3:
                    ad.annual_receipt("gaming_duty")
                if j == 4:
                    ad.annual_receipt("amusement_machine_licence")
                if j == 5:
                    ad.annual_receipt("bingo")
                if j == 6:
                    ad.annual_receipt("machine_games_duty")
                if j == 7:
                    ad.annual_receipt("lottery_duty")
            if b == 7:
                ad.annual_betting()
            if b == 8:
                ad.annual_gaming()
        elif a == 1:
            print(""" Filter
            1 - Year
            2 - Year >
            3 - Year <
            4 - Month
            5 - Month >
            6 - Month <
            7 - Date
            8 - Other fields
            9 - quit
            
            """)

            b = int(input("Enter command:"))

            if b == 1:
                c = int((input("Enter year:")))
                fd.filter_date("year", c)
            if b == 2:
                c = int((input("Enter year:")))
                fd.filter_date_gt("year", c)
            if b == 3:
                c = input("Enter year:")
                fd.filter_date_ls("year", c)
            if b == 4:
                c = int((input("Enter month:")))
                fd.filter_date("month", c)
            if b == 5:
                c = int((input("Enter month:")))
                fd.filter_date_gt("month", c)
            if b == 6:
                c = input("Enter month:")
                fd.filter_date_ls("month", c)
            if b == 7:
                c = input("Enter date:(yyyy-mm)")
                fullDate = "%s-01" % c
                print(fullDate)
                fd.filter_data("date", fullDate)
            if b == 8:

                print(""" operation
                         1 - =
                         2 - >
                         3 - <
                              """)
                op = int(input("Enter operation: "))
                val = int(input("Enter Value: "))
                print(""" Field
                        1 - general_betting_duty
                        2 - pool_betting_duty
                        3 - gaming_duty
                        4 - amusement_machine_licence
                        5 - bingo
                        6 - machine_games_duty
                        7 - lottery_duty
                                                       """)
                j = int(input("enter command"))
                s = ""
                if j == 1:
                    s = "general_betting_duty"
                if j == 2:
                    s = "pool_betting_duty"
                if j == 3:
                    s = "gaming_duty"
                if j == 4:
                    s = "amusement_machine_licence"
                if j == 5:
                    s = "bingo"
                if j == 6:
                    s = "machine_games_duty"
                if j == 7:
                    s = "lottery_duty"
                if op == 1:
                    fd.filter_data(s, val)
                if op == 2:
                    fd.filter_data_gt(s, val)
                if op == 3:
                    fd.filter_data_ls(s, val)
            if b == 15:
                break
            if (b < 1 or b > 15):
                print("Error number")
                flag_main = True

        elif a == 3:
            gd.read_data()
        elif a == 4:
            c = int(input("Enter start year: "))
            gd.generate_data(c)
        elif a == 5:
            break
コード例 #10
0
def ex6b(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex6b.png'):
    """ex6b: solve exercise 6 using a simulated annealing optimization
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-02 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    initialguess = sc.array([bestfit[0], bestfit[1]])
    #With this initial guess start off the annealing procedure
    initialchisq = nsample * 10.
    chisq = initialchisq
    bestfit = initialguess
    nonglobal = True
    print(
        "Performing 10 runs of the simulating basinhopping optimization algorithm"
    )
    for jj in range(10):  #Do ten runs of the sa algorithm
        sc.random.seed(
            jj + 1)  #In the interest of reproducibility (if that's a word)
        minimizer_kwargs = {"args": (X, Y, yerr)}
        bestfitbiexp = optimize.basinhopping(logbiexp,
                                             x0=initialguess,
                                             minimizer_kwargs=minimizer_kwargs,
                                             niter=100)

    # print(bestfitbiexp.keys()) # dict_keys(['lowest_optimization_result',
    # # 'message', 'minimization_failures', 'nit', 'x', 'nfev', 'njev', 'fun'])
    # print(bestfitbiexp.x, bestfitbiexp.fun)
    # print(chisq)
    # print(bestfit)
    print(bestfitbiexp)
    # NOTE:  result of anneal (not basinhopping) res[0] is obtained min
    #        and res[1] is function value at that minimum.
    #        but result of basinhopping is OpitimizeResult object
    #        with attributes .x and .fun with others.
    #
    #        res[0] ==> res.x     ndarray
    #        res[1] ==> res.fun   function value at ndarray
    #        res[6] ==> res.status  success(bool) status(int)
    #
    if bestfitbiexp.fun < chisq:
        bestfit = bestfitbiexp.x
        chisq = bestfitbiexp.fun

    bestfitsbiexp = bestfit

    #Now plot the solution
    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        #'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    #Plot data
    errorbar(X, Y, yerr, color='k', marker='o', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    xlim(0, 300)
    ylim(0, 700)
    xmin, xmax = xlim()
    nsamples = 1001
    xs = sc.linspace(xmin, xmax, nsamples)
    ys = sc.zeros(nsamples)
    for ii in range(nsamples):
        ys[ii] = bestfitsbiexp[0] + bestfitsbiexp[1] * xs[ii]
    if bestfitsbiexp[0] < 0:
        sgn_str = '-'
    else:
        sgn_str = '+'
    label = r'$y = %4.2f\, x' % (
        bestfitsbiexp[1]) + sgn_str + '%4.0f ' % m.fabs(
            bestfitsbiexp[0]) + r'; X = ' + '%3.1f' % chisq + '$'
    plot(xs, ys, color='k', ls='--', label=label)
    l = legend(loc=(.3, .1), numpoints=8)
    l.draw_frame(False)
    xlim(0, 300)
    ylim(0, 700)

    print('Creating: ', plotfilename)
    savefig(plotfilename, format='png')

    return 0
コード例 #11
0
ファイル: exMix1.py プロジェクト: azadag/DataAnalysisRecipes
def exMix1(
    exclude=None,
    plotfilenameA="exMix1a.png",
    plotfilenameB="exMix1b.png",
    plotfilenameC="exMix1c.png",
    nburn=20000,
    nsamples=1000000,
    parsigma=[5, 0.075, 0.2, 1, 0.1],
    dsigma=1.0,
    bovyprintargs={},
    sampledata=None,
):
    """exMix1: solve exercise 5 (mixture model) using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename*  - filenames for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
       dsigma         - divide uncertainties by this amount
    Output:
       plot
    History:
       2010-04-28 - Written - Bovy (NYU)
    """
    sc.random.seed(-1)  # In the interest of reproducibility (if that's a word)
    # Read the data
    data = read_data("data_yerr.dat")
    ndata = len(data)
    if not exclude == None:
        nsample = ndata - len(exclude)
    else:
        nsample = ndata
    # First find the chi-squared solution, which we will use as an
    # initial guess
    # Put the data in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2] ** 2.0 / dsigma ** 2.0
            yerr[jj] = data[ii][2] / dsigma
            jj = jj + 1

    brange = [-120, 120]
    mrange = [1.5, 3.2]

    # This matches the order of the parameters in the "samples" vector
    mbrange = [brange, mrange]

    if sampledata is None:
        sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange)

    (histmb, edges, mbsamples, pbhist, pbedges) = sampledata

    # Hack -- produce fake Pbad samples from Pbad histogram.
    pbsamples = hstack([array([x] * N) for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist)])

    indxi = sc.argmax(sc.amax(histmb, axis=1))
    indxj = sc.argmax(sc.amax(histmb, axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi - 1], edges[1][indxj - 1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi + 1], edges[1][indxj + 1]

    # 2D histogram
    plot.bovy_print(**bovyprintargs)
    levels = special.erf(0.5 * sc.arange(1, 4))
    xe = [edges[0][0], edges[0][-1]]
    ye = [edges[1][0], edges[1][-1]]
    aspect = (xe[1] - xe[0]) / (ye[1] - ye[0])
    plot.bovy_dens2d(
        histmb.T,
        origin="lower",
        cmap=cm.gist_yarg,
        interpolation="nearest",
        contours=True,
        cntrmass=True,
        extent=xe + ye,
        levels=levels,
        aspect=aspect,
        xlabel=r"$b$",
        ylabel=r"$m$",
    )
    xlim(brange)
    ylim(mrange)

    plot.bovy_end_print(plotfilenameA)

    # Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = edges[0][indxi]
    bestm = edges[1][indxj]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(
        xrange,
        bestm * sc.array(xrange) + bestb,
        "k-",
        xrange=xrange,
        yrange=yrange,
        xlabel=r"$x$",
        ylabel=r"$y$",
        zorder=2,
    )
    errorbar(X, Y, yerr, marker="o", color="k", linestyle="None", zorder=1)

    for m, b in mbsamples:
        plot.bovy_plot(
            xrange,
            m * sc.array(xrange) + b,
            overplot=True,
            xrange=xrange,
            yrange=yrange,
            xlabel=r"$x$",
            ylabel=r"$y$",
            color="0.75",
            zorder=1,
        )

    plot.bovy_end_print(plotfilenameB)

    # Pb plot
    if not "text_fontsize" in bovyprintargs:
        bovyprintargs["text_fontsize"] = 11
    plot.bovy_print(**bovyprintargs)
    plot.bovy_hist(
        pbsamples,
        bins=round(sc.sqrt(nsamples) / 5.0),
        xlabel=r"$P_\mathrm{b}$",
        normed=True,
        histtype="step",
        range=[0, 1],
        edgecolor="k",
    )
    ylim(0, 4.0)
    if dsigma == 1.0:
        plot.bovy_text(r"$\mathrm{using\ correct\ data\ uncertainties}$", top_right=True)
    else:
        plot.bovy_text(r"$\mathrm{using\ data\ uncertainties\ /\ 2}$", top_left=True)

    plot.bovy_end_print(plotfilenameC)

    return sampledata
コード例 #12
0
def ex14(
        exclude=sc.array([1, 2, 3, 4]), plotfilename='ex14.png',
        bovyprintargs={}):
    """ex12: solve exercise 14
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_allerr.dat', allerr=True)
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y1 = sc.zeros(nsample)
    X1 = sc.zeros(nsample)
    A1 = sc.ones((nsample, 2))
    C1 = sc.zeros((nsample, nsample))
    Y2 = sc.zeros(nsample)
    X2 = sc.zeros(nsample)
    A2 = sc.ones((nsample, 2))
    C2 = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    xerr = sc.zeros(nsample)
    ycovar = sc.zeros((2, nsample, 2))  #Makes the sc.dot easier
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y1[jj] = data[ii][1][1]
            X1[jj] = data[ii][1][0]
            A1[jj, 1] = data[ii][1][0]
            C1[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            Y2[jj] = data[ii][1][0]
            X2[jj] = data[ii][1][1]
            A2[jj, 1] = data[ii][1][1]
            C2[jj, jj] = data[ii][3]**2.
            xerr[jj] = data[ii][3]
            jj = jj + 1
    #Now compute the best fit and the uncertainties: forward
    bestfit1 = sc.dot(linalg.inv(C1), Y1.T)
    bestfit1 = sc.dot(A1.T, bestfit1)
    bestfitvar1 = sc.dot(linalg.inv(C1), A1)
    bestfitvar1 = sc.dot(A1.T, bestfitvar1)
    bestfitvar1 = linalg.inv(bestfitvar1)
    bestfit1 = sc.dot(bestfitvar1, bestfit1)
    #Now compute the best fit and the uncertainties: backward
    bestfit2 = sc.dot(linalg.inv(C2), Y2.T)
    bestfit2 = sc.dot(A2.T, bestfit2)
    bestfitvar2 = sc.dot(linalg.inv(C2), A2)
    bestfitvar2 = sc.dot(A2.T, bestfitvar2)
    bestfitvar2 = linalg.inv(bestfitvar2)
    bestfit2 = sc.dot(bestfitvar2, bestfit2)
    #Propagate to y=mx+b
    linerrprop = sc.array([[-1. / bestfit2[1], bestfit2[0] / bestfit2[1]**2],
                           [0., -1. / bestfit2[1]**2.]])
    bestfit2 = sc.array([-bestfit2[0] / bestfit2[1], 1. / bestfit2[1]])
    bestfitvar2 = sc.dot(linerrprop, sc.dot(bestfitvar2, linerrprop.T))

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(sc.array(xrange),
                   bestfit1[1] * sc.array(xrange) + bestfit1[0],
                   'k--',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    plot.bovy_plot(sc.array(xrange),
                   bestfit2[1] * sc.array(xrange) + bestfit2[0],
                   'k-.',
                   overplot=True,
                   zorder=2)

    #Plot data
    errorbar(A1[:, 1],
             Y1,
             yerr,
             xerr,
             color='k',
             marker='o',
             linestyle='None',
             zorder=0)
    plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( ' +
                   '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' %
                   (bestfit1[1], m.sqrt(bestfitvar1[1, 1]), bestfit1[0],
                    m.sqrt(bestfitvar1[0, 0])) + r')$' + '\n' +
                   r'$\mathrm{reverse}\ -\cdot -\:\ y = ( ' +
                   '%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' %
                   (bestfit2[1], m.sqrt(bestfitvar2[1, 1]), bestfit2[0],
                    m.sqrt(bestfitvar2[0, 0])) + r')$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
コード例 #13
0
def ex13(exclude=sc.array([1, 2, 3, 4]),
         plotfilename='ex13.png',
         nburn=1000,
         nsamples=100000,
         parsigma=[1, m.pi / 200., .01, .5, 1., .05, .1, .005],
         bovyprintargs={}):
    """ex13: solve exercise 13 by MCMC
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2010-05-06 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_allerr.dat', allerr=True)
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    Z = sc.zeros((nsample, 2))
    yerr = sc.zeros(nsample)
    ycovar = sc.zeros((2, nsample, 2))  #Makes the sc.dot easier
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            Z[jj, 0] = X[jj]
            Z[jj, 1] = Y[jj]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            ycovar[0, jj, 0] = data[ii][3]**2.
            ycovar[1, jj, 1] = data[ii][2]**2.
            ycovar[0, jj, 1] = data[ii][4] * m.sqrt(
                ycovar[0, jj, 0] * ycovar[1, jj, 1])
            ycovar[1, jj, 0] = ycovar[0, jj, 1]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    #Now sample
    inittheta = m.acos(1. / m.sqrt(1. + bestfit[1]**2.))
    if bestfit[1] < 0.:
        inittheta = m.pi - inittheta
    initialguess = sc.array([
        m.cos(inittheta), inittheta, 0.,
        sc.mean(X),
        sc.mean(Y),
        m.log(sc.var(X)),
        m.log(sc.var(X)), 0.
    ])  #(m,b,Pb,Yb,Vb)
    #With this initial guess start off the sampling procedure
    initialX = objective(initialguess, Z, ycovar)
    currentX = initialX
    bestX = initialX
    bestfit = initialguess
    currentguess = initialguess
    naccept = 0
    samples = []
    samples.append(currentguess)
    for jj in range(nburn + nsamples):
        #Draw a sample from the proposal distribution
        newsample = sc.zeros(8)
        newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0]
        newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1]
        newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2]
        newsample[3] = currentguess[3] + stats.norm.rvs() * parsigma[3]
        newsample[4] = currentguess[4] + stats.norm.rvs() * parsigma[4]
        newsample[5] = currentguess[5] + stats.norm.rvs() * parsigma[5]
        newsample[6] = currentguess[6] + stats.norm.rvs() * parsigma[6]
        newsample[7] = currentguess[7] + stats.norm.rvs() * parsigma[7]
        #Calculate the objective function for the newsample
        newX = objective(newsample, Z, ycovar)
        #Accept or reject
        #Reject with the appropriate probability
        u = stats.uniform.rvs()
        try:
            test = m.exp(newX - currentX)
        except OverflowError:
            test = 2.
        if u < test:
            #Accept
            currentX = newX
            currentguess = newsample
            naccept = naccept + 1
        if currentX > bestX:
            bestfit = currentguess
            bestX = currentX
        samples.append(currentguess)
    if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / (
            nburn + nsamples) > .8:
        print "Acceptance ratio was " + str(
            double(naccept) / (nburn + nsamples))

    samples = sc.array(samples).T[:, nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :])

    histmb, edges = sc.histogramdd(samples.T[:, 0:2],
                                   bins=round(sc.sqrt(nsamples) / 2.))
    indxi = sc.argmax(sc.amax(histmb, axis=1))
    indxj = sc.argmax(sc.amax(histmb, axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi - 1], edges[1][indxj - 1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi + 1], edges[1][indxj + 1]

    t = edges[1][indxj]
    bcost = edges[0][indxi]
    mf = m.sqrt(1. / m.cos(t)**2. - 1.)
    b = bcost / m.cos(t)
    print b, mf

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(sc.array(xrange),
                   mf * sc.array(xrange) + b,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    for ii in range(10):
        #Random sample
        ransample = sc.floor((stats.uniform.rvs() * nsamples)).astype('int')
        ransample = samples.T[ransample, 0:2]
        mf = m.sqrt(1. / m.cos(ransample[1])**2. - 1.)
        b = ransample[0] / m.cos(ransample[1])
        bestb = b
        bestm = mf
        plot.bovy_plot(sc.array(xrange),
                       bestm * sc.array(xrange) + bestb,
                       overplot=True,
                       color='0.75',
                       zorder=0)

    #Add labels
    nsamples = samples.shape[1]
    for ii in range(nsample):
        Pb = 0.
        for jj in range(nsamples):
            Pb += Pbad(samples[:, jj], Z[ii, :], ycovar[:, ii, :])
        Pb /= nsamples
        text(Z[ii, 0] + 5, Z[ii, 1] + 5, '%.1f' % Pb, color='0.5', zorder=3)

    #Plot the data OMG straight from plot_data.py
    data = read_data('data_allerr.dat', True)
    ndata = len(data)
    #Create the ellipses and the data points
    id = sc.zeros(nsample)
    x = sc.zeros(nsample)
    y = sc.zeros(nsample)
    ellipses = []
    ymin, ymax = 0, 0
    xmin, xmax = 0, 0
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            continue
        id[jj] = data[ii][0]
        x[jj] = data[ii][1][0]
        y[jj] = data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar = sc.zeros((2, 2))
        ycovar[0, 0] = data[ii][3]**2.
        ycovar[1, 1] = data[ii][2]**2.
        ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1])
        ycovar[1, 0] = ycovar[0, 1]
        eigs = linalg.eig(ycovar)
        angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180.
        thisellipse = Ellipse(sc.array([x[jj], y[jj]]), 2 * m.sqrt(eigs[0][0]),
                              2 * m.sqrt(eigs[0][1]), angle)
        ellipses.append(thisellipse)
        if (x[jj] + m.sqrt(ycovar[0, 0])) > xmax:
            xmax = (x[jj] + m.sqrt(ycovar[0, 0]))
        if (x[jj] - m.sqrt(ycovar[0, 0])) < xmin:
            xmin = (x[jj] - m.sqrt(ycovar[0, 0]))
        if (y[jj] + m.sqrt(ycovar[1, 1])) > ymax:
            ymax = (y[jj] + m.sqrt(ycovar[1, 1]))
        if (y[jj] - m.sqrt(ycovar[1, 1])) < ymin:
            ymin = (y[jj] - m.sqrt(ycovar[1, 1]))
        jj = jj + 1

    #Add the error ellipses
    ax = gca()
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x, y, color='k', marker='o', linestyle='None')

    plot.bovy_end_print(plotfilename)
コード例 #14
0
def marginalize_mixture(mixture=True, short=False):
    if mixture:
        prefix = 'mixture2d'
    else:
        prefix = 'nomixture2d'

    random.seed(-1)  #In the interest of reproducibility (if that's a word)
    # Read the data
    data = read_data('data_allerr.dat', True)
    ndata = len(data)
    # Create the ellipses and the data points
    x = zeros(ndata)
    y = zeros(ndata)
    ellipses = []
    yvar = zeros((ndata, 2, 2))
    for ii in range(ndata):
        x[ii] = data[ii][1][0]
        y[ii] = data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        yvar[ii, 0, 0] = data[ii][3]**2.
        yvar[ii, 1, 1] = data[ii][2]**2.
        yvar[ii, 0, 1] = data[ii][4] * sqrt(yvar[ii, 0, 0] * yvar[ii, 1, 1])
        yvar[ii, 1, 0] = yvar[ii, 0, 1]
        eigs = linalg.eig(yvar[ii, :, :])
        angle = arctan(-eigs[1][0, 1] / eigs[1][1, 1]) / pi * 180.
        thisellipse = Ellipse(array([x[ii], y[ii]]), 2 * sqrt(eigs[0][0]),
                              2 * sqrt(eigs[0][1]), angle)
        ellipses.append(thisellipse)

    # initialize parameters
    theta = arctan2(y[7] - y[9], x[7] - x[9])
    bperp = (y[7] - tan(theta) * x[7]) * cos(theta)  # bad at theta = 0.5 * pi
    if mixture:
        Pbad = 0.5
    else:
        Pbad = 0.
    Ybad = mean(y)
    Vbad = mean((y - Ybad)**2)

    p = posterior(x, y, yvar, theta, bperp, Pbad, Ybad, Vbad)
    print 'starting p=', p

    chain = []
    oldp = p
    oldparams = (theta, bperp, Pbad, Ybad, Vbad)
    bestparams = oldparams
    bestp = oldp

    nsteps = 0
    naccepts = 0

    NSTEPS = 100000
    if short:
        NSTEPS /= 2
    print 'doing', NSTEPS, 'steps of MCMC...'
    while nsteps < NSTEPS:
        newparams = pick_new_parameters(nsteps, *oldparams)
        if not mixture:
            # clamp Pbad to zero.
            (theta, bperp, Pbad, Ybad, Vbad) = newparams
            newparams = (theta, bperp, 0, Ybad, Vbad)

        p = posterior(x, y, yvar, *newparams)
        if p / oldp > random.uniform():
            chain.append((p, newparams))
            oldparams = newparams
            oldp = p
            if p > bestp:
                bestp = p
                bestparams = newparams
            naccepts += 1
        else:
            chain.append((oldp, oldparams))
            # keep oldparams, oldp
        nsteps += 1
        if (nsteps % 5000 == 1):
            print nsteps, naccepts, (naccepts /
                                     float(nsteps)), oldp, bestp, bestparams

    print 'acceptance fraction', (naccepts / float(nsteps))

    # plot a sample

    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size,
        'image.interpolation': 'nearest',
        'image.origin': 'lower',
    }
    rcParams.update(params)

    # Plot data
    clf()
    ax = gca()
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    xlabel(r'$x$')
    ylabel(r'$y$')
    xlim(0, 300)
    ylim(0, 700)
    savefig(prefix + '-data.pdf')

    a = axis()
    xmin, xmax = xlim()
    ymin, ymax = ylim()
    xs = linspace(xmin, xmax, 2)
    Nchain = len(chain)
    if mixture:
        # select 10 samples at random from the second half of the chain.
        I = Nchain / 2 + random.permutation(Nchain / 2)[:10]
    else:
        I = array([argmax([p for (p, params) in chain])])
    for i in I:
        (p, params) = chain[i]
        (theta, bperp, Pbad, Ybad, Vbad) = params
        ys = tan(theta) * xs + bperp / cos(
            theta)  # replace this with smarter linear algebra
        plot(xs, ys, color='k', alpha=0.3)
    axis(a)
    savefig(prefix + '-xy.pdf')

    if mixture:
        bgp = zeros(len(x))
        fgp = zeros(len(x))
        for (p, params) in chain[Nchain / 2:]:
            (theta, bperp, Pbad, Ybad, Vbad) = params
            bgp += Pbad * single_point_likelihoods(x, y, yvar, theta, bperp, 1,
                                                   Ybad, Vbad)
            fgp += (1. - Pbad) * single_point_likelihoods(
                x, y, yvar, theta, bperp, 0, Ybad, Vbad)
        bgodds = bgp / fgp
        for i, bgo in enumerate(bgodds):
            if bgo < 1:
                continue
            dxl = (xmax - xmin) * 0.01
            dyl = (ymax - ymin) * 0.01
            t = text(x[i] + dxl,
                     y[i] + dyl,
                     '%.1f' % log10(bgo),
                     horizontalalignment='left',
                     verticalalignment='bottom',
                     alpha=0.3)
        savefig(prefix + '-xy-bg.pdf')

    clf()
    # note horrifying theta = 0.5 * pi behavior!
    ms = array([
        tan(theta)
        for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain / 2:]
    ])
    bs = array([
        bperp / cos(theta)
        for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain / 2:]
    ])
    #plot(ms, bs, 'k,', alpha=0.1)
    xlabel('slope $m$')
    ylabel('intercept $b$')
    #savefig(prefix + '-mb-scatter.pdf')

    clf()
    (H, xe, ye) = histogram2d(ms, bs, bins=(100, 100))
    imshow(log(1 + H.T),
           extent=(xe.min(), xe.max(), ye.min(), ye.max()),
           aspect='auto',
           cmap=antigray)
    xlabel('slope $m$')
    ylabel('intercept $b$')
    savefig(prefix + '-mb.pdf')
コード例 #15
0
def marginalize_mixture(mixture=True, thirds=False, short=False):
	if mixture:
		prefix = 'mixture'
	else:
		prefix = 'nomixture'

	if thirds:
		prefix += '-thirds'

	random.seed(-1) #In the interest of reproducibility (if that's a word)
	#Read the data
	data= read_data('data_yerr.dat')
	ndata= len(data)
	#Put the data in the appropriate arrays and matrices
	x= zeros(ndata)
	y= zeros(ndata)
	yvar= zeros(ndata)
	jj= 0
	for ii in arange(ndata):
		x[jj]= data[ii][1][0]
		y[jj]= data[ii][1][1]
		yvar[jj]= data[ii][2]**2
		jj= jj+1
	ndata= jj
	x= x[0:ndata]
	y= y[0:ndata]
	yvar= yvar[0:ndata]

	if thirds:
		yvar /= 9.

	# initialize parameters
	m = (y[7]-y[9]) / (x[7]-x[9])
	b = y[7] - m * x[7]
	if mixture:
		Pbad = 0.5
	else:
		Pbad = 0.
	Ybad = mean(y)
	Vbad = mean((y-Ybad)**2)

	p = posterior(x, y, yvar, m, b, Pbad, Ybad, Vbad)
	print 'starting p=', p

	chain = []
	oldp = p
	oldparams = (m, b, Pbad, Ybad, Vbad)
	bestparams = oldparams
	bestp = oldp

	nsteps = 0
	naccepts = 0

	NSTEPS = 100000
	if short:
		NSTEPS /= 2
	print 'doing', NSTEPS, 'steps of MCMC...'
	while nsteps < NSTEPS:
		newparams = pick_new_parameters(nsteps, *oldparams)
		if not mixture:
			# clamp Pbad to zero.
			(m, b, Pbad, Ybad, Vbad) = newparams
			newparams = (m, b, 0, Ybad, Vbad)

		p = posterior(x, y, yvar, *newparams)
		if p/oldp > random.uniform():
			chain.append((p,newparams))
			oldparams = newparams
			oldp = p
			if p > bestp:
				bestp = p
				bestparams = newparams
			naccepts += 1
		else:
			chain.append((oldp,oldparams))
			# keep oldparams, oldp
		nsteps += 1
		if (nsteps % 5000 == 1):
			print nsteps, naccepts, (naccepts/float(nsteps)), oldp, bestp, bestparams

	print 'acceptance fraction', (naccepts/float(nsteps))

	# plot a sample
	

	# Plot data
	errorbar(x, y, sqrt(yvar), color='k', marker='o', linestyle='None')
	xlabel(r'$x$')
	ylabel(r'$y$')
	xlim(0,300)
	ylim(0,700)
	savefig(prefix + '-data.pdf')

	a = axis()
	xmin, xmax = xlim()
	ymin, ymax = ylim()
	xs = linspace(xmin, xmax, 2)
	# select 10 samples at random from the second half of the chain.
	Nchain = len(chain)
	if mixture:
		I = Nchain/2 + random.permutation(Nchain/2)[:10]
	else:
		I = array([argmax([p for (p, params) in chain])])
	for i in I:
		(p,params) = chain[i]
		(m, b, Pbad, Ybad, Vbad) = params
		ys = m * xs + b
		plot(xs, ys, color='k', alpha=0.3)
	axis(a)
	savefig(prefix + '-xy.pdf')

	if mixture:
		bgp = zeros(len(x))
		fgp = zeros(len(x))
		for (p,params) in chain[Nchain/2:]:
			(m, b, Pbad, Ybad, Vbad) = params
			bgp += Pbad      * single_point_likelihoods(x, y, yvar, m, b, 1, Ybad, Vbad)
			fgp += (1.-Pbad) * single_point_likelihoods(x, y, yvar, m, b, 0, Ybad, Vbad)
		bgodds = bgp / fgp
		for i,bgo in enumerate(bgodds):
			if bgo < 1:
				continue
			dxl = (xmax-xmin) * 0.01
			dyl = (ymax-ymin) * 0.01
			t = text(x[i]+dxl, y[i]+dyl, '%.1f' % log10(bgo),
					 horizontalalignment='left',
					 verticalalignment='bottom', alpha=0.3)
		savefig(prefix + '-xy-bg.pdf')

	clf()
	ms = array([m for (p, (m, b, Pbad, Ybad, Vbad)) in chain[Nchain/2:]])
	bs = array([b for (p, (m, b, Pbad, Ybad, Vbad)) in chain[Nchain/2:]])
	#plot(ms, bs, 'k,', alpha=0.1)
	#xlabel('slope $m$')
	#ylabel('intercept $b$')
	#savefig(prefix + '-mb-scatter.pdf')

	clf()
	(H, xe, ye) = histogram2d(ms, bs, bins=(100,100))
	print 'max H:', H.max()
	imshow(log(1 + H.T), extent=(xe.min(), xe.max(), ye.min(), ye.max()), aspect='auto',
		   cmap=antigray)
	xlabel('slope $m$')
	ylabel('intercept $b$')
	savefig(prefix + '-mb.pdf')
コード例 #16
0
def plot_data_allerr():
    """plot_data_allerr: Plot the data with full error ellipses

    History:
       2009-05-20 - Written - Bovy (NYU)

    """
    #Read the data
    data = read_data('data_allerr.dat', True)
    ndata = len(data)
    #Create the ellipses and the data points
    id = sc.zeros(ndata)
    x = sc.zeros(ndata)
    y = sc.zeros(ndata)
    ellipses = []
    ymin, ymax = 0, 0
    xmin, xmax = 0, 0
    for ii in range(ndata):
        id[ii] = data[ii][0]
        x[ii] = data[ii][1][0]
        y[ii] = data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar = sc.zeros((2, 2))
        ycovar[0, 0] = data[ii][3]**2.
        ycovar[1, 1] = data[ii][2]**2.
        ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1])
        ycovar[1, 0] = ycovar[0, 1]
        eigs = linalg.eig(ycovar)
        angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180.
        #print x[ii], y[ii], m.sqrt(ycovar[1,1]), m.sqrt(ycovar[0,0])
        #print m.sqrt(eigs[0][0]), m.sqrt(eigs[0][1]), angle
        thisellipse = Ellipse(sc.array([x[ii], y[ii]]), 2 * m.sqrt(eigs[0][0]),
                              2 * m.sqrt(eigs[0][1]), angle)
        ellipses.append(thisellipse)
        if (x[ii] + m.sqrt(ycovar[0, 0])) > xmax:
            xmax = (x[ii] + m.sqrt(ycovar[0, 0]))
        if (x[ii] - m.sqrt(ycovar[0, 0])) < xmin:
            xmin = (x[ii] - m.sqrt(ycovar[0, 0]))
        if (y[ii] + m.sqrt(ycovar[1, 1])) > ymax:
            ymax = (y[ii] + m.sqrt(ycovar[1, 1]))
        if (y[ii] - m.sqrt(ycovar[1, 1])) < ymin:
            ymin = (y[ii] - m.sqrt(ycovar[1, 1]))

    plotfilename = 'data_allerr.png'
    fig_width = 7.5
    fig_height = 7.5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    fig = figure()
    ax = fig.add_subplot(111)
    #Add the error ellipses
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x, y, color='k', marker='o', linestyle='None')
    ax.set_xlabel(r'$x$')
    ax.set_ylabel(r'$y$')
    ax.set_xlim((xmin, xmax))
    ax.set_ylim((ymin, ymax))
    savefig(plotfilename, format='png')

    return 0
コード例 #17
0
def ex8(plotfilename='ex8.png',nburn=1000,nsamples=10000,parsigma=[.075,2.,0.1]):
    """ex8: solve exercise 8 using...?
    Input:
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2009-06-25 -- hacked from Bovy code - Hogg (NYU)
    """
    sc.random.seed(-1) #In the interest of reproducibility (if that's a word)
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    #Put the data in the appropriate arrays and matrices
    X= sc.zeros(ndata)
    Y= sc.zeros(ndata)
    A= sc.ones((ndata,2))
    Yivar= sc.zeros(ndata)
    C= sc.zeros((ndata,ndata))
    yerr= sc.zeros(ndata)
    jj= 0
    for ii in range(ndata):
        X[jj]= data[ii][1][0]
        Y[jj]= data[ii][1][1]
        A[jj,1]= data[ii][1][0]
        Yivar[jj]= 1.0/(data[ii][2]**2)
        C[jj,jj]= data[ii][2]**2
        yerr[jj]= data[ii][2]
        jj= jj+1
    #First find the chi-squared solution, which we will use as an
    #initial guess
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    m= bestfit[1]
    b= bestfit[0]
    q= sc.array([1 for cc in range(ndata)])
    q[0:4] = 0
    pgood= 0.9
    #pgood=0.999759#3 sigma for uncertainty~50
    initialguess= [m,b,q,pgood]
    print initialguess
    #With this initial guess start off the sampling procedure
    bgmean= sc.mean(Y)
    bgivar= 1.0/sc.sum((Y-bgmean)**2)
    initialX= lnposterior(X,Y,Yivar,m,b,q,pgood,bgmean,bgivar)
    currentX= initialX
    bestX= initialX
    bestfit= initialguess
    currentguess= initialguess
    naccept= 0
    for jj in range(nburn+nsamples):
        #Draw a sample from the proposal distribution
        thisguess = c.deepcopy(currentguess)
        m= thisguess[0]
        b= thisguess[1]
        q= thisguess[2]
        pgood= thisguess[3]
        #First Gibbs sample each q
        for ii in range(ndata):
            thisdatagood= ma.sqrt(Yivar[ii]/(2.*math.pi))*ma.exp(-.5*(Y[ii]-m*X[ii]-b)**2.*Yivar[ii])*pgood
            thisdatabad= ma.sqrt(bgivar/(2.*math.pi))*ma.exp(-.5*(Y[ii]-bgmean)**2.*bgivar)*(1.0-pgood)
            a= thisdatagood/(thisdatagood+thisdatabad)
            u= stats.uniform.rvs()
            if u<a:
                q[ii]= 1
            else:
                q[ii]= 0
        #Then Metropolis sample m and b
        m += stats.norm.rvs()*parsigma[0]
        b += stats.norm.rvs()*parsigma[1]
        pgood += stats.norm.rvs()*parsigma[2]
        if pgood > MAXP: pgood = MAXP
        if pgood < (1.0-MAXP): pgood = (1.0-MAXP)
        newsample = [m,b,q,pgood]
        #Calculate the objective function for the newsample
        newX= lnposterior(X,Y,Yivar,m,b,q,pgood,bgmean,bgivar)
        #Accept or reject
        #Reject with the appropriate probability
        u= stats.uniform.rvs()
        if u < ma.exp(newX-currentX):
            #Accept
            currentX= newX
            currentguess= newsample
            naccept= naccept+1
        if currentX > bestX:
            print currentguess
            bestfit= currentguess
            bestX= currentX
    print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples))

    #Now plot the best solution
    fig_width=5
    fig_height=5
    fig_size =  [fig_width,fig_height]
    params = {'axes.labelsize': 12,
              'text.fontsize': 11,
              'legend.fontsize': 12,
              'xtick.labelsize':10,
              'ytick.labelsize':10,
              'text.usetex': True,
              'figure.figsize': fig_size}
    rcParams.update(params)
    #Plot data
    errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    xlim(0,300)
    ylim(0,700)
    xmin, xmax= xlim()
    (m,b,q,pgood) = bestfit
    print bestfit
    print m
    xs= sc.linspace(xmin,xmax,3)
    ys= m*xs+b
    if b < 0:
        sgn_str= '-'
    else:
        sgn_str= '+'
    label= r'$y = %4.2f\, x'% m+sgn_str+ '%4.0f ' % ma.fabs(b)+'$'#+r'; X = '+ '%3.1f' % bestX+'$'
    plot(xs,ys,color='k',ls='--',label=label)
    l=legend(loc=(.3,.1),numpoints=8)
    l.draw_frame(False)
    xlim(0,300)
    ylim(0,700)
    savefig(plotfilename,format='png')
    
    return 0
コード例 #18
0
def ex9(exclude=sc.array([1, 2, 3, 4]),
        plotfilename='ex9.png',
        zoom=False,
        bovyprintargs={}):
    """ex9: solve exercise 9

    Input:
       exclude  - ID numbers to exclude from the analysis
       zoom - zoom in
    Output:
       plot
    History:
       2009-05-27 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    nSs = 1001
    if zoom:
        Srange = [900, 1000]
    else:
        Srange = [0.001, 1500]
    Ss = sc.linspace(Srange[0], Srange[1], nSs)
    chi2s = sc.zeros(nSs)
    for kk in range(nSs):
        #Put the dat in the appropriate arrays and matrices
        Y = sc.zeros(nsample)
        A = sc.ones((nsample, 2))
        C = sc.zeros((nsample, nsample))
        yerr = sc.zeros(nsample)
        jj = 0
        for ii in range(ndata):
            if sc.any(exclude == data[ii][0]):
                pass
            else:
                Y[jj] = data[ii][1][1]
                A[jj, 1] = data[ii][1][0]
                C[jj, jj] = Ss[kk]
                yerr[jj] = data[ii][2]  #OMG, such bad code
                jj = jj + 1
        #Now compute the best fit and the uncertainties
        bestfit = sc.dot(linalg.inv(C), Y.T)
        bestfit = sc.dot(A.T, bestfit)
        bestfitvar = sc.dot(linalg.inv(C), A)
        bestfitvar = sc.dot(A.T, bestfitvar)
        bestfitvar = linalg.inv(bestfitvar)
        bestfit = sc.dot(bestfitvar, bestfit)
        chi2s[kk] = chi2(bestfit, A, Y, C)

    #Now plot the solution
    plot.bovy_print(**bovyprintargs)
    #Plot the best fit line
    xrange = Srange
    if zoom:
        yrange = [nsample - 4, nsample]
    else:
        yrange = [nsample - 10, nsample + 8]
    plot.bovy_plot(Ss,
                   chi2s,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$S$',
                   ylabel=r'$\chi^2$',
                   zorder=1)
    plot.bovy_plot(sc.array(Srange),
                   sc.array([nsample - 2, nsample - 2]),
                   'k--',
                   zorder=2,
                   overplot=True)
    #plot.bovy_plot(sc.array([sc.median(yerr**2.),sc.median(yerr**2.)]),
    #               sc.array(yrange),color='0.75',overplot=True)
    plot.bovy_plot(sc.array([sc.mean(yerr**2.),
                             sc.mean(yerr**2.)]),
                   sc.array(yrange),
                   color='0.75',
                   overplot=True)
    plot.bovy_end_print(plotfilename)

    return 0
コード例 #19
0
def ex7c(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex7c.png'):
    """ex7d: solve exercise 7 using a simulated annealing optimization
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-02 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    initialguess = sc.array([bestfit[0], bestfit[1]])
    #With this initial guess start off the annealing procedure
    Qs = [1., 2.]
    bestfitssoft = sc.zeros((2, len(Qs)))
    initialchisq = 0.
    for jj in range(nsample):
        initialchisq = initialchisq + (Y[jj] - X[jj] * initialguess[1] -
                                       initialguess[0])**2 / (yerr[jj]**2)
    chisqQ = sc.zeros(len(Qs))
    for ii in range(len(Qs)):
        chisqQ[ii] = initialchisq
        bestfit = initialguess
        nonglobal = True
        print "Working on Q = " + str(Qs[ii])
        print "Performing 10 runs of the simulating annealing optimization algorithm"
        for jj in range(10):  #Do ten runs of the sa algorithm
            sc.random.seed(
                jj + 1)  #In the interest of reproducibility (if that's a word)
            bestfitsoft = optimize.anneal(
                softchisquared,
                initialguess, (X, Y, yerr, Qs[ii]),
                schedule='boltzmann',
                full_output=1)  #,dwell=200,maxiter=1000)
            if bestfitsoft[1] < chisqQ[ii]:
                bestfit = bestfitsoft[0]
                chisqQ[ii] = bestfitsoft[1]
            if bestfitsoft[6] == 0:
                nonglobal = False
        if nonglobal:
            print "Did not cool to the global optimum"
        try:
            x = raw_input('continue to plot? [yn]\n')
        except EOFError:
            print "Since you are in non-interactive mode I will assume 'y'"
            x = 'y'
        if x == 'n':
            print "returning..."
            return -1
        bestfitssoft[:, ii] = bestfit

    #Now plot the solution
    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    #Plot data
    errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line for the different Qs
    linestyles = ('--', ':', '-.')
    for jj in range(len(Qs)):
        xlim(0, 300)
        ylim(0, 700)
        xmin, xmax = xlim()
        nsamples = 1001
        xs = sc.linspace(xmin, xmax, nsamples)
        ys = sc.zeros(nsamples)
        for ii in range(nsamples):
            ys[ii] = bestfitssoft[0, jj] + bestfitssoft[1, jj] * xs[ii]
            if bestfitssoft[0, jj] < 0:
                sgn_str = '-'
            else:
                sgn_str = '+'
        label = r'$Q= ' + '%i: y = %4.2f\, x' % (Qs[jj], bestfitssoft[
            1, jj]) + sgn_str + '%4.0f ' % m.fabs(bestfitssoft[
                0, jj]) + r'; \chi^2_Q = ' + '%3.1f' % chisqQ[jj] + '$'
        plot(xs, ys, color='k', ls=linestyles[jj], label=label)
    l = legend(loc=(.2, .1), numpoints=8)
    l.draw_frame(False)
    xlim(0, 300)
    ylim(0, 700)
    savefig(plotfilename, format='png')

    return 0
コード例 #20
0
def ex6c(exclude=sc.array([1, 2, 3, 4]),
         plotfilename='ex6c.png',
         nburn=100,
         nsamples=10000,
         parsigma=[5, .075]):
    """ex6c: solve exercise 6 using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2009-06-02 - Written - Bovy (NYU)
    """
    sc.random.seed(100)  #In the interest of reproducibility (if that's a word)
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the data in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    initialguess = sc.array([bestfit[0], bestfit[1]])
    #With this initial guess start off the sampling procedure
    initialX = 0.
    for jj in range(nsample):
        initialX = initialX + m.fabs(Y[jj] - bestfit[1] * X[jj] -
                                     bestfit[0]) / yerr[jj]
    currentX = initialX
    bestX = initialX
    bestfit = initialguess
    currentguess = initialguess
    naccept = 0
    for jj in range(nburn + nsamples):
        #Draw a sample from the proposal distribution
        newsample = sc.zeros(2)
        newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0]
        newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1]
        #Calculate the objective function for the newsample
        newX = logbiexp(newsample, X, Y, yerr)
        #Accept or reject
        #Reject with the appropriate probability
        u = stats.uniform.rvs()
        if u < m.exp(currentX - newX):
            #Accept
            currentX = newX
            currentguess = newsample
            naccept = naccept + 1
        if currentX < bestX:
            bestfit = currentguess
            bestX = currentX
    bestfitsbiexp = bestfit
    if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / (
            nburn + nsamples) > .8:
        print("Acceptance ratio was " +
              str(double(naccept) / (nburn + nsamples)))

    #Now plot the solution
    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        #'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    #Plot data
    errorbar(X, Y, yerr, color='k', marker='o', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    xlim(0, 300)
    ylim(0, 700)
    xmin, xmax = xlim()
    nsamples = 1001
    xs = sc.linspace(xmin, xmax, nsamples)
    ys = sc.zeros(nsamples)
    for ii in range(nsamples):
        ys[ii] = bestfitsbiexp[0] + bestfitsbiexp[1] * xs[ii]
    if bestfitsbiexp[0] < 0:
        sgn_str = '-'
    else:
        sgn_str = '+'
    label = r'$y = %4.2f\, x' % (
        bestfitsbiexp[1]) + sgn_str + '%4.0f ' % m.fabs(
            bestfitsbiexp[0]) + r'; X = ' + '%3.1f' % bestX + '$'
    plot(xs, ys, color='k', ls='--', label=label)
    l = legend(loc=(.3, .1), numpoints=8)
    l.draw_frame(False)
    xlim(0, 300)
    ylim(0, 700)
    print('Creating: ', plotfilename)
    savefig(plotfilename, format='png')

    return 0
コード例 #21
0
def ex7a(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex7a.png'):
    """ex7a: solve exercise 7 using non-linear optimization
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-01 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    initialguess = sc.array([bestfit[0], bestfit[1]])
    #Now optimize the soft chi-squared objective function
    Qs = [1., 2.]
    bestfitssoft = sc.zeros((2, len(Qs)))
    chisqQ = sc.zeros(len(Qs))
    for ii in range(len(Qs)):
        print "Working on Q = " + str(Qs[ii])
        bestfitsoft1 = optimize.fmin(softchisquared,
                                     initialguess, (X, Y, yerr, Qs[ii]),
                                     disp=False)
        #Restart the optimization once using a different method
        bestfitsoft = optimize.fmin_powell(softchisquared,
                                           bestfitsoft1, (X, Y, yerr, Qs[ii]),
                                           disp=False)
        if linalg.norm(bestfitsoft - bestfitsoft1) > 10**-12:
            if linalg.norm(bestfitsoft - bestfitsoft1) < 10**-6:
                print "Different optimizers give slightly different results..."
            else:
                print "Different optimizers give rather different results..."
            print "The norm of the results differs by %g" % linalg.norm(
                bestfitsoft - bestfitsoft1)
            try:
                x = raw_input('continue to plot? [yn]\n')
            except EOFError:
                print "Since you are in non-interactive mode I will assume 'y'"
                x = 'y'
            if x == 'n':
                print "returning..."
                return -1
        bestfitssoft[:, ii] = bestfitsoft
        #Calculate chi^2_Q
        for jj in range(nsample):
            chisqQ[ii] = chisqQ[ii] + 1. / (
                yerr[jj]**2 /
                (Y[jj] - X[jj] * bestfitsoft[1] - bestfitsoft[1])**2 +
                1. / Qs[ii]**2)

    #Now plot the solution
    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    #Plot data
    errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line for the different Qs
    linestyles = ('--', ':', '-.')
    for jj in range(len(Qs)):
        xlim(0, 300)
        ylim(0, 700)
        xmin, xmax = xlim()
        nsamples = 1001
        xs = sc.linspace(xmin, xmax, nsamples)
        ys = sc.zeros(nsamples)
        for ii in range(nsamples):
            ys[ii] = bestfitssoft[0, jj] + bestfitssoft[1, jj] * xs[ii]
            if bestfitssoft[0, jj] < 0:
                sgn_str = '-'
            else:
                sgn_str = '+'
        label = r'$Q= ' + '%i: y = %4.2f\, x' % (Qs[jj], bestfitssoft[
            1, jj]) + sgn_str + '%4.0f ' % m.fabs(bestfitssoft[
                0, jj]) + r'; \chi^2_Q = ' + '%3.1f' % chisqQ[jj] + '$'
        plot(xs, ys, color='k', ls=linestyles[jj], label=label)
    l = legend(loc=(.2, .1), numpoints=8)
    l.draw_frame(False)
    xlim(0, 300)
    ylim(0, 700)
    savefig(plotfilename, format='png')

    return 0
コード例 #22
0
def ex7c(exclude=sc.array([1,2,3,4]),plotfilename='ex7c.png'):
    """ex7d: solve exercise 7 using a simulated annealing optimization
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-02 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1]])
    #With this initial guess start off the annealing procedure
    Qs= [1.,2.]
    bestfitssoft= sc.zeros((2,len(Qs)))
    initialchisq= 0.
    for jj in range(nsample):
        initialchisq= initialchisq+(Y[jj]-X[jj]*initialguess[1]-initialguess[0])**2/(yerr[jj]**2)
    chisqQ= sc.zeros(len(Qs))
    for ii in range(len(Qs)):
        chisqQ[ii]= initialchisq
        bestfit= initialguess
        nonglobal= True
        print "Working on Q = "+str(Qs[ii])
        print "Performing 10 runs of the simulating annealing optimization algorithm"
        for jj in range(10):#Do ten runs of the sa algorithm
            sc.random.seed(jj+1) #In the interest of reproducibility (if that's a word)
            bestfitsoft= optimize.anneal(softchisquared,initialguess,(X,Y,yerr,Qs[ii]),
                                     schedule='boltzmann',full_output=1)#,dwell=200,maxiter=1000)
            if bestfitsoft[1] < chisqQ[ii]:
                bestfit= bestfitsoft[0]
                chisqQ[ii]= bestfitsoft[1]
            if bestfitsoft[6] == 0:
                nonglobal= False
        if nonglobal:
            print "Did not cool to the global optimum"
        try:
            x=raw_input('continue to plot? [yn]\n')
        except EOFError:
            print "Since you are in non-interactive mode I will assume 'y'"
            x='y'
        if x == 'n':
            print "returning..."
            return -1
        bestfitssoft[:,ii]= bestfit

    #Now plot the solution
    fig_width=5
    fig_height=5
    fig_size =  [fig_width,fig_height]
    params = {'axes.labelsize': 12,
              'text.fontsize': 11,
              'legend.fontsize': 12,
              'xtick.labelsize':10,
              'ytick.labelsize':10,
              'text.usetex': True,
              'figure.figsize': fig_size}
    rcParams.update(params)
    #Plot data
    errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line for the different Qs
    linestyles= ('--',':', '-.')
    for jj in range(len(Qs)):
        xlim(0,300)
        ylim(0,700)
        xmin, xmax= xlim()
        nsamples= 1001
        xs= sc.linspace(xmin,xmax,nsamples)
        ys= sc.zeros(nsamples)
        for ii in range(nsamples):
            ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii]
            if bestfitssoft[0,jj] < 0:
                sgn_str= '-'
            else:
                sgn_str= '+'
        label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$'
        plot(xs,ys,color='k',ls=linestyles[jj],label=label)
    l=legend(loc=(.2,.1),numpoints=8)
    l.draw_frame(False)
    xlim(0,300)
    ylim(0,700)
    savefig(plotfilename,format='png')
    
    return 0
コード例 #23
0
ファイル: ex16.py プロジェクト: dstndstn/DataAnalysisRecipes
def ex16(exclude=sc.array([3]), plotfilename='ex16.png', bovyprintargs={}):
    """ex16: solve exercise 16 by optimization of the objective function
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_allerr.dat', allerr=True)
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    Z = sc.zeros((nsample, 2))
    yerr = sc.zeros(nsample)
    ycovar = sc.zeros((2, nsample, 2))  #Makes the sc.dot easier
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            Z[jj, 0] = X[jj]
            Z[jj, 1] = Y[jj]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            ycovar[0, jj, 0] = data[ii][3]**2.
            ycovar[1, jj, 1] = data[ii][2]**2.
            ycovar[0, jj, 1] = data[ii][4] * m.sqrt(
                ycovar[0, jj, 0] * ycovar[1, jj, 1])
            ycovar[1, jj, 0] = ycovar[0, jj, 1]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    #Now optimize
    initial = sc.array([bestfit[0], bestfit[1], sc.log(100)])
    bestfit2d1 = optimize.fmin(objective, initial, (Z, ycovar), disp=False)
    #Restart the optimization once using a different method
    bestfit2d = optimize.fmin_powell(objective,
                                     initial, (Z, ycovar),
                                     disp=False)
    if linalg.norm(bestfit2d - bestfit2d1) > 10**-12:
        if linalg.norm(bestfit2d - bestfit2d1) < 10**-6:
            print "Different optimizers give slightly different results..."
        else:
            print "Different optimizers give rather different results..."
        print "The norm of the results differs by %g" % linalg.norm(bestfit2d -
                                                                    bestfit2d1)
        try:
            x = raw_input('continue to plot? [yn]\n')
        except EOFError:
            print "Since you are in non-interactive mode I will assume 'y'"
            x = 'y'
        if x == 'n':
            print "returning..."
            return -1

    b = bestfit2d[0]
    mf = bestfit2d[1]
    V = sc.exp(bestfit2d[2] / 2.)
    cost = 1. / sc.sqrt(1 + mf**2.)
    bcost = b * cost

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(sc.array(xrange),
                   bestfit2d[1] * sc.array(xrange) + bcost + V,
                   'k--',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    plot.bovy_plot(sc.array(xrange),
                   bestfit2d[1] * sc.array(xrange) + bcost - V,
                   'k--',
                   zorder=2,
                   overplot=True)

    #Plot the data OMG straight from plot_data.py
    data = read_data('data_allerr.dat', True)
    ndata = len(data)
    #Create the ellipses and the data points
    id = sc.zeros(nsample)
    x = sc.zeros(nsample)
    y = sc.zeros(nsample)
    ellipses = []
    ymin, ymax = 0, 0
    xmin, xmax = 0, 0
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            continue
        id[jj] = data[ii][0]
        x[jj] = data[ii][1][0]
        y[jj] = data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar = sc.zeros((2, 2))
        ycovar[0, 0] = data[ii][3]**2.
        ycovar[1, 1] = data[ii][2]**2.
        ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1])
        ycovar[1, 0] = ycovar[0, 1]
        eigs = linalg.eig(ycovar)
        angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180.
        thisellipse = Ellipse(sc.array([x[jj], y[jj]]), 2 * m.sqrt(eigs[0][0]),
                              2 * m.sqrt(eigs[0][1]), angle)
        ellipses.append(thisellipse)
        if (x[jj] + m.sqrt(ycovar[0, 0])) > xmax:
            xmax = (x[jj] + m.sqrt(ycovar[0, 0]))
        if (x[jj] - m.sqrt(ycovar[0, 0])) < xmin:
            xmin = (x[jj] - m.sqrt(ycovar[0, 0]))
        if (y[jj] + m.sqrt(ycovar[1, 1])) > ymax:
            ymax = (y[jj] + m.sqrt(ycovar[1, 1]))
        if (y[jj] - m.sqrt(ycovar[1, 1])) < ymin:
            ymin = (y[jj] - m.sqrt(ycovar[1, 1]))
        jj = jj + 1

    #Add the error ellipses
    ax = gca()
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x, y, color='k', marker='o', linestyle='None')

    plot.bovy_end_print(plotfilename)
コード例 #24
0
def ex7a(exclude=sc.array([1,2,3,4]),plotfilename='ex7a.png'):
    """ex7a: solve exercise 7 using non-linear optimization
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-01 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1]])
    #Now optimize the soft chi-squared objective function
    Qs= [1.,2.]
    bestfitssoft= sc.zeros((2,len(Qs)))
    chisqQ= sc.zeros(len(Qs))
    for ii in range(len(Qs)):
        print "Working on Q = "+str(Qs[ii])
        bestfitsoft1= optimize.fmin(softchisquared,initialguess,(X,Y,yerr,Qs[ii]),disp=False)
        #Restart the optimization once using a different method
        bestfitsoft= optimize.fmin_powell(softchisquared,bestfitsoft1,(X,Y,yerr,Qs[ii]),disp=False)
        if linalg.norm(bestfitsoft-bestfitsoft1) > 10**-12:
            if linalg.norm(bestfitsoft-bestfitsoft1) < 10**-6:
                print "Different optimizers give slightly different results..."
            else:
                print "Different optimizers give rather different results..."
            print "The norm of the results differs by %g" % linalg.norm(bestfitsoft-bestfitsoft1)
            try:
                x=raw_input('continue to plot? [yn]\n')
            except EOFError:
                print "Since you are in non-interactive mode I will assume 'y'"
                x='y'
            if x == 'n':
                    print "returning..."
                    return -1
        bestfitssoft[:,ii]= bestfitsoft
        #Calculate chi^2_Q
        for jj in range(nsample):
            chisqQ[ii]= chisqQ[ii]+1./(yerr[jj]**2/(Y[jj]-X[jj]*bestfitsoft[1]-bestfitsoft[1])**2+1./Qs[ii]**2)
    
    #Now plot the solution
    fig_width=5
    fig_height=5
    fig_size =  [fig_width,fig_height]
    params = {'axes.labelsize': 12,
              'text.fontsize': 11,
              'legend.fontsize': 12,
              'xtick.labelsize':10,
              'ytick.labelsize':10,
              'text.usetex': True,
              'figure.figsize': fig_size}
    rcParams.update(params)
    #Plot data
    errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line for the different Qs
    linestyles= ('--',':', '-.')
    for jj in range(len(Qs)):
        xlim(0,300)
        ylim(0,700)
        xmin, xmax= xlim()
        nsamples= 1001
        xs= sc.linspace(xmin,xmax,nsamples)
        ys= sc.zeros(nsamples)
        for ii in range(nsamples):
            ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii]
            if bestfitssoft[0,jj] < 0:
                sgn_str= '-'
            else:
                sgn_str= '+'
        label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$'
        plot(xs,ys,color='k',ls=linestyles[jj],label=label)
    l=legend(loc=(.2,.1),numpoints=8)
    l.draw_frame(False)
    xlim(0,300)
    ylim(0,700)
    savefig(plotfilename,format='png')
    
    return 0
コード例 #25
0
def ex6a(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex6a.png'):
    """ex6a: solve exercise 6 by optimization of the objective function
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-01 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    #Now optimize the bi-exponential objective function
    bestfitbiexp1 = optimize.fmin(logbiexp, bestfit, (X, Y, yerr), disp=False)
    #Restart the optimization once using a different method
    bestfitbiexp = optimize.fmin_powell(logbiexp,
                                        bestfitbiexp1, (X, Y, yerr),
                                        disp=False)
    if linalg.norm(bestfitbiexp - bestfitbiexp1) > 10**-12:
        if linalg.norm(bestfitbiexp - bestfitbiexp1) < 10**-6:
            print("Different optimizers give slightly different results...")
        else:
            print("Different optimizers give rather different results...")
        print("The norm of the results differs by %g" %
              linalg.norm(bestfitbiexp - bestfitbiexp1))

    #Calculate X
    XX = 0.
    for jj in range(nsample):
        XX = XX + m.fabs(Y[jj] - bestfitbiexp[1] * X[jj] -
                         bestfitbiexp[0]) / yerr[jj]

    #Now plot the solution
    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        #'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    #Plot data
    errorbar(X, Y, yerr, color='k', marker='o', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line
    xlim(0, 300)
    ylim(0, 700)
    xmin, xmax = xlim()
    nsamples = 1001
    xs = sc.linspace(xmin, xmax, nsamples)
    ys = sc.zeros(nsamples)
    for ii in range(nsamples):
        ys[ii] = bestfitbiexp[0] + bestfitbiexp[1] * xs[ii]
    if bestfitbiexp[0] < 0:
        sgn_str = '-'
    else:
        sgn_str = '+'
    label = r'$y = %4.2f\, x' % (
        bestfitbiexp[1]) + sgn_str + '%4.0f ' % m.fabs(
            bestfitbiexp[0]) + r'; X = ' + '%3.1f' % XX + '$'
    plot(xs, ys, color='k', ls='--', label=label)
    l = legend(loc=(.3, .1), numpoints=8)
    l.draw_frame(False)
    plot(xs, ys, 'k--')
    xlim(0, 300)
    ylim(0, 700)

    print('Creating: ', plotfilename)
    savefig(plotfilename, format='png')

    return 0
コード例 #26
0
def ex13(exclude=sc.array([1,2,3,4]),plotfilename='ex13.png',
         nburn=1000,nsamples=100000,
         parsigma=[1,m.pi/200.,.01,.5,1.,.05,.1,.005],
		 bovyprintargs={}):
    """ex13: solve exercise 13 by MCMC
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2010-05-06 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_allerr.dat',allerr=True)
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    Z= sc.zeros((nsample,2))
    yerr= sc.zeros(nsample)
    ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            Z[jj,0]= X[jj]
            Z[jj,1]= Y[jj]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            ycovar[0,jj,0]= data[ii][3]**2.
            ycovar[1,jj,1]= data[ii][2]**2.
            ycovar[0,jj,1]= data[ii][4]*m.sqrt(ycovar[0,jj,0]*ycovar[1,jj,1])
            ycovar[1,jj,0]= ycovar[0,jj,1]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    #Now sample
    inittheta= m.acos(1./m.sqrt(1.+bestfit[1]**2.))
    if bestfit[1] < 0.:
        inittheta= m.pi- inittheta
    initialguess= sc.array([m.cos(inittheta),inittheta,0.,sc.mean(X),sc.mean(Y),m.log(sc.var(X)),m.log(sc.var(X)),0.])#(m,b,Pb,Yb,Vb)
    #With this initial guess start off the sampling procedure
    initialX= objective(initialguess,Z,ycovar)
    currentX= initialX
    bestX= initialX
    bestfit= initialguess
    currentguess= initialguess
    naccept= 0
    samples= []
    samples.append(currentguess)
    for jj in range(nburn+nsamples):
        #Draw a sample from the proposal distribution
        newsample= sc.zeros(8)
        newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0]
        newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1]
        newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2]
        newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3]
        newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4]
        newsample[5]= currentguess[5]+stats.norm.rvs()*parsigma[5]
        newsample[6]= currentguess[6]+stats.norm.rvs()*parsigma[6]
        newsample[7]= currentguess[7]+stats.norm.rvs()*parsigma[7]
        #Calculate the objective function for the newsample
        newX= objective(newsample,Z,ycovar)
        #Accept or reject
        #Reject with the appropriate probability
        u= stats.uniform.rvs()
        try:
            test= m.exp(newX-currentX)
        except OverflowError:
            test= 2.
        if u < test:
            #Accept
            currentX= newX
            currentguess= newsample
            naccept= naccept+1
        if currentX > bestX:
            bestfit= currentguess
            bestX= currentX
        samples.append(currentguess)
    if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8:
        print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples))

    samples= sc.array(samples).T[:,nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:])

    histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.))
    indxi= sc.argmax(sc.amax(histmb,axis=1))
    indxj= sc.argmax(sc.amax(histmb,axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi-1], edges[1][indxj-1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi+1], edges[1][indxj+1]

    t= edges[1][indxj]
    bcost= edges[0][indxi]
    mf= m.sqrt(1./m.cos(t)**2.-1.)
    b= bcost/m.cos(t)
    print b, mf

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(sc.array(xrange),mf*sc.array(xrange)+b,
                   'k-',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    for ii in range(10):
        #Random sample
        ransample= sc.floor((stats.uniform.rvs()*nsamples))
        ransample= samples.T[ransample,0:2]
        mf= m.sqrt(1./m.cos(ransample[1])**2.-1.)
        b= ransample[0]/m.cos(ransample[1])
        bestb= b
        bestm= mf
        plot.bovy_plot(sc.array(xrange),bestm*sc.array(xrange)+bestb,
                       overplot=True,color='0.75',zorder=0)

    #Add labels
    nsamples= samples.shape[1]
    for ii in range(nsample):
        Pb= 0.
        for jj in range(nsamples):
            Pb+= Pbad(samples[:,jj],Z[ii,:],ycovar[:,ii,:])
        Pb/= nsamples
        text(Z[ii,0]+5,Z[ii,1]+5,'%.1f'%Pb,color='0.5',zorder=3)


    #Plot the data OMG straight from plot_data.py
    data= read_data('data_allerr.dat',True)
    ndata= len(data)
    #Create the ellipses and the data points
    id= sc.zeros(nsample)
    x= sc.zeros(nsample)
    y= sc.zeros(nsample)
    ellipses=[]
    ymin, ymax= 0, 0
    xmin, xmax= 0,0
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            continue
        id[jj]= data[ii][0]
        x[jj]= data[ii][1][0]
        y[jj]= data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar= sc.zeros((2,2))
        ycovar[0,0]= data[ii][3]**2.
        ycovar[1,1]= data[ii][2]**2.
        ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1])
        ycovar[1,0]= ycovar[0,1]
        eigs= linalg.eig(ycovar)
        angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180.
        thisellipse= Ellipse(sc.array([x[jj],y[jj]]),2*m.sqrt(eigs[0][0]),
                             2*m.sqrt(eigs[0][1]),angle)
        ellipses.append(thisellipse)
        if (x[jj]+m.sqrt(ycovar[0,0])) > xmax:
            xmax= (x[jj]+m.sqrt(ycovar[0,0]))
        if (x[jj]-m.sqrt(ycovar[0,0])) < xmin:
            xmin= (x[jj]-m.sqrt(ycovar[0,0]))
        if (y[jj]+m.sqrt(ycovar[1,1])) > ymax:
            ymax= (y[jj]+m.sqrt(ycovar[1,1]))
        if (y[jj]-m.sqrt(ycovar[1,1])) < ymin:
            ymin= (y[jj]-m.sqrt(ycovar[1,1]))
        jj= jj+1
        
    #Add the error ellipses
    ax=gca()
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x,y,color='k',marker='o',linestyle='None')


    plot.bovy_end_print(plotfilename)
コード例 #27
0
def ex7b(exclude=sc.array([1,2,3,4]),plotfilename='ex7b.png'):
    """ex7c: solve exercise 7 using an iterative procedure
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-01 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1]])
    #With this initial guess start the iteration, using as the weights Q^2/(sigma^2*Q^2+(y-mx-b)^2
    tol= 10**-10.
    Qs= [1.,2.]
    bestfitssoft= sc.zeros((2,len(Qs)))
    chisqQ= sc.zeros(len(Qs))
    for jj in range(len(Qs)):
        currentguess= initialguess
        diff= 2*tol
        while diff > tol:
            oldguess= currentguess
            #Calculate the weight based on the previous iteration
            for ii in range(nsample):
                #Update C
                C[ii,ii]= (yerr[ii]**2.+(Y[ii]-oldguess[1]*X[ii]-oldguess[0])**2/Qs[jj]**2.)
            #Re-fit
            bestfit= sc.dot(linalg.inv(C),Y.T)
            bestfit= sc.dot(A.T,bestfit)
            bestfitvar= sc.dot(linalg.inv(C),A)
            bestfitvar= sc.dot(A.T,bestfitvar)
            bestfitvar= linalg.inv(bestfitvar)
            bestfit= sc.dot(bestfitvar,bestfit)
            currentguess= sc.array([bestfit[0],bestfit[1]])
            diff= m.sqrt((currentguess[0]-oldguess[0])**2/oldguess[0]**2.+(currentguess[1]-oldguess[1])**2/oldguess[1]**2.)       
        bestfitssoft[0,jj]= currentguess[0]
        bestfitssoft[1,jj]= currentguess[1]
        #Calculate chi^2_Q
        for ii in range(nsample):
            chisqQ[jj]= chisqQ[jj]+1./(yerr[ii]**2/(Y[ii]-X[ii]*currentguess[1]-currentguess[1])**2+1./Qs[jj]**2)

    #Now plot the solution
    fig_width=5
    fig_height=5
    fig_size =  [fig_width,fig_height]
    params = {'axes.labelsize': 12,
              'text.fontsize': 11,
              'legend.fontsize': 12,
              'xtick.labelsize':10,
              'ytick.labelsize':10,
              'text.usetex': True,
              'figure.figsize': fig_size}
    rcParams.update(params)
    #Plot data
    errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line for the different Qs
    linestyles= ('--',':', '-.')
    for jj in range(len(Qs)):
        xlim(0,300)
        ylim(0,700)
        xmin, xmax= xlim()
        nsamples= 1001
        xs= sc.linspace(xmin,xmax,nsamples)
        ys= sc.zeros(nsamples)
        for ii in range(nsamples):
            ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii]
            if bestfitssoft[0,jj] < 0:
                sgn_str= '-'
            else:
                sgn_str= '+'
        label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$'
        plot(xs,ys,color='k',ls=linestyles[jj],label=label)
    l=legend(loc=(.2,.1),numpoints=8)
    l.draw_frame(False)
    xlim(0,300)
    ylim(0,700)
    savefig(plotfilename,format='png')
    
    return 0
コード例 #28
0
def plot_data_allerr():
    """plot_data_allerr: Plot the data with full error ellipses

    History:
       2009-05-20 - Written - Bovy (NYU)

    """
    #Read the data
    data= read_data('data_allerr.dat',True)
    ndata= len(data)
    #Create the ellipses and the data points
    id= sc.zeros(ndata)
    x= sc.zeros(ndata)
    y= sc.zeros(ndata)
    ellipses=[]
    ymin, ymax= 0, 0
    xmin, xmax= 0,0
    for ii in range(ndata):
        id[ii]= data[ii][0]
        x[ii]= data[ii][1][0]
        y[ii]= data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar= sc.zeros((2,2))
        ycovar[0,0]= data[ii][3]**2.
        ycovar[1,1]= data[ii][2]**2.
        ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1])
        ycovar[1,0]= ycovar[0,1]
        eigs= linalg.eig(ycovar)
        angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180.
        #print x[ii], y[ii], m.sqrt(ycovar[1,1]), m.sqrt(ycovar[0,0])
        #print m.sqrt(eigs[0][0]), m.sqrt(eigs[0][1]), angle
        thisellipse= Ellipse(sc.array([x[ii],y[ii]]),2*m.sqrt(eigs[0][0]),
                             2*m.sqrt(eigs[0][1]),angle)
        ellipses.append(thisellipse)
        if (x[ii]+m.sqrt(ycovar[0,0])) > xmax:
            xmax= (x[ii]+m.sqrt(ycovar[0,0]))
        if (x[ii]-m.sqrt(ycovar[0,0])) < xmin:
            xmin= (x[ii]-m.sqrt(ycovar[0,0]))
        if (y[ii]+m.sqrt(ycovar[1,1])) > ymax:
            ymax= (y[ii]+m.sqrt(ycovar[1,1]))
        if (y[ii]-m.sqrt(ycovar[1,1])) < ymin:
            ymin= (y[ii]-m.sqrt(ycovar[1,1]))
        
    plotfilename='data_allerr.png'
    fig_width=7.5
    fig_height=7.5
    fig_size =  [fig_width,fig_height]
    params = {'axes.labelsize': 12,
              'text.fontsize': 11,
              'legend.fontsize': 12,
              'xtick.labelsize':10,
              'ytick.labelsize':10,
              'text.usetex': True,
              'figure.figsize': fig_size}
    rcParams.update(params)
    fig= figure()
    ax= fig.add_subplot(111)
    #Add the error ellipses
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x,y,color='k',marker='o',linestyle='None')
    ax.set_xlabel(r'$x$')
    ax.set_ylabel(r'$y$')
    ax.set_xlim((xmin,xmax))
    ax.set_ylim((ymin,ymax))
    savefig(plotfilename,format='png')

    return 0
コード例 #29
0
def ex7d(exclude=sc.array([1,2,3,4]),plotfilename='ex7d.png',nburn=100,nsamples=10000,parsigma=[5.,.075]):
    """ex7c: solve exercise 7 using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2009-06-02 - Written - Bovy (NYU)
    """
    sc.random.seed(-1) #In the interest of reproducibility (if that's a word)
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the data in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1]])
    #With this initial guess start off the sampling procedure
    Qs= [1.,2.]
    bestfitssoft= sc.zeros((2,len(Qs)))
    chisqQ= sc.zeros(len(Qs))
    for kk in range(len(Qs)):
        print "Working on Q = "+str(Qs[kk])
        initialchisqQ= softchisquared(initialguess,X,Y,yerr,Qs[kk])
        bestfit= initialguess
        currentchisqQ= initialchisqQ
        bestchisqQ= initialchisqQ
        currentguess= initialguess
        naccept= 0
        for jj in range(nburn+nsamples):
            #Draw a sample from the proposal distribution
            newsample= sc.zeros(2)
            newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0]
            newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1]
            #Calculate the objective function for the newsample
            newchisqQ= softchisquared(newsample,X,Y,yerr,Qs[kk])
            #Accept or reject
            #Reject with the appropriate probability
            u= stats.uniform.rvs()
            if u < m.exp(currentchisqQ-newchisqQ):
                #Accept
                currentchisqQ= newchisqQ
                currentguess= newsample
                naccept= naccept+1
            if currentchisqQ < bestchisqQ:
                bestfit= currentguess
                bestchisqQ= currentchisqQ
        bestfitssoft[:,kk]= bestfit
        chisqQ[kk]= bestchisqQ
        if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8:
            print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples))

    #Now plot the solution
    fig_width=5
    fig_height=5
    fig_size =  [fig_width,fig_height]
    params = {'axes.labelsize': 12,
              'text.fontsize': 11,
              'legend.fontsize': 12,
              'xtick.labelsize':10,
              'ytick.labelsize':10,
              'text.usetex': True,
              'figure.figsize': fig_size}
    rcParams.update(params)
    #Plot data
    errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line for the different Qs
    linestyles= ('--',':', '-.')
    for jj in range(len(Qs)):
        xlim(0,300)
        ylim(0,700)
        xmin, xmax= xlim()
        nsamples= 1001
        xs= sc.linspace(xmin,xmax,nsamples)
        ys= sc.zeros(nsamples)
        for ii in range(nsamples):
            ys[ii]= bestfitssoft[0,jj]+bestfitssoft[1,jj]*xs[ii]
            if bestfitssoft[0,jj] < 0:
                sgn_str= '-'
            else:
                sgn_str= '+'
        label= r'$Q= '+'%i: y = %4.2f\, x'% (Qs[jj], bestfitssoft[1,jj]) +sgn_str+ '%4.0f ' % m.fabs(bestfitssoft[0,jj])+r'; \chi^2_Q = '+ '%3.1f' % chisqQ[jj]+'$'
        plot(xs,ys,color='k',ls=linestyles[jj],label=label)
    l=legend(loc=(.2,.1),numpoints=8)
    l.draw_frame(False)
    xlim(0,300)
    ylim(0,700)
    savefig(plotfilename,format='png')
    
    return 0
コード例 #30
0
def ex14(exclude=sc.array([1,2,3,4]),plotfilename='ex14.png',
		 bovyprintargs={}):
    """ex12: solve exercise 14
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_allerr.dat',allerr=True)
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y1= sc.zeros(nsample)
    X1= sc.zeros(nsample)
    A1= sc.ones((nsample,2))
    C1= sc.zeros((nsample,nsample))
    Y2= sc.zeros(nsample)
    X2= sc.zeros(nsample)
    A2= sc.ones((nsample,2))
    C2= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    xerr= sc.zeros(nsample)
    ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y1[jj]= data[ii][1][1]
            X1[jj]= data[ii][1][0]
            A1[jj,1]= data[ii][1][0]
            C1[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            Y2[jj]= data[ii][1][0]
            X2[jj]= data[ii][1][1]
            A2[jj,1]= data[ii][1][1]
            C2[jj,jj]= data[ii][3]**2.
            xerr[jj]= data[ii][3]
            jj= jj+1
    #Now compute the best fit and the uncertainties: forward
    bestfit1= sc.dot(linalg.inv(C1),Y1.T)
    bestfit1= sc.dot(A1.T,bestfit1)
    bestfitvar1= sc.dot(linalg.inv(C1),A1)
    bestfitvar1= sc.dot(A1.T,bestfitvar1)
    bestfitvar1= linalg.inv(bestfitvar1)
    bestfit1= sc.dot(bestfitvar1,bestfit1)
    #Now compute the best fit and the uncertainties: backward
    bestfit2= sc.dot(linalg.inv(C2),Y2.T)
    bestfit2= sc.dot(A2.T,bestfit2)
    bestfitvar2= sc.dot(linalg.inv(C2),A2)
    bestfitvar2= sc.dot(A2.T,bestfitvar2)
    bestfitvar2= linalg.inv(bestfitvar2)
    bestfit2= sc.dot(bestfitvar2,bestfit2)
    #Propagate to y=mx+b
    linerrprop= sc.array([[-1./bestfit2[1],bestfit2[0]/bestfit2[1]**2],
                          [0.,-1./bestfit2[1]**2.]])
    bestfit2= sc.array([-bestfit2[0]/bestfit2[1],1./bestfit2[1]])
    bestfitvar2= sc.dot(linerrprop,sc.dot(bestfitvar2,linerrprop.T))

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(sc.array(xrange),bestfit1[1]*sc.array(xrange)+bestfit1[0],
                   'k--',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    plot.bovy_plot(sc.array(xrange),bestfit2[1]*sc.array(xrange)+bestfit2[0],
                   'k-.',overplot=True,zorder=2)

    #Plot data
    errorbar(A1[:,1],Y1,yerr,xerr,color='k',marker='o',
             linestyle='None',zorder=0)
    plot.bovy_text(r'$\mathrm{forward}\ ---\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit1[1], m.sqrt(bestfitvar1[1,1]), bestfit1[0],m.sqrt(bestfitvar1[0,0]))+r')$'+'\n'+
                   r'$\mathrm{reverse}\ -\cdot -\:\ y = ( '+'%4.2f \pm %4.2f )\,x+ ( %4.0f\pm %4.0f' % (bestfit2[1], m.sqrt(bestfitvar2[1,1]), bestfit2[0],m.sqrt(bestfitvar2[0,0]))+r')$',bottom_right=True)
    plot.bovy_end_print(plotfilename)
コード例 #31
0
def ex15(
        exclude=sc.array([1, 2, 3, 4]), plotfilename='ex15.png',
        bovyprintargs={}):
    """ex15: solve exercise 15
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_allerr.dat', allerr=True)
    ndata = len(data)
    nsample = ndata - len(exclude)
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    Z = sc.zeros((nsample, 2))
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            Z[jj, 0] = X[jj]
            Z[jj, 1] = Y[jj]
            jj = jj + 1
    #Now compute the PCA solution
    Zm = sc.mean(Z, axis=0)
    Q = sc.cov(Z.T)
    eigs = linalg.eig(Q)
    maxindx = sc.argmax(eigs[0])
    V = eigs[1][maxindx]
    V = V / linalg.norm(V)

    m = sc.sqrt(1 / V[0]**2. - 1)
    bestfit = sc.array([-m * Zm[0] + Zm[1], m])

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(sc.array(xrange),
                   bestfit[1] * sc.array(xrange) + bestfit[0],
                   'k--',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    plot.bovy_plot(X,
                   Y,
                   marker='o',
                   color='k',
                   linestyle='None',
                   zorder=0,
                   overplot=True)

    plot.bovy_text(r'$y = %4.2f \,x %4.0f' % (bestfit[1], bestfit[0]) + r'$',
                   bottom_right=True)
    plot.bovy_end_print(plotfilename)
コード例 #32
0
def ex10(exclude=sc.array([1, 2, 3, 4]),
         plotfilenameA='ex10a.png',
         plotfilenameB='ex10b.png',
         nburn=1000,
         nsamples=200000,
         parsigma=[5, .075, 0.1],
         bovyprintargs={}):
    """ex10: solve exercise 10 using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    sc.random.seed(-1)  #In the interest of reproducibility (if that's a word)
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    if not exclude == None:
        nsample = ndata - len(exclude)
    else:
        nsample = ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    initialguess = sc.array([bestfit[0], bestfit[1], 0.])  #(m,b,logS)
    #With this initial guess start off the sampling procedure
    initialX = objective(initialguess, X, Y, yerr)
    currentX = initialX
    bestX = initialX
    bestfit = initialguess
    currentguess = initialguess
    naccept = 0
    samples = []
    samples.append(currentguess)
    for jj in range(nburn + nsamples):
        #Draw a sample from the proposal distribution
        newsample = sc.zeros(3)
        newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0]
        newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1]
        newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2]
        #Calculate the objective function for the newsample
        newX = objective(newsample, X, Y, yerr)
        #Accept or reject
        #Reject with the appropriate probability
        u = stats.uniform.rvs()
        accept = False
        try:
            test = m.exp(newX - currentX)
            if u < test:
                accept = True
        except OverflowError:
            accept = True
        if accept:
            #Accept
            currentX = newX
            currentguess = newsample
            naccept = naccept + 1
        if currentX > bestX:
            bestfit = currentguess
            bestX = currentX
        samples.append(currentguess)
    if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / (
            nburn + nsamples) > .8:
        print "Acceptance ratio was " + str(
            double(naccept) / (nburn + nsamples))

    samples = sc.array(samples).T[:, nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :])

    histmb, edges = sc.histogramdd(samples.T[:, 0:2],
                                   bins=round(sc.sqrt(nsamples) / 2.))
    indxi = sc.argmax(sc.amax(histmb, axis=1))
    indxj = sc.argmax(sc.amax(histmb, axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi - 1], edges[1][indxj - 1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi + 1], edges[1][indxj + 1]

    print "Best-fit for S marginalized"
    histS, edgesS = sc.histogram(samples.T[:, 2],
                                 bins=round(sc.sqrt(nsamples) / 2.))
    indx = sc.argmax(histS)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = bestfit[0]
    bestm = bestfit[1]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(xrange,
                   bestm * sc.array(xrange) + bestb,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    errorbar(X,
             Y,
             sc.exp(bestfit[2] / 2.),
             marker='o',
             color='k',
             linestyle='None',
             zorder=1)
    plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' %
                   (bestfit[1], bestfit[0]) + r'$' + '\n' +
                   r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' %
                   (sc.exp(bestfit[2] / 2.)),
                   bottom_right=True)
    plot.bovy_end_print(plotfilenameA)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = edges[0][indxi]
    bestm = edges[1][indxj]
    bestS = edgesS[indx]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(xrange,
                   bestm * sc.array(xrange) + bestb,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    errorbar(X,
             Y,
             sc.exp(bestS / 2.),
             marker='o',
             color='k',
             linestyle='None',
             zorder=1)
    plot.bovy_text(
        r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' %
        (bestm, bestb) + r'$' + '\n' +
        r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' %
        (sc.exp(bestS / 2.)),
        bottom_right=True)
    plot.bovy_end_print(plotfilenameB)

    return
コード例 #33
0
def ex10(exclude=sc.array([1,2,3,4]),
		 plotfilenameA='ex10a.png',
		 plotfilenameB='ex10b.png',
         nburn=1000,nsamples=200000,
         parsigma=[5,.075,0.1],
		 bovyprintargs={}):
    """ex10: solve exercise 10 using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    sc.random.seed(-1) #In the interest of reproducibility (if that's a word)
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    if not exclude == None:
        nsample= ndata- len(exclude)
    else:
        nsample= ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1],0.])#(m,b,logS)
    #With this initial guess start off the sampling procedure
    initialX= objective(initialguess,X,Y,yerr)
    currentX= initialX
    bestX= initialX
    bestfit= initialguess
    currentguess= initialguess
    naccept= 0
    samples= []
    samples.append(currentguess)
    for jj in range(nburn+nsamples):
        #Draw a sample from the proposal distribution
        newsample= sc.zeros(3)
        newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0]
        newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1]
        newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2]
        #Calculate the objective function for the newsample
        newX= objective(newsample,X,Y,yerr)
        #Accept or reject
        #Reject with the appropriate probability
        u= stats.uniform.rvs()
        accept=False
        try:
            test= m.exp(newX-currentX)
            if u < test:
                accept= True
        except OverflowError:
            accept= True
        if accept:
            #Accept
            currentX= newX
            currentguess= newsample
            naccept= naccept+1
        if currentX > bestX:
            bestfit= currentguess
            bestX= currentX
        samples.append(currentguess)
    if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8:
        print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples))

    samples= sc.array(samples).T[:,nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:])

    histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.))
    indxi= sc.argmax(sc.amax(histmb,axis=1))
    indxj= sc.argmax(sc.amax(histmb,axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi-1], edges[1][indxj-1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi+1], edges[1][indxj+1]

    print "Best-fit for S marginalized"
    histS,edgesS= sc.histogram(samples.T[:,2],bins=round(sc.sqrt(nsamples)/2.))
    indx= sc.argmax(histS)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb= bestfit[0]
    bestm= bestfit[1]
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-',
                   xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    errorbar(X,Y,sc.exp(bestfit[2]/2.),
             marker='o',color='k',linestyle='None',zorder=1)
    plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0])+r'$'+'\n'+r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2]/2.)),
                   bottom_right=True)
    plot.bovy_end_print(plotfilenameA)
    
    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb= edges[0][indxi]
    bestm= edges[1][indxj]
    bestS= edgesS[indx]
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-',
                   xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    errorbar(X,Y,sc.exp(bestS/2.),
             marker='o',color='k',linestyle='None',zorder=1)
    plot.bovy_text(r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb)+r'$'+'\n'+r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS/2.)),
                   bottom_right=True)
    plot.bovy_end_print(plotfilenameB)

    return
コード例 #34
0
def marginalize_mixture(mixture=True, short=False):
	if mixture:
		prefix = 'mixture2d'
	else:
		prefix = 'nomixture2d'

	random.seed(-1) #In the interest of reproducibility (if that's a word)
	# Read the data
	data= read_data('data_allerr.dat',True)
	ndata= len(data)
	# Create the ellipses and the data points
	x= zeros(ndata)
	y= zeros(ndata)
	ellipses=[]
	yvar= zeros((ndata,2,2))
	for ii in range(ndata):
		x[ii]= data[ii][1][0]
		y[ii]= data[ii][1][1]
		#Calculate the eigenvalues and the rotation angle
		yvar[ii,0,0]= data[ii][3]**2.
		yvar[ii,1,1]= data[ii][2]**2.
		yvar[ii,0,1]= data[ii][4]*sqrt(yvar[ii,0,0]*yvar[ii,1,1])
		yvar[ii,1,0]= yvar[ii,0,1]
		eigs= linalg.eig(yvar[ii,:,:])
		angle= arctan(-eigs[1][0,1]/eigs[1][1,1])/pi*180.
		thisellipse= Ellipse(array([x[ii],y[ii]]),2*sqrt(eigs[0][0]),
							 2*sqrt(eigs[0][1]),angle)
		ellipses.append(thisellipse)

	# initialize parameters
	theta = arctan2(y[7]-y[9],x[7]-x[9])
	bperp = (y[7] - tan(theta) * x[7]) * cos(theta) # bad at theta = 0.5 * pi
	if mixture:
		Pbad = 0.5
	else:
		Pbad = 0.
	Ybad = mean(y)
	Vbad = mean((y-Ybad)**2)

	p = posterior(x, y, yvar, theta, bperp, Pbad, Ybad, Vbad)
	print 'starting p=', p

	chain = []
	oldp = p
	oldparams = (theta, bperp, Pbad, Ybad, Vbad)
	bestparams = oldparams
	bestp = oldp

	nsteps = 0
	naccepts = 0

	NSTEPS = 100000
	if short:
		NSTEPS /= 2
	print 'doing', NSTEPS, 'steps of MCMC...'
	while nsteps < NSTEPS:
		newparams = pick_new_parameters(nsteps, *oldparams)
		if not mixture:
			# clamp Pbad to zero.
			(theta, bperp, Pbad, Ybad, Vbad) = newparams
			newparams = (theta, bperp, 0, Ybad, Vbad)

		p = posterior(x, y, yvar, *newparams)
		if p/oldp > random.uniform():
			chain.append((p,newparams))
			oldparams = newparams
			oldp = p
			if p > bestp:
				bestp = p
				bestparams = newparams
			naccepts += 1
		else:
			chain.append((oldp,oldparams))
			# keep oldparams, oldp
		nsteps += 1
		if (nsteps % 5000 == 1):
			print nsteps, naccepts, (naccepts/float(nsteps)), oldp, bestp, bestparams

	print 'acceptance fraction', (naccepts/float(nsteps))

	# plot a sample
	
	fig_width=5
	fig_height=5
	fig_size =	[fig_width,fig_height]
	params = {'axes.labelsize': 12,
			  'text.fontsize': 11,
			  'legend.fontsize': 12,
			  'xtick.labelsize':10,
			  'ytick.labelsize':10,
			  'text.usetex': True,
			  'figure.figsize': fig_size,
			  'image.interpolation':'nearest',
			  'image.origin':'lower',
			  }
	rcParams.update(params)

	# Plot data
	clf()
	ax = gca()
	for e in ellipses:
		ax.add_artist(e)
		e.set_facecolor('none')
	xlabel(r'$x$')
	ylabel(r'$y$')
	xlim(0,300)
	ylim(0,700)
	savefig(prefix + '-data.pdf')

	a = axis()
	xmin, xmax = xlim()
	ymin, ymax = ylim()
	xs = linspace(xmin, xmax, 2)
	Nchain = len(chain)
	if mixture:
		# select 10 samples at random from the second half of the chain.
		I = Nchain/2 + random.permutation(Nchain/2)[:10]
	else:
		I = array([argmax([p for (p, params) in chain])])
	for i in I:
		(p,params) = chain[i]
		(theta, bperp, Pbad, Ybad, Vbad) = params
		ys = tan(theta) * xs + bperp / cos(theta) # replace this with smarter linear algebra
		plot(xs, ys, color='k', alpha=0.3)
	axis(a)
	savefig(prefix + '-xy.pdf')

	if mixture:
		bgp = zeros(len(x))
		fgp = zeros(len(x))
		for (p,params) in chain[Nchain/2:]:
			(theta, bperp, Pbad, Ybad, Vbad) = params
			bgp += Pbad		 * single_point_likelihoods(x, y, yvar, theta, bperp, 1, Ybad, Vbad)
			fgp += (1.-Pbad) * single_point_likelihoods(x, y, yvar, theta, bperp, 0, Ybad, Vbad)
		bgodds = bgp / fgp
		for i,bgo in enumerate(bgodds):
			if bgo < 1:
				continue
			dxl = (xmax-xmin) * 0.01
			dyl = (ymax-ymin) * 0.01
			t = text(x[i]+dxl, y[i]+dyl, '%.1f' % log10(bgo),
					 horizontalalignment='left',
					 verticalalignment='bottom', alpha=0.3)
		savefig(prefix + '-xy-bg.pdf')

	clf()
	# note horrifying theta = 0.5 * pi behavior!
	ms = array([tan(theta) for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain/2:]])
	bs = array([bperp / cos(theta) for (p, (theta, bperp, Pbad, Ybad, Vbad)) in chain[Nchain/2:]])
	#plot(ms, bs, 'k,', alpha=0.1)
	xlabel('slope $m$')
	ylabel('intercept $b$')
	#savefig(prefix + '-mb-scatter.pdf')

	clf()
	(H, xe, ye) = histogram2d(ms, bs, bins=(100,100))
	imshow(log(1+H.T), extent=(xe.min(), xe.max(), ye.min(), ye.max()), aspect='auto',
		   cmap=antigray)
	xlabel('slope $m$')
	ylabel('intercept $b$')
	savefig(prefix + '-mb.pdf')
コード例 #35
0
def ex16(exclude=sc.array([3]),plotfilename='ex16.png',
		 bovyprintargs={}):
    """ex16: solve exercise 16 by optimization of the objective function
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2010-05-07 - Written - Bovy (NYU)
    """
    #Read the data
    data= read_data('data_allerr.dat',allerr=True)
    ndata= len(data)
    nsample= ndata- len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues
    #Put the dat in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    Z= sc.zeros((nsample,2))
    yerr= sc.zeros(nsample)
    ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            Z[jj,0]= X[jj]
            Z[jj,1]= Y[jj]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2.
            yerr[jj]= data[ii][2]
            ycovar[0,jj,0]= data[ii][3]**2.
            ycovar[1,jj,1]= data[ii][2]**2.
            ycovar[0,jj,1]= data[ii][4]*m.sqrt(ycovar[0,jj,0]*ycovar[1,jj,1])
            ycovar[1,jj,0]= ycovar[0,jj,1]
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    #Now optimize
    initial= sc.array([bestfit[0],bestfit[1],sc.log(100)])
    bestfit2d1= optimize.fmin(objective,initial,(Z,ycovar),disp=False)
    #Restart the optimization once using a different method
    bestfit2d= optimize.fmin_powell(objective,initial,
                                       (Z,ycovar),disp=False)
    if linalg.norm(bestfit2d-bestfit2d1) > 10**-12:
        if linalg.norm(bestfit2d-bestfit2d1) < 10**-6:
            print "Different optimizers give slightly different results..."
        else:
            print "Different optimizers give rather different results..."
        print "The norm of the results differs by %g" % linalg.norm(bestfit2d-bestfit2d1)
        try:
            x=raw_input('continue to plot? [yn]\n')
        except EOFError:
            print "Since you are in non-interactive mode I will assume 'y'"
            x='y'
        if x == 'n':
            print "returning..."
            return -1

    b= bestfit2d[0]
    mf= bestfit2d[1]
    V=sc.exp(bestfit2d[2]/2.)
    cost= 1./sc.sqrt(1+mf**2.)
    bcost= b*cost

    #Plot result
    plot.bovy_print(**bovyprintargs)
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(sc.array(xrange),bestfit2d[1]*sc.array(xrange)+bcost+V,
                   'k--',xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    plot.bovy_plot(sc.array(xrange),bestfit2d[1]*sc.array(xrange)+bcost-V,
                   'k--',zorder=2,overplot=True)
     
    #Plot the data OMG straight from plot_data.py
    data= read_data('data_allerr.dat',True)
    ndata= len(data)
    #Create the ellipses and the data points
    id= sc.zeros(nsample)
    x= sc.zeros(nsample)
    y= sc.zeros(nsample)
    ellipses=[]
    ymin, ymax= 0, 0
    xmin, xmax= 0,0
    jj= 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            continue
        id[jj]= data[ii][0]
        x[jj]= data[ii][1][0]
        y[jj]= data[ii][1][1]
        #Calculate the eigenvalues and the rotation angle
        ycovar= sc.zeros((2,2))
        ycovar[0,0]= data[ii][3]**2.
        ycovar[1,1]= data[ii][2]**2.
        ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1])
        ycovar[1,0]= ycovar[0,1]
        eigs= linalg.eig(ycovar)
        angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180.
        thisellipse= Ellipse(sc.array([x[jj],y[jj]]),2*m.sqrt(eigs[0][0]),
                             2*m.sqrt(eigs[0][1]),angle)
        ellipses.append(thisellipse)
        if (x[jj]+m.sqrt(ycovar[0,0])) > xmax:
            xmax= (x[jj]+m.sqrt(ycovar[0,0]))
        if (x[jj]-m.sqrt(ycovar[0,0])) < xmin:
            xmin= (x[jj]-m.sqrt(ycovar[0,0]))
        if (y[jj]+m.sqrt(ycovar[1,1])) > ymax:
            ymax= (y[jj]+m.sqrt(ycovar[1,1]))
        if (y[jj]-m.sqrt(ycovar[1,1])) < ymin:
            ymin= (y[jj]-m.sqrt(ycovar[1,1]))
        jj= jj+1
        
    #Add the error ellipses
    ax=gca()
    for e in ellipses:
        ax.add_artist(e)
        e.set_facecolor('none')
    ax.plot(x,y,color='k',marker='o',linestyle='None')

    plot.bovy_end_print(plotfilename)
コード例 #36
0
def ex7b(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex7b.png'):
    """ex7c: solve exercise 7 using an iterative procedure
    Input:
       exclude        - ID numbers to exclude from the analysis
       plotfilename   - filename for the output plot
    Output:
       plot
    History:
       2009-06-01 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    #First find the chi-squared solution, which we will use as an
    #initial gues for the bi-exponential optimization
    #Put the dat in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2.
            yerr[jj] = data[ii][2]
            jj = jj + 1
    #Now compute the best fit and the uncertainties
    bestfit = sc.dot(linalg.inv(C), Y.T)
    bestfit = sc.dot(A.T, bestfit)
    bestfitvar = sc.dot(linalg.inv(C), A)
    bestfitvar = sc.dot(A.T, bestfitvar)
    bestfitvar = linalg.inv(bestfitvar)
    bestfit = sc.dot(bestfitvar, bestfit)
    initialguess = sc.array([bestfit[0], bestfit[1]])
    #With this initial guess start the iteration, using as the weights Q^2/(sigma^2*Q^2+(y-mx-b)^2
    tol = 10**-10.
    Qs = [1., 2.]
    bestfitssoft = sc.zeros((2, len(Qs)))
    chisqQ = sc.zeros(len(Qs))
    for jj in range(len(Qs)):
        currentguess = initialguess
        diff = 2 * tol
        while diff > tol:
            oldguess = currentguess
            #Calculate the weight based on the previous iteration
            for ii in range(nsample):
                #Update C
                C[ii, ii] = (yerr[ii]**2. +
                             (Y[ii] - oldguess[1] * X[ii] - oldguess[0])**2 /
                             Qs[jj]**2.)
            #Re-fit
            bestfit = sc.dot(linalg.inv(C), Y.T)
            bestfit = sc.dot(A.T, bestfit)
            bestfitvar = sc.dot(linalg.inv(C), A)
            bestfitvar = sc.dot(A.T, bestfitvar)
            bestfitvar = linalg.inv(bestfitvar)
            bestfit = sc.dot(bestfitvar, bestfit)
            currentguess = sc.array([bestfit[0], bestfit[1]])
            diff = m.sqrt((currentguess[0] - oldguess[0])**2 /
                          oldguess[0]**2. +
                          (currentguess[1] - oldguess[1])**2 / oldguess[1]**2.)
        bestfitssoft[0, jj] = currentguess[0]
        bestfitssoft[1, jj] = currentguess[1]
        #Calculate chi^2_Q
        for ii in range(nsample):
            chisqQ[jj] = chisqQ[jj] + 1. / (
                yerr[ii]**2 /
                (Y[ii] - X[ii] * currentguess[1] - currentguess[1])**2 +
                1. / Qs[jj]**2)

    #Now plot the solution
    fig_width = 5
    fig_height = 5
    fig_size = [fig_width, fig_height]
    params = {
        'axes.labelsize': 12,
        'text.fontsize': 11,
        'legend.fontsize': 12,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'text.usetex': True,
        'figure.figsize': fig_size
    }
    rcParams.update(params)
    #Plot data
    errorbar(X, Y, yerr, color='k', marker='o', color='k', linestyle='None')
    xlabel(r'$x$')
    ylabel(r'$y$')
    #Plot the best fit line for the different Qs
    linestyles = ('--', ':', '-.')
    for jj in range(len(Qs)):
        xlim(0, 300)
        ylim(0, 700)
        xmin, xmax = xlim()
        nsamples = 1001
        xs = sc.linspace(xmin, xmax, nsamples)
        ys = sc.zeros(nsamples)
        for ii in range(nsamples):
            ys[ii] = bestfitssoft[0, jj] + bestfitssoft[1, jj] * xs[ii]
            if bestfitssoft[0, jj] < 0:
                sgn_str = '-'
            else:
                sgn_str = '+'
        label = r'$Q= ' + '%i: y = %4.2f\, x' % (Qs[jj], bestfitssoft[
            1, jj]) + sgn_str + '%4.0f ' % m.fabs(bestfitssoft[
                0, jj]) + r'; \chi^2_Q = ' + '%3.1f' % chisqQ[jj] + '$'
        plot(xs, ys, color='k', ls=linestyles[jj], label=label)
    l = legend(loc=(.2, .1), numpoints=8)
    l.draw_frame(False)
    xlim(0, 300)
    ylim(0, 700)
    savefig(plotfilename, format='png')

    return 0
コード例 #37
0
def exNew(exclude=sc.array([1,2,3,4]),
          plotfilename='exNew.png',nburn=20000,nsamples=200000,
          parsigma=[5,.075,.01,1,.1],dsigma=1.):
    """exMix1: solve the new exercise using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename   - filename for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
       dsigma         - divide uncertainties by this amount
    Output:
       plot
    History:
       2010-04-28 - Written - Bovy (NYU)
    """
    sc.random.seed(1) #In the interest of reproducibility (if that's a word)
    #Read the data
    data= read_data('data_yerr.dat')
    ndata= len(data)
    if not exclude == None:
        nsample= ndata- len(exclude)
    else:
        nsample= ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y= sc.zeros(nsample)
    X= sc.zeros(nsample)
    A= sc.ones((nsample,2))
    C= sc.zeros((nsample,nsample))
    yerr= sc.zeros(nsample)
    jj= 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj]= data[ii][1][1]
            X[jj]= data[ii][1][0]
            A[jj,1]= data[ii][1][0]
            C[jj,jj]= data[ii][2]**2./dsigma**2.
            yerr[jj]= data[ii][2]/dsigma
            jj= jj+1
    #Now compute the best fit and the uncertainties
    bestfit= sc.dot(linalg.inv(C),Y.T)
    bestfit= sc.dot(A.T,bestfit)
    bestfitvar= sc.dot(linalg.inv(C),A)
    bestfitvar= sc.dot(A.T,bestfitvar)
    bestfitvar= linalg.inv(bestfitvar)
    bestfit= sc.dot(bestfitvar,bestfit)
    initialguess= sc.array([bestfit[0],bestfit[1],0.,sc.mean(Y),m.log(sc.var(Y))])#(m,b,Pb,Yb,Vb)
    #With this initial guess start off the sampling procedure
    initialX= objective(initialguess,X,Y,yerr)
    currentX= initialX
    bestX= initialX
    bestfit= initialguess
    currentguess= initialguess
    naccept= 0
    samples= []
    samples.append(currentguess)
    for jj in range(nburn+nsamples):
        #Draw a sample from the proposal distribution
        newsample= sc.zeros(5)
        newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0]
        newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1]
        #newsample[2]= stats.uniform.rvs()
        newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2]
        newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3]
        newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4]
        #Calculate the objective function for the newsample
        newX= objective(newsample,X,Y,yerr)
        #Accept or reject
        #Reject with the appropriate probability
        u= stats.uniform.rvs()
        if u < m.exp(newX-currentX):
            #Accept
            currentX= newX
            currentguess= newsample
            naccept= naccept+1
        if currentX > bestX:
            bestfit= currentguess
            bestX= currentX
        samples.append(currentguess)
    if double(naccept)/(nburn+nsamples) < .2 or double(naccept)/(nburn+nsamples) > .6:
        print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples))

    samples= sc.array(samples).T[:,nburn:-1]
    print "Best-fit, overall"
    print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:])

    histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/5.))
    indxi= sc.argmax(sc.amax(histmb,axis=1))
    indxj= sc.argmax(sc.amax(histmb,axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi-1], edges[1][indxj-1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi+1], edges[1][indxj+1]
        
    #2D histogram
    plot.bovy_print()
    levels= special.erf(0.5*sc.arange(1,4))
    #xrange=[edges[0][0],edges[0][-1]]
    #yrange=[edges[1][0],edges[1][-1]]
    xrange=[-120,120]
    yrange=[1.5,3.2]
    histmb,edges= sc.histogramdd(samples.T[:,0:2],
                                 range=[[-120,120],[1.5,3.2]],
                                 bins=(round(sc.sqrt(nsamples)/5.)/(edges[0][-1]-edges[0][0])*(xrange[1]-xrange[0]),
                                       round(sc.sqrt(nsamples)/5.)/(edges[1][-1]-edges[1][0])*(yrange[1]-yrange[0])))
    aspect=(xrange[1]-xrange[0])/(yrange[1]-yrange[0])
    plot.bovy_dens2d(histmb.T,origin='lower',cmap='gist_yarg',
                     contours=True,cntrmass=True,
                     xrange=xrange,yrange=yrange,
                     levels=levels,
                     aspect=aspect,
                     xlabel=r'$b$',ylabel=r'$m$')
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_right=True)       
    if dsigma == 1.:
        plot.bovy_end_print('exNew1a.png')
    else:
        plot.bovy_end_print('exNew2a.png')

    #Data with MAP line and sampling
    plot.bovy_print()
    bestb= edges[0][indxi]
    bestm= edges[1][indxj]
    xrange=[0,300]
    yrange=[0,700]
    plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-',
                   xrange=xrange,yrange=yrange,
                   xlabel=r'$x$',ylabel=r'$y$',zorder=2)
    errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None',zorder=1)
    for ii in range(10):
        #Random sample
        ransample= sc.floor((stats.uniform.rvs()*nsamples))
        ransample= samples.T[ransample,0:2]
        bestb= ransample[0]
        bestm= ransample[1]
        plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,
                       overplot=True,xrange=xrange,yrange=yrange,
                       xlabel=r'$x$',ylabel=r'$y$',color='0.75',zorder=1)
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_right=True)       
    if dsigma == 1.:
        plot.bovy_end_print('exNew1b.png')
    else:
        plot.bovy_end_print('exNew2b.png')
    
    #Pb plot
    plot.bovy_print()
    plot.bovy_hist(samples.T[:,2],color='k',bins=round(sc.sqrt(nsamples)/5.),
                   xlabel=r'$P_\mathrm{b}$',normed=True,histtype='step',
                   range=[0,1])
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_right=True)       
    if dsigma == 1.:
        plot.bovy_end_print('exNew1c.png')
    else:
        plot.bovy_end_print('exNew2c.png')
    
    return
コード例 #38
0
def exMix1(exclude=None,
           plotfilenameA='exMix1a.png',
           plotfilenameB='exMix1b.png',
           plotfilenameC='exMix1c.png',
           nburn=20000,
           nsamples=1000000,
           parsigma=[5, .075, .2, 1, .1],
           dsigma=1.,
           bovyprintargs={},
           sampledata=None):
    """exMix1: solve exercise 5 (mixture model) using MCMC sampling
    Input:
       exclude        - ID numbers to exclude from the analysis (can be None)
       plotfilename*  - filenames for the output plot
       nburn          - number of burn-in samples
       nsamples       - number of samples to take after burn-in
       parsigma       - proposal distribution width (Gaussian)
       dsigma         - divide uncertainties by this amount
    Output:
       plot
    History:
       2010-04-28 - Written - Bovy (NYU)
    """
    sc.random.seed(-1)  #In the interest of reproducibility (if that's a word)
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    if not exclude == None:
        nsample = ndata - len(exclude)
    else:
        nsample = ndata
    #First find the chi-squared solution, which we will use as an
    #initial guess
    #Put the data in the appropriate arrays and matrices
    Y = sc.zeros(nsample)
    X = sc.zeros(nsample)
    A = sc.ones((nsample, 2))
    C = sc.zeros((nsample, nsample))
    yerr = sc.zeros(nsample)
    jj = 0
    for ii in range(ndata):
        if not exclude == None and sc.any(exclude == data[ii][0]):
            pass
        else:
            Y[jj] = data[ii][1][1]
            X[jj] = data[ii][1][0]
            A[jj, 1] = data[ii][1][0]
            C[jj, jj] = data[ii][2]**2. / dsigma**2.
            yerr[jj] = data[ii][2] / dsigma
            jj = jj + 1

    brange = [-120, 120]
    mrange = [1.5, 3.2]

    # This matches the order of the parameters in the "samples" vector
    mbrange = [brange, mrange]

    if sampledata is None:
        sampledata = runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma,
                                mbrange)

    (histmb, edges, mbsamples, pbhist, pbedges) = sampledata

    # Hack -- produce fake Pbad samples from Pbad histogram.
    pbsamples = hstack([
        array([x] * N)
        for x, N in zip((pbedges[:-1] + pbedges[1:]) / 2, pbhist)
    ])

    indxi = sc.argmax(sc.amax(histmb, axis=1))
    indxj = sc.argmax(sc.amax(histmb, axis=0))
    print "Best-fit, marginalized"
    print edges[0][indxi - 1], edges[1][indxj - 1]
    print edges[0][indxi], edges[1][indxj]
    print edges[0][indxi + 1], edges[1][indxj + 1]

    #2D histogram
    plot.bovy_print(**bovyprintargs)
    levels = special.erf(0.5 * sc.arange(1, 4))
    xe = [edges[0][0], edges[0][-1]]
    ye = [edges[1][0], edges[1][-1]]
    aspect = (xe[1] - xe[0]) / (ye[1] - ye[0])
    plot.bovy_dens2d(histmb.T,
                     origin='lower',
                     cmap=cm.gist_yarg,
                     interpolation='nearest',
                     contours=True,
                     cntrmass=True,
                     extent=xe + ye,
                     levels=levels,
                     aspect=aspect,
                     xlabel=r'$b$',
                     ylabel=r'$m$')
    xlim(brange)
    ylim(mrange)

    plot.bovy_end_print(plotfilenameA)

    #Data with MAP line and sampling
    plot.bovy_print(**bovyprintargs)
    bestb = edges[0][indxi]
    bestm = edges[1][indxj]
    xrange = [0, 300]
    yrange = [0, 700]
    plot.bovy_plot(xrange,
                   bestm * sc.array(xrange) + bestb,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$x$',
                   ylabel=r'$y$',
                   zorder=2)
    errorbar(X, Y, yerr, marker='o', color='k', linestyle='None', zorder=1)

    for m, b in mbsamples:
        plot.bovy_plot(xrange,
                       m * sc.array(xrange) + b,
                       overplot=True,
                       xrange=xrange,
                       yrange=yrange,
                       xlabel=r'$x$',
                       ylabel=r'$y$',
                       color='0.75',
                       zorder=1)

    plot.bovy_end_print(plotfilenameB)

    #Pb plot
    if not 'text_fontsize' in bovyprintargs:
        bovyprintargs['text_fontsize'] = 11
    plot.bovy_print(**bovyprintargs)
    plot.bovy_hist(pbsamples,
                   bins=round(sc.sqrt(nsamples) / 5.),
                   xlabel=r'$P_\mathrm{b}$',
                   normed=True,
                   histtype='step',
                   range=[0, 1],
                   edgecolor='k')
    ylim(0, 4.)
    if dsigma == 1.:
        plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$',
                       top_right=True)
    else:
        plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$',
                       top_left=True)

    plot.bovy_end_print(plotfilenameC)

    return sampledata