Ejemplo n.º 1
0
def coreTable3_1(c, exp_sig2, exp_beta, var_beta):
    #Set up subplots
    f, ax = plt.subplots(1, 1, figsize=(6, 4))
    f.suptitle('Bayesian Core Table 3.1', fontsize=14)
    log.info("Creating Bayesian Core Table 3.2")

    # hide axes
    f.patch.set_visible(False)
    ax.axis('off')

    cell_data = np.array([c, exp_sig2, exp_beta, var_beta]).T
    cell_text = util.npArrayToStrList(cell_data, '{0:.4f}')
    col_labels = [
        r'c', r'$E^{\pi}(\sigma^{2}|Y,X)$', r'$E^{\pi}(\beta_{0}|Y,X)$',
        r'$V^{\pi}(\beta_{0}|Y,X)$'
    ]

    tab = ax.table(cellText=cell_text,
                   colLabels=col_labels,
                   cellLoc='center',
                   loc='center')

    tab.set_fontsize(16)
    tab.scale(1, 2)
    log.log("Saving Figure...")
    plt.savefig('Table3_1.png')
    log.sucess("Figure created and sucessfully saved")
def mleRegression(x_data, t_data, std):
    '''
    Completes a MLE regression for the given data
    Args:
        x_data (np.array): array of input data
        y_data (np.array): array of training data
    '''
    log.log("Calculating MLE regression")
    (N,K) = x_data.shape
    x = np.zeros((N,K+1)) + 1
    x[:,1:] = x_data #Expanding with a colomn of ones for bias terms
    #(X'*X)^(-1)*(X')*Y
    xtx_i = np.linalg.inv(x.T.dot(x))
    beta_hat = xtx_i.dot(x.T).dot(t_data)
    #Standard variance
    y_star = (t_data - x.dot(beta_hat))
    
    s2 = y_star.dot(y_star)
    sig_hat = 1.0/(N-K-1)*s2
    std_err = (np.sqrt(np.eye(K+1)*sig_hat*xtx_i)).dot(np.zeros(K+1)+1)
    
    t = (beta_hat/std_err)
    p_right = 1.0-sc.stats.t.cdf(np.abs(t),N-K-1)
    p_left = sc.stats.t.cdf(-np.abs(t),N-K-1)
    p = p_right+p_left

    log.sucess("MLE Regression complete")
    return beta_hat, std_err, t, p
Ejemplo n.º 3
0
def readFileData(fileName):
    '''
    Reads in catapillar data from files
    @params:
        fileName(string): File name
    Returns:
        data (np.array): array of data read from file
    '''
    log.info("Reading in data file")
    #Attempt to read text file and extact data into a list
    try:
        file_object = open(str(fileName), "r").read().splitlines()
        data_list = [[float(x.strip()) for x in my_string.split()]
                     for my_string in file_object]
    except OSError as err:
        log.error("OS error: {0}".format(err))
        return
    except IOError as err:
        log.error("File read error: {0}".format(err))
        return
    except:
        log.error("Unexpected error:" + sys.exc_info()[0])
        return

    data = np.asarray(data_list)
    log.sucess("Catapillar data successfully read from file")
    return data
Ejemplo n.º 4
0
def coreTable3_11(gibs_info_beta, gibs_noninfo_beta, c):
    #Set up subplots
    f, ax = plt.subplots(1, 1, figsize=(6, 5))
    f.suptitle('Bayesian Core Table 3.11, C=%d' % (c), fontsize=14)
    ax.set_title('Left: Informative, Right: Non-Informative')
    log.info("Creating Bayesian Core Table 3.11")

    # hide axes
    f.patch.set_visible(False)
    ax.axis('off')

    cell_data = np.array([gibs_info_beta, gibs_noninfo_beta]).T
    cell_text = util.npArrayToStrList(cell_data, '{0:.5f}')

    col_labels = [
        r'$\gamma_{i}$', r'$\hat{P}^{\pi}(\gamma_{i}=1|Y,X)$',
        r'$\hat{P}^{\pi}(\gamma_{i}=1|Y,X)$'
    ]
    gamma_labels = []
    for i, val in enumerate(cell_text):
        gamma_labels.append(r'$\gamma_{' + str(i) + '}$')
    cell_text = util.appendListColumn(cell_text, gamma_labels, 0)

    tab = ax.table(cellText=cell_text,
                   colLabels=col_labels,
                   colWidths=[0.2, 0.3, 0.3],
                   cellLoc='center',
                   loc='center')

    tab.set_fontsize(12)
    tab.scale(1, 1.75)
    log.log("Saving Figure...")
    plt.savefig('Table3_11.png')
    log.sucess("Figure created and sucessfully saved")
Ejemplo n.º 5
0
def coreFigure3_2(beta_hat, std_err, t, p):
    #Set up subplots
    f, ax = plt.subplots(1, 1, figsize=(6, 6))
    f.suptitle('Bayesian Core Figure 3.2', fontsize=14)
    log.info("Creating Bayesian Core Figure 3.2")

    # hide axes
    f.patch.set_visible(False)
    ax.axis('off')
    ax.axis('tight')

    cell_data = np.array([beta_hat, std_err, t, p]).T
    cell_text = util.npArrayToStrList(cell_data, '{0:.6f}')

    col_labels = ['Estimate', 'Std. Error', 't-value', r'$Pr(>|t|)$']
    row_labels = ['intercept']
    for i, val in enumerate(beta_hat[1:]):
        row_labels.append('XV' + str(i))

    tab = ax.table(cellText=cell_text,
                   rowLabels=row_labels,
                   colLabels=col_labels,
                   cellLoc='center',
                   loc='center',
                   bbox=[0.15, 0.2, 0.9, 0.7])

    tab.set_fontsize(16)
    tab.scale(1, 2)
    log.log("Saving Figure...")
    plt.savefig('Figure3_2.png')
    log.sucess("Figure created and sucessfully saved")
Ejemplo n.º 6
0
def coreTable3_5(hpd):
    #Set up subplots
    f, ax = plt.subplots(1, 1, figsize=(6, 5))
    f.suptitle('Bayesian Core Table 3.5', fontsize=14)
    log.info("Creating Bayesian Core Table 3.5")

    # hide axes
    f.patch.set_visible(False)
    ax.axis('off')

    cell_text0 = util.npArrayToStrList(hpd, '{0:.4f}')
    cell_text1 = [
        str(row).replace('\'', '') for i, row in enumerate(cell_text0)
    ]
    cell_text = map(list, zip(cell_text1))

    col_labels = [r'$\beta_{i}$', 'HPD Interval']
    beta_labels = []
    for i, val in enumerate(cell_text):
        beta_labels.append(r'$\beta_{' + str(i) + '}$')
    cell_text = util.appendListColumn(cell_text, beta_labels, 0)

    tab = ax.table(cellText=cell_text,
                   colLabels=col_labels,
                   colWidths=[0.2, 0.3],
                   cellLoc='center',
                   loc='center')

    tab.set_fontsize(16)
    tab.scale(1, 2)
    log.log("Saving Figure...")
    plt.savefig('Table3_5.png')
    log.sucess("Figure created and sucessfully saved")
Ejemplo n.º 7
0
def coreTable_B10(exp_beta, var_beta, log_b10, c):
    #Set up subplots
    f, ax = plt.subplots(1, 1, figsize=(6, 5))
    f.suptitle('Bayes\' Factor, C=' + str(c), fontsize=14)
    log.info("Creating Bayes Factor Table")

    # hide axes
    f.patch.set_visible(False)
    ax.axis('off')

    cell_data = np.array([exp_beta, var_beta, log_b10]).T
    cell_text = util.npArrayToStrList(cell_data, '{0:.4f}')
    col_labels = [
        r'$\beta_{i}$', r'$E^{\pi}(\beta_{i}|Y,X)$',
        r'$V^{\pi}(\beta_{i}|Y,X)$', r'$log_{10}(BF)$'
    ]
    beta_labels = []
    for i, val in enumerate(exp_beta):
        beta_labels.append(r'$\beta_{' + str(i) + '}$')
    cell_text = util.appendListColumn(cell_text, beta_labels, 0)

    tab = ax.table(cellText=cell_text,
                   colLabels=col_labels,
                   colWidths=[0.2, 0.3, 0.3, 0.3],
                   cellLoc='center',
                   loc='center')

    tab.set_fontsize(16)
    tab.scale(1, 2)
    log.log("Saving Figure...")
    plt.savefig('Table_BayesFactor.png')
    log.sucess("Figure created and sucessfully saved")
Ejemplo n.º 8
0
def coreGibbsModelEvidenceTable(model_evid, gibbs_evid, fignum_str, K, c=None):
    #Set up subplots
    f, ax = plt.subplots(1, 1, figsize=(6, 7))
    if (c != None):
        f.suptitle('Bayesian Core Table ' + fignum_str + ', C = %d' % (c),
                   fontsize=14)
    else:
        f.suptitle('Bayesian Core Table ' + fignum_str, fontsize=14)
    log.info("Creating Bayesian Core Table " + fignum_str)

    t_gamma = []
    for e0 in gibbs_evid:
        #e0 contains [model id, evidence]
        t_gamma_np, q = util.getGammaIndexes(K, int(e0[0]))
        model_label = str(t_gamma_np.tolist())
        t_gamma.append(re.sub('[^0-9 ,]+', '', model_label))

    # hide axes
    f.patch.set_visible(False)
    ax.axis('off')

    cell_data = np.array([model_evid[:, 1], gibbs_evid[:, 1]]).T
    cell_text = util.npArrayToStrList(cell_data, '{0:.5f}')

    col_labels = [
        r'$t_{1}(\gamma)$', r'$\pi(\gamma|Y,X)$', r'$\hat{\pi}(\gamma|Y,X)$'
    ]
    cell_text = util.appendListColumn(cell_text, t_gamma, 0)

    tab = ax.table(cellText=cell_text,
                   colLabels=col_labels,
                   colWidths=[0.4, 0.3, 0.3],
                   cellLoc='center',
                   loc='center')

    tab.set_fontsize(12)
    tab.scale(1, 1.75)
    log.log("Saving Figure...")
    plt.savefig('Table' + fignum_str.replace('.', '_') + '.png')
    log.sucess("Figure created and sucessfully saved")
Ejemplo n.º 9
0
def coreFigure3_1(data):
    #Set up subplots
    f, ax = plt.subplots(2, 5, figsize=(6, 6))
    f.suptitle('Bayesian Core Figure 3.1', fontsize=14)
    log.info("Creating Bayesian Core Figure 3.1")

    n = 0
    for (i, j), ax0 in np.ndenumerate(ax):
        ax0.plot(data[:, n], data[:, -1], 'o', markersize=3.5)
        ax0.set_yscale('log')
        # Get rid of the ticks
        ax0.set_xticks([])
        ax0.set_yticks([])
        ax0.minorticks_off()
        #Axis label
        ax0.set_xlabel(r'$x_' + str(n + 1) + '$')
        n += 1

    plt.tight_layout(rect=[0, 0, 1.0, 0.93])
    log.log("Saving Figure...")
    plt.savefig('Figure3_1.png')
    log.sucess("Figure created and sucessfully saved")
def priorExpectations(x_data, t_data, beta_hat, beta_tilde, a, b, c):
    '''
    Computes the expecation and variance of the beta_hats for conjugate priors
    @params:
        x_data (np.array): array of input data
        t_data (np.array): array of training data
        beta_hat (np.array): MLE linear regression weights
        beta_tilde (np.array): prior hyperparameter
        a (float): hyper-parameter
        b (float): hyper-parameter
        c (float): hyper-parameter
    '''
    log.log("Computing exp and var for conjugate priors")
    log.log("Hyper-parameters a:%.2f b:%.2f c:%.2f"%(a, b, c))

    (N,K) = x_data.shape
    x = np.zeros((N,K+1)) + 1
    x[:,1:] = x_data #Expanding with a colomn of ones for bias terms
    
    y_star = (t_data - x.dot(beta_hat))
    s2 = y_star.dot(y_star)
    #(X'*X)^(-1)*(X')*Y
    xtx_i = np.linalg.inv(x.T.dot(x))
    M = np.eye(K+1)/c

    c1 = np.linalg.inv(np.linalg.inv(M)+np.linalg.inv(x.T.dot(x)))
    c1 = (beta_tilde - beta_hat).T.dot(c1).dot(beta_tilde - beta_hat)
    c2 = np.linalg.inv(M+x.T.dot(x))

    exp_sig2 = (2*b + s2 + c1)/(N + 2*a - 2)
    exp_beta = c2.dot(x.T.dot(x).dot(beta_hat) + M.dot(beta_tilde))
    covar_beta = (exp_sig2)*c2
    var_beta = (np.eye(K+1)*(covar_beta)).dot(np.zeros(K+1)+1) #Get the variance (diagonal) elements

    log.sucess("MLE Regression complete")
    return exp_sig2, exp_beta, var_beta
Ejemplo n.º 11
0
def coreTableExpVar(exp_beta, var_beta, fignum_str, c=None):
    #Set up subplots
    f, ax = plt.subplots(1, 1, figsize=(6, 5))
    if (c != None):
        f.suptitle('Bayesian Core Table ' + fignum_str + ', C = %d' % (c),
                   fontsize=14)
    else:
        f.suptitle('Bayesian Core Table ' + fignum_str, fontsize=14)
    log.info("Creating Bayesian Core Table " + fignum_str)

    # hide axes
    f.patch.set_visible(False)
    ax.axis('off')

    cell_data = np.array([exp_beta, var_beta]).T
    cell_text = util.npArrayToStrList(cell_data, '{0:.4f}')
    col_labels = [
        r'$\beta_{i}$', r'$E^{\pi}(\beta_{i}|Y,X)$',
        r'$V^{\pi}(\beta_{i}|Y,X)$'
    ]
    beta_labels = []
    for i, val in enumerate(exp_beta):
        beta_labels.append(r'$\beta_{' + str(i) + '}$')
    cell_text = util.appendListColumn(cell_text, beta_labels, 0)

    tab = ax.table(cellText=cell_text,
                   colLabels=col_labels,
                   colWidths=[0.2, 0.3, 0.3],
                   cellLoc='center',
                   loc='center')

    tab.set_fontsize(16)
    tab.scale(1, 2)
    log.log("Saving Figure...")
    plt.savefig('Table' + fignum_str.replace('.', '_') + '.png')
    log.sucess("Figure created and sucessfully saved")
def gibbsSamplingInformative(x0, y0, beta_tilde, c, T, T0):
    '''
    Performs Gibb's sampling for Zelner's Informative prior
    @params:
        x0 (np.array): array of input data
        y0 (np.array): array of training data
        beta_tilde (np.array): prior hyperparameter
        T (int): numder of samples to take
        T0 (int): "burn in" samples to ignore
    '''
    T = int(T)
    T0 = int(T0)
    (N, K) = x0.shape
    x = np.zeros((N, K + 1)) + 1
    x[:, 1:] = x0  #Expanding with a colomn of ones for bias terms

    zeller_evid = np.zeros(2)
    gammas = np.zeros(T)
    post_informative = np.zeros((2**K, 2))
    beta_evidence = np.zeros((K))
    post_informative[:, 0] = range(2**K)

    gam_index = random.randint(0, 2**K)
    (t_gamma0, q) = util.getGammaIndexes(K, int(gam_index))

    log.log("Staring Zeller Informative Prior Gibb's sampling")
    log.warning("This will take a while...")

    for i in range(T):

        if (i % 100 == 0):
            log.print_progress(i, T)

        for j in range(K):
            gam_i0 = gam_index
            gam_i1 = gam_index
            if (int(gam_index / 2**j) % 2 == 0):
                gam_i1 = gam_index + 2**j  #With current model parameter
            else:
                gam_i0 = gam_index - 2**j  #With out current model parameter

            for i0, gam0 in enumerate([int(gam_i0), int(gam_i1)]):
                (t_gamma, q) = util.getGammaIndexes(K, gam0)
                x_g = x[:, t_gamma]
                bt_g = beta_tilde[t_gamma]

                xtxi = np.linalg.inv(x_g.T.dot(x_g))
                c0 = y0.T.dot(x_g.dot(xtxi).dot(x_g.T)).dot(y0)
                c1 = y0.T.dot(y0)-c/(c+1.0)*c0+1/(c+1.0)*bt_g.T.dot(x_g.T.dot(x_g)).dot(bt_g) -\
                    2.0/(c+1.0)*y0.T.dot(x_g).dot(bt_g)

                zeller_evid[i0] = (c + 1.0)**(-0.5 * (q + 1)) * c1**(-0.5 * N)

            zeller_evid_norm = zeller_evid[0] / np.sum(zeller_evid, 0)
            if (random.random() < zeller_evid_norm):
                gam_index = gam_i0
            else:
                gam_index = gam_i1
        gammas[i] = gam_index

    log.print_progress(T, T)
    log.sucess("Gibb's Sampling for informative prior complete!")
    log.log("Calculating Model Evidence")

    #Count all instances of a certain model to get model evidence
    for i in range(2**K):
        count = np.count_nonzero(gammas[T0:T] == i)
        post_informative[i, 1] = count / (T - T0 + 1.0)

    post_informative_sort = post_informative[post_informative[:, 1].argsort()
                                             [::-1]]

    #Count all instances of a certain certain to get variable evidence
    for i in range(K):
        count = np.where((gammas[T0:T] / (2**i)).astype(int) %
                         2 == 1)[0].shape[0]
        beta_evidence[i] = count / (T - T0 + 1.0)

    return post_informative_sort, beta_evidence
def variableSelection(x0, y0, beta_tilde, c, clim):
    '''
    Performs variable (model) selection by computing the evidence of each model
    @params:
        x0 (np.array): array of input data
        y0 (np.array): array of training data
        beta_tilde (np.array): prior hyperparameter
        c (float): prior hyperparameter
        clim (int): upper limit of the c summation for the non-informative prior
    '''
    (N, K) = x0.shape
    x = np.zeros((N, K + 1)) + 1
    x[:, 1:] = x0  #Expanding with a colomn of ones for bias terms

    zeller_posterior = np.zeros((2**K, 2))
    post_informative = np.zeros((2**K, 2))
    post_noninformative = np.zeros((2**K, 2))
    #Save the index of each model in the array for when we max sort it
    post_informative[:, 0] = range(2**K)
    post_noninformative[:, 0] = range(2**K)

    log.log("Starting model variable selection for both priors")

    for i in range(2**K):
        (gamma_index, q) = util.getGammaIndexes(K, i)

        x_g = x[:, gamma_index]
        bt_g = beta_tilde[gamma_index]

        xtxi = np.linalg.inv(x_g.T.dot(x_g))
        c0 = y0.T.dot(x_g.dot(xtxi).dot(x_g.T)).dot(y0)
        c1 = y0.T.dot(y0)-c/(c+1.0)*c0+1/(c+1.0)*bt_g.T.dot(x_g.T.dot(x_g)).dot(bt_g) -\
            2.0/(c+1.0)*y0.T.dot(x_g).dot(bt_g)

        zeller_posterior[i, 0] = (c + 1.0)**(-0.5 * (q + 1)) * c1**(-0.5 * N)

        zeller_noninform = 0
        for ci in range(1, int(clim)):
            c1 = (1.0 / ci) * (ci + 1)**(-0.5 * (q + 1))
            zeller_noninform += c1 * (y0.T.dot(y0) - ci /
                                      (ci + 1.0) * c0)**(-0.5 * N)

        zeller_posterior[i, 1] = zeller_noninform

    log.sucess("Variable selection posteriors calculated")
    log.log("Normalizing and sorting Model Evidence")

    #Normalize model evidence
    post_informative[:, 1] = zeller_posterior[:, 0] / np.sum(
        zeller_posterior[:, 0])
    post_noninformative[:, 1] = zeller_posterior[:, 1] / np.sum(
        zeller_posterior[:, 1])

    #Sort array largest to smallest based on model evidence
    #https://stackoverflow.com/questions/2828059/sorting-arrays-in-numpy-by-column
    post_informative_sort = post_informative[post_informative[:, 1].argsort()
                                             [::-1]]
    post_noninformative_sort = post_noninformative[
        post_noninformative[:, 1].argsort()[::-1]]

    return post_informative_sort, post_noninformative_sort