コード例 #1
0
def plot(caseHDF5Name, controlHDF5Name, position):
    print("Plot theta of position %d" % (position))

    ###################### plot position for case ###################
    caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(caseHDF5Name)
    casedelta = caseq['delta']
    a = casedelta[0][position, 0]
    b = casedelta[0][position, 1]
    #print (a, b)

    fig, ax = plt.subplots()
    # display the pdf
    # ppf (percentage point function) is the inverse CDF.
    # median read depth of case file
    cov_case = int(np.median(caseN))
    x_case = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax.plot(x_case,
            beta.pdf(x_case, a, b),
            'b-',
            lw=4,
            alpha=0.8,
            label="Case, Depth=%d" % cov_case)
    # generate random variables
    r_case = beta.rvs(a, b, size=1000)
    ax.hist(r_case, normed=True, histtype='stepfilled', alpha=0.2)
    ax.legend(loc='best', frameon=False)

    ###################### plot position for control ###################
    controlR, controlN, controlphi, controlq, _, _ = rvd3.load_model(
        controlHDF5Name)
    controldelta = controlq['delta']
    a = controldelta[0][position, 0]
    b = controldelta[0][position, 1]
    #print (a, b)

    # display the pdf
    # ppf (percentage point function) is the inverse CDF.
    cov_control = int(np.median(controlN))
    x_control = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax.plot(x_control,
            beta.pdf(x_control, a, b),
            'g-',
            lw=4,
            alpha=0.8,
            label='Control, Depth=%d' % cov_control)
    # generate random variables
    r_control = beta.rvs(a, b, size=1000)
    ax.hist(r_control, normed=True, histtype='stepfilled', alpha=0.2)
    ax.legend(loc='best', frameon=False)
    ax.set_title(
        '$\\beta$ variational distribution of $\\theta$ at position %d' %
        (position))
    ax.set_xlabel('$\\theta$', fontsize=20)
    ax.set_ylabel('PDF', fontsize=18)

    # position_VAF_downsample
    #plt.savefig('%d.png' %(position))
    plt.show()
コード例 #2
0
ファイル: plot_M.py プロジェクト: fzhangcode/rvd2-variational
def main():
    dilutionList = (0.1, 0.3, 1.0, 10.0, 100.0)

    folder = '2015-09-28_Run_rvd3_synthetic_data_set/hdf5/10'

    fig = plt.figure(figsize=(12, 20))
    #plt.suptitle('Read depth/M across position')

    controlFile = "../%s/Control.hdf5" % folder
    controlR, controlN, controlPhi, controlq, controlLoc, _ = rvd3.load_model(
        controlFile)

    sub0 = len(dilutionList) + 1
    ax = fig.add_subplot(sub0, 2, 1)
    #TODO: use index of controlN rather than directly controlLOC
    controlLoc = [int(x.split(':')[1]) for x in controlLoc]
    ax.plot(controlLoc, controlN.T)
    ax.set_title('Control')
    ax.set_ylabel('Coverage')
    ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))

    ax = fig.add_subplot(sub0, 2, 2)
    ax.semilogy(controlLoc, np.mean(controlPhi['M'], axis=1))
    ax.set_title('Control')
    ax.set_ylabel('M')
    #ax.set_ylim([1e-4,1e5])
    ax.semilogy([controlLoc[0], controlLoc[-1]],
                [controlPhi['M0'], controlPhi['M0']],
                color='r',
                ls='--')
    for d in dilutionList:
        logging.debug("Processing dilution: %0.1f%%" % d)
        caseFile = "Case%s.hdf5" % str(d).replace(".", "_")
        caseFile = "../%(folder)s/%(file)s" % {
            'folder': folder,
            'file': caseFile
        }
        caseR, caseN, casePhi, caseq, caseLoc, _ = rvd3.load_model(caseFile)
        ax = fig.add_subplot(sub0, 2, 2 * dilutionList.index(d) + 3)
        caseLoc = [int(x.split(':')[1]) for x in caseLoc]
        ax.plot(caseLoc, caseN.T)
        ax.set_title("Dilution %0.1f%%" % d)
        if dilutionList.index(d) == len(dilutionList) - 1:
            ax.set_xlabel('Position')
        ax.set_ylabel('Coverage')
        ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
        ax = fig.add_subplot(sub0, 2, 2 * dilutionList.index(d) + 4)
        ax.semilogy(caseLoc, np.mean(casePhi['M'], axis=1))
        ax.set_title("Dilution %0.1f%%" % d)
        if dilutionList.index(d) == len(dilutionList) - 1:
            ax.set_xlabel('Position')
        ax.set_ylabel('M')
        #ax.set_ylim([1e-4,1e5])
        ax.semilogy([caseLoc[0], caseLoc[-1]], [casePhi['M0'], casePhi['M0']],
                    color='r',
                    ls='--')
    plt.savefig('M_dsample=10.png')
コード例 #3
0
def bayestest(caseHDF5Name, controlHDF5Name, position):
    alpha = 0.05
    tau = 0
    caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(caseHDF5Name)
    casegam = caseq['gam']
    controlR, controlN, controlphi, controlq, _, _ = rvd3.load_model(
        controlHDF5Name)
    controlgam = controlq['gam']

    def beta_mean(p):
        return p[0] * 1.0 / np.sum(p)

    def beta_var(p):
        s = np.sum(p)
        return p[0] * p[1] / (s**2 * (s + 1))

    mu = (beta_mean(casegam[position, :]) - casephi['mu0']) - (
        beta_mean(controlgam[position, :]) - controlphi['mu0'])
    sigma = beta_var(casegam[position, :]) + beta_var(controlgam[position, :])
    z = (tau - mu) / sigma
    print(z)
    p = ss.norm.cdf(z)
    print(p[0])
コード例 #4
0
def read(filename, pos):
    def beta_mean(p):
        return p[0] * 1.0 / np.sum(p)

    caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(filename)
    casegam = caseq['gam']
    caseMu = beta_mean(casegam[pos, :]) - casephi['mu0']

    # calculate the lower and upper credible value
    alpha = 0.05
    cred = caseMu * alpha / 2
    conf_l = caseMu - cred
    conf_u = caseMu + cred

    # calculate the error bar value
    err = np.array(cred, cred)
    print 100 * caseMu, conf_l, conf_u

    return 100 * caseMu, 100 * err
コード例 #5
0
def main():
    book=xlwt.Workbook(encoding="utf-8")
    sheet1=book.add_sheet("TPR_TNR")
    sheet1.write(0, 0, "VAF")
    sheet1.write(0, 1, "Median Depth")

    sheet2=book.add_sheet("Multi-measures")
    sheet2.write(1, 0, "VAF")
    sheet2.write(1, 1, "Median Depth")

    sheet3=book.add_sheet("FDR")
    sheet3.write(0, 0, "VAF")
    sheet3.write(0, 1, "Median Depth")

    sheet4=book.add_sheet("MCC")
    sheet4.write(0, 0, "VAF")
    sheet4.write(0, 1, "Median Depth")

    # method = {'RVD2(T*)(R=6)':'./../2013-12-20_experiment_set_gibbs_Qsd_mu_1_mu_over_10_minus_mu0/six_replicates_synthetic_optT/vcf/MCC',
    #           'RVD2(T*)(R=1)':'./../2013-12-20_experiment_set_gibbs_Qsd_mu_1_mu_over_10_minus_mu0/one_replicate_synthetic_optT/vcf/MCC',
    #           'RVD2(T=0)(R=6)':'./../2013-12-20_experiment_set_gibbs_Qsd_mu_1_mu_over_10_minus_mu0/six_replicates_synthetic_T0/vcf',
    #           'RVD2(T=0)(R=1)':'./../2013-12-20_experiment_set_gibbs_Qsd_mu_1_mu_over_10_minus_mu0/one_replicate_synthetic_T0/vcf',
    #           'VarScan2 somatic':'./../2013-09-23_SNP_calling_using_varscan2_somatic/vcf',
    #           'SAMtools':'./../2013-09-10_SNP_calling_using_samtools/vcf',
    #           'GATK':'./../2013-09-13_SNP_calling_using_GATK/vcf',
    #           'MuTect':'./../2013-10-02_SNP_calling_using_MuTect/work',
    #           'Strelka':'./../2013-10-01_SNP_calling_using_strelka/vcf',
    #           'VarScan2 mpileup':'./../2013-09-20_SNP_calling_using_varscan2/vcf'}

    # method = {'RVD2_MCMC(T=0,R=6)':'./../2013-12-20_experiment_set_gibbs_Qsd_mu_1_mu_over_10_minus_mu0/six_replicates_synthetic_T0/vcf',
    #           'MuTect':'./../2013-10-02_SNP_calling_using_MuTect/work',
    #           'RVD2_Var(T=0,R=6)':'./vcf'
    #           }

    method = {'RVD3(T=0,R=6)':'./vcf',}
    
    DilutionList = (0.1, 0.3, 1.0, 10.0,100.0)
    DepthList = (10000, 1000, 100, 10)
    i=0
    
    for k, v in method.iteritems():
        i=i+1
        print 'Method %(number)d: %(method)s' %{'number':i, 'method': k}
        sheet1.write(0, i+1, k)
        sheet2.write(0, 9*(i-1)+6, k)
        sheet3.write(0, i+1, k)
        sheet4.write(0, i+1, k)
        character=('Sensitiviy', 'Specificity', 'FPR', 'FNR', 'PPV', 'NPV', 'FDR', 'ACC', 'MCC')
        for j in xrange(9):
            sheet2.write(1,9*(i-1)+j+2,character[j])
        for d in DilutionList:
            if i==1:
                sheet1.write(DilutionList.index(d)*len(DepthList)+1,0,"%0.1f%%" %d)
                sheet2.write(DilutionList.index(d)*len(DepthList)+2,0,"%0.1f%%" %d)
                sheet3.write(DilutionList.index(d)*len(DepthList)+1,0,"%0.1f%%" %d)
                sheet4.write(DilutionList.index(d)*len(DepthList)+1,0,"%0.1f%%" %d)
                
            for r in DepthList:
                # read in the median coverage
                #hdf5Dir='../2013-08-14_Compute_ROC_Synthetic_avg%s' %str(r)
                hdf5Dir = './hdf5/%s' %str(r)
                caseFile = 'Case%s.hdf5' %str(d).replace('.','_')
                caseFile = "%(dir)s/%(file)s" %{'dir':hdf5Dir,'file':caseFile}
               
                # pdb.set_trace()
                #(_, _, _, _, _, caseN,_) = rvd27.load_model(caseFile)
                (_, caseN, _, _, _, _) = rvd3.load_model(caseFile)
                cov = int(np.median(caseN))

                # print the median coverage
                if i==1:
                    sheet1.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+1, 1, "%s" % str(cov))
                    sheet2.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+2, 1, "%s" % str(cov))
                    sheet3.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+1, 1, "%s" % str(cov))
                    sheet4.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+1, 1, "%s" % str(cov))

                # read in called positions from vcf files
                # pdb.set_trace()
                vcfFile=os.path.join(v,"%s" %r,
                                     "vcf%s.vcf" %str(d).replace('.','_'))
                
                logging.debug(vcfFile)

                vcf_reader = vcf.Reader(open(vcfFile, 'r'))
                # pdb.set_trace()
                callpos=np.array([record.POS for record in vcf_reader])

                # prediction classification
                PredictClass = np.zeros(400)
                if len(callpos) != 0:
                    PredictClass[callpos-1] = np.ones_like(callpos)
                    
                # actual classification
                RefClass = np.zeros(400)
                pos = np.arange(85,346,20)
                RefClass[pos-1] = np.ones_like(pos)

                # characteristics computation
                [TPR, TNR, FPR, FNR, PPV, NPV, FDR, ACC, MCC]=characteristics(RefClass, PredictClass)
                ncharacter=(TPR, TNR, FPR, FNR, PPV, NPV, FDR, ACC, MCC)

                # print characteristics
                sheet1.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+1,i+1,"%(TPR)0.2f/%(TNR)0.2f" %{'TPR':TPR,'TNR':TNR})               
                for j in xrange(9):
                    sheet2.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+2,9*(i-1)+j+2,'%0.2f' %ncharacter[j])
                if not np.isnan(FDR):
                    sheet3.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+1,i+1,"%0.2f" %FDR)
                if not np.isnan(FDR):
                    sheet4.write(DilutionList.index(d)*len(DepthList)+DepthList.index(r)+1,i+1,"%0.2f" %MCC)
    book.save('statistics_no_chi2.xls')  
コード例 #6
0
def plot(case_10000, case_1000, case_100, case_10, control_10000, control_1000,
         control_100, control_10, position):
    print("Plot mu of position %d" % (position + 1))
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2)

    xstep = 0.002
    xmax = 0.0152
    ymax = 1200
    size = 15
    ################ Downsample = 10000 #################################################
    caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(case_10000)
    casegam = caseq['gam']
    a, b = get_a_b(position, casegam, casephi['mu0'])

    cov_case = int(np.median(caseN))
    x_case = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax1.plot(x_case,
             beta.pdf(x_case, a, b),
             'b-',
             lw=5,
             alpha=0.8,
             label="Case")
    # generate random variables
    r_case = beta.rvs(a, b, size=1000)
    ax1.hist(r_case, normed=True, histtype='stepfilled', alpha=0.2)
    ax1.legend(loc='best', frameon=False)

    controlR, controlN, controlphi, controlq, _, _ = rvd3.load_model(
        control_10000)
    controlgam = controlq['gam']
    a, b = get_a_b(position, controlgam, controlphi['mu0'])

    x_control = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax1.plot(x_control,
             beta.pdf(x_control, a, b),
             'g-',
             lw=5,
             alpha=0.8,
             label='Control')
    # generate random variables
    r_control = beta.rvs(a, b, size=1000)
    ax1.hist(r_control, normed=True, histtype='stepfilled', alpha=0.2)
    ax1.legend(loc='best', frameon=False)
    ax1.set_title('Depth=%d' % cov_case, fontsize=size)
    xticks = np.arange(0, xmax, xstep)
    ax1.set_xticks(xticks)
    ax1.set_ylim(0, ymax)

    print('mu0^control:', controlphi['mu0'])
    print('mu0^case', casephi['mu0'], '\n')

    #################### Downsample = 1000 #############################################
    caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(case_1000)
    casegam = caseq['gam']
    a, b = get_a_b(position, casegam, casephi['mu0'])

    cov_case = int(np.median(caseN))
    x_case = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax2.plot(x_case,
             beta.pdf(x_case, a, b),
             'b-',
             lw=5,
             alpha=0.8,
             label="Case")
    # generate random variables
    r_case = beta.rvs(a, b, size=1000)
    ax2.hist(r_case, normed=True, histtype='stepfilled', alpha=0.2)
    ax2.legend(loc='best', frameon=False)

    controlR, controlN, controlphi, controlq, _, _ = rvd3.load_model(
        control_1000)
    controlgam = controlq['gam']
    a, b = get_a_b(position, controlgam, controlphi['mu0'])

    x_control = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax2.plot(x_control,
             beta.pdf(x_control, a, b),
             'g-',
             lw=5,
             alpha=0.8,
             label='Control')
    # generate random variables
    r_control = beta.rvs(a, b, size=1000)
    ax2.hist(r_control, normed=True, histtype='stepfilled', alpha=0.2)
    ax2.legend(loc='best', frameon=False)
    ax2.set_title('Depth=%d' % cov_case, fontsize=size)
    xticks = np.arange(0, xmax, xstep)
    ax2.set_xticks(xticks)
    ax2.set_ylim(0, ymax)

    print('mu0^control:', controlphi['mu0'])
    print('mu0^case', casephi['mu0'], '\n')

    ################### Downsample = 100 #############################################
    caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(case_100)
    casegam = caseq['gam']
    a, b = get_a_b(position, casegam, casephi['mu0'])

    cov_case = int(np.median(caseN))
    x_case = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax3.plot(x_case,
             beta.pdf(x_case, a, b),
             'b-',
             lw=5,
             alpha=0.8,
             label="Case")
    # generate random variables
    r_case = beta.rvs(a, b, size=1000)
    ax3.hist(r_case, normed=True, histtype='stepfilled', alpha=0.2)
    ax3.legend(loc='best', frameon=False)

    controlR, controlN, controlphi, controlq, _, _ = rvd3.load_model(
        control_100)
    controlgam = controlq['gam']
    a, b = get_a_b(position, controlgam, controlphi['mu0'])

    x_control = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax3.plot(x_control,
             beta.pdf(x_control, a, b),
             'g-',
             lw=5,
             alpha=0.8,
             label='Control')
    # generate random variables
    r_control = beta.rvs(a, b, size=1000)
    ax3.hist(r_control, normed=True, histtype='stepfilled', alpha=0.2)
    ax3.legend(loc='best', frameon=False)
    ax3.set_title('Depth=%d' % cov_case, fontsize=size)
    xticks = np.arange(0, xmax, xstep)
    ax3.set_xticks(xticks)
    ax3.set_xlabel('$\mu$', fontsize=size)
    ax3.set_ylim(0, ymax)

    print('mu0^control:', controlphi['mu0'])
    print('mu0^case', casephi['mu0'], '\n')

    #################### Downsample = 10 #############################################
    caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(case_10)
    casegam = caseq['gam']
    a, b = get_a_b(position, casegam, casephi['mu0'])

    cov_case = int(np.median(caseN))
    x_case = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax4.plot(x_case,
             beta.pdf(x_case, a, b),
             'b-',
             lw=5,
             alpha=0.8,
             label="Case")
    # generate random variables
    r_case = beta.rvs(a, b, size=1000)
    ax4.hist(r_case, normed=True, histtype='stepfilled', alpha=0.2)
    ax4.legend(loc='best', frameon=False)

    controlR, controlN, controlphi, controlq, _, _ = rvd3.load_model(
        control_10)
    controlgam = controlq['gam']
    a, b = get_a_b(position, controlgam, controlphi['mu0'])

    x_control = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax4.plot(x_control,
             beta.pdf(x_control, a, b),
             'g-',
             lw=5,
             alpha=0.8,
             label='Control')
    # generate random variables
    r_control = beta.rvs(a, b, size=1000)
    ax4.hist(r_control, normed=True, histtype='stepfilled', alpha=0.2)
    ax4.legend(loc='best', frameon=False)
    ax4.set_title('Depth=%d' % cov_case, fontsize=size)
    xticks = np.arange(0, xmax, xstep)
    ax4.set_xticks(xticks)
    ax4.set_xlabel('$\mu$', fontsize=size)
    ax4.set_ylim(0, ymax)

    print('mu0^control:', controlphi['mu0'])
    print('mu0^case', casephi['mu0'], '\n')

    plt.suptitle(
        '$\\beta$ variational distribution of $\mu$ at position %d when VAF=1.0%% '
        % (position + 1),
        fontsize=size)
    # manually adjust the spacing of suptitle
    plt.subplots_adjust(top=0.9)
    #plt.tight_layout(fig, rect=[0, 0.03, 1, 0.95])
    #plt.show()

    fig = plt.gcf()
    fig.set_size_inches(12, 8)
    plt.savefig('mu_%d_VAF=1.0.png' % (position + 1))
コード例 #7
0
def plot(f2HDF5Name, f1HDF5Name, f3HDF5Name, f4HDF5Name, position):

    ###################### plot position for f2 ###################
    f2R, f2N, f2phi, f2q, loc, refb = rvd3.load_model(f2HDF5Name)
    f2gam = f2q['gam']
    a = f2gam[position, 0]
    b = f2gam[position, 1]
    #print (a, b)
    M0_f2 = f2phi['M0']

    fig, ax = plt.subplots()
    x_f2 = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax.plot(x_f2,
            beta.pdf(x_f2, a, b),
            'g--',
            lw=8,
            alpha=0.8,
            label="M0=%.4f" % M0_f2)

    ###################### plot position for f1 ###################
    f1R, f1N, f1phi, f1q, _, _ = rvd3.load_model(f1HDF5Name)
    f1gam = f1q['gam']
    a = f1gam[position, 0]
    b = f1gam[position, 1]
    M0_f1 = f1phi['M0']
    x_f1 = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax.plot(x_f1,
            beta.pdf(x_f1, a, b),
            'b-',
            lw=8,
            alpha=0.8,
            label='M0=%.3f' % M0_f1)

    ###################### plot position for f3 ###################
    f3R, f3N, f3phi, f3q, _, _ = rvd3.load_model(f3HDF5Name)
    f3gam = f3q['gam']
    a = f3gam[position, 0]
    b = f3gam[position, 1]
    M0_f3 = f3phi['M0']
    x_f3 = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax.plot(x_f3,
            beta.pdf(x_f3, a, b),
            'm-.',
            lw=8,
            alpha=0.8,
            label='M0=%.2f' % M0_f3)

    ###################### plot position for f4 ###################
    f4R, f4N, f4phi, f4q, _, _ = rvd3.load_model(f4HDF5Name)
    f4gam = f4q['gam']
    a = f4gam[position, 0]
    b = f4gam[position, 1]
    M0_f4 = f4phi['M0']
    x_f4 = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
    ax.plot(x_f4,
            beta.pdf(x_f4, a, b),
            'yo:',
            lw=8,
            alpha=0.8,
            label='M0=%.1f' % M0_f4)

    legend = ax.legend(loc='upper left')
    for label in legend.get_texts():
        label.set_fontsize(38)

    ax.set_xlabel('$\hat{\mu}_{1,014,740}$', fontsize=38)
    ax.set_xlim([0.95, 1])
    plt.setp(plt.gca().get_xticklabels(), fontsize=35)
    plt.setp(plt.gca().get_yticklabels(), fontsize=35)

    plt.show()
コード例 #8
0
def main():
    ################### Read mu of MCMC (rvd2) ################################
    with h5py.File(control_mcmc, 'r') as f:
        muControl = f['mu'][...]
        locControl = f['loc'][...]
    with h5py.File(case_mcmc, 'r') as f:
        muCase = f['mu'][...]
        locCase = f['loc'][...]
    idx = []
    for pos in position:
        idx.append(pos)
        muControl1 = muControl[idx]
        muCase1 = muCase[idx]
        #N = 2000
        #(muZ,_,_) =rvd27.sample_post_diff(muCase1, muControl1, N) # sample Z

    ## plot histogram
    num_bins = 25
    for i in xrange(len(position)):
        fig = plt.figure(figsize=(12, 8))

        ########### Plot mu of MCMC (rvd2) vs Variational (rvd3) ##################
        # normed=True, the integral of the histogram will sum to 1.
        plt.hist(muCase1[i, :].T,
                 num_bins,
                 normed=True,
                 facecolor='r',
                 alpha=0.5,
                 label='Case (MCMC)')
        plt.hist(muControl1[i, :].T,
                 num_bins,
                 normed=True,
                 facecolor='k',
                 alpha=0.5,
                 label='Control (MCMC)')

        ############# Plot mu of Variational (rvd3) ################################
        caseR, caseN, casephi, caseq, loc, refb = rvd3.load_model(case_var)
        casegam = caseq['gam']
        a = casegam[position, 0]
        b = casegam[position, 1]
        cov_case = int(np.median(caseN))
        x_case = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b), 100)
        plt.plot(x_case,
                 beta.pdf(x_case, a, b),
                 'r--',
                 lw=4,
                 alpha=1.0,
                 label="Case (Variational)")
        r_case = beta.rvs(a, b, size=2000)
        plt.hist(r_case,
                 num_bins,
                 normed=True,
                 histtype='stepfilled',
                 alpha=0.2,
                 facecolor='r')

        controlR, controlN, controlphi, controlq, _, _ = rvd3.load_model(
            control_var)
        controlgam = controlq['gam']
        a = controlgam[position, 0]
        b = controlgam[position, 1]
        x_control = np.linspace(beta.ppf(0.001, a, b), beta.ppf(0.999, a, b),
                                100)
        plt.plot(x_control,
                 beta.pdf(x_control, a, b),
                 'k--',
                 lw=4,
                 alpha=1.0,
                 label='Control (Variational)')
        r_control = beta.rvs(a, b, size=2000)
        plt.hist(r_control,
                 num_bins,
                 normed=True,
                 histtype='stepfilled',
                 alpha=0.2,
                 facecolor='k')
        plt.xlim(0, 0.012)
        plt.legend(loc='best', frameon=False)
        plt.xlabel('$\hat{\mu} = \mu-\mu_0$', fontsize=20)
        plt.xticks(rotation=25)
        plt.title('$\hat{\mu}$ at position %s when median depth is %d' %
                  ((position[i] + 1), cov_case),
                  fontsize=18)
        plt.xticks(rotation=25)
        plt.savefig('position_%s_%d_mcmc_vs_var.png' %
                    ((position[i] + 1), cov_case))
        plt.tight_layout()
コード例 #9
0
def ROCpoints(controlFile,caseFile, d, N, P, chi2):

    # Load the model samples
    controlR, controlN, controlphi, controlq, controlLoc, _ = rvd3.load_model(controlFile)
    controlgam = controlq['gam']

    caseR, caseN, casephi, caseq, caseLoc, refb = rvd3.load_model(caseFile)
    casegam = caseq['gam'] 


    #(N,J) = np.shape(caseR)[0:2]
    J = len(controlLoc)
    def beta_mean(p):
        return p[0]*1.0/np.sum(p)    

    def beta_var(p):
        s = np.sum(p)
        return p[0]*p[1]/(s**2*(s+1))
    
    # Draw random samples from Beta distribution    
    controlMu = np.zeros(shape=(J, 4000))
    caseMu = np.zeros(shape=(J, 4000))
    for j in xrange(J):
        controlMu[j] = np.random.beta(controlgam[j,:][0], controlgam[j,:][1], 4000)   
        caseMu[j] = np.random.beta(casegam[j,:][0], casegam[j,:][1], 4000) 		


    # Extract the common locations in case and control
    caseLocIdx = [i for i in xrange(len(caseLoc)) if caseLoc[i] in controlLoc]
    controlLocIdx = [i for i in xrange(len(controlLoc)) if controlLoc[i] in caseLoc]
	

    caseMu = caseMu[caseLocIdx,:]
    controlMu = controlMu[controlLocIdx,:]
    # caseR = caseR[:,caseLocIdx,:]
    # controlR = controlR[:,controlLocIdx,:]
    # caseN = caseN[:,caseLocIdx]
    # controlN = controlN[:,controlLocIdx]

    loc = caseLoc[caseLocIdx]
    J = len(loc)
    pos = np.arange(85,346,20)
    posidx = [i for i in xrange(J) if int(loc[i][8:]) in pos]
    
    # Sample from the posterior Z = muCase - muControl        
    (Z, caseMuS, controlMuS) = sample_post_diff(caseMu-casephi['mu0'], controlMu-controlphi['mu0'], N)
	
    # Compute cumulative posterior probability for regions (Threshold,np.inf)
    T = np.linspace(np.min(np.min(Z)), np.max(np.max(Z)), num=300)

    pList = [bayes_test(Z, [(t, np.inf)]) for t in T]

    # mutation classification
    clsList = np.array((np.array(pList)>P).astype(int))
    clsList = clsList.reshape((clsList.shape[0],clsList.shape[1]))# category list
    
    # chi2 test for goodness-of-fit to a uniform distribution for non-ref bases
    if chi2:
        nRep = caseR.shape[0]
        chi2Prep = np.zeros((J,nRep))
        chi2P = np.zeros(J)
        for j in xrange(J):
                chi2Prep[j,:] = np.array([rvd3.chi2test( caseR[i,j,:] ) for i in xrange(nRep)] )
                if np.any(np.isnan(chi2Prep[j,:])):
                    chi2P[j] = 1
                else:
                   chi2P[j] = 1-ss.chi2.cdf(-2*np.sum(np.log(chi2Prep[j,:] + np.finfo(float).eps)), 2*nRep) # combine p-values using Fisher's Method
        
        clsList2 = np.array((np.array(chi2P)<0.05/J).astype(int))        
        clsList2 = np.tile(clsList2,(clsList.shape[0],1))
        clsList = np.array(((clsList+clsList2)==2).astype(int))

    # false postive rate
    fpr = np.array([float(sum(clsList[i])-sum(clsList[i,np.array(posidx)]))/(clsList.shape[1]-len(posidx)) for i in xrange(clsList.shape[0])])

    # true positive rate
    tpr = np.array([float(sum(clsList[i,np.array(posidx)]))/len(posidx) for i in xrange(clsList.shape[0])])

    cov = np.median(caseN)

    # # return information for mu bar plot at called positions under optimal threshold.

    # # using EL distance.
# ##    distance=np.sum(np.power([fpr,tpr-1],2),0) 
# ##    Tidx=distance.argmin()
# ##    print Tidx


     # # Using L1 distance 
    # distance = 1+tpr-fpr
    # Tidx=distance.argmax()
    

    # outputFile=os.path.join(path,'vcf%s.vcf' %str(d).replace('.','_'))
    
    # with h5py.File(controlFile, 'r') as f:
        # refb = f['/refb'][...]
        # f.close()
    # refb = refb[controlLocIdx]
    
    # altb = []
    # call=[]
    # acgt = {'A':0, 'C':1, 'G':2, 'T':3}
    # for i in xrange(J):
        # r = np.squeeze(caseR[:,i,:]) # replicates x bases
        
        # # Make a list of the alternate bases for each replicate
        # acgt_r = ['A','C','G','T']
        # del acgt_r[ acgt[refb[i]] ]

        # altb_r = [acgt_r[x] for x in np.argmax(r, axis=1)]

        # if clsList[Tidx,i]==1:
            # call.append(True)
            # altb.append(altb_r[0])
        # else:
            # altb.append(None)
            # call.append(False)
            
   # rvd30.write_vcf(outputFile, loc, call, refb, altb, np.mean(caseMu, axis=1), np.mean(controlMu, axis=1))
    return fpr,tpr, cov