Example #1
0
def step4_SCpeakbias(conf_dict,logfile):

    wlog('readin sequence from 2bit',logfile)
    seq_dict = {}
    inf = open(conf_dict['options']['csize']) 
    for line in inf:
        chrm = line.split()[0]
        seq_dict[chrm] = fetchseq_2bit_chrom(conf_dict['General']['twoBitToFa'],conf_dict['General']['sequence'],chrm)
    inf.close()
    conf_dict['results']['seqdict'] = seq_dict

    wlog('scan peak level bias',logfile)

    tmplog = bias_peakXcell_mat(conf_dict['General']['outname'], 
                              conf_dict['General']['bedtools'], 
                              conf_dict['options']['chromosome'],
                              conf_dict['options']['kmer'],
                              conf_dict['results']['biasMat'],
                              conf_dict['results']['seqdict'],
                              conf_dict['results']['finalcells'],
                              conf_dict['General']['datatype'],
                              conf_dict['options']['peakminreads'],
                              conf_dict['options']['peakmaxreads']
                              )

    return conf_dict
Example #2
0
def step1_QC_format(conf_dict, logfile):

    ### preparing mapping state dict
    wlog('summarize reads count distribution', logfile)
    chrom_reads = split_chromosome_reads(conf_dict['General']['fragments'],
                                         conf_dict['General']['outname'],
                                         conf_dict['options']['scATAC10x'],
                                         conf_dict['options']['chromosome'])
    if "chrM" in chrom_reads:
        conf_dict['QC']["chrM_reads"] = chrom_reads["chrM"]
    else:
        conf_dict['QC']["chrM_reads"] = 0
    chromatin_reads = 0
    for chrom in chrom_reads.keys():
        if chrom != "chrM":
            chromatin_reads += chrom_reads[chrom]
    conf_dict['QC']["chromatin_reads"] = chromatin_reads

    if conf_dict['General']['mode'] == "sc":
        wlog('filter high quality single cells', logfile)
        filter_highQcell_results = filter_highQcell_reads(
            conf_dict['General']['outname'],
            int(conf_dict['options']['readcutoff']),
            conf_dict['options']["usecells"])
        if filter_highQcell_results == "fail":
            ewlog(
                'obtain < 100 high quality cell with reads >= %s.' %
                (conf_dict['options']['readcutoff']), logfile)
        if len(conf_dict['options']["usecells"]) == 0:
            wlog('no specified cellname list inputed', logfile)
        elif filter_highQcell_results[4] == "highQ":
            wlog(
                'obtain < 100 cell left after highQ + cellname filtering, use highQ cell only',
                logfile)
        wlog(
            'obtain %s cells from filtering, containing %s reads' %
            (filter_highQcell_results[1], filter_highQcell_results[2]),
            logfile)
        conf_dict['results']['finalcells'] = filter_highQcell_results[0]
        conf_dict['QC']['totalcellnum'] = filter_highQcell_results[3]
        conf_dict['QC']['highQcellnum'] = filter_highQcell_results[1]
        conf_dict['QC']['finalusecellnum'] = len(
            conf_dict['results']['finalcells'])
        conf_dict['QC']['finalreadnum'] = filter_highQcell_results[2]

    return conf_dict
Example #3
0
def step2_biasMat(conf_dict, logfile):

    ## readin 2bit seq

    ### obtain bias mat
    if conf_dict['options']['bias'] == "naked":
        wlog('obtain pre-processed bias matrix from naked DNA data', logfile)
        conf_dict['results']['biasMat'] = readBias(
            conf_dict['options']['biasfile'])
    elif conf_dict['QC']['chrM_reads'] < 500000:
        wlog(
            'chrM reads number < 500k, obtain pre-processed bias matrix from naked DNA data',
            logfile)
        if not os.path.isfile(conf_dict['options']['biasfile']):
            ewlog("no naked DNA bias matrix, cannot estimate bias", logfile)
        else:
            conf_dict['results']['biasMat'] = readBias(
                conf_dict['options']['biasfile'])
    else:
        wlog('estimate bias matrix from mtDNA(chrM) data', logfile)
        conf_dict['results']['biasMatNaive'] = naive_kmerBias_chrM(
            conf_dict['General']['outname'], conf_dict['General']['sequence'],
            conf_dict['options']['kmer'], conf_dict['General']['twoBitToFa'],
            conf_dict['General']['format'])
        conf_dict['results']['biasfile'] = "%s_bias.txt" % (
            conf_dict['General']['outname'])
        conf_dict['results']['biasMat'] = simplex_encoding(
            conf_dict['results']['biasMatNaive'],
            conf_dict['results']['biasfile'])

    return conf_dict
Example #4
0
def step3_callpeak(conf_dict, logfile):

    conf_dict['results'][
        'peakfile'] = conf_dict['General']['outname'] + "_summitEXT.bed"

    macs3callpeak = 1
    if conf_dict['options']['peak'] != "NA":
        conf_dict['QC']['peaknumTotal'] = extExternal(
            conf_dict['options']['peak'], conf_dict['results']['peakfile'],
            int(conf_dict['options']['extend']))
        if conf_dict['QC']['peaknumTotal'] < 1000:
            wlog(
                "obtain < 1000 (%s) external inputted peaks, use macs3 to detect peaks"
                % conf_dict['QC']['peaknumTotal'], logfile)
            macs3callpeak = 1
        else:
            wlog(
                "obtain %s peaks from (-p) inputted" %
                conf_dict['QC']['peaknumTotal'], logfile)
            macs3callpeak = 0

    if macs3callpeak == 1:
        if conf_dict['General']['macs3'] == "NA":
            ewlog(
                "macs3 was not installed. SELMA requires macs3 installed in the default path ($PATH) for peak calling",
                logfile)

        ### callpeak
        if conf_dict['General']['genome'] == "hg38":
            gtag = "hs"
        else:
            gtag = "mm"

        if conf_dict['General']['format'] == "PE":
            macs3cmd = "macs3 callpeak -t %s -n %s -f BEDPE -g %s -q %s --keep-dup 1" % (
                conf_dict['General']['outname'] + "_chromatin.bed",
                conf_dict['General']['outname'], gtag,
                conf_dict['options']['peakqval'])
        else:
            macs3cmd = "macs3 callpeak -t %s -n %s -f BED -g %s -q %s --keep-dup 1 --nomodel --extsize 100" % (
                conf_dict['General']['outname'] + "_chromatin.bed",
                conf_dict['General']['outname'], gtag,
                conf_dict['options']['peakqval'])

        wlog("peak calling with macs3: %s" % macs3cmd, logfile)
        peaklog = sp(macs3cmd)

        ### ext peak from summit
        wlog(
            "extend peak summits to +/- %sbp" % conf_dict['options']['extend'],
            logfile)
        if not os.path.isfile(conf_dict['General']['outname'] +
                              "_summits.bed"):
            ewlog(
                "no macs3 results detected, check whether macs3 was correctly installed.",
                logfile)
        conf_dict['QC']['peaknumTotal'] = extsummit(
            conf_dict['General']['outname'] + "_summits.bed",
            conf_dict['results']['peakfile'],
            int(conf_dict['options']['extend']))
        if conf_dict['QC']['peaknumTotal'] < 1000:
            ewlog(
                "obtain < 1000 (%s) peaks, SELMA terminated" %
                conf_dict['QC']['peaknumTotal'], logfile)
        else:
            wlog("obtain %s peaks" % conf_dict['QC']['peaknumTotal'], logfile)

    return conf_dict
Example #5
0
def step4_BULKcleavageBias(conf_dict, logfile):

    ### preparing mapping state dict
    wlog('split fragments to strand specific cleavage sites', logfile)
    if conf_dict['General']['format'] == "PE":
        cmdplus = """awk '{OFS="\\t";print $1,$2,$2+1,".",".","+"}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_plus.bed")
        cmdminus = """awk '{OFS="\\t";print $1,$3-1,$3,".",".","-"}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_minus.bed")
    else:
        cmdplus = """awk '{if($6=="+") print $0}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_plus.bed")
        cmdminus = """awk '{if($6=="-") print $0}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_minus.bed")

    tmplog = sp(cmdplus)
    tmplog = sp(cmdminus)

    wlog('pile up cleavage sites', logfile)
    pluslog1 = sp("macs3 pileup -i %s -f BED --extsize 1 -o %s " %
                  (conf_dict['General']['outname'] + "_cleavage_plus.bed",
                   conf_dict['General']['outname'] + "_cleavage_plus.bdg"))
    pluslog2 = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_cleavage_plus.bdg",
         conf_dict['General']['outname'] + "_cleavage_plus_sorted.bdg"))
    pluslog3 = sp("%s %s %s %s" %
                  (conf_dict['General']['bedGraphToBigWig'],
                   conf_dict['General']['outname'] +
                   "_cleavage_plus_sorted.bdg", conf_dict['options']['csize'],
                   conf_dict['General']['outname'] + "_cleavage_plus.bw"))
    minuslog1 = sp("macs3 pileup -i %s -f BED --extsize 1 -o %s " %
                   (conf_dict['General']['outname'] + "_cleavage_minus.bed",
                    conf_dict['General']['outname'] + "_cleavage_minus.bdg"))
    minuslog2 = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_cleavage_minus.bdg",
         conf_dict['General']['outname'] + "_cleavage_minus_sorted.bdg"))
    minuslog3 = sp(
        "%s %s %s %s" %
        (conf_dict['General']['bedGraphToBigWig'],
         conf_dict['General']['outname'] + "_cleavage_minus_sorted.bdg",
         conf_dict['options']['csize'],
         conf_dict['General']['outname'] + "_cleavage_minus.bw"))

    wlog("remove redundant position from the extended peak file", logfile)
    cmduni = """sort -k 1,1 -k 2,2g -k 3,3g %s | %s merge -i - > %s""" % (
        conf_dict['results']['peakfile'], conf_dict['General']['bedtools'],
        conf_dict['General']['outname'] + "_summitEXTmerge.bed")
    tmplog = sp(cmduni)

    wlog('readin sequence from 2bit', logfile)
    seq_dict = {}
    inf = open(conf_dict['options']['csize'])
    for line in inf:
        chrm = line.split()[0]
        seq_dict[chrm] = fetchseq_2bit_chrom(
            conf_dict['General']['twoBitToFa'],
            conf_dict['General']['sequence'], chrm)
    inf.close()
    conf_dict['results']['seqdict'] = seq_dict

    wlog('calculate bias expected cleavages', logfile)
    if conf_dict['General']['datatype'] == "DNase":
        tmplog = bias_exp_cleavage_DNase(
            conf_dict['General']['outname'],
            conf_dict['General']['outname'] + "_summitEXTmerge.bed",
            conf_dict['results']['biasMat'], conf_dict['options']['kmer'],
            conf_dict['General']['bigWigSummary'],
            conf_dict['General']['bedGraphToBigWig'],
            conf_dict['results']['seqdict'])
    else:
        tmplog = bias_exp_cleavage_ATAC(
            conf_dict['General']['outname'],
            conf_dict['General']['outname'] + "_summitEXTmerge.bed",
            conf_dict['results']['biasMat'], conf_dict['options']['kmer'],
            conf_dict['General']['bigWigSummary'],
            conf_dict['General']['bedGraphToBigWig'],
            conf_dict['results']['seqdict'])
#
    pluslog = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_biasExpCuts_plus.bdg",
         conf_dict['General']['outname'] + "_biasExpCuts_plus_sorted.bdg"))
    pluslog = sp(
        "%s %s %s %s" %
        (conf_dict['General']['bedGraphToBigWig'],
         conf_dict['General']['outname'] + "_biasExpCuts_plus_sorted.bdg",
         conf_dict['options']['csize'],
         conf_dict['General']['outname'] + "_biasExpCuts_plus.bw"))
    #
    minuslog = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_biasExpCuts_minus.bdg",
         conf_dict['General']['outname'] + "_biasExpCuts_minus_sorted.bdg"))
    minuslog = sp(
        "%s %s %s %s" %
        (conf_dict['General']['bedGraphToBigWig'],
         conf_dict['General']['outname'] + "_biasExpCuts_minus_sorted.bdg",
         conf_dict['options']['csize'],
         conf_dict['General']['outname'] + "_biasExpCuts_minus.bw"))
    return conf_dict
Example #6
0
def step5_SCcellClustering(conf_dict,logfile):

    wlog('single-cell clustering analysis',logfile)
    if conf_dict['options']['clustermethod'] == "PCAkm":
        conf_dict['General']['scPackage'] = scClustering_PCAkm(conf_dict['General']['outname'],
                                       conf_dict['options']['lowbiaspeak'],
                                       conf_dict['options']['clusterNum'],
                                       conf_dict['options']['topDim'],
                                       int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("umap was not installed, UMAP scatter plot will not be generated",logfile)

    elif conf_dict['options']['clustermethod'] == "Seurat": 
        conf_dict['General']['scPackage'] = scClustering_Seurat(conf_dict['General']['outname'],
                           conf_dict['options']['lowbiaspeak'],
                           conf_dict['options']['topDim'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("Seurat related packages were not installed, skip single-cell clustering step",logfile)

    elif conf_dict['options']['clustermethod'] == "ArchR": 
        conf_dict['General']['scPackage'] = scClustering_ArchR(conf_dict['General']['outname'],
                           conf_dict['General']['genome'],
                           conf_dict['options']['lowbiaspeak'],
                           conf_dict['options']['topDim'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("ArchR related packages were not installed, skip single-cell clustering step",logfile)
 
    elif conf_dict['options']['clustermethod'] == "APEC": 
        conf_dict['General']['scPackage'] = scClustering_APEC(conf_dict['General']['outname'],
                           conf_dict['options']['lowbiaspeak'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("APEC related packages were not installed, skip single-cell clustering step",logfile)

    elif conf_dict['options']['clustermethod'] == "Cicero": 
        conf_dict['General']['scPackage'] = scClustering_Cicero(conf_dict['General']['outname'],
                           conf_dict['options']['lowbiaspeak'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("Cicero related packages were not installed, skip single-cell clustering step",logfile)

    if os.path.isfile("%s_scClusters.txt"%(conf_dict['General']['outname'])):
        tmplist = []
        inf = open("%s_scClusters.txt"%(conf_dict['General']['outname']))
        for line in inf:
            ll = line.strip().split("\t")
            if ll[1] != "cluster":
                tmplist.append(ll[1])
        inf.close()
        conf_dict['QC']['scClusters'] = len(set(tmplist))
    else:
        conf_dict['QC']['scClusters'] = 0
    return conf_dict
Example #7
0
def stepFinal_summary(conf_dict, logfile):
    wlog('Collect results', logfile)
    summarydir = 'summary/'
    createDIR(summarydir)
    if "biasfile" in conf_dict['results'] and os.path.isfile(
            conf_dict['results']['biasfile']):
        sp("mv %s %s" % (conf_dict['results']['biasfile'], summarydir))
    sp("mv %s_summitEXT.bed %s" %
       (conf_dict['General']['outname'], summarydir))

    conf_dict['results']['umap'] = "NA"
    if conf_dict['General']['mode'] == "bulk":
        sp("mv %s_cleavage_plus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_cleavage_minus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_biasExpCuts_plus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_biasExpCuts_minus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
    else:
        sp("gzip %s_peakXcellMat.txt" % (conf_dict['General']['outname']))
        sp("gzip %s_peakFeatures.txt" % (conf_dict['General']['outname']))
        sp("mv %s_peakXcellMat.txt.gz %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_peakFeatures.txt.gz %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_scClusters.txt %s" %
           (conf_dict['General']['outname'], summarydir))

        if os.path.isfile("%s_clusteringUMAP.pdf" %
                          (conf_dict['General']['outname'])):
            if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
                sp("mv %s_clusteringUMAP.pdf %s/%s_clusteringTSNE.pdf" %
                   (conf_dict['General']['outname'], summarydir,
                    conf_dict['General']['outname']))
                conf_dict['results']['umap'] = "%s_clusteringTSNE.pdf" % (
                    conf_dict['General']['outname'])
            else:
                sp("mv %s_clusteringUMAP.pdf %s" %
                   (conf_dict['General']['outname'], summarydir))
                conf_dict['results']['umap'] = "%s_clusteringUMAP.pdf" % (
                    conf_dict['General']['outname'])
    tmpresult = 'tmpResults/'
    createDIR(tmpresult)
    sp("mv %s_chromatin.bed %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_chrM.bed %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_summits.bed %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_peaks.xls %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_peaks.narrowPeak %s" %
       (conf_dict['General']['outname'], tmpresult))

    if conf_dict['General']['mode'] == "bulk":
        sp("mv %s_cleavage_plus.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_minus.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_plus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_minus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_plus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_minus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_plus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_minus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_plus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_minus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_summitEXTmerge.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
    else:
        sp("mv %s_highQcellReads.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_tmpSCreads.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_tmpSCpeaks.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_scOVcleavage.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_scRscript.r %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_highQcellReads.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        if conf_dict['options']['clustermethod'] == "ArchR":
            sp("mv %s_ArchR %s" % (conf_dict['General']['outname'], tmpresult))
        if conf_dict['options']['clustermethod'] == "APEC":
            sp("mv %s_APEC %s" % (conf_dict['General']['outname'], tmpresult))
        if conf_dict['options']['clustermethod'] == "Cicero":
            sp("mv %s_Cicero %s" %
               (conf_dict['General']['outname'], tmpresult))

    if conf_dict['options']['keeptmp']:
        wlog('--keeptmp not setting, keep intermediate results', logfile)
        pass
    else:
        wlog('--keeptmp was not setting, remove intermediate results', logfile)
        sp("rm -r tmpResults/")

    wlog('Generate summary reports', logfile)
    outf = open("%s_summaryReports.txt" % conf_dict['General']['outname'], 'w')
    outf.write("#settings\n")
    outf.write("mode\t%s\n" % (conf_dict['General']['mode']))
    outf.write("fragments\t%s\n" % (conf_dict['General']['fragments']))
    outf.write("data format\t%s\n" % (conf_dict['General']['format']))
    outf.write("data type\t%s\n" % (conf_dict['General']['datatype']))
    outf.write("genome version\t%s\n" % (conf_dict['General']['genome']))
    outf.write("output name\t%s\n" % (conf_dict['General']['outname']))

    outf.write("\n#parameters\n")
    outf.write("peak extend size\t%s\n" % (conf_dict['options']['extend']))
    outf.write("peak qvalue\t%s\n" % (conf_dict['options']['peakqval']))
    outf.write("bias source\t%s\n" % (conf_dict['options']['bias']))
    outf.write("k-mer\t%s\n" % (conf_dict['options']['kmer']))
    if conf_dict['General']['mode'] == "sc":
        outf.write("[sc]reads cutoff\t%s\n" %
                   (conf_dict['options']['readcutoff']))
        outf.write("[sc]%low biaspeak\t" +
                   str(conf_dict['options']['lowbiaspeak']) + "\n")
        outf.write("[sc]peak min reads\t%s\n" %
                   (conf_dict['options']['peakminreads']))
        outf.write("[sc]peak max reads\t%s\n" %
                   (conf_dict['options']['peakmaxreads']))
        outf.write("[sc]topN dimensions\t%s\n" %
                   (conf_dict['options']['topDim']))
        outf.write("[sc]clustering method\t%s\n" %
                   (conf_dict['options']['clustermethod']))
#        if conf_dict['options']['clustermethod'] == "PCAkm":
#            outf.write("[sc]cluster number\t%s\n"%(conf_dict['options']['clusterNum']))

    outf.write("\n#QC\n")
    outf.write(
        "total reads\t%s\n" %
        (conf_dict['QC']['chrM_reads'] + conf_dict['QC']['chromatin_reads']))
    outf.write("chromatin reads\t%s\n" % (conf_dict['QC']['chromatin_reads']))
    outf.write("mtDNA reads\t%s\n" % (conf_dict['QC']['chrM_reads']))
    outf.write("total peaks\t%s\n" % (conf_dict['QC']['peaknumTotal']))
    if conf_dict['General']['mode'] == "sc":
        outf.write("total single-cells\t%s\n" %
                   (conf_dict['QC']['totalcellnum']))
        outf.write("high quality single-cells\t%s\n" %
                   (conf_dict['QC']['highQcellnum']))
        outf.write("single-cells for clustering\t%s\n" %
                   (conf_dict['QC']['finalusecellnum']))
        outf.write("reads in single-cells for clustering\t%s\n" %
                   (conf_dict['QC']['finalreadnum']))
        if conf_dict['QC']['scClusters'] > 0:
            outf.write("cluster number\t%s\n" %
                       (conf_dict['QC']['scClusters']))

    outf.write("\n#output results\n")
    outf.write("peaks (accessible regions)\t%s_summitEXT.bed\n" %
               (conf_dict['General']['outname']))
    if conf_dict['General']['mode'] == "bulk":
        outf.write("observed cleavage (+ strand)\t%s_cleavage_plus.bw\n" %
                   (conf_dict['General']['outname']))
        outf.write("observed cleavage (- strand)\t%s_cleavage_minus.bw\n" %
                   (conf_dict['General']['outname']))
        outf.write(
            "bias expected cleavage (+ strand)\t%s_biasExpCuts_plus.bw\n" %
            (conf_dict['General']['outname']))
        outf.write(
            "bias expected cleavage (- strand)\t%s_biasExpCuts_minus.bw\n" %
            (conf_dict['General']['outname']))
    else:
        outf.write("peak bias features\t%s_peakFeatures.txt.gz\n" %
                   (conf_dict['General']['outname']))
        outf.write("peakXcell count\t%s_peakXcellMat.txt.gz\n" %
                   (conf_dict['General']['outname']))
        if os.path.isfile("%s%s_scClusters.txt" %
                          (summarydir, conf_dict['General']['outname'])):
            outf.write("single-cell cluster\t%s_scClusters.txt\n" %
                       (conf_dict['General']['outname']))
        if os.path.isfile(summarydir + conf_dict['results']['umap']):
            if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
                outf.write("sc-cluster t-SNE\t%s\n" %
                           (conf_dict['results']['umap']))
            else:
                outf.write("sc-cluster UMAP\t%s\n" %
                           (conf_dict['results']['umap']))
    outf.close()

    ### check pdflatex
    QCdoc = """\documentclass[11pt,a4paper]{article}
\\usepackage{tabularx}
\\usepackage[english]{babel}
\\usepackage{array}
\\usepackage{graphicx}
\\usepackage{color}
\DeclareGraphicsExtensions{.eps,.png,.pdf,.ps}
\\begin{document}
\\title{SELMA summary reports for: %s}

\\vspace{-1cm}
\maketitle
\\tableofcontents
\\newpage
\\newpage
\section{Summary description}
\\begin{quotation}
Table 1 describes the input files and settings.
\end{quotation}
\\begin{table}[h]
\\small
\caption{ settings }\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }

""" % (strlatexformat(conf_dict['General']['outname']))
    ### table1 prepare parameter
    QCdoc += """      
\hline
parameter & value  \\\\
\hline
mode & %s \\\\
\hline
fragment file & %s \\\\
\hline
data format & %s \\\\
\hline
data type & %s  \\\\
\hline
genome version & %s  \\\\
\hline
output name & %s  \\\\
\hline
\end{tabularx}
\end{table}
""" % (strlatexformat(conf_dict['General']['mode']),
       strlatexformat(conf_dict['General']['fragments'].split("/")[-1]),
       strlatexformat(conf_dict['General']['format']),
       strlatexformat(conf_dict['General']['datatype']),
       strlatexformat(conf_dict['General']['genome']),
       strlatexformat(conf_dict['General']['outname']))

    QCdoc += """
\\newpage
\\newpage
\section{parameters and options}
\\begin{quotation}
Table 2 describes the parameters and options.
\end{quotation}
\\begin{table}[h]
\\small
\caption{parameters and options}\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }
\hline
parameter & value  \\\\
\hline
peak extend size & %s \\\\
\hline
peak qvalue & %s \\\\
\hline
bias source & %s \\\\
\hline
k-mer & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['options']['extend']),
       strlatexformat(conf_dict['options']['peakqval']),
       strlatexformat(conf_dict['options']['bias']),
       strlatexformat(conf_dict['options']['kmer']))
    if conf_dict['General']['mode'] == "sc":
        QCdoc += """
[sc]reads cutoff & %s  \\\\
\hline
[sc]lowbias peak percent & %s  \\\\
\hline
[sc]peak min reads & %s  \\\\
\hline
[sc]peak max reads & %s  \\\\
\hline
[sc]topN dimensions & %s  \\\\
\hline
[sc]cluster methods & %s  \\\\
\hline
""" % (
            strlatexformat(conf_dict['options']['readcutoff']),
            strlatexformat(conf_dict['options']['lowbiaspeak']),
            strlatexformat(conf_dict['options']['peakminreads']),
            strlatexformat(conf_dict['options']['peakmaxreads']),
            strlatexformat(conf_dict['options']['topDim']),
            strlatexformat(conf_dict['options']['clustermethod']),
        )
    QCdoc += """
\end{tabularx}
\end{table}
"""

    QCdoc += """
\\newpage
\\newpage
\section{data quality}
\\begin{quotation}
Table 3 describes data Quality.
\end{quotation}
\\begin{table}[h]
\\small
\caption{data quality}\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }
\hline
parameter & value  \\\\
\hline
total reads & %s \\\\
\hline
chromatin reads & %s \\\\
\hline
mtDNA reads & %s \\\\
\hline
total peaks & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['QC']['chrM_reads'] +
                      conf_dict['QC']['chromatin_reads']),
       strlatexformat(conf_dict['QC']['chromatin_reads']),
       strlatexformat(conf_dict['QC']['chrM_reads']),
       strlatexformat(conf_dict['QC']['peaknumTotal']))
    if conf_dict['General']['mode'] == "sc":
        QCdoc += """
[sc]total single-cells(sc) & %s  \\\\
\hline
[sc]high quality sc & %s  \\\\
\hline
[sc]sc for clustering & %s  \\\\
\hline
[sc]reads in sc for clustering & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['QC']['totalcellnum']),
        strlatexformat(conf_dict['QC']['highQcellnum']),
        strlatexformat(conf_dict['QC']['finalusecellnum']),
        strlatexformat(conf_dict['QC']['finalreadnum']))
        if conf_dict['QC']['scClusters'] > 0:
            QCdoc += """[sc]number of cluster & %s  \\\\
\hline           
""" % (strlatexformat(conf_dict['QC']['scClusters']))
    QCdoc += """
\end{tabularx}
\end{table}
"""

    QCdoc += """
\\newpage
\\newpage
\section{output results}
\\begin{quotation}
Table 3 describes output results (in the summary/ folder).
\end{quotation}
\\begin{table}[h]
\\small
\caption{output results}\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }
\hline
parameter & value  \\\\
\hline
peaks (accessible regions) & %s \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] + "_summitEXT.bed"))

    if conf_dict['General']['mode'] == "bulk":
        QCdoc += """
observed cleavage(+) & %s \\\\
\hline
observed cleavage(-) & %s  \\\\
\hline
bias expected cleavage(+) & %s \\\\
\hline
bias expected cleavage(-) & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] + "_cleavage_plus.bw"),
        strlatexformat(conf_dict['General']['outname'] + "_cleavage_minus.bw"),
        strlatexformat(conf_dict['General']['outname'] +
                      "_biasExpCuts_plus.bw"),
        strlatexformat(conf_dict['General']['outname'] +
                      "_biasExpCuts_minus.bw"))
    else:
        QCdoc += """
peak bias feature & %s \\\\
\hline
peakXcell count & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] +
                      "_peakFeatures.txt.gz"),
        strlatexformat(conf_dict['General']['outname'] +
                      "_peakXcellMat.txt.gz"))
        if os.path.isfile("%s%s_scClusters.txt" %
                          (summarydir, conf_dict['General']['outname'])):
            QCdoc += """single-cell cluster & %s \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] + "_scClusters.txt"))
        if os.path.isfile(summarydir + conf_dict['results']['umap']):
            if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
                dimRedTerm = "t-SNE"
            else:
                dimRedTerm = "UMAP"
            QCdoc += """sc-cluster %s & %s \\\\
\hline
""" % (dimRedTerm,
            strlatexformat(conf_dict['General']['outname'] + "_scClusters.txt"))
    QCdoc += """
\end{tabularx}
\end{table}
"""

    if os.path.isfile(summarydir + conf_dict['results']['umap']):
        if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
            dimRedTerm = "t-SNE"
        else:
            dimRedTerm = "UMAP"
        QCdoc += """
\\newpage
\\newpage
\section{%s scatter plot}
The 2-dim scatter plot represent the %s results. Each dot represents an individual cell and the color represents cluster labels  
\\begin{figure}[h]
        \caption{cross-validation curve for lambda decision} \label{fig:profileunion}
        \setlength{\\abovecaptionskip}{0pt}
        \setlength{\\belowcaptionskip}{10pt}
        \centering
        {\includegraphics[width=0.8\\textwidth]{%s}}
\end{figure}
""" % (dimRedTerm, dimRedTerm, summarydir + conf_dict['results']['umap'])

    QCdoc += """
\end{document} 
"""
    latexfile = conf_dict['General']['outname'] + '_summaryReports.tex'

    outf = open(latexfile, 'w')
    outf.write(QCdoc)
    outf.close()

    check_latex = sp('which pdflatex')
    if check_latex[0].decode("ascii") == "":
        wlog(
            'pdflatex was not installed, SELMA will not generate pdf version of summary report. Please copy the %s to an environment with pdflatex installed and complie the pdf file'
            % (conf_dict['General']['outname'] + '_summaryReports.tex'),
            logfile)
    else:
        cmd = "pdflatex %s" % (latexfile)
        tmpobj = sp(cmd)
        tmpobj = sp(cmd)
        #tmpobj = sp(cmd2)
        tmpobj = sp("rm %s_summaryReports.aux" %
                    conf_dict['General']['outname'])
        tmpobj = sp("rm %s_summaryReports.log" %
                    conf_dict['General']['outname'])
        tmpobj = sp("rm %s_summaryReports.toc" %
                    conf_dict['General']['outname'])

    return conf_dict