Python wlog Examples

Programming Language: Python

Namespace/Package Name: SELMApipe.Utility

Method/Function: wlog

Examples at hotexamples.com: 7

Python wlog - 7 examples found. These are the top rated real world Python examples of SELMApipe.Utility.wlog extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def step4_SCpeakbias(conf_dict,logfile):

    wlog('readin sequence from 2bit',logfile)
    seq_dict = {}
    inf = open(conf_dict['options']['csize']) 
    for line in inf:
        chrm = line.split()[0]
        seq_dict[chrm] = fetchseq_2bit_chrom(conf_dict['General']['twoBitToFa'],conf_dict['General']['sequence'],chrm)
    inf.close()
    conf_dict['results']['seqdict'] = seq_dict

    wlog('scan peak level bias',logfile)

    tmplog = bias_peakXcell_mat(conf_dict['General']['outname'], 
                              conf_dict['General']['bedtools'], 
                              conf_dict['options']['chromosome'],
                              conf_dict['options']['kmer'],
                              conf_dict['results']['biasMat'],
                              conf_dict['results']['seqdict'],
                              conf_dict['results']['finalcells'],
                              conf_dict['General']['datatype'],
                              conf_dict['options']['peakminreads'],
                              conf_dict['options']['peakmaxreads']
                              )

    return conf_dict

Example #2

Show file

def step1_QC_format(conf_dict, logfile):

    ### preparing mapping state dict
    wlog('summarize reads count distribution', logfile)
    chrom_reads = split_chromosome_reads(conf_dict['General']['fragments'],
                                         conf_dict['General']['outname'],
                                         conf_dict['options']['scATAC10x'],
                                         conf_dict['options']['chromosome'])
    if "chrM" in chrom_reads:
        conf_dict['QC']["chrM_reads"] = chrom_reads["chrM"]
    else:
        conf_dict['QC']["chrM_reads"] = 0
    chromatin_reads = 0
    for chrom in chrom_reads.keys():
        if chrom != "chrM":
            chromatin_reads += chrom_reads[chrom]
    conf_dict['QC']["chromatin_reads"] = chromatin_reads

    if conf_dict['General']['mode'] == "sc":
        wlog('filter high quality single cells', logfile)
        filter_highQcell_results = filter_highQcell_reads(
            conf_dict['General']['outname'],
            int(conf_dict['options']['readcutoff']),
            conf_dict['options']["usecells"])
        if filter_highQcell_results == "fail":
            ewlog(
                'obtain < 100 high quality cell with reads >= %s.' %
                (conf_dict['options']['readcutoff']), logfile)
        if len(conf_dict['options']["usecells"]) == 0:
            wlog('no specified cellname list inputed', logfile)
        elif filter_highQcell_results[4] == "highQ":
            wlog(
                'obtain < 100 cell left after highQ + cellname filtering, use highQ cell only',
                logfile)
        wlog(
            'obtain %s cells from filtering, containing %s reads' %
            (filter_highQcell_results[1], filter_highQcell_results[2]),
            logfile)
        conf_dict['results']['finalcells'] = filter_highQcell_results[0]
        conf_dict['QC']['totalcellnum'] = filter_highQcell_results[3]
        conf_dict['QC']['highQcellnum'] = filter_highQcell_results[1]
        conf_dict['QC']['finalusecellnum'] = len(
            conf_dict['results']['finalcells'])
        conf_dict['QC']['finalreadnum'] = filter_highQcell_results[2]

    return conf_dict

Example #3

Show file

File: step2_biasMat.py Project: Tarela/SELMA

def step2_biasMat(conf_dict, logfile):

    ## readin 2bit seq

    ### obtain bias mat
    if conf_dict['options']['bias'] == "naked":
        wlog('obtain pre-processed bias matrix from naked DNA data', logfile)
        conf_dict['results']['biasMat'] = readBias(
            conf_dict['options']['biasfile'])
    elif conf_dict['QC']['chrM_reads'] < 500000:
        wlog(
            'chrM reads number < 500k, obtain pre-processed bias matrix from naked DNA data',
            logfile)
        if not os.path.isfile(conf_dict['options']['biasfile']):
            ewlog("no naked DNA bias matrix, cannot estimate bias", logfile)
        else:
            conf_dict['results']['biasMat'] = readBias(
                conf_dict['options']['biasfile'])
    else:
        wlog('estimate bias matrix from mtDNA(chrM) data', logfile)
        conf_dict['results']['biasMatNaive'] = naive_kmerBias_chrM(
            conf_dict['General']['outname'], conf_dict['General']['sequence'],
            conf_dict['options']['kmer'], conf_dict['General']['twoBitToFa'],
            conf_dict['General']['format'])
        conf_dict['results']['biasfile'] = "%s_bias.txt" % (
            conf_dict['General']['outname'])
        conf_dict['results']['biasMat'] = simplex_encoding(
            conf_dict['results']['biasMatNaive'],
            conf_dict['results']['biasfile'])

    return conf_dict

Example #4

Show file

File: step3_callpeak.py Project: Tarela/SELMA

def step3_callpeak(conf_dict, logfile):

    conf_dict['results'][
        'peakfile'] = conf_dict['General']['outname'] + "_summitEXT.bed"

    macs3callpeak = 1
    if conf_dict['options']['peak'] != "NA":
        conf_dict['QC']['peaknumTotal'] = extExternal(
            conf_dict['options']['peak'], conf_dict['results']['peakfile'],
            int(conf_dict['options']['extend']))
        if conf_dict['QC']['peaknumTotal'] < 1000:
            wlog(
                "obtain < 1000 (%s) external inputted peaks, use macs3 to detect peaks"
                % conf_dict['QC']['peaknumTotal'], logfile)
            macs3callpeak = 1
        else:
            wlog(
                "obtain %s peaks from (-p) inputted" %
                conf_dict['QC']['peaknumTotal'], logfile)
            macs3callpeak = 0

    if macs3callpeak == 1:
        if conf_dict['General']['macs3'] == "NA":
            ewlog(
                "macs3 was not installed. SELMA requires macs3 installed in the default path ($PATH) for peak calling",
                logfile)

        ### callpeak
        if conf_dict['General']['genome'] == "hg38":
            gtag = "hs"
        else:
            gtag = "mm"

        if conf_dict['General']['format'] == "PE":
            macs3cmd = "macs3 callpeak -t %s -n %s -f BEDPE -g %s -q %s --keep-dup 1" % (
                conf_dict['General']['outname'] + "_chromatin.bed",
                conf_dict['General']['outname'], gtag,
                conf_dict['options']['peakqval'])
        else:
            macs3cmd = "macs3 callpeak -t %s -n %s -f BED -g %s -q %s --keep-dup 1 --nomodel --extsize 100" % (
                conf_dict['General']['outname'] + "_chromatin.bed",
                conf_dict['General']['outname'], gtag,
                conf_dict['options']['peakqval'])

        wlog("peak calling with macs3: %s" % macs3cmd, logfile)
        peaklog = sp(macs3cmd)

        ### ext peak from summit
        wlog(
            "extend peak summits to +/- %sbp" % conf_dict['options']['extend'],
            logfile)
        if not os.path.isfile(conf_dict['General']['outname'] +
                              "_summits.bed"):
            ewlog(
                "no macs3 results detected, check whether macs3 was correctly installed.",
                logfile)
        conf_dict['QC']['peaknumTotal'] = extsummit(
            conf_dict['General']['outname'] + "_summits.bed",
            conf_dict['results']['peakfile'],
            int(conf_dict['options']['extend']))
        if conf_dict['QC']['peaknumTotal'] < 1000:
            ewlog(
                "obtain < 1000 (%s) peaks, SELMA terminated" %
                conf_dict['QC']['peaknumTotal'], logfile)
        else:
            wlog("obtain %s peaks" % conf_dict['QC']['peaknumTotal'], logfile)

    return conf_dict

Example #5

Show file

def step4_BULKcleavageBias(conf_dict, logfile):

    ### preparing mapping state dict
    wlog('split fragments to strand specific cleavage sites', logfile)
    if conf_dict['General']['format'] == "PE":
        cmdplus = """awk '{OFS="\\t";print $1,$2,$2+1,".",".","+"}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_plus.bed")
        cmdminus = """awk '{OFS="\\t";print $1,$3-1,$3,".",".","-"}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_minus.bed")
    else:
        cmdplus = """awk '{if($6=="+") print $0}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_plus.bed")
        cmdminus = """awk '{if($6=="-") print $0}' %s > %s""" % (
            conf_dict['General']['outname'] + "_chromatin.bed",
            conf_dict['General']['outname'] + "_cleavage_minus.bed")

    tmplog = sp(cmdplus)
    tmplog = sp(cmdminus)

    wlog('pile up cleavage sites', logfile)
    pluslog1 = sp("macs3 pileup -i %s -f BED --extsize 1 -o %s " %
                  (conf_dict['General']['outname'] + "_cleavage_plus.bed",
                   conf_dict['General']['outname'] + "_cleavage_plus.bdg"))
    pluslog2 = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_cleavage_plus.bdg",
         conf_dict['General']['outname'] + "_cleavage_plus_sorted.bdg"))
    pluslog3 = sp("%s %s %s %s" %
                  (conf_dict['General']['bedGraphToBigWig'],
                   conf_dict['General']['outname'] +
                   "_cleavage_plus_sorted.bdg", conf_dict['options']['csize'],
                   conf_dict['General']['outname'] + "_cleavage_plus.bw"))
    minuslog1 = sp("macs3 pileup -i %s -f BED --extsize 1 -o %s " %
                   (conf_dict['General']['outname'] + "_cleavage_minus.bed",
                    conf_dict['General']['outname'] + "_cleavage_minus.bdg"))
    minuslog2 = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_cleavage_minus.bdg",
         conf_dict['General']['outname'] + "_cleavage_minus_sorted.bdg"))
    minuslog3 = sp(
        "%s %s %s %s" %
        (conf_dict['General']['bedGraphToBigWig'],
         conf_dict['General']['outname'] + "_cleavage_minus_sorted.bdg",
         conf_dict['options']['csize'],
         conf_dict['General']['outname'] + "_cleavage_minus.bw"))

    wlog("remove redundant position from the extended peak file", logfile)
    cmduni = """sort -k 1,1 -k 2,2g -k 3,3g %s | %s merge -i - > %s""" % (
        conf_dict['results']['peakfile'], conf_dict['General']['bedtools'],
        conf_dict['General']['outname'] + "_summitEXTmerge.bed")
    tmplog = sp(cmduni)

    wlog('readin sequence from 2bit', logfile)
    seq_dict = {}
    inf = open(conf_dict['options']['csize'])
    for line in inf:
        chrm = line.split()[0]
        seq_dict[chrm] = fetchseq_2bit_chrom(
            conf_dict['General']['twoBitToFa'],
            conf_dict['General']['sequence'], chrm)
    inf.close()
    conf_dict['results']['seqdict'] = seq_dict

    wlog('calculate bias expected cleavages', logfile)
    if conf_dict['General']['datatype'] == "DNase":
        tmplog = bias_exp_cleavage_DNase(
            conf_dict['General']['outname'],
            conf_dict['General']['outname'] + "_summitEXTmerge.bed",
            conf_dict['results']['biasMat'], conf_dict['options']['kmer'],
            conf_dict['General']['bigWigSummary'],
            conf_dict['General']['bedGraphToBigWig'],
            conf_dict['results']['seqdict'])
    else:
        tmplog = bias_exp_cleavage_ATAC(
            conf_dict['General']['outname'],
            conf_dict['General']['outname'] + "_summitEXTmerge.bed",
            conf_dict['results']['biasMat'], conf_dict['options']['kmer'],
            conf_dict['General']['bigWigSummary'],
            conf_dict['General']['bedGraphToBigWig'],
            conf_dict['results']['seqdict'])
#
    pluslog = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_biasExpCuts_plus.bdg",
         conf_dict['General']['outname'] + "_biasExpCuts_plus_sorted.bdg"))
    pluslog = sp(
        "%s %s %s %s" %
        (conf_dict['General']['bedGraphToBigWig'],
         conf_dict['General']['outname'] + "_biasExpCuts_plus_sorted.bdg",
         conf_dict['options']['csize'],
         conf_dict['General']['outname'] + "_biasExpCuts_plus.bw"))
    #
    minuslog = sp(
        "sort -k1,1 -k2,2n %s > %s" %
        (conf_dict['General']['outname'] + "_biasExpCuts_minus.bdg",
         conf_dict['General']['outname'] + "_biasExpCuts_minus_sorted.bdg"))
    minuslog = sp(
        "%s %s %s %s" %
        (conf_dict['General']['bedGraphToBigWig'],
         conf_dict['General']['outname'] + "_biasExpCuts_minus_sorted.bdg",
         conf_dict['options']['csize'],
         conf_dict['General']['outname'] + "_biasExpCuts_minus.bw"))
    return conf_dict

Example #6

Show file

def step5_SCcellClustering(conf_dict,logfile):

    wlog('single-cell clustering analysis',logfile)
    if conf_dict['options']['clustermethod'] == "PCAkm":
        conf_dict['General']['scPackage'] = scClustering_PCAkm(conf_dict['General']['outname'],
                                       conf_dict['options']['lowbiaspeak'],
                                       conf_dict['options']['clusterNum'],
                                       conf_dict['options']['topDim'],
                                       int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("umap was not installed, UMAP scatter plot will not be generated",logfile)

    elif conf_dict['options']['clustermethod'] == "Seurat": 
        conf_dict['General']['scPackage'] = scClustering_Seurat(conf_dict['General']['outname'],
                           conf_dict['options']['lowbiaspeak'],
                           conf_dict['options']['topDim'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("Seurat related packages were not installed, skip single-cell clustering step",logfile)

    elif conf_dict['options']['clustermethod'] == "ArchR": 
        conf_dict['General']['scPackage'] = scClustering_ArchR(conf_dict['General']['outname'],
                           conf_dict['General']['genome'],
                           conf_dict['options']['lowbiaspeak'],
                           conf_dict['options']['topDim'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("ArchR related packages were not installed, skip single-cell clustering step",logfile)
 
    elif conf_dict['options']['clustermethod'] == "APEC": 
        conf_dict['General']['scPackage'] = scClustering_APEC(conf_dict['General']['outname'],
                           conf_dict['options']['lowbiaspeak'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("APEC related packages were not installed, skip single-cell clustering step",logfile)

    elif conf_dict['options']['clustermethod'] == "Cicero": 
        conf_dict['General']['scPackage'] = scClustering_Cicero(conf_dict['General']['outname'],
                           conf_dict['options']['lowbiaspeak'],
                           int(conf_dict['options']['UMAP']))
        if conf_dict['General']['scPackage']  == "noPackage":
            wlog("Cicero related packages were not installed, skip single-cell clustering step",logfile)

    if os.path.isfile("%s_scClusters.txt"%(conf_dict['General']['outname'])):
        tmplist = []
        inf = open("%s_scClusters.txt"%(conf_dict['General']['outname']))
        for line in inf:
            ll = line.strip().split("\t")
            if ll[1] != "cluster":
                tmplist.append(ll[1])
        inf.close()
        conf_dict['QC']['scClusters'] = len(set(tmplist))
    else:
        conf_dict['QC']['scClusters'] = 0
    return conf_dict

Example #7

Show file

File: stepFinal_summary.py Project: Tarela/SELMA

def stepFinal_summary(conf_dict, logfile):
    wlog('Collect results', logfile)
    summarydir = 'summary/'
    createDIR(summarydir)
    if "biasfile" in conf_dict['results'] and os.path.isfile(
            conf_dict['results']['biasfile']):
        sp("mv %s %s" % (conf_dict['results']['biasfile'], summarydir))
    sp("mv %s_summitEXT.bed %s" %
       (conf_dict['General']['outname'], summarydir))

    conf_dict['results']['umap'] = "NA"
    if conf_dict['General']['mode'] == "bulk":
        sp("mv %s_cleavage_plus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_cleavage_minus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_biasExpCuts_plus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_biasExpCuts_minus.bw %s" %
           (conf_dict['General']['outname'], summarydir))
    else:
        sp("gzip %s_peakXcellMat.txt" % (conf_dict['General']['outname']))
        sp("gzip %s_peakFeatures.txt" % (conf_dict['General']['outname']))
        sp("mv %s_peakXcellMat.txt.gz %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_peakFeatures.txt.gz %s" %
           (conf_dict['General']['outname'], summarydir))
        sp("mv %s_scClusters.txt %s" %
           (conf_dict['General']['outname'], summarydir))

        if os.path.isfile("%s_clusteringUMAP.pdf" %
                          (conf_dict['General']['outname'])):
            if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
                sp("mv %s_clusteringUMAP.pdf %s/%s_clusteringTSNE.pdf" %
                   (conf_dict['General']['outname'], summarydir,
                    conf_dict['General']['outname']))
                conf_dict['results']['umap'] = "%s_clusteringTSNE.pdf" % (
                    conf_dict['General']['outname'])
            else:
                sp("mv %s_clusteringUMAP.pdf %s" %
                   (conf_dict['General']['outname'], summarydir))
                conf_dict['results']['umap'] = "%s_clusteringUMAP.pdf" % (
                    conf_dict['General']['outname'])
    tmpresult = 'tmpResults/'
    createDIR(tmpresult)
    sp("mv %s_chromatin.bed %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_chrM.bed %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_summits.bed %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_peaks.xls %s" % (conf_dict['General']['outname'], tmpresult))
    sp("mv %s_peaks.narrowPeak %s" %
       (conf_dict['General']['outname'], tmpresult))

    if conf_dict['General']['mode'] == "bulk":
        sp("mv %s_cleavage_plus.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_minus.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_plus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_minus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_plus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_cleavage_minus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_plus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_minus.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_plus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_biasExpCuts_minus_sorted.bdg %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_summitEXTmerge.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
    else:
        sp("mv %s_highQcellReads.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_tmpSCreads.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_tmpSCpeaks.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_scOVcleavage.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_scRscript.r %s" %
           (conf_dict['General']['outname'], tmpresult))
        sp("mv %s_highQcellReads.bed %s" %
           (conf_dict['General']['outname'], tmpresult))
        if conf_dict['options']['clustermethod'] == "ArchR":
            sp("mv %s_ArchR %s" % (conf_dict['General']['outname'], tmpresult))
        if conf_dict['options']['clustermethod'] == "APEC":
            sp("mv %s_APEC %s" % (conf_dict['General']['outname'], tmpresult))
        if conf_dict['options']['clustermethod'] == "Cicero":
            sp("mv %s_Cicero %s" %
               (conf_dict['General']['outname'], tmpresult))

    if conf_dict['options']['keeptmp']:
        wlog('--keeptmp not setting, keep intermediate results', logfile)
        pass
    else:
        wlog('--keeptmp was not setting, remove intermediate results', logfile)
        sp("rm -r tmpResults/")

    wlog('Generate summary reports', logfile)
    outf = open("%s_summaryReports.txt" % conf_dict['General']['outname'], 'w')
    outf.write("#settings\n")
    outf.write("mode\t%s\n" % (conf_dict['General']['mode']))
    outf.write("fragments\t%s\n" % (conf_dict['General']['fragments']))
    outf.write("data format\t%s\n" % (conf_dict['General']['format']))
    outf.write("data type\t%s\n" % (conf_dict['General']['datatype']))
    outf.write("genome version\t%s\n" % (conf_dict['General']['genome']))
    outf.write("output name\t%s\n" % (conf_dict['General']['outname']))

    outf.write("\n#parameters\n")
    outf.write("peak extend size\t%s\n" % (conf_dict['options']['extend']))
    outf.write("peak qvalue\t%s\n" % (conf_dict['options']['peakqval']))
    outf.write("bias source\t%s\n" % (conf_dict['options']['bias']))
    outf.write("k-mer\t%s\n" % (conf_dict['options']['kmer']))
    if conf_dict['General']['mode'] == "sc":
        outf.write("[sc]reads cutoff\t%s\n" %
                   (conf_dict['options']['readcutoff']))
        outf.write("[sc]%low biaspeak\t" +
                   str(conf_dict['options']['lowbiaspeak']) + "\n")
        outf.write("[sc]peak min reads\t%s\n" %
                   (conf_dict['options']['peakminreads']))
        outf.write("[sc]peak max reads\t%s\n" %
                   (conf_dict['options']['peakmaxreads']))
        outf.write("[sc]topN dimensions\t%s\n" %
                   (conf_dict['options']['topDim']))
        outf.write("[sc]clustering method\t%s\n" %
                   (conf_dict['options']['clustermethod']))
#        if conf_dict['options']['clustermethod'] == "PCAkm":
#            outf.write("[sc]cluster number\t%s\n"%(conf_dict['options']['clusterNum']))

    outf.write("\n#QC\n")
    outf.write(
        "total reads\t%s\n" %
        (conf_dict['QC']['chrM_reads'] + conf_dict['QC']['chromatin_reads']))
    outf.write("chromatin reads\t%s\n" % (conf_dict['QC']['chromatin_reads']))
    outf.write("mtDNA reads\t%s\n" % (conf_dict['QC']['chrM_reads']))
    outf.write("total peaks\t%s\n" % (conf_dict['QC']['peaknumTotal']))
    if conf_dict['General']['mode'] == "sc":
        outf.write("total single-cells\t%s\n" %
                   (conf_dict['QC']['totalcellnum']))
        outf.write("high quality single-cells\t%s\n" %
                   (conf_dict['QC']['highQcellnum']))
        outf.write("single-cells for clustering\t%s\n" %
                   (conf_dict['QC']['finalusecellnum']))
        outf.write("reads in single-cells for clustering\t%s\n" %
                   (conf_dict['QC']['finalreadnum']))
        if conf_dict['QC']['scClusters'] > 0:
            outf.write("cluster number\t%s\n" %
                       (conf_dict['QC']['scClusters']))

    outf.write("\n#output results\n")
    outf.write("peaks (accessible regions)\t%s_summitEXT.bed\n" %
               (conf_dict['General']['outname']))
    if conf_dict['General']['mode'] == "bulk":
        outf.write("observed cleavage (+ strand)\t%s_cleavage_plus.bw\n" %
                   (conf_dict['General']['outname']))
        outf.write("observed cleavage (- strand)\t%s_cleavage_minus.bw\n" %
                   (conf_dict['General']['outname']))
        outf.write(
            "bias expected cleavage (+ strand)\t%s_biasExpCuts_plus.bw\n" %
            (conf_dict['General']['outname']))
        outf.write(
            "bias expected cleavage (- strand)\t%s_biasExpCuts_minus.bw\n" %
            (conf_dict['General']['outname']))
    else:
        outf.write("peak bias features\t%s_peakFeatures.txt.gz\n" %
                   (conf_dict['General']['outname']))
        outf.write("peakXcell count\t%s_peakXcellMat.txt.gz\n" %
                   (conf_dict['General']['outname']))
        if os.path.isfile("%s%s_scClusters.txt" %
                          (summarydir, conf_dict['General']['outname'])):
            outf.write("single-cell cluster\t%s_scClusters.txt\n" %
                       (conf_dict['General']['outname']))
        if os.path.isfile(summarydir + conf_dict['results']['umap']):
            if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
                outf.write("sc-cluster t-SNE\t%s\n" %
                           (conf_dict['results']['umap']))
            else:
                outf.write("sc-cluster UMAP\t%s\n" %
                           (conf_dict['results']['umap']))
    outf.close()

    ### check pdflatex
    QCdoc = """\documentclass[11pt,a4paper]{article}
\\usepackage{tabularx}
\\usepackage[english]{babel}
\\usepackage{array}
\\usepackage{graphicx}
\\usepackage{color}
\DeclareGraphicsExtensions{.eps,.png,.pdf,.ps}
\\begin{document}
\\title{SELMA summary reports for: %s}

\\vspace{-1cm}
\maketitle
\\tableofcontents
\\newpage
\\newpage
\section{Summary description}
\\begin{quotation}
Table 1 describes the input files and settings.
\end{quotation}
\\begin{table}[h]
\\small
\caption{ settings }\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }

""" % (strlatexformat(conf_dict['General']['outname']))
    ### table1 prepare parameter
    QCdoc += """      
\hline
parameter & value  \\\\
\hline
mode & %s \\\\
\hline
fragment file & %s \\\\
\hline
data format & %s \\\\
\hline
data type & %s  \\\\
\hline
genome version & %s  \\\\
\hline
output name & %s  \\\\
\hline
\end{tabularx}
\end{table}
""" % (strlatexformat(conf_dict['General']['mode']),
       strlatexformat(conf_dict['General']['fragments'].split("/")[-1]),
       strlatexformat(conf_dict['General']['format']),
       strlatexformat(conf_dict['General']['datatype']),
       strlatexformat(conf_dict['General']['genome']),
       strlatexformat(conf_dict['General']['outname']))

    QCdoc += """
\\newpage
\\newpage
\section{parameters and options}
\\begin{quotation}
Table 2 describes the parameters and options.
\end{quotation}
\\begin{table}[h]
\\small
\caption{parameters and options}\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }
\hline
parameter & value  \\\\
\hline
peak extend size & %s \\\\
\hline
peak qvalue & %s \\\\
\hline
bias source & %s \\\\
\hline
k-mer & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['options']['extend']),
       strlatexformat(conf_dict['options']['peakqval']),
       strlatexformat(conf_dict['options']['bias']),
       strlatexformat(conf_dict['options']['kmer']))
    if conf_dict['General']['mode'] == "sc":
        QCdoc += """
[sc]reads cutoff & %s  \\\\
\hline
[sc]lowbias peak percent & %s  \\\\
\hline
[sc]peak min reads & %s  \\\\
\hline
[sc]peak max reads & %s  \\\\
\hline
[sc]topN dimensions & %s  \\\\
\hline
[sc]cluster methods & %s  \\\\
\hline
""" % (
            strlatexformat(conf_dict['options']['readcutoff']),
            strlatexformat(conf_dict['options']['lowbiaspeak']),
            strlatexformat(conf_dict['options']['peakminreads']),
            strlatexformat(conf_dict['options']['peakmaxreads']),
            strlatexformat(conf_dict['options']['topDim']),
            strlatexformat(conf_dict['options']['clustermethod']),
        )
    QCdoc += """
\end{tabularx}
\end{table}
"""

    QCdoc += """
\\newpage
\\newpage
\section{data quality}
\\begin{quotation}
Table 3 describes data Quality.
\end{quotation}
\\begin{table}[h]
\\small
\caption{data quality}\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }
\hline
parameter & value  \\\\
\hline
total reads & %s \\\\
\hline
chromatin reads & %s \\\\
\hline
mtDNA reads & %s \\\\
\hline
total peaks & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['QC']['chrM_reads'] +
                      conf_dict['QC']['chromatin_reads']),
       strlatexformat(conf_dict['QC']['chromatin_reads']),
       strlatexformat(conf_dict['QC']['chrM_reads']),
       strlatexformat(conf_dict['QC']['peaknumTotal']))
    if conf_dict['General']['mode'] == "sc":
        QCdoc += """
[sc]total single-cells(sc) & %s  \\\\
\hline
[sc]high quality sc & %s  \\\\
\hline
[sc]sc for clustering & %s  \\\\
\hline
[sc]reads in sc for clustering & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['QC']['totalcellnum']),
        strlatexformat(conf_dict['QC']['highQcellnum']),
        strlatexformat(conf_dict['QC']['finalusecellnum']),
        strlatexformat(conf_dict['QC']['finalreadnum']))
        if conf_dict['QC']['scClusters'] > 0:
            QCdoc += """[sc]number of cluster & %s  \\\\
\hline           
""" % (strlatexformat(conf_dict['QC']['scClusters']))
    QCdoc += """
\end{tabularx}
\end{table}
"""

    QCdoc += """
\\newpage
\\newpage
\section{output results}
\\begin{quotation}
Table 3 describes output results (in the summary/ folder).
\end{quotation}
\\begin{table}[h]
\\small
\caption{output results}\label{bstable}
\\begin{tabularx}{\\textwidth}{ |X|l| }
\hline
parameter & value  \\\\
\hline
peaks (accessible regions) & %s \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] + "_summitEXT.bed"))

    if conf_dict['General']['mode'] == "bulk":
        QCdoc += """
observed cleavage(+) & %s \\\\
\hline
observed cleavage(-) & %s  \\\\
\hline
bias expected cleavage(+) & %s \\\\
\hline
bias expected cleavage(-) & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] + "_cleavage_plus.bw"),
        strlatexformat(conf_dict['General']['outname'] + "_cleavage_minus.bw"),
        strlatexformat(conf_dict['General']['outname'] +
                      "_biasExpCuts_plus.bw"),
        strlatexformat(conf_dict['General']['outname'] +
                      "_biasExpCuts_minus.bw"))
    else:
        QCdoc += """
peak bias feature & %s \\\\
\hline
peakXcell count & %s  \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] +
                      "_peakFeatures.txt.gz"),
        strlatexformat(conf_dict['General']['outname'] +
                      "_peakXcellMat.txt.gz"))
        if os.path.isfile("%s%s_scClusters.txt" %
                          (summarydir, conf_dict['General']['outname'])):
            QCdoc += """single-cell cluster & %s \\\\
\hline
""" % (strlatexformat(conf_dict['General']['outname'] + "_scClusters.txt"))
        if os.path.isfile(summarydir + conf_dict['results']['umap']):
            if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
                dimRedTerm = "t-SNE"
            else:
                dimRedTerm = "UMAP"
            QCdoc += """sc-cluster %s & %s \\\\
\hline
""" % (dimRedTerm,
            strlatexformat(conf_dict['General']['outname'] + "_scClusters.txt"))
    QCdoc += """
\end{tabularx}
\end{table}
"""

    if os.path.isfile(summarydir + conf_dict['results']['umap']):
        if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]:
            dimRedTerm = "t-SNE"
        else:
            dimRedTerm = "UMAP"
        QCdoc += """
\\newpage
\\newpage
\section{%s scatter plot}
The 2-dim scatter plot represent the %s results. Each dot represents an individual cell and the color represents cluster labels  
\\begin{figure}[h]
        \caption{cross-validation curve for lambda decision} \label{fig:profileunion}
        \setlength{\\abovecaptionskip}{0pt}
        \setlength{\\belowcaptionskip}{10pt}
        \centering
        {\includegraphics[width=0.8\\textwidth]{%s}}
\end{figure}
""" % (dimRedTerm, dimRedTerm, summarydir + conf_dict['results']['umap'])

    QCdoc += """
\end{document} 
"""
    latexfile = conf_dict['General']['outname'] + '_summaryReports.tex'

    outf = open(latexfile, 'w')
    outf.write(QCdoc)
    outf.close()

    check_latex = sp('which pdflatex')
    if check_latex[0].decode("ascii") == "":
        wlog(
            'pdflatex was not installed, SELMA will not generate pdf version of summary report. Please copy the %s to an environment with pdflatex installed and complie the pdf file'
            % (conf_dict['General']['outname'] + '_summaryReports.tex'),
            logfile)
    else:
        cmd = "pdflatex %s" % (latexfile)
        tmpobj = sp(cmd)
        tmpobj = sp(cmd)
        #tmpobj = sp(cmd2)
        tmpobj = sp("rm %s_summaryReports.aux" %
                    conf_dict['General']['outname'])
        tmpobj = sp("rm %s_summaryReports.log" %
                    conf_dict['General']['outname'])
        tmpobj = sp("rm %s_summaryReports.toc" %
                    conf_dict['General']['outname'])

    return conf_dict