コード例 #1
0
ファイル: run_prog.py プロジェクト: lemene/mbio
def rp_mecat_cns(argv):
    '''调用mecat纠错raw reads
'''
    '''
if [ -z $1 ] ; then
   echo Usage: mecat_correction rawreads
   exit 1
fi

rawreads=$1
threads=20
mecat2pw -j 0 -d $rawreads -o rawreads.pm.can -w wrk_dir -t $threads
mecat2cns -i 0 -t $threads rawreads.pm.can $rawreads corrected.fasta
'''

    try:
        rawreads = argv[0]
        cnsreads = argv[1]
        threads = int(argv[2]) if len(argv) >= 3 else 4

        pm = "rawreads.pm.can"

        cmd = "mecat2pw -j 0 -d %s -o %s -w wrk_dir -t %d" % (rawreads, pm,
                                                              threads)
        tl.run_if_modified([rawreads], [pm], cmd)

        cmd = "mecat2cns -i 0 -t %d %s %s %s" % (threads, pm, rawreads,
                                                 cnsreads)
        tl.run_if_modified([pm], [cnsreads], cmd)

        tl.run("rm -rf %s* wrk_dir" % pm)
    except:
        traceback.print_exc()
        print("----------------")
        print(rp_mecat_cns.__doc__)
コード例 #2
0
ファイル: run_prog.py プロジェクト: lemene/mbio
def rp_racon(argv):
    '''调用minimap2和racon完成polish
    rp_racon contigs reads polished [threads(4)] [minimap2_options(-x map-pb)] [racon_options()]
'''

    try:
        contigs = argv[0]
        reads = argv[1]
        polished = argv[2]
        threads = int(argv[3]) if len(argv) >= 4 else 4
        minimap2_options = argv[4] if len(argv) >= 5 else "-x map-pb"
        racon_options = argv[5] if len(argv) >= 6 else ""

        rd2ctg = "__rd2ctg__.paf"

        cmd = "minimap2 %s -t %d %s %s > %s" % (minimap2_options, threads,
                                                contigs, reads, rd2ctg)
        tl.run_if_modified([contigs, reads], [rd2ctg], cmd)

        cmd = "racon %s -t %d %s %s %s > %s" % (racon_options, threads, reads,
                                                rd2ctg, contigs, polished)
        tl.run_if_modified([reads, contigs, rd2ctg], [polished], cmd)

        #tl.run("rm -f " + rd2ctg)

    except:
        traceback.print_exc()
        print("----------------")
        print(rp_racon.__doc__)
コード例 #3
0
ファイル: run_prog.py プロジェクト: lemene/mbio
def rp_mummerplot(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("reference", type=str)
    parser.add_argument("contigs", type=str)

    try:
        args = parser.parse_args(argv)
        contigs = args.contigs
        refs = args.reference

        cmd = f"nucmer  -l 100 -c 1000 -d 10 --banded -D 5 {contigs} {refs}"
        tl.run_if_modified([], [], cmd)

        cmd = f"delta-filter -i 95 -o 95 out.delta > out.best.delta"
        tl.run_if_modified([], [], cmd)

        cmd = f"dnadiff -d out.best.delta"
        tl.run_if_modified([], [], cmd)

        cmd = f"mummerplot out.best.delta --fat -f -png"
        tl.run_if_modified([], [], cmd)

        #tl.run("rm -f ")

    except:
        traceback.print_exc()
        print("----------------")
        print(rp_racon.__doc__)
コード例 #4
0
ファイル: run_prog.py プロジェクト: lemene/mbio
def rp_miniasm(argv):
    '''调用minimap2+miniasm完成组装
'''
    # minimap + miniasm
    try:
        reads = argv[0]
        contigs = argv[1]
        threads = argv[2]
        minimap2_options = argv[2]

        overlaps = "__overlaps.paf"
        graph = "__graph.gfa"
        cmd = f"minimap2 -t {threads} {minimap2_options} {reads} {reads} | gzip -1 > {overlaps}"
        tl.run_if_modified([reads], [overlaps], cmd)

        cmd = f"miniasm -f {reads} {overlaps} > {graph}"
        tl.run_if_modified([reads, overlaps], [graph], cmd)

        # awk '/^S/{print ">"$2"\n"$3}' $graph > $contigs
        cmd = f"""awk '/^S/{{print ">"$2"\n"$3}}' {graph} > {contigs}"""
        tl.run_if_modified([graph], [contigs], cmd)

        tl.run(f"rm -f {overlaps} {graph}")
    except:
        traceback.print_exc()
        print("----------------")
        print(rp_miniasm.__doc__)
コード例 #5
0
ファイル: evaluate.py プロジェクト: lemene/mbio
def eval_read_accuracy(argv):
    '''评估reads的准确度,首先对reads采样(100M),然后使用minimap2(--eqx)比对到ref上,最后评估错误率。
'''
    
    parser = argparse.ArgumentParser();
    try:

        reads = argv[0]
        stub = argv[1]
        ref = "./ref.fna"

        workdir = "__wrkdir"
        if not os.path.exists(workdir): os.mkdir(workdir)

        name, _ = os.path.splitext(os.path.split(reads)[1])
        sample_size = 100000000
        subreads = "%s/%s-%dM.fasta" % (workdir, name, sample_size/1000000)
        rd2ref = "%s/%s-rd2ref.paf" % (workdir, name)
        result = "%s/%s-%s" % (workdir, name, stub)

        cmd = "python3 %s/read.py rd_sample random %s %s %d" % (get_this_script_folder(), reads, subreads, sample_size)
        tl.run_if_modified([reads], [subreads], cmd)

        cmd = "minimap2 -x asm20 -t 40 %s %s -c --eqx > %s" % (ref, subreads, rd2ref)
        tl.run_if_modified([subreads, ref], [rd2ref], cmd)

        cmd = "python3 %s/paffile.py paf_accuracy %s 2>&1 | tee %s" % (get_this_script_folder(), rd2ref, result)
        print(cmd)
        tl.run_if_modified([rd2ref], [result], cmd)
        os.system("cat %s" % result)

    except:
        traceback.print_exc()
        print("----------------")
        print(eval_read_accuracy.__doc__)
コード例 #6
0
ファイル: run_prog.py プロジェクト: lemene/mbio
def rp_sam4igv(argv):
    '''将sam文件处理成igv可显示的格式,生成xxx.sorted.bam xxx.sorted.bam.bai
    rp_sam4igv xxx.sam
'''

    try:
        samfname = argv[0]
        assert samfname.endswith(".sam")
        name = samfname[:-4]

        cmd = "samtools view -bS %s > %s.bam" % (samfname, name)
        tl.run_if_modified(samfname, name + ".bam", cmd)

        cmd = "samtools sort %s.bam -o %s.sorted.bam" % (name, name)
        tl.run_if_modified(name + ".bam", name + ".sorted.bam", cmd)

        cmd = "samtools index %s.sorted.bam" % name
        tl.run_if_modified(name + ".sorted.bam", name + ".sorted.bam.bai", cmd)

        tl.run("rm -f %s.bam" % name)
    except:
        traceback.print_exc()
        print("----------------")
        print(rp_sam4igv.__doc__)
コード例 #7
0
ファイル: run_prog.py プロジェクト: lemene/mbio
def rp_purge_dups(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("reads", type=str)
    parser.add_argument("contigs", type=str)
    parser.add_argument("--threads", type=int, default=20)

    try:
        args = parser.parse_args(argv)
        contigs = args.contigs
        reads = args.reads
        threads = args.threads

        #cmd = f"minimap2 -xmap-pb {contigs} {reads} | gzip -c - > rd2ctg.paf.gz"
        cmd = "minimap2 -x map-pb -t %d %s %s | gzip -c - > rd2ctg.paf.gz" % (
            threads, contigs, reads)
        tl.run_if_modified([contigs, reads], ["rd2ctg.paf.gz"], cmd)

        cmd = "pbcstat rd2ctg.paf.gz"
        tl.run_if_modified(["rd2ctg.paf.gz"], ["PB.base.cov", "PB.stat"], cmd)

        cmd = "calcuts PB.stat > cutoffs 2>calcults.log"
        tl.run_if_modified(["PB.stat"], ["cutoffs"], cmd)

        _, name = os.path.split(contigs)
        split_contigs = name + ".split"

        #cmd = f"split_fa {contigs} > {split_contigs}"
        cmd = "split_fa %s > %s" % (contigs, split_contigs)
        tl.run_if_modified([contigs], [split_contigs], cmd)

        #cmd = f"minimap2 -xasm5 -DP {split_contigs} {split_contigs} | gzip -c - > ctg2ctg.paf.gz"
        cmd = "minimap2 -x asm20 -DP -t %d %s %s | gzip -c - > ctg2ctg.paf.gz" % (
            threads, split_contigs, split_contigs)
        tl.run_if_modified([split_contigs], ["ctg2ctg.paf.gz"], cmd)

        cmd = "purge_dups -2 -T cutoffs -c PB.base.cov ctg2ctg.paf.gz > dups.bed 2> purge_dups.log"
        tl.run_if_modified(["PB.base.cov", "ctg2ctg.paf.gz", "cutoffs"],
                           ["dups.bed"], cmd)

        #cmd = f"get_seqs dups.bed {contigs}"
        cmd = "get_seqs dups.bed %s" % contigs
        tl.run_if_modified(["dups.bed", contigs], ["purged.fa"], cmd)

        #tl.run("rm -f ")

    except:
        pass