Exemple #1
0
def split_aln(man, pltfm, ref, core_lim, mem_lim, queue, out_dir, bin_dir,
              spid):
    mkdir(out_dir)

    out_fn = "{0}/{1}.split.fa".format(out_dir, get_rm_prefix(ref))
    jcmd = "{0}/split_fa {1} > {2}".format(bin_dir, ref, out_fn)
    jjn = "split_{}".format(spid)
    jout = "{0}/{1}_%J.o".format(out_dir, jjn)
    jerr = "{0}/{1}_%J.e".format(out_dir, jjn)

    j = hpc(pltfm, cmd=jcmd, core=1, mem=5000, jn=jjn, out=jout, err=jerr)
    rtn = man.start([j])
    if not rtn:
        in_fn = out_fn
        out_fn = "{0}/{1}.split.paf".format(out_dir, get_rm_prefix(ref))
        jcmd = "minimap2 -xasm5 -DP {0} {0} > {1}".format(in_fn, out_fn)
        jjn = "self_aln_{}".format(spid)
        jout = "{0}/{1}_%J.o".format(out_dir, jjn)
        jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
        j = hpc(pltfm,
                cmd=jcmd,
                core=core_lim,
                mem=mem_lim,
                jn=jjn,
                out=jout,
                err=jerr)
        rtn = man.start([j])
    exit(rtn)
Exemple #2
0
def assess_bn(man, ref, fofn_list, core_lim, mem_lim, queue, out_dir, bin_dir,
              spid, skip_bn):
    if skip_bn == 1:
        return
    procs = []
    out_fns = []
    params = []
    for i in range(len(fofn_list)):
        out_fn = "{0}/bionano_{1}.bed".format(out_dir, i)
        out_fns.append(out_fn)
        params.append([
            man, mem_lim, core_lim, queue, ref, fofn_list[i], out_dir, out_fn,
            bin_dir, spid
        ])

    pl = Pool(processes=len(fofn_list))
    rtvn = pl.map(assess_bn_core, params)
    suc = all(v == 0 for v in rtvn)
    if suc:
        if len(out_fns) > 1:
            jcmd = [bin_dir + '/union']
            jcmd.extend(out_fns)
            jout = "{}/bn.bed".format(out_dir)
            j = hpc(cmd=jcmd, out=jout, jn="union")
            man.start([j], True)
        else:
            jcmd = ['cp', out_fns[0], out_dir + "/bn.bed"]
            j = hpc(cmd=jcmd, jn="cp")
            man.start([j], True)
Exemple #3
0
def assess_pb(man, ref, fofn, core_lim, mem_lim, queue, out_dir, bin_dir, spid, skip_pb):
    if skip_pb == 1:
        return 
    jobs = []
    out_fns = []
    with open(fofn, "r") as f:
        for fl in f:
            fl_strip = fl.strip()
            fn_prefix = get_lm_prefix(getfn(fl_strip))
            out_fn = "{0}/{1}.paf".format(out_dir, fn_prefix)
            out_fns.append(out_fn)
            
            jcmd = "minimap2 -x map-pb -t {0} {1} {2} >{3}".format(str(core_lim), ref, fl_strip, out_fn)
            jjn = "minimap_{}".format(fn_prefix)
            jout = "{0}/{1}_%J.o".format(out_dir, jjn)
            jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
            
            j = hpc("lsf", cmd=jcmd, core=core_lim, mem = mem_lim, jn=jjn, out=jout, err=jerr)
            jobs.append(j)
        f.close()
    rtn = man.start(jobs)
    
    jobs = []
    if not rtn: 
        jcmd = "{0}/ast_pb -O {2} {1} >{2}/pb.bed".format(bin_dir, " ".join(out_fns), out_dir)
        jjn = "ast_pb_{}".format(spid) 
        jout = "{0}/{1}.o".format(out_dir, jjn)
        jerr = "{0}/{1}.e".format(out_dir, jjn)

        j = hpc("lsf", cmd=jcmd, core=1, mem = mem_lim, jn=jjn, out=jout, err=jerr)
        jobs.append(j)
        man.start(jobs, True)
Exemple #4
0
def trim_aln_10x(p):
    [man, bin_dir, ref, fns, core_lim, mem_lim, queue, out_fn, skip_trim] = p
    out_dir = getd(out_fn)
     
    [r1, r2] = fns.split('\t')
    
    pref = getfn(r1).split(".")[0]
    
    jobs = []
    rtn = 0
    if skip_trim != 1:
        jcmd = " ".join(['10x', '-p', pref, '-o', out_dir, '-c', r1, r2])
        # jcmd = "{0}/10x -p {1} -o {2} -c {3} {4}".format(bin_dir, pref, out_dir, r1, r2)
        jjn = "10x_trim"
        jout = "{0}/{1}.o".format(out_dir, jjn)
        jerr = "{0}/{1}.e".format(out_dir, jjn)
        j = hpc("lsf", cmd=jcmd, jn=jjn, out=jout, err=jerr)
        rtn = man.start([j])
    
    if not rtn:
        if skip_trim == 1:
            jcmd = "bwa mem -t {0} {1} {2} {3} | samtools view -b -o - >{4}".format(core_lim, ref, r1, r2, out_fn)
        else:
            jcmd = "bwa mem -t {0} {1} {2}/{3}_1.fq.gz {2}/{3}_2.fq.gz | samtools view -b -o - >{4}".format(core_lim, ref, out_dir, pref, out_fn)
         
        jjn = "bwa_mem_{}".format(pref)
        jout = "{0}/{1}_%J.o".format(out_dir, jjn)
        jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
        
        j = hpc("lsf", cmd=jcmd, core=core_lim, mem = mem_lim, queue=queue, jn=jjn, out=jout, err=jerr)
        rtn = man.start([j])

    return rtn
Exemple #5
0
def assess_hic2(man, ref, fofn, core_lim, mem_lim, queue, out_dir, bin_dir, spid, skip_hic):
    if skip_hic == 1:
        return 
    jobs = []
    out_fns = []

    split_ref = "{}/split.fa".format(out_dir)
    with open(fofn) as f:
        for fl in f:
            [r1, r2] = fl.strip().split('\t')
            pref = get_lm_prefix(getfn(r1))
            out_fn = "{0}/{1}.split.bam".format(out_dir, pref)
            out_fns.append(out_fn)

            jcmd = "bwa mem -SP -B10 -t {0} {1} {2} {3} | samtools view -b -o - >{4}".format(core_lim, split_ref, r1, r2, out_fn)
            jjn = "bwa_mem_{}".format(pref)
            jout = "{0}/{1}_%J.o".format(out_dir, jjn)
            jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
            j = hpc("lsf", cmd=jcmd, core=core_lim, mem = mem_lim, queue=queue, jn=jjn, out=jout, err=jerr)
            jobs.append(j)

        f.close()
    if not man.start(jobs):
        jcmd = ['samtools', 'faidx', split_ref]
        jjn = "faidx"
        jout = "{0}/{1}.o".format(out_dir, jjn)
        jerr = "{0}/{1}.e".format(out_dir, jjn)
        j = hpc(cmd=jcmd, jn=jjn, out=jout, err=jerr)
        rtn = man.start([j])
        if rtn:
            return 
        faidx_fn = "{}.fai".format(split_ref)
         
        jcmd = "{0}/col_conts {1} >{2}/links.mat".format(bin_dir, " ".join(out_fns), out_dir)
         
        jjn = "col_conts_{}".format(spid)
        jout = "{0}/{1}_%J.o".format(out_dir, jjn)
        jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
        
        j = hpc("lsf", cmd=jcmd, core=1, mem = 20000, jn=jjn, out=jout, err=jerr)
        
        rtn = man.start([j], True)
        if rtn:
            return 
        jcmd = "{0}/ast_hic2 {3} {2}/links.mat >{2}/hic2.bed".format(bin_dir, " ".join(out_fns), out_dir, faidx_fn)
         
        jjn = "ast_hic2_{}".format(spid)
        jout = "{0}/{1}_%J.o".format(out_dir, jjn)
        jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
        
        j = hpc("lsf", cmd=jcmd, core=1, mem = 20000, jn=jjn, out=jout, err=jerr)
         
        man.start([j], True)
Exemple #6
0
def assess_bnx_core(p):
    [man, mem_lim, core_lim, queue, ref, fn, out_dir, out_fn, bin_dir, spid, ishap] = p
    [fn_pref, tech, enzyme] = get_bn_details(get_rm_prefix(fn))
    ind = i

    # solve_dir = "/nfs/users/nfs_d/dg30/luster_dg30/dg30/projects/vgp/tools/Solve3.4_06042019a/"
    # jcmd = " ".join(["perl", solve_dir+"HybridScaffold/06042019/scripts/fa2cmap_multi_color.pl", "-e", enzyme, "1", "-i", ref, '-o', out_dir+"/fa2cmap"])
    # solve_dir = "/nfs/users/nfs_d/dg30/luster_dg30/dg30/projects/vgp/tools/Solve3.2.1_04122018/"
    # jcmd = " ".join(["perl", solve_dir+"HybridScaffold/04122018/scripts/fa2cmap_multi_color.pl", "-e", enzyme, "1", "-i", ref, '-o', out_dir+"/fa2cmap"])
    solve_dir = "/nfs/users/nfs_d/dg30/luster_dg30/dg30/projects/vgp/tools/Solve3.3_10252018/"
    jcmd = " ".join(["perl", solve_dir+"HybridScaffold/10252018/scripts/fa2cmap_multi_color.pl", "-e", enzyme, "1", "-i", ref, '-o', out_dir+"/fa2cmap"])

    jjn = "fa2cmap"
    jout = "{0}/{1}.o".format(out_dir, ind)
    jerr = "{0}/{1}.e".format(out_dir, ind)
    
    j = hpc("lsf", cmd=jcmd, mem=1000, jn=jjn, out=jout, err=jerr)
    rtn = man.start([j])
    print ("fa2cmap return value {}".format(rtn))
    if not rtn:
        jcmd = ['cp', fn, out_dir] 
        j = hpc(cmd=jcmd, out="{}/cp.o".format(out_dir))
        rtn = man.start([j])
        if not rtn:
            ref_prefix = get_rm_prefix(ref)
            ref_cmap = "{0}/fa2cmap/{1}_{2}_0kb_0labels.cmap".format(out_dir, ref_prefix, enzyme.upper())
            key_fn = "{0}/fa2cmap/{1}_{2}_0kb_0labels_key.txt".format(out_dir, ref_prefix, enzyme.upper())
            query_map = "{0}/{1}".format(out_dir, getfn(fn))
            optn = "DLE1_{}".format(tech.lower()) if enzyme == "DLE1" else tech.lower()
            # jcmd = " ".join(["python2",  solve_dir+"Pipeline/06042019/align_bnx_to_cmap.py","--prefix", enzyme, "--mol", query_map,  "--ref", ref_cmap, "--ra", solve_dir+"RefAligner/8949.9232rel/", "--nthreads", "12", "--pipeline", solve_dir+"Pipeline/06042019/", "--optArgs", solve_dir+"RefAligner/8949.9232rel/optArguments_haplotype_{}.xml".format(optn), "--output", out_dir + "/alignref_{}".format(enzyme)])
            # jcmd = " ".join(["python2",  solve_dir+"Pipeline/10252018/align_bnx_to_cmap.py","--prefix", enzyme, "--mol", query_map,  "--ref", ref_cmap, "--ra", solve_dir+"RefAligner/7437.7523rel/", "--nthreads", "12", "--pipeline", solve_dir+"Pipeline/04122018/", "--optArgs", solve_dir+"RefAligner/7437.7523rel/optArguments_haplotype_{}.xml".format(optn), "--output", out_dir + "/alignref_{}".format(enzyme)])
            jcmd = " ".join(["python2",  solve_dir+"Pipeline/10252018/align_bnx_to_cmap.py","--prefix", enzyme, "--mol", query_map,  "--ref", ref_cmap, "--ra", solve_dir+"RefAligner/7915.7989rel/", "--nthreads", "12", "--pipeline", solve_dir+"Pipeline/10252018/", "--optArgs", solve_dir+"RefAligner/7915.7989rel/optArguments_{1}_{0}.xml".format(optn, "haplotype" if ishap else "nonhaplotype"), "--output", out_dir + "/alignref_{}".format(enzyme)])
            j = hpc("lsf", cpu="avx", mem=mem_lim, core=12, queue=queue, cmd=jcmd, jn="bnx_refalign", out="{0}/bnx_refalign_{1}.o".format(out_dir, enzyme[0:4]))
            rtn = man.start([j]) 
            if not rtn:
                ref_lm_pref = get_lm_prefix(ref)
                map_path = "{0}/alignref_{2}/{1}".format(out_dir, "contigs/alignmolvref/merge/exp_refineFinal1",  enzyme)
                rmap_fn = "{}_r.cmap".format(map_path)
                qmap_fn = "{}_q.cmap".format(map_path)
                xmap_fn = "{}.xmap".format(map_path)

                jcmd = " ".join([bin_dir+"/ast_bion_bnx", rmap_fn, qmap_fn, xmap_fn, key_fn, '-O', out_dir + "/alignref_{}".format(enzyme)])
                jcmd += " >"+out_fn + " 2>" + "{0}/{1}_{2}.bed".format(out_dir, tech, enzyme)
                jn = "ast_bion_bnx"
                jout = "{0}/{1}.o".format(out_dir, jn) 
                jerr = "{0}/{1}.e".format(out_dir, jn)
                j = hpc("lsf", mem=5000, cmd=jcmd, jn="ast_bion_bnx", out=jout, err=jerr)
                rtn = man.start([j], True) 
    return rtn
Exemple #7
0
def aln_10x(p):
    [man, ref, fns, core_lim, mem_lim, out_fn] = p
    out_dir = getd(out_fn)

    [r1, r2] = fns.split('\t')

    pref = getfn(r1).split(".")[0]

    # jcmd = " ".join(['10x', '-p', pref, '-o', out_dir, '-c', r1, r2])
    # jjn = "10x_trim"
    # jout = "{0}/{1}.o".format(out_dir, jjn)
    # jerr = "{0}/{1}.e".format(out_dir, jjn)
    # j = hpc("lsf", cmd=jcmd, jn=jjn, out=jout, err=jerr)
    # rtn = man.start([j])

    jcmd = "bwa mem -t {0} {1} {2} {3} | samtools view -b -o - >{4}".format(
        core_lim, ref, r1, r2, out_fn)

    jjn = "bwa_mem_{}".format(pref)
    jout = "{0}/{1}_%J.o".format(out_dir, jjn)
    jerr = "{0}/{1}_%J.e".format(out_dir, jjn)

    j = hpc("lsf",
            cmd=jcmd,
            core=core_lim,
            mem=mem_lim,
            jn=jjn,
            out=jout,
            err=jerr)
    rtn = man.start([j])

    return rtn
Exemple #8
0
def acc(man, ref, out_dir, bin_dir, spid):
    # if not checkf():
        # jcmd = ['samtools', 'faidx', ref]
        # jjn = "faidx"
        # jout = "{0}/{1}.o".format(out_dir, jjn)
        # jerr = "{0}/{1}.e".format(out_dir, jjn)
        # j = hpc(cmd=jcmd, jn=jjn, out=jout, err=jerr)
        # rtn = man.start([j])
        # if rtn:
            # return 
    # for fn in ["gaps.bed", "10x.bed", "bn.bed", "hic.bed", "pb.bed"]:
        # fpath = "{0}/{1}".format(out_dir,fn)
        # if checkf(fpath):
            # beds.append(fpath)
    # if len(beds):
        # jcmd = "{0}/acc {1} > {2}/acc.bed".format(bin_dir, " ".join(beds), out_dir) 
        # jjn = "acc_{}".format(spid)
        # jout = "{0}/acc_{1}.o".format(out_dir, spid)
        # jerr = "{0}/acc_{1}.e".format(out_dir, spid)
        # j = hpc("lsf", cmd=jcmd, core=1, mem = 20000, jn=jjn, out=jout, err=jerr)
        # man.start([j], True)
    #acc contig
    beds = []
    for fn in ["pb.bed", "bn.bed"]:
        fpath = "{0}/{1}".format(out_dir,fn)
        if checkf(fpath):
            beds.append(fpath)
    jcmd = "{0}/acc {2}/gaps.bed {1} > {2}/pb_bn.bed".format(bin_dir, " ".join(beds), out_dir) 
    jjn = "acc_contig_{}".format(spid)
    jout = "{0}/acc_contig_{1}.o".format(out_dir, spid)
    jerr = "{0}/acc_contig_{1}.e".format(out_dir, spid)
    j = hpc("lsf", cmd=jcmd, core=1, mem = 20000, jn=jjn, out=jout, err=jerr)
    man.start([j], True)
	
	beds = []
Exemple #9
0
def assess_10x(man, ref, fofn, core_lim, mem_lim, queue, out_dir, bin_dir, spid, skip_trim, skip_10x):
    # trim and align 10x
    if skip_10x == 1:
        return 
    out_fns = []
    in_fns = []
    params = []
    with open(fofn, "r") as f:
        for fl in f:
            fl_strip = fl.strip()
            out_fn = "{0}/{1}.bam".format(out_dir, get_lm_prefix(getfn(fl_strip.split('\t')[0])))
            out_fns.append(out_fn)
            in_fns.append(fl_strip)
        f.close()
  
    for i in range(len(in_fns)):
        params.append([man, bin_dir, ref, in_fns[i], core_lim, mem_lim, queue, out_fns[i], skip_trim])
    
    procs = []
    pl = Pool(processes=len(in_fns))
    rtvs = pl.map(trim_aln_10x, params)

    
    suc = all(v == 0 for v in rtvs)
    if suc:
        
        jcmd = "{0}/ast_10x -O {2} {2}/gaps.bed {1} >{2}/10x.bed".format(bin_dir, " ".join(out_fns), out_dir)
         
        jjn = "ast_10x_{}".format(spid)
        jout = "{0}/{1}_%J.o".format(out_dir, jjn)
        jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
        
        j = hpc("lsf", cmd=jcmd, core=1, mem = 20000, queue="long", jn=jjn, out=jout, err=jerr)
         
        man.start([j], True)
Exemple #10
0
def punchlist(man, out_dir, bin_dir, spid):
    all_bed = "{}/acc.bed".format(out_dir)
    if checkf(all_bed):
        jcmd = "{0}/pchlst {1}/gaps.bed {2} > {1}/pchlst.bed".format(
            bin_dir, out_dir, all_bed)
        jjn = "pchlst_{}".format(spid)
        jout = "{0}/pchlst_{1}.o".format(out_dir, spid)
        jerr = "{0}/pchlst_{1}.e".format(out_dir, spid)
        j = hpc(cmd=jcmd, out=jout, err=jerr, jn=jjn)
        man.start([j], True)
Exemple #11
0
def assess_bn_core(p):
    [man, mem_lim, core_lim, queue, ref, fn, out_dir, out_fn, bin_dir, spid, ishap] = p
    [fn_pref, tech, enzyme] = get_bn_details(get_rm_prefix(fn))
    ind = i

    solve_dir = "/nfs/users/nfs_d/dg30/luster_dg30/dg30/projects/vgp/tools/Solve3.2.1_04122018/"
    jcmd = " ".join(["perl", solve_dir+"HybridScaffold/04122018/scripts/fa2cmap.pl", "-n", enzyme[0:4], "-i", ref, '-o', out_dir])
    
    jjn = "fa2cmap"
    jout = "{0}/{1}.o".format(out_dir, ind)
    jerr = "{0}/{1}.e".format(out_dir, ind)
    
    j = hpc("lsf", cmd=jcmd, mem=1000, jn=jjn, out=jout, err=jerr)
    rtn = man.start([j])
    if not rtn:
        jcmd = ['cp', fn, out_dir] 
        j = hpc(cmd=jcmd, out="{}/cp.o".format(out_dir))
        rtn = man.start([j])
        if not rtn:
            ref_prefix = get_rm_prefix(ref)
            ref_cmap = "{0}/fa2cmap/{1}_{2}_0Kb_0labels.cmap".format(out_dir, ref_prefix, enzyme)
            key_fn = "{0}/fa2cmap/{1}_{2}_0Kb_0labels_key.txt".format(out_dir, ref_prefix, enzyme)
            query_cmap = "{0}/{1}".format(out_dir, getfn(fn))
            optn = "DLE1_{}".format(tech.lower()) if enzyme == "DLE1" else tech.lower()
            jcmd = " ".join(["python2",  solve_dir+"Pipeline/04122018/runCharacterize.py","-t",  solve_dir+"RefAligner/7437.7523rel/RefAligner","-q", query_cmap, "-r", ref_cmap, "-p", solve_dir+"Pipeline/04122018/", "-a", solve_dir+"RefAligner/7437.7523rel/optArguments_{1}_{0}.xml".format(optn, "haplotype" if ishap else "nonhaplotype"), "-n","2"])
             
            j = hpc("lsf", cpu="avx", mem=mem_lim, core=core_lim, queue=queue, cmd=jcmd, jn="bn_refalign", out="{0}/bn_refalign_{1}.o".format(out_dir, enzyme[0:4]))
            rtn = man.start([j]) 
            if not rtn:
                ref_lm_pref = get_lm_prefix(ref)
                map_path = "{0}/alignref/{1}".format(out_dir, get_rm_prefix(fn))
                rmap_fn = "{}_r.cmap".format(map_path)
                qmap_fn = "{}_q.cmap".format(map_path)
                xmap_fn = "{}.xmap".format(map_path)

                jcmd = " ".join([bin_dir+"/ast_bion", rmap_fn, qmap_fn, xmap_fn, key_fn])
                jcmd += " >"+out_fn + " 2>" + "{0}/{1}_{2}.bed".format(out_dir, tech, enzyme)
                jn = "ast_bion"
                jout = "{0}/{1}.o".format(out_dir, jn) 
                jerr = "{0}/{1}.e".format(out_dir, jn)
                j = hpc("lsf", mem=5000, cmd=jcmd, jn="ast_bion", out=jout, err=jerr)
                rtn = man.start([j], True) 
    return rtn
Exemple #12
0
def bwa_index(p):
    [man, ref, out_dir, spid] = p
    jcmd = "bwa index {}".format(ref)
    mem_lim = 20000
    jjn = "bwa_index_{}".format(spid)
    jout = "{0}/{1}_%J.o".format(out_dir, jjn)
    jerr = "{0}/{1}_%J.e".format(out_dir, jjn)

    j = hpc("lsf", cmd=jcmd, core=1, mem=mem_lim, jn=jjn, out=jout, err=jerr)

    return man.start([j])
Exemple #13
0
def bwa_index(p):
    [man, ref, out_dir, spid] = p
    jobs = []
    jcmd = "bwa index {}".format(ref)
    mem_lim = 20000
    jjn = "bwa_index_{}".format(spid)
    jout = "{0}/{1}_%J.o".format(out_dir, jjn)
    jerr = "{0}/{1}_%J.e".format(out_dir, jjn)

    j = hpc("lsf", cmd=jcmd, core=1, mem=mem_lim, jn=jjn, out=jout, err=jerr)

    jobs.append(j)
    jcmd = "bwa index {}/split.fa".format(out_dir)
    mem_lim = 20000
    jjn = "bwa_index_split_{}".format(spid)
    jout = "{0}/{1}_%J.o".format(out_dir, jjn)
    jerr = "{0}/{1}_%J.e".format(out_dir, jjn)

    j = hpc("lsf", cmd=jcmd, core=1, mem=mem_lim, jn=jjn, out=jout, err=jerr)
    jobs.append(j)
    return man.start(jobs)
Exemple #14
0
def punchlist(man, ref, out_dir, bin_dir, spid):
    # all_bed = "{}/acc.bed".format(out_dir)
    # if checkf(all_bed):
    # jcmd = "{0}/pchlst {1}/gaps.bed {2} > {1}/pchlst.bed".format(bin_dir, out_dir, all_bed)
    # jjn = "pchlst_{}".format(spid)
    # jout = "{0}/pchlst_{1}.o".format(out_dir, spid)
    # jerr = "{0}/pchlst_{1}.e".format(out_dir, spid)
    # j = hpc(cmd = jcmd, out=jout, err=jerr, jn=jjn)
    # man.start([j], True)
    contig_acc = "{}/pb_bn.bed".format(out_dir)
    if checkf(contig_acc):
        jcmd = "{0}/pchlst -c {1}/gaps.bed {2} > {1}/pchlst_ctg.bed".format(
            bin_dir, out_dir, contig_acc)
        jjn = "pchlst_ctg_{}".format(spid)
        jout = "{0}/pchlst_ctg_{1}.o".format(out_dir, spid)
        jerr = "{0}/pchlst_ctg_{1}.e".format(out_dir, spid)
        j = hpc(cmd=jcmd, out=jout, err=jerr, jn=jjn)
        man.start([j], True)
    scaf_acc = "{}/10x_hic2_bn.bed".format(out_dir)
    if checkf(scaf_acc):
        jcmd = "{0}/pchlst  {1}/gaps.bed {2} > {1}/pchlst_scaf.bed".format(
            bin_dir, out_dir, scaf_acc)
        jjn = "pchlst_scf_{}".format(spid)
        jout = "{0}/pchlst_scf_{1}.o".format(out_dir, spid)
        jerr = "{0}/pchlst_scf_{1}.e".format(out_dir, spid)
        j = hpc(cmd=jcmd, out=jout, err=jerr, jn=jjn)
        man.start([j], True)

    ctg_pchlst = "{}/pchlst_ctg.bed".format(out_dir)
    scf_pchlst = "{}/pchlst_scaf.bed".format(out_dir)
    if checkf(scaf_acc) and checkf(contig_acc):
        faidx_fn = "{}.fai".format(ref)
        jcmd = "{0}/union_brks -x 1000 {1}/gaps.bed {2} {3} > {1}/pchlst_chrom.bed".format(
            bin_dir, out_dir, ctg_pchlst, scf_pchlst)
        jjn = "pchlst_chrom_{}".format(spid)
        jout = "{0}/pchlst_chrom_{1}.o".format(out_dir, spid)
        jerr = "{0}/pchlst_chrom_{1}.e".format(out_dir, spid)
        j = hpc(cmd=jcmd, out=jout, err=jerr, jn=jjn)
        man.start([j], True)
Exemple #15
0
def postproc(man, fofn, out_dir, bin_dir, spid):
    pchlst_bed = "{}/pchlst.bed".format(out_dir)
    in_fns = []
    with open(fofn, "r") as f:
        for fl in f:
            fl_strip = fl.strip()
            fn_prefix = get_lm_prefix(getfn(fl_strip)) 
            in_fn = "{0}/{1}.paf".format(out_dir, fn_prefix)
            in_fns.append(in_fn)
        f.close()

    if checkf(pchlst_bed):
        jcmd = "{0}/ast_postproc {1} {2} > {3}/post_pchlst.bed".format(bin_dir, pchlst_bed, " ".join(in_fns), out_dir)
        jout = "{}/postproc.o".format(out_dir)
        jerr = "{}/postproc.e".format(out_dir)
        j = hpc("lsf", cmd=jcmd, core=1, mem = 3000, jn="ast_postproc", out=jout, err=jerr)
        man.start([j])
Exemple #16
0
def get_seqs(man, pltfm, ref, dups_fn, core_lim, mem_lim, out_dir, bin_dir,
             spid):
    mkdir(out_dir)
    out_fn = "{0}/{1}.purged.fa".format(out_dir, get_lm_prefix(ref))
    out_red_fn = "{0}/{1}.red.fa".format(out_dir, get_lm_prefix(ref))
    jcmd = "{0}/get_seqs {1} {2} >{3} 2>{4}".format(bin_dir, dups_fn, ref,
                                                    out_fn, out_red_fn)
    jjn = "get_seqs_{}".format(spid)
    jout = "{0}/{1}_%J.o".format(out_dir, jjn)
    jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
    j = hpc(pltfm,
            cmd=jcmd,
            core=core_lim,
            mem=mem_lim,
            jn=jjn,
            out=jout,
            err=jerr)
    rtn = man.start([j], True)
    return rtn
Exemple #17
0
def run_kcm(man, pltfm, skip, spid, fasta, mem, core, kmer, reads, prefix,
            tmpdir):
    if skip == 1:
        return 0
    else:
        jcmd = "run_kcm {0} {1} {2} {3} {4} {5} {6} {7}".format(
            spid, mem, core, kmer, reads, fasta, prefix, tmpdir)
        jjn = "kcm_{}".format(spid)
        jout = "{}.o".format(jjn)
        jerr = "{}.e".format(jjn)
        j = hpc(pltfm,
                cmd=jcmd,
                core=core,
                mem=mem,
                queue="normal",
                jn=jjn,
                out=jout,
                err=jerr)
        rtn = man.start([j])
        return rtn
Exemple #18
0
def run_busco(man, pltfm, skip, workdir, spid, fasta, mem, core, queue, prefix,
              lineage, tmpdir):
    if skip == 1:
        return 0
    else:
        os.chdir(workdir)
        jcmd = "run_busco2 {0} {1} {2} {3} {4}".format(fasta, core, prefix,
                                                       lineage, tmpdir)
        jjn = "busco_{}".format(spid)
        jout = "{}.o".format(jjn)
        jerr = "{}.e".format(jjn)
        j = hpc(pltfm,
                cmd=jcmd,
                core=core,
                mem=mem,
                queue=queue,
                jn=jjn,
                out=jout,
                err=jerr)
        rtn = man.start([j], True)
        return rtn
Exemple #19
0
def purge_dups(man, pltfm, paf_fn, base_cov_fn, cutoff_fn, core_lim, mem_lim,
               queue, out_dir, bin_dir, spid):
    mkdir(out_dir)
    fn_param = ""
    if base_cov_fn != "" and cutoff_fn != "":
        fn_param = "-c {0} -T {1}".format(base_cov_fn, cutoff_fn)
    out_fn = "{}/dups.bed".format(out_dir)
    # jcmd = "{0}/purge_dups -1 {1} {2} > {3}".format(bin_dir, fn_param, paf_fn, out_fn)
    jcmd = "{0}/purge_dups -2 {1} {2} > {3}".format(bin_dir, fn_param, paf_fn,
                                                    out_fn)
    jjn = "purge_dups_{}".format(spid)
    jout = "{0}/{1}_%J.o".format(out_dir, jjn)
    jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
    j = hpc(pltfm,
            cmd=jcmd,
            core=core_lim,
            mem=mem_lim,
            jn=jjn,
            out=jout,
            err=jerr)
    rtn = man.start([j], True)
    return rtn
Exemple #20
0
        f = open(config_fn, "r")
        config_dict = json.load(f)

        out_dir = config_dict["out_dir"]
        ref = config_dict["ref"]
        if not checkd(out_dir):
            mkdir(out_dir)

        man = manager(retries=2)

        jcmd = "{0}/detgaps {1} > {2}/gaps.bed".format(bin_dir, ref, out_dir)
        jout = "{}/detgaps.o".format(out_dir)
        jerr = "{}/detgaps.e".format(out_dir)

        j = hpc("lsf", cmd=jcmd, jn="detgaps", out=jout, err=jerr)
        if man.start([j], True):
            print("fail to generate gaps for {}".format(ref))
            sys.exit(1)
        jcmd = "{0}/split_fa {1} > {2}/split.fa".format(bin_dir, ref, out_dir)
        jout = "{}/split_ref.o".format(out_dir)
        jerr = "{}/split_ref.e".format(out_dir)

        j = hpc("lsf", cmd=jcmd, jn="split_ref", out=jout, err=jerr)
        if man.start([j], True):
            print("fail to split_ref for {}".format(ref))
            sys.exit(1)

        procs = []

        # func_list = [assess_pb, assess_10x_hic, assess_bn, assess_bnx]
Exemple #21
0
        f = open(config_fn, "r")
        config_dict = json.load(f)

        out_dir = config_dict["out_dir"]
        ref = config_dict["ref"]
        if not checkd(out_dir):
            mkdir(out_dir)

        man = manager(retries=2)

        jcmd = "{0}/detgaps {1} > {2}/gaps.bed".format(bin_dir, ref, out_dir)
        jout = "{}/detgaps.o".format(out_dir)
        jerr = "{}/detgaps.e".format(out_dir)

        j = hpc("lsf", cmd=jcmd, jn="detgaps", out=jout, err=jerr)
        if man.start([j], True):
            print("fail to generate gaps for {}".format(ref))
            sys.exit(1)

        procs = []

        func_list = [assess_pb, assess_10x_hic, assess_bn]
        # func_list = [aassess_bn]
        key_list = ["pb", "10x_hic", "bn"]
        # key_list = ["bn"]
        for i in range(len(func_list)):
            cur_d = config_dict[key_list[i]]
            if i == 1:
                p = Process(
                    target=func_list[i],
Exemple #22
0
    p = hpc("lsf", cmd="./test 1400000000", mem=1000, out="test_c2.o", err="test_c2.e" )
    procs.append(p)
    man.start(procs)
def func1(man):
    procs = []
    print ("func1 running")
    p = hpc("lsf", cmd="./test 130000", mem=1000, out="test_c1.o", err="test_c1.e")
    procs.append(p)
    man.start(procs)


if __name__ == '__main__':
    m = manager(wait=10,retries=5)  
    procs = []
    # p = hpc(cmd="ls -l > files")
    # p = hpc(cmd=["ls", '-l'], out="files")
    p = hpc("LSF", cmd="ls -l > fls", cpu="avx", hosts="bc-1-01-3", mem=1000, out="test_1.o", err="test_1.e")
    # p.chgq(m.sys[p.platform]["queues"])
    p.speak()
    procs.append(p)
    m.start(procs)
    # procs2 = []
    # p = Process(target=func2, args=(m,))
    # procs2.append(p)
    # p = Process(target=func1, args=(m,))
    # procs2.append(p)
    # for p in procs2:
        # p.start()
    # for p in procs2:
        # p.join()
Exemple #23
0
def cal_cov(man, pltfm, ref, ispb, isdip, fofn, core_lim, mem_lim, queue,
            mnmp_opt, bwa_opt, skip, out_dir, bin_dir, spid, ispurged):
    mkdir(out_dir)
    if skip == 1:
        exit(1)
    if not ispb:
        # index
        if bwa_index([man, pltfm, ref, out_dir, spid]):
            exit(1)

    jobs = []
    out_fns = []
    with open(fofn, "r") as f:
        for fl in f:
            fl_strip = fl.strip()
            if ispb:
                fn_prefix = get_rm_prefix(getfn(fl_strip))
                out_fn = "{0}/{1}.paf".format(out_dir, fn_prefix)
                out_fns.append(out_fn)
                idx_opt = "-I {}".format(
                    "4G" if os.path.getsize(ref) < 4e9 else "10G")
                if mnmp_opt != "":
                    jcmd = "minimap2 {4} {5} -t {0} {1} {2} >{3}".format(
                        core_lim, ref, fl_strip, out_fn, idx_opt, mnmp_opt)
                else:
                    jcmd = "minimap2 {4} -x map-pb -t {0} {1} {2} >{3}".format(
                        core_lim, ref, fl_strip, out_fn, idx_opt)
                jjn = "minimap_{}".format(fn_prefix)
                jout = "{0}/{1}_%J.o".format(out_dir, jjn)
                jerr = "{0}/{1}_%J.e".format(out_dir, jjn)
                j = hpc(pltfm,
                        cmd=jcmd,
                        core=core_lim,
                        mem=mem_lim,
                        jn=jjn,
                        out=jout,
                        err=jerr)
                jobs.append(j)
            else:
                [r1, r2] = fl_strip.split('\t')
                fn_prefix = get_rm_prefix(getfn(r1))
                out_fn = "{0}/{1}.bam".format(out_dir, fn_prefix)
                out_fns.append(out_fn)
                if bwa_opt != "":
                    jcmd = "bwa mem -t {0} {5} {1} {2} {3} | samtools view -b - >{4}".format(
                        core_lim, ref, r1, r2, out_fn, bwa_opt)
                else:
                    jcmd = "bwa mem -t {0} {1} {2} {3} | samtools view -b - >{4}".format(
                        core_lim, ref, r1, r2, out_fn)
                jjn = "bwa_mem_{}".format(fn_prefix)
                jout = "{0}/{1}_%J.o".format(out_dir, jjn)
                jerr = "{0}/{1}_%J.e".format(out_dir, jjn)

                j = hpc(pltfm,
                        cmd=jcmd,
                        core=core_lim + 1,
                        mem=mem_lim,
                        queue=queue,
                        jn=jjn,
                        out=jout,
                        err=jerr)
                jobs.append(j)
        f.close()
    rtn = man.start(jobs)

    jobs = []
    if not rtn:
        if ispb:
            jcmd = "{0}/pbcstat -O {2} {1}".format(bin_dir, " ".join(out_fns),
                                                   out_dir)
            jjn = "pbcstat_{}".format(spid)
            jout = "{0}/{1}.o".format(out_dir, jjn)
            jerr = "{0}/{1}.e".format(out_dir, jjn)

            j = hpc(pltfm,
                    cmd=jcmd,
                    core=1,
                    mem=10000,
                    jn=jjn,
                    out=jout,
                    err=jerr)
            jobs.append(j)
        else:
            jcmd = "{0}/ngscstat -O {2} {1}".format(bin_dir, " ".join(out_fns),
                                                    out_dir)
            jjn = "ngscstat_{}".format(spid)
            jout = "{0}/{1}.o".format(out_dir, jjn)
            jerr = "{0}/{1}.e".format(out_dir, jjn)

            j = hpc(pltfm,
                    cmd=jcmd,
                    core=1,
                    mem=30000,
                    jn=jjn,
                    out=jout,
                    err=jerr)
            jobs.append(j)
        rtn = man.start(jobs, True)
    # if not rtn:
    # in_fn = "{}/PB.stat".format(out_dir)
    # out_prefix = "{0}.{1}".format(spid, "purged" if ispurged == 1 else "origin")
    # jcmd = "Rscript ~/plot_depthgraph.R {} {}".format(in_fn, out_prefix)
    # jjn = "depthplot_{}".format(spid)
    # jout = "{0}/{1}.o".format(out_dir, jjn)
    # jerr = "{0}/{1}.e".format(out_dir, jjn)
    # j = hpc(pltfm, cmd=jcmd, core=1, mem = 500, jn=jjn, out=jout, err=jerr)
    # man.start([j])

    if not rtn:
        in_fn = "{}/PB.stat".format(out_dir)
        out_fn = "{}/cutoffs".format(out_dir)
        jcmd = "{0}/calcuts {3} {1} > {2}".format(bin_dir, in_fn, out_fn, "")
        jjn = "calcuts_{}".format(spid)
        jout = "{0}/{1}.o".format(out_dir, jjn)
        jerr = "{0}/{1}.e".format(out_dir, jjn)

        j = hpc(pltfm, cmd=jcmd, core=1, mem=2000, jn=jjn, out=jout, err=jerr)
        rtn = man.start([j], True)
    exit(rtn)
Exemple #24
0
def func2(man):
    procs = []
    print ("func2 running")
    p = hpc("lsf", cmd="./test 1400000000", mem=1000, out="test_c2.o", err="test_c2.e" )
    procs.append(p)
    man.start(procs)
Exemple #25
0
    jjn = "acc_contig_{}".format(spid)
    jout = "{0}/acc_contig_{1}.o".format(out_dir, spid)
    jerr = "{0}/acc_contig_{1}.e".format(out_dir, spid)
    j = hpc("lsf", cmd=jcmd, core=1, mem = 20000, jn=jjn, out=jout, err=jerr)
    man.start([j], True)
	
	beds = []
    for fn in ["10x.bed", "hic2.bed", "bn.bed"]:
        fpath = "{0}/{1}".format(out_dir,fn)
        if checkf(fpath):
            beds.append(fpath)
    jcmd = "{0}/acc {2}/gaps.bed {1} > {2}/10x_hic2_bn.bed".format(bin_dir, " ".join(beds), out_dir) 
    jjn = "acc_scaf_{}".format(spid)
    jout = "{0}/acc_scaf_{1}.o".format(out_dir, spid)
    jerr = "{0}/acc_scaf_{1}.e".format(out_dir, spid)
    j = hpc("lsf", cmd=jcmd, core=1, mem = 20000, jn=jjn, out=jout, err=jerr)
    man.start([j], True)
    

def punchlist(man, ref, out_dir, bin_dir, spid):
    # all_bed = "{}/acc.bed".format(out_dir)
    # if checkf(all_bed):
        # jcmd = "{0}/pchlst {1}/gaps.bed {2} > {1}/pchlst.bed".format(bin_dir, out_dir, all_bed) 
        # jjn = "pchlst_{}".format(spid)
        # jout = "{0}/pchlst_{1}.o".format(out_dir, spid)
        # jerr = "{0}/pchlst_{1}.e".format(out_dir, spid)
        # j = hpc(cmd = jcmd, out=jout, err=jerr, jn=jjn)
        # man.start([j], True)
    contig_acc = "{}/pb_bn.bed".format(out_dir)
    if checkf(contig_acc):
        jcmd = "{0}/pchlst -c {1}/gaps.bed {2} > {1}/pchlst_ctg.bed".format(bin_dir, out_dir, contig_acc)