Example #1
0
def task_track_reads(self):

    job_done = fn(self.job_done)
    wd = self.parameters["wd"]
    config = self.parameters["config"]
    input_bam_fofn = config["input_bam_fofn"]
    sge_track_reads = config["sge_track_reads"]
    script_dir = os.path.join(wd)
    script_fn = os.path.join(script_dir, "track_reads_h.sh")

    script = []
    script.append("set -vex")
    script.append(
        "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done))
    script.append("cd %s" % wd)
    script.append("hostname")
    script.append("date")
    script.append("cd {wd}".format(wd=wd))
    script.append("fc_get_read_hctg_map.py")
    script.append("fc_rr_hctg_track.py")
    script.append("mkdir -p 4-quiver/reads/")
    script.append("fc_select_reads_from_bam.py {input_bam_fofn}".format(
        input_bam_fofn=input_bam_fofn))
    script.append("date")
    script.append("touch {job_done}".format(job_done=job_done))

    with open(script_fn, "w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_track_reads
    run_script(job_data, job_type=config["job_type"])
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #2
0
def task_track_reads(self):

    job_done = fn(self.job_done)
    wd = self.parameters["wd"]
    config = self.parameters["config"]
    sge_track_reads = config["sge_track_reads"]
    script_dir = os.path.join(wd)
    script_fn = os.path.join(script_dir, "track_reads.sh")

    script = []
    script.append("set -vex")
    script.append(
        "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done))
    script.append("cd %s" % wd)
    script.append("hostname")
    script.append("date")
    script.append("cd {wd}".format(wd=wd))
    script.append("python -m falcon_kit.mains.get_read_ctg_map")
    script.append("python -m falcon_kit.mains.rr_ctg_track")
    script.append("python -m falcon_kit.mains.pr_ctg_track")
    script.append("mkdir -p 3-unzip/reads/")
    script.append("python -m falcon_kit.mains.fetch_reads")
    script.append("date")
    script.append("touch {job_done}".format(job_done=job_done))

    with open(script_fn, "w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_track_reads
    run_script(job_data, job_type=config["job_type"])
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #3
0
def task_hasm(self):

    job_done = fn(self.job_done)
    config = self.parameters["config"]
    sge_hasm = config["sge_hasm"]

    wd = self.parameters["wd"]

    job_type = config["job_type"]

    script_dir = os.path.join(wd)
    script_fn = os.path.join(script_dir, "hasm.sh")

    script = """\
set -vex
trap 'touch {job_done}.exit' EXIT
hostname
date
cd {wd}

fc_ovlp_filter_with_phase.py --fofn ../../2-asm-falcon/las.fofn --max_diff 120 --max_cov 120 --min_cov 1 --n_core 12 --min_len 2500 --db ../../1-preads_ovl/preads.db --rid_phase_map ./rid_to_phase.all > preads.p_ovl
fc_phased_ovlp_to_graph.py preads.p_ovl --min_len 2500 > fc.log
ln -sf db2falcon/preads4falcon.fasta ../../1-preads_ovl/
fc_graphs_to_h_tigs.py --fc_asm_path ../../2-asm-falcon/ --fc_hasm_path ./ --ctg_id all --rid_phase_map ./rid_to_phase.all --fasta ../../1-preads_ovl/preads4falcon.fasta
""".format(**locals())
    more_script = \
"""
WD=$PWD
for f in `cat ../reads/ctg_list `; mkdir -p $WD/$f; do cd $WD/$f; fc_dedup_h_tigs.py $f; done

## prepare for quviering the haplotig
cd $WD/..
if [ -e "all_phased_reads" ]; then rm all_phased_reads; fi
if [ -e "all_h_ctg_ids" ]; then rm all_h_ctg_ids; fi
if [ -e "all_p_ctg_edges" ]; then rm all_p_ctg_edges; fi
if [ -e "all_p_ctg.fa" ]; then rm all_p_ctg.fa; fi
if [ -e "all_h_ctg.fa" ]; then rm all_h_ctg.fa; fi

find 0-phasing -name "phased_reads" | sort | xargs cat >> all_phased_reads
find 1-hasm -name "h_ctg_ids.*" | sort | xargs cat >> all_h_ctg_ids
find 1-hasm -name "p_ctg_edges.*" | sort | xargs cat >> all_p_ctg_edges
find 1-hasm -name "h_ctg_edges.*" | sort | xargs cat >> all_h_ctg_edges
find 1-hasm -name "p_ctg.*.fa" | sort | xargs cat >> all_p_ctg.fa
find 1-hasm -name "h_ctg.*.fa" | sort | xargs cat >> all_h_ctg.fa
cd ../
date
"""
    script += more_script  # a little bit hacky here, we should improve
    script += "touch {job_done}\n".format(job_done=job_done)

    with open(script_fn, "w") as script_file:
        script_file.write(script)

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_hasm
    run_script(job_data, job_type=job_type)
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #4
0
def task_run_quiver(self):

    ref_fasta = fn(self.ref_fasta)
    read_sam = fn(self.read_sam)

    cns_fasta = fn(self.cns_fasta)
    cns_fastq = fn(self.cns_fastq)
    job_done = fn(self.job_done)

    job_uid = self.parameters["job_uid"]
    wd = self.parameters["wd"]
    config = self.parameters["config"]
    ctg_id = self.parameters["ctg_id"]
    
    smrt_bin = config["smrt_bin"]
    sge_quiver = config["sge_quiver"]
    job_type = config["job_type"]
    samtools = os.path.join( smrt_bin, "samtools")
    pbalign = os.path.join( smrt_bin, "pbalign")
    makePbi = os.path.join( smrt_bin, "makePbi")
    variantCaller = os.path.join( smrt_bin, "variantCaller")

    script_dir = os.path.join( wd )
    script_fn =  os.path.join( script_dir , "cns_%s.sh" % (ctg_id))

    script = []
    script.append( "set -vex" )
    script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = job_done) )
    script.append( "cd %s" % wd )
    script.append( "hostname" )
    script.append( "date" )
    script.append( "cd {wd}".format(wd = wd) )
    
    script.append( "{samtools} faidx {ref_fasta}".format( samtools=samtools, ref_fasta=ref_fasta ) )
    script.append( "{samtools} view -b -S {read_sam} > {ctg_id}.bam".format( samtools=samtools, read_sam = read_sam, ctg_id = ctg_id ) )
    script.append( "{pbalign} --tmpDir=/localdisk/scratch/ --nproc=24 --minAccuracy=0.75 --minLength=50\
            --minAnchorSize=12 --maxDivergence=30 --concordant --algorithm=blasr\
            --algorithmOptions=-useQuality --maxHits=1 --hitPolicy=random --seed=1\
            {ctg_id}.bam {ref_fasta} aln-{ctg_id}.bam".format( pbalign=pbalign , ctg_id = ctg_id, ref_fasta = ref_fasta)) 
    script.append( "#{makePbi} --referenceFasta {ref_fasta} aln-{ctg_id}.bam".format(makePbi = makePbi, ref_fasta = ref_fasta, ctg_id = ctg_id) ) 
    script.append( "({variantCaller} -x 5 -X 120 -q 20 -j 24 -r {ref_fasta} aln-{ctg_id}.bam\
            -o {cns_fasta} -o {cns_fastq}) || echo quvier failed".format( variantCaller = variantCaller, ctg_id = ctg_id, ref_fasta = ref_fasta, 
                                                   cns_fasta=cns_fasta, cns_fastq=cns_fastq ))

    script.append( "date" )
    script.append( "touch {job_done}".format(job_done = job_done) )

    with open(script_fn,"w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_quiver
    run_script(job_data, job_type = job_type)
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #5
0
def task_run_blasr(self):

    job_done = fn(self.job_done)
    ref_fasta = fn(self.ref_fasta)
    read_fasta = fn(self.read_fasta)

    job_uid = self.parameters["job_uid"]
    wd = self.parameters["wd"]
    ctg_id = self.parameters["ctg_id"]

    config = self.parameters["config"]
    smrt_bin = config["smrt_bin"]
    sge_blasr_aln = config["sge_blasr_aln"]
    job_type = config["job_type"]
    blasr = os.path.join(smrt_bin, "blasr")
    samtools = os.path.join(smrt_bin, "samtools")

    script_dir = os.path.join(wd)
    script_fn = os.path.join(script_dir,
                             "aln_{ctg_id}.sh".format(ctg_id=ctg_id))

    script = []
    script.append("set -vex")
    script.append(
        "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done))
    script.append("cd %s" % wd)
    script.append("hostname")
    script.append("date")
    script.append("cd {wd}".format(wd=wd))
    script.append(
        "time {blasr} {read_fasta} {ref_fasta} -noSplitSubreads -clipping subread\
 -hitPolicy randombest -randomSeed 42 -bestn 1 -minPctIdentity 70.0\
 -minMatch 12  -nproc 24 -sam -out tmp_aln.sam".format(blasr=blasr,
                                                       read_fasta=read_fasta,
                                                       ref_fasta=ref_fasta))

    script.append(
        "{samtools} view -bS tmp_aln.sam | {samtools} sort - {ctg_id}_sorted".
        format(samtools=samtools, ctg_id=ctg_id))
    script.append("{samtools} index {ctg_id}_sorted.bam".format(
        samtools=samtools, ctg_id=ctg_id))
    script.append("rm tmp_aln.sam")
    script.append("date")
    script.append("touch {job_done}".format(job_done=job_done))

    with open(script_fn, "w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_blasr_aln
    run_script(job_data, job_type=config["job_type"])
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #6
0
def task_phasing(self):

    ref_fasta = fn(self.ref_fasta)
    aln_bam = fn(self.aln_bam)

    job_done = fn(self.job_done)

    job_uid = self.parameters["job_uid"]
    wd = self.parameters["wd"]
    ctg_id = self.parameters["ctg_id"]

    config = self.parameters["config"]
    sge_phasing = config["sge_phasing"]
    job_type = config["job_type"]

    script_dir = os.path.join( wd )
    script_fn =  os.path.join( script_dir , "p_%s.sh" % (ctg_id))

    script = []

    script.append( "set -vex" )
    script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = job_done) )
    script.append( "cd %s" % wd )
    script.append( "hostname" )
    script.append( "date" )
    script.append( "cd {wd}".format(wd = wd) )
    script.append( "fc_phasing.py --bam {aln_bam} --fasta {ref_fasta} --ctg_id {ctg_id} --base_dir ../".format( aln_bam = aln_bam,
                                                                                                                ref_fasta = ref_fasta,
                                                                                                                ctg_id = ctg_id ))
    script.append( "fc_phasing_readmap.py --ctg_id {ctg_id} --read_map_dir ../../../2-asm-falcon/read_maps --phased_reads phased_reads".format(ctg_id = ctg_id) )
    #script.append( "fc_ovlp_filter_with_phase.py --fofn ../../2-asm-falcon/las.fofn\
    #        --max_diff 120 --max_cov 120 --min_cov 1 --n_core 12 --min_len 2500\
    #        --db ../../1-preads_ovl/preads.db  --rid_phase_map ./rid_to_phase > preads.p_ovl") #TODO: make it configurable
    #script.append( "fc_phased_ovlp_to_graph.py preads.p_ovl --min_len 2500 > fc.log" )
    #script.append( "fc_graphs_to_h_tigs.py --fc_asm_path ../../2-asm-falcon/ --fc_phase_path ./ --ctg_id {ctg_id}\
    #        --rid_phase_map ./rid_to_phase --fasta ../../1-preads_ovl/preads4falcon.fasta".format(ctg_id = ctg_id))

    #script.append( "fc_dedup_h_tigs.py" )
    script.append( "date" )
    script.append( "touch {job_done}".format(job_done = job_done) )

    with open(script_fn,"w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_phasing
    run_script(job_data, job_type = job_type)
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #7
0
def task_run_blasr(self):

    job_done = fn(self.job_done)
    ref_fasta = fn(self.ref_fasta)
    read_fasta = fn(self.read_fasta)

    job_uid = self.parameters["job_uid"]
    wd = self.parameters["wd"]
    ctg_id = self.parameters["ctg_id"]

    config = self.parameters["config"]
    smrt_bin = config["smrt_bin"]
    sge_blasr_aln = config["sge_blasr_aln"]
    job_type = config["job_type"]
    blasr = os.path.join(smrt_bin, "blasr")
    samtools = os.path.join( smrt_bin, "samtools")


    script_dir = os.path.join( wd )
    script_fn =  os.path.join( script_dir , "aln_%s.sh" % (ctg_id))

    script = []
    script.append( "set -vex" )
    script.append( "trap 'touch {job_done}.exit' EXIT".format(job_done = job_done) )
    script.append( "cd %s" % wd )
    script.append( "hostname" )
    script.append( "date" )
    script.append( "cd {wd}".format(wd = wd) )
    script.append( "time {blasr} {read_fasta} {ref_fasta} -noSplitSubreads -clipping subread\
 -hitPolicy randombest -randomSeed 42 -bestn 1 -minPctIdentity 70.0\
 -minMatch 12  -nproc 24 -sam -out tmp_aln.sam".format(blasr = blasr,
                                                       read_fasta = read_fasta, 
                                                       ref_fasta = ref_fasta) )

    script.append( "{samtools} view -bS tmp_aln.sam | {samtools} sort - {ctg_id}_sorted".format( samtools = samtools, ctg_id = ctg_id) ) 
    script.append( "{samtools} index {ctg_id}_sorted.bam".format( samtools = samtools, ctg_id = ctg_id) )
    script.append( "rm tmp_aln.sam" )
    script.append( "date" )
    script.append( "touch {job_done}".format(job_done = job_done) )

    with open(script_fn,"w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_blasr_aln
    run_script(job_data, job_type = config["job_type"])
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #8
0
def task_phasing(self):

    ref_fasta = fn(self.ref_fasta)
    aln_bam = fn(self.aln_bam)

    job_done = fn(self.job_done)

    job_uid = self.parameters["job_uid"]
    wd = self.parameters["wd"]
    ctg_id = self.parameters["ctg_id"]

    config = self.parameters["config"]
    sge_phasing = config["sge_phasing"]
    job_type = config["job_type"]

    script_dir = os.path.join(wd)
    script_fn = os.path.join(script_dir, "p_%s.sh" % (ctg_id))

    script = []

    script.append("set -vex")
    script.append(
        "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done))
    script.append("cd %s" % wd)
    script.append("hostname")
    script.append("date")
    script.append("cd {wd}".format(wd=wd))
    script.append(
        "fc_phasing.py --bam {aln_bam} --fasta {ref_fasta} --ctg_id {ctg_id} --base_dir ../"
        .format(aln_bam=aln_bam, ref_fasta=ref_fasta, ctg_id=ctg_id))
    script.append(
        "fc_phasing_readmap.py --ctg_id {ctg_id} --read_map_dir ../../../2-asm-falcon/read_maps --phased_reads phased_reads"
        .format(ctg_id=ctg_id))
    script.append("date")
    script.append("touch {job_done}".format(job_done=job_done))

    with open(script_fn, "w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_phasing
    run_script(job_data, job_type=job_type)
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])
Example #9
0
def task_run_quiver(self):

    ref_fasta = fn(self.ref_fasta)
    read_sam = fn(self.read_sam)

    cns_fasta = fn(self.cns_fasta)
    cns_fastq = fn(self.cns_fastq)
    job_done = fn(self.job_done)

    job_uid = self.parameters["job_uid"]
    wd = self.parameters["wd"]
    config = self.parameters["config"]
    ctg_id = self.parameters["ctg_id"]

    smrt_bin = config["smrt_bin"]
    sge_quiver = config["sge_quiver"]
    job_type = config["job_type"]
    samtools = os.path.join(smrt_bin, "samtools")
    pbalign = os.path.join(smrt_bin, "pbalign")
    makePbi = os.path.join(smrt_bin, "makePbi")
    variantCaller = os.path.join(smrt_bin, "variantCaller")

    script_dir = os.path.join(wd)
    script_fn = os.path.join(script_dir, "cns_%s.sh" % (ctg_id))

    script = []
    script.append("set -vex")
    script.append(
        "trap 'touch {job_done}.exit' EXIT".format(job_done=job_done))
    script.append("cd %s" % wd)
    script.append("hostname")
    script.append("date")
    script.append("cd {wd}".format(wd=wd))

    script.append("{samtools} faidx {ref_fasta}".format(samtools=samtools,
                                                        ref_fasta=ref_fasta))
    script.append("{samtools} view -b -S {read_sam} > {ctg_id}.bam".format(
        samtools=samtools, read_sam=read_sam, ctg_id=ctg_id))
    script.append(
        "{pbalign} --tmpDir=/localdisk/scratch/ --nproc=24 --minAccuracy=0.75 --minLength=50\
            --minAnchorSize=12 --maxDivergence=30 --concordant --algorithm=blasr\
            --algorithmOptions=-useQuality --maxHits=1 --hitPolicy=random --seed=1\
            {ctg_id}.bam {ref_fasta} aln-{ctg_id}.bam".format(
            pbalign=pbalign, ctg_id=ctg_id, ref_fasta=ref_fasta))
    script.append(
        "#{makePbi} --referenceFasta {ref_fasta} aln-{ctg_id}.bam".format(
            makePbi=makePbi, ref_fasta=ref_fasta, ctg_id=ctg_id))
    script.append(
        "({variantCaller} -x 5 -X 120 -q 20 -j 24 -r {ref_fasta} aln-{ctg_id}.bam\
            -o {cns_fasta} -o {cns_fastq}) || echo quvier failed".format(
            variantCaller=variantCaller,
            ctg_id=ctg_id,
            ref_fasta=ref_fasta,
            cns_fasta=cns_fasta,
            cns_fastq=cns_fastq))

    script.append("date")
    script.append("touch {job_done}".format(job_done=job_done))

    with open(script_fn, "w") as script_file:
        script_file.write("\n".join(script) + '\n')

    job_data = support.make_job_data(self.URL, script_fn)
    job_data["sge_option"] = sge_quiver
    run_script(job_data, job_type=job_type)
    wait_for_file(job_done, task=self, job_name=job_data['job_name'])