Exemple #1
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info( "fc_run started with configuration %s", input_config_fn ) 
    config = support.get_config(support.parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done,
                                                 "run_jobs": run_jobs}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done]) 

        db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", db_file, rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        @PypeTask( inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))
        
        wf.addTask(check_r_da_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
        
        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks( merge_tasks )
        if config["target"] == "overlapping":
            wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) 
    parameters = {"work_dir": pread_dir,
                  "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    make_build_pdb_task  = PypeTask(inputs = { "pread_fofn": pread_fofn },
                                    outputs = { "pdb_build_done": pdb_build_done,
                                                "run_jobs": run_jobs},
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done]) 



    db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" ))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", db_file, pdb_build_done, config, pread_aln= True) 
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") )

    @PypeTask( inputs = daligner_out, 
               outputs =  {"da_done":p_da_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pda_check" )
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))
    
    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks( merge_tasks )
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") )

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done":p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))
    
    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq = wait_time) #all

    
    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    make_run_falcon_asm = PypeTask(
               inputs = {"p_merge_done": p_merge_done, "db_file":db_file},
               outputs =  {"falcon_asm_done":falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets(updateFreq = wait_time) #all
Exemple #2
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    setup_logger(logger_config_fn)

    fc_run_logger.info( "fc_run started with configuration %s", input_config_fn ) 
    config = get_config(parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(make_fofn_abs_raw)
    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)

        build_rdb_task = make_build_rdb_task(build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done]) 
        

        db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        @PypeTask( inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))
        
        wf.addTask(check_r_da_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
        
        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config )
        wf.addTasks( merge_tasks )
        if config["target"] == "overlapping":
            wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) 
    parameters = {"work_dir": pread_dir,
                  "config": config}

    make_build_pdb_task  = PypeTask( inputs = { "pread_fofn": pread_fofn },
                                    outputs = { "pdb_build_done": pdb_build_done },
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done]) 



    db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" ))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks( pread_dir, "preads", db_file, pdb_build_done, config, pread_aln= True) 
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") )

    @PypeTask( inputs = daligner_out, 
               outputs =  {"da_done":p_da_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pda_check" )
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))
    
    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config )
    wf.addTasks( merge_tasks )
    #wf.refreshTargets(updateFreq = 30) #all            

    p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") )

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done":p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))
    
    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq = wait_time) #all            

    
    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    @PypeTask( inputs = {"p_merge_done": p_merge_done, "db_file":db_file}, 
               outputs =  {"falcon_asm_done":falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )

    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join( wd )
        script_fn =  os.path.join( script_dir ,"run_falcon_asm.sh" )
        
        script = []
        script.append( "set -vex" )
        script.append( "trap 'touch %s.exit' EXIT" % fn(self.falcon_asm_done) )
        script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
        script.append( "cd %s" % pread_dir )
        # Write preads4falcon.fasta, in 1-preads_ovl:
        script.append( "DB2Falcon -U preads")
        script.append( "cd %s" % wd )
        script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir )
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append( """fc_ovlp_filter.py --db %s --fofn las.fofn %s --min_len %d > preads.ovl""" %\
                (fn(db_file), overlap_filtering_setting, length_cutoff_pr) )
        script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append( """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log""" % length_cutoff_pr) # TODO: drop this logfile
        # Write 'p_ctg.fa' and 'a_ctg.fa':
        script.append( """fc_graph_to_contig.py""" )
        script.append( """touch %s""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_data = make_job_data(self.URL, script_fn)
        job_data["sge_option"] = config["sge_option_fc"]
        run_script(job_data, job_type = config["job_type"])
        wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_data['job_name'])
    
    wf.addTask( run_falcon_asm_task )
    wf.refreshTargets(updateFreq = wait_time) #all            
Exemple #3
0
                "q_sn": q_sn,
                "t_sn": t_sn,
                "config": config
            }

            #for testing
            #task_decorator = PypeTask(inputs = inputs, outputs = outputs, parameters = parameters, TaskType = PypeTaskBase )
            #task()
            make_mapping_task = PypeTask(
                inputs=inputs,
                outputs=outputs,
                parameters=parameters,
                TaskType=PypeThreadTaskBase,
                URL="task://localhost/mapping_task_q%05d_t%05d" % (q_sn, t_sn))
            mapping_task = make_mapping_task(blasr_align)
            wf.addTask(mapping_task)

        qf_out = os.path.join(mapping_data_dir, "qf%05d.m4" % q_sn)
        qf_out = makePypeLocalFile(qf_out)

        all_qf_out["qf_out_%s" % q_sn] = qf_out

        job_done = os.path.join(mapping_data_dir, "qf%05d_done" % q_sn)
        job_done = makePypeLocalFile(job_done)

        all_qf_out["qf_done_%s" % q_sn] = job_done
        parameters = {"mapping_data_dir": mapping_data_dir, "q_sn": q_sn}
        make_qf_task = PypeTask(inputs=query_group_done,
                                outputs={
                                    "qf_out": qf_out,
                                    "job_done": job_done
Exemple #4
0
    with open("run_jobs.sh") as f:
        for l in f:
            l = l.strip().split()
            if l[0] == "daligner":
                try:
                    os.makedirs("./job_%05d" % job_id)
                except OSError:
                    pass
                os.system(
                    "cd ./job_%05d;ln -s ../.%s.bps .; ln -s ../.%s.idx .; ln -s ../%s.db ."
                    % (job_id, prefix, prefix, prefix))
                job_done = makePypeLocalFile(
                    os.path.abspath("./job_%05d/job_%05d_done" %
                                    (job_id, job_id)))
                parameters = {
                    "daligner_cmd": " ".join(l),
                    "cwd": os.path.join(os.getcwd(), "job_%05d" % job_id),
                    "job_id": job_id
                }
                make_daligner_task = PypeTask(
                    inputs={"db_file": db_file},
                    outputs={"job_done": job_done},
                    parameters=parameters,
                    TaskType=PypeThreadTaskBase,
                    URL="task://localhost/mtask_%05d" % job_id)
                daligner_task = make_daligner_task(run_daligner)
                wf.addTask(daligner_task)
                job_id += 1
                print job_id
    wf.refreshTargets(updateFreq=45)  #all
Exemple #5
0
def main(argv=sys.argv):

    global fc_run_logger
    fc_run_logger = support.setup_logger(None)

    if len(sys.argv) < 2:
        print "you need to provide a configuration file to specific a couple cluster running environment"
        sys.exit(1)

    config_fn = sys.argv[1]

    config = ConfigParser.ConfigParser()
    config.read(config_fn)

    job_type = "SGE"
    if config.has_option('General', 'job_type'):
        job_type = config.get('General', 'job_type')

    sge_track_reads = " -pe smp 12 -q bigmem"
    if config.has_option('Unzip', 'sge_track_reads'):
        sge_track_reads = config.get('Unzip', 'sge_track_reads')

    sge_quiver = " -pe smp 24 -q bigmem "
    if config.has_option('Unzip', 'sge_quiver'):
        sge_quiver = config.get('Unzip', 'sge_quiver')

    smrt_bin = "/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/"
    if config.has_option('Unzip', 'smrt_bin'):
        smrt_bin = config.get('Unzip', 'smrt_bin')

    input_bam_fofn = "input_bam.fofn"
    if config.has_option('Unzip', 'input_bam_fofn'):
        input_bam_fofn = config.get('Unzip', 'input_bam_fofn')

    quiver_concurrent_jobs = 8
    if config.has_option('Unzip', 'quiver_concurrent_jobs'):
        quiver_concurrent_jobs = config.getint('Unzip',
                                               'quiver_concurrent_jobs')

    config = {
        "job_type": job_type,
        "sge_quiver": sge_quiver,
        "sge_track_reads": sge_track_reads,
        "input_bam_fofn": input_bam_fofn,
        "smrt_bin": smrt_bin
    }

    support.job_type = "SGE"  #tmp hack until we have a configuration parser

    ctg_ids = []

    PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs,
                                           quiver_concurrent_jobs)
    wf = PypeThreadWorkflow()

    parameters = {"wd": os.path.abspath("."), "config": config}
    hasm_done = makePypeLocalFile("./3-unzip/1-hasm/hasm_done")
    job_done = makePypeLocalFile(
        os.path.join(parameters["wd"], "track_reads_h_done"))
    make_track_reads_task = PypeTask(inputs={"hasm_done": hasm_done},
                                     outputs={"job_done": job_done},
                                     parameters=parameters,
                                     TaskType=PypeThreadTaskBase,
                                     URL="task://localhost/track_reads_h")
    track_reads_task = make_track_reads_task(task_track_reads)

    wf.addTask(track_reads_task)
    wf.refreshTargets()  #force refresh now, will put proper dependence later

    ref_seq_data = {}
    p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa")
    ctg_types = {}
    for r in p_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
        ctg_types[rid] = "p"

    h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa")
    for r in h_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
        ctg_types[rid] = "h"

    ctg_ids = sorted(ref_seq_data.keys())
    p_ctg_out = []
    h_ctg_out = []
    for ctg_id in ctg_ids:
        sequence = ref_seq_data[ctg_id]
        m_ctg_id = ctg_id.split("-")[0]
        wd = os.path.join(os.getcwd(), "./4-quiver/", m_ctg_id)
        mkdir(wd)
        ref_fasta = makePypeLocalFile(
            os.path.join(wd, "{ctg_id}_ref.fa".format(ctg_id=ctg_id)))
        read_sam = makePypeLocalFile(
            os.path.join(
                os.getcwd(), "./4-quiver/reads/"
                "{ctg_id}.sam".format(ctg_id=ctg_id)))
        cns_fasta = makePypeLocalFile(
            os.path.join(wd, "cns-{ctg_id}.fasta.gz".format(ctg_id=ctg_id)))
        cns_fastq = makePypeLocalFile(
            os.path.join(wd, "cns-{ctg_id}.fastq.gz".format(ctg_id=ctg_id)))
        job_done = makePypeLocalFile(
            os.path.join(wd, "{ctg_id}_quiver_done".format(ctg_id=ctg_id)))

        if os.path.exists(fn(read_sam)):
            if ctg_types[ctg_id] == "p":
                p_ctg_out.append((cns_fasta, cns_fastq))
            if ctg_types[ctg_id] == "h":
                h_ctg_out.append((cns_fasta, cns_fastq))
            if not os.path.exists(fn(ref_fasta)):
                with open(fn(ref_fasta), "w") as f:
                    print >> f, ">" + ctg_id
                    print >> f, sequence
            parameters = {
                "job_uid": "q-" + ctg_id,
                "wd": wd,
                "config": config,
                "ctg_id": ctg_id
            }
            make_quiver_task = PypeTask(
                inputs={
                    "ref_fasta": ref_fasta,
                    "read_sam": read_sam
                },
                outputs={
                    "cns_fasta": cns_fasta,
                    "cns_fastq": cns_fastq,
                    "job_done": job_done
                },
                parameters=parameters,
                TaskType=PypeThreadTaskBase,
                URL="task://localhost/q_{ctg_id}".format(ctg_id=ctg_id))
            quiver_task = make_quiver_task(task_run_quiver)
            wf.addTask(quiver_task)

    wf.refreshTargets()
    os.system("sleep 30")

    mkdir("./4-quiver/cns_output")
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fasta")
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fastq")
    for cns_fasta, cns_fastq in sorted(p_ctg_out):
        os.system(
            "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_p_ctg.fasta".format(
                cns_fasta=fn(cns_fasta)))
        os.system(
            "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_p_ctg.fastq".format(
                cns_fastq=fn(cns_fastq)))

    os.system("rm ./4-quiver/cns_output/cns_h_ctg.fasta")
    os.system("rm ./4-quiver/cns_output/cns_h_ctg.fastq")
    for cns_fasta, cns_fastq in sorted(h_ctg_out):
        os.system(
            "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_h_ctg.fasta".format(
                cns_fasta=fn(cns_fasta)))
        os.system(
            "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_h_ctg.fastq".format(
                cns_fastq=fn(cns_fastq)))
Exemple #6
0
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        @PypeTask( inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))
        
        wf.addTask(check_r_da_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
        
        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config )
        wf.addTasks( merge_tasks )
        if config["target"] == "overlapping":
            wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            exit(0)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
Exemple #7
0
def main(*argv):
    setup_logger()
    if len(argv) < 2:
        print "you need to specify a configuration file"
        print "example: HGAP.py HGAP_run.cfg"
        sys.exit(1)

    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(argv[1])
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        input_h5_fofn = makePypeLocalFile(
            os.path.abspath(config["input_fofn_fn"]))
        rdb_build_done = makePypeLocalFile(
            os.path.join(rawread_dir, "rdb_build_done"))
        parameters = {"work_dir": rawread_dir, "config": config}

        make_buid_rdb_task = PypeTask(
            inputs={"input_fofn": input_h5_fofn},
            outputs={"rdb_build_done": rdb_build_done},
            parameters=parameters,
            TaskType=PypeThreadTaskBase)

        buid_rdb_task = make_buid_rdb_task(build_rdb)

        wf.addTasks([buid_rdb_task])
        wf.refreshTargets([rdb_build_done])

        db_file = makePypeLocalFile(
            os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(
            rawread_dir, "raw_reads", db_file, rdb_build_done, config)

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done"))

        @PypeTask(inputs=daligner_out,
                  outputs={"da_done": r_da_done},
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/rda_check")
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))

        wf.addTask(check_r_da_task)
        wf.refreshTargets(
            updateFreq=wait_time
        )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs,
                                               concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
            rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks(merge_tasks)
        if config["target"] == "overlapping":
            wf.refreshTargets(
                updateFreq=wait_time
            )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            exit(0)
        wf.addTasks(consensus_tasks)

        r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done"))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

        @PypeTask(inputs=consensus_out,
                  outputs={
                      "cns_done": r_cns_done,
                      "pread_fofn": pread_fofn
                  },
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/cns_check")
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn), "w") as f:
                fn_list = glob.glob("%s/preads/out*.fa" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >> f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(
            updateFreq=wait_time)  # larger number better for more jobs

    if config["target"] == "pre-assembly":
        exit(0)

    if config["input_type"] == "preads":
        if not os.path.exists("%s/input_preads.fofn" % pread_dir):
            os.system("cp %s %s/input_preads.fofn" %
                      (os.path.abspath(config["input_fofn_fn"]), pread_dir))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

    rdb_build_done = makePypeLocalFile(
        os.path.join(pread_dir, "rdb_build_done"))

    @PypeTask(inputs={"pread_fofn": pread_fofn},
              outputs={"rdb_build_done": rdb_build_done},
              parameters={
                  "config": config,
                  "pread_dir": pread_dir
              },
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/build_p_rdb")
    def build_p_rdb_task(self):
        config = self.parameters["config"]
        pread_dir = self.parameters["pread_dir"]
        fa_serial = 0
        for fa_fn in open(fn(self.pread_fofn)).readlines():
            fa_fn = fa_fn.strip()
            c = 0
            fa_serial += 1
            with open("%s/preads_norm_%05d.fasta" % (pread_dir, fa_serial),
                      "w") as p_norm:
                f = FastaReader(fa_fn)
                for r in f:
                    if len(r.sequence) < config["length_cutoff_pr"]:
                        continue
                    name = r.name
                    name = name.replace("_", "")
                    ignore_read = False
                    for cc in r.sequence:
                        if cc not in ["A", "C", "G", "T"]:
                            ignore_read = True
                            break
                    if ignore_read:
                        continue
                    print >> p_norm, ">prolog_%05d/%d/%d_%d" % (
                        fa_serial, c, 0, len(r.sequence))
                    for i in range(0, len(r.sequence) / 80):
                        print >> p_norm, r.sequence[i * 80:(i + 1) * 80]
                    print >> p_norm, r.sequence[(i + 1) * 80:]
                    c += 1
            os.system("cd %s; fasta2DB preads preads_norm_%05d.fasta" %
                      (pread_dir, fa_serial))

        os.system("cd %s; DBsplit %s preads" %
                  (pread_dir, config["ovlp_DBsplit_option"]))
        os.system("cd %s; HPCdaligner %s preads > run_jobs.sh" %
                  (pread_dir, config["ovlp_HPCdaligner_option"]))
        os.system("cd %s; touch rdb_build_done" % pread_dir)

    wf.addTask(build_p_rdb_task)
    wf.refreshTargets(
        updateFreq=wait_time)  # larger number better for more jobs

    db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads"))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(pread_dir,
                                                         "preads",
                                                         db_file,
                                                         rdb_build_done,
                                                         config,
                                                         pread_aln=True)
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done"))

    @PypeTask(inputs=daligner_out,
              outputs={"da_done": p_da_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pda_check")
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))

    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
        pread_dir, "preads", p_da_done, config)
    wf.addTasks(merge_tasks)
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done"))

    @PypeTask(inputs=merge_out,
              outputs={"p_merge_done": p_merge_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pmerge_check")
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))

    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq=wait_time)  #all

    falcon_asm_done = makePypeLocalFile(
        os.path.join(falcon_asm_dir, "falcon_asm_done"))

    @PypeTask(inputs={"p_merge_done": p_merge_done},
              outputs={"falcon_asm_done": falcon_asm_done},
              parameters={
                  "wd": falcon_asm_dir,
                  "config": config,
                  "pread_dir": pread_dir
              },
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/falcon")
    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join(wd)
        script_fn = os.path.join(script_dir, "run_falcon_asm.sh")

        script = []
        script.append("source {install_prefix}/bin/activate".format(
            install_prefix=install_prefix))
        script.append("cd %s" % pread_dir)
        script.append("DB2Falcon preads")
        script.append("cd %s" % wd)
        script.append("""find %s/las_files -name "*.las" > las.fofn """ %
                      pread_dir)
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append("""fc_ovlp_filter.py --fofn las.fofn %s \
                                 --n_core 24 --min_len %d > preads.ovl""" %
                      (overlap_filtering_setting, length_cutoff_pr))

        script.append("ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append("""fc_ovlp_to_graph.py preads.ovl > fc.log""")
        script.append("""fc_graph_to_contig.py""")
        script.append("""touch %s\n""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_name = self.URL.split("/")[-1]
        job_name += "-" + str(uuid.uuid1())[:8]
        job_data = {
            "job_name": job_name,
            "cwd": wd,
            "sge_option": config["sge_option_fc"],
            "script_fn": script_fn
        }
        run_script(job_data, job_type="SGE")
        wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_name)

    wf.addTask(run_falcon_asm_task)
    wf.refreshTargets(updateFreq=wait_time)  #all
Exemple #8
0
def main(*argv):
    setup_logger()
    if len(argv) < 2:
        print "you need to specify a configuration file"
        print "example: HGAP.py HGAP_run.cfg"
        sys.exit(1)
    
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(argv[1])
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        input_h5_fofn = makePypeLocalFile( os.path.abspath( config["input_fofn_fn"] ) )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        make_buid_rdb_task = PypeTask(inputs = {"input_fofn": input_h5_fofn},
                                      outputs = {"rdb_build_done": rdb_build_done}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)

        buid_rdb_task = make_buid_rdb_task(build_rdb)

        wf.addTasks([buid_rdb_task])
        wf.refreshTargets([rdb_build_done]) 
        

        db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        @PypeTask( inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))
        
        wf.addTask(check_r_da_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
        
        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config )
        wf.addTasks( merge_tasks )
        if config["target"] == "overlapping":
            wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            exit(0)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fa" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    if config["target"] == "pre-assembly":
        exit(0)
    
    if config["input_type"] == "preads":
        if not os.path.exists( "%s/input_preads.fofn" % pread_dir):
            os.system( "cp %s %s/input_preads.fofn" % (os.path.abspath( config["input_fofn_fn"] ), pread_dir) )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

    rdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "rdb_build_done") ) 
    @PypeTask( inputs = { "pread_fofn": pread_fofn },
               outputs = { "rdb_build_done": rdb_build_done },
               parameters = {"config": config, "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/build_p_rdb")
    def build_p_rdb_task(self):
        config = self.parameters["config"]
        pread_dir = self.parameters["pread_dir"]
        fa_serial = 0
        for fa_fn in open(fn(self.pread_fofn)).readlines():
            fa_fn = fa_fn.strip()
            c = 0
            fa_serial += 1
            with open("%s/preads_norm_%05d.fasta" % (pread_dir, fa_serial), "w") as p_norm:
                f = FastaReader(fa_fn)
                for r in f:
                    if len(r.sequence) < config["length_cutoff_pr"]:
                        continue
                    name = r.name
                    name = name.replace("_","")
                    ignore_read = False
                    for  cc in r.sequence:
                        if cc not in ["A","C","G","T"]:
                            ignore_read = True
                            break
                    if ignore_read:
                        continue
                    print >> p_norm, ">prolog_%05d/%d/%d_%d" % (fa_serial, c, 0, len(r.sequence) )
                    for i in range(0, len(r.sequence)/80):
                        print >> p_norm, r.sequence[ i *80 : (i + 1) * 80]
                    print >> p_norm, r.sequence[(i+1)*80:]
                    c += 1
            os.system("cd %s; fasta2DB preads preads_norm_%05d.fasta" % (pread_dir, fa_serial) )

        os.system("cd %s; DBsplit %s preads" % (pread_dir, config["ovlp_DBsplit_option"]))
        os.system("cd %s; HPCdaligner %s preads > run_jobs.sh" % (pread_dir, config["ovlp_HPCdaligner_option"]))
        os.system("cd %s; touch rdb_build_done" % pread_dir)

    wf.addTask(build_p_rdb_task)
    wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" ))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks( pread_dir, "preads", db_file, rdb_build_done, config, pread_aln= True) 
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") )

    @PypeTask( inputs = daligner_out, 
               outputs =  {"da_done":p_da_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pda_check" )
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))
    
    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config )
    wf.addTasks( merge_tasks )
    #wf.refreshTargets(updateFreq = 30) #all            

    p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") )

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done":p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))
    
    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq = wait_time) #all            

    
    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    @PypeTask( inputs = {"p_merge_done": p_merge_done}, 
               outputs =  {"falcon_asm_done":falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join( wd )
        script_fn =  os.path.join( script_dir ,"run_falcon_asm.sh" )
        
        script = []
        script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
        script.append( "cd %s" % pread_dir )
        script.append( "DB2Falcon preads")
        script.append( "cd %s" % wd )
        script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir )
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append( """fc_ovlp_filter.py --fofn las.fofn %s \
                                 --n_core 24 --min_len %d > preads.ovl""" % (overlap_filtering_setting, length_cutoff_pr) )

        script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append( """fc_ovlp_to_graph.py preads.ovl > fc.log""" )
        script.append( """fc_graph_to_contig.py""" )
        script.append( """touch %s\n""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_name = self.URL.split("/")[-1]
        job_name += "-"+str(uuid.uuid1())[:8]
        job_data = {"job_name": job_name,
                    "cwd": wd,
                    "sge_option": config["sge_option_fc"],
                    "script_fn": script_fn }
        run_script(job_data, job_type = "SGE")
        wait_for_file( fn(self.falcon_asm_done), task=self, job_name=job_name )
    
    wf.addTask( run_falcon_asm_task )
    wf.refreshTargets(updateFreq = wait_time) #all            
Exemple #9
0
def unzip_all(config):
    unzip_concurrent_jobs = config["unzip_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(unzip_concurrent_jobs,
                                           unzip_concurrent_jobs)
    wf = PypeThreadWorkflow()

    ctg_list_file = makePypeLocalFile("./3-unzip/reads/ctg_list")
    falcon_asm_done = makePypeLocalFile("./2-asm-falcon/falcon_asm_done")
    parameters = {"wd": os.path.abspath("."), "config": config}

    job_done = makePypeLocalFile(
        os.path.join(parameters["wd"], "track_reads_done"))
    make_track_reads_task = PypeTask(
        inputs={"falcon_asm_done": falcon_asm_done},
        outputs={
            "job_done": job_done,
            "ctg_list_file": ctg_list_file
        },
        parameters=parameters,
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/track_reads")
    track_reads_task = make_track_reads_task(task_track_reads)

    wf.addTask(track_reads_task)
    wf.refreshTargets()  #force refresh now, will put proper dependence later

    ctg_ids = []
    with open("./3-unzip/reads/ctg_list") as f:
        for row in f:
            row = row.strip()
            ctg_ids.append(row)

    aln1_outs = {}

    all_ctg_out = {}

    for ctg_id in ctg_ids:
        # inputs
        ref_fasta = makePypeLocalFile(
            "./3-unzip/reads/{ctg_id}_ref.fa".format(ctg_id=ctg_id))
        read_fasta = makePypeLocalFile(
            "./3-unzip/reads/{ctg_id}_reads.fa".format(ctg_id=ctg_id))

        # outputs
        wd = os.path.join(
            os.getcwd(), "./3-unzip/0-phasing/{ctg_id}/".format(ctg_id=ctg_id))
        mkdir(wd)
        ctg_aln_out = makePypeLocalFile(
            os.path.join(wd, "{ctg_id}_sorted.bam".format(ctg_id=ctg_id)))
        job_done = makePypeLocalFile(
            os.path.join(wd, "aln_{ctg_id}_done".format(ctg_id=ctg_id)))

        parameters = {
            "job_uid": "aln-" + ctg_id,
            "wd": wd,
            "config": config,
            "ctg_id": ctg_id
        }
        make_blasr_task = PypeTask(
            inputs={
                "ref_fasta": ref_fasta,
                "read_fasta": read_fasta
            },
            outputs={
                "ctg_aln_out": ctg_aln_out,
                "job_done": job_done
            },
            parameters=parameters,
            TaskType=PypeThreadTaskBase,
            URL="task://localhost/aln_{ctg_id}".format(ctg_id=ctg_id))
        blasr_task = make_blasr_task(task_run_blasr)
        aln1_outs[ctg_id] = (ctg_aln_out, job_done)
        wf.addTask(blasr_task)

        job_done = makePypeLocalFile(
            os.path.join(wd, "p_{ctg_id}_done".format(ctg_id=ctg_id)))
        rid_to_phase_out = makePypeLocalFile(
            os.path.join(wd, "rid_to_phase.{ctg_id}".format(ctg_id=ctg_id)))
        all_ctg_out["r2p.{ctg_id}".format(ctg_id=ctg_id)] = rid_to_phase_out

        parameters = {
            "job_uid": "ha-" + ctg_id,
            "wd": wd,
            "config": config,
            "ctg_id": ctg_id
        }
        make_phasing_task = PypeTask(
            inputs={
                "ref_fasta": ref_fasta,
                "aln_bam": ctg_aln_out
            },
            outputs={"job_done": job_done},
            parameters=parameters,
            TaskType=PypeThreadTaskBase,
            URL="task://localhost/p_{ctg_id}".format(ctg_id=ctg_id))
        phasing_task = make_phasing_task(task_phasing)
        wf.addTask(phasing_task)

    wf.refreshTargets()

    hasm_wd = os.path.abspath("./3-unzip/1-hasm/")
    mkdir(hasm_wd)
    rid_to_phase_all = makePypeLocalFile(
        os.path.join(hasm_wd, "rid_to_phase.all"))

    @PypeTask(inputs=all_ctg_out,
              outputs={"rid_to_phase_all": rid_to_phase_all},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/rid_to_phase_all")
    def get_rid_to_phase_all(self):
        rid_to_phase_all_fn = fn(self.rid_to_phase_all)
        inputs_fn = [fn(f) for f in self.inputs.values()]
        inputs_fn.sort()
        output = []
        for fname in inputs_fn:
            output.extend(open(fname).read())

        out = open(rid_to_phase_all_fn, "w")
        out.write("".join(output))
        out.close()

    wf.addTask(get_rid_to_phase_all)

    parameters["wd"] = hasm_wd
    job_done = makePypeLocalFile(os.path.join(hasm_wd, "hasm_done"))
    make_hasm_task = PypeTask(inputs={"rid_to_phase_all": rid_to_phase_all},
                              outputs={"job_done": job_done},
                              parameters=parameters,
                              TaskType=PypeThreadTaskBase,
                              URL="task://localhost/hasm")
    hasm_task = make_hasm_task(task_hasm)

    wf.addTask(hasm_task)

    wf.refreshTargets()
Exemple #10
0
        daligner_tasks, daligner_out = create_daligner_tasks(
            rawread_dir, "raw_reads", db_file, rdb_build_done, config)

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done"))

        @PypeTask(inputs=daligner_out,
                  outputs={"da_done": r_da_done},
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/rda_check")
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))

        wf.addTask(check_r_da_task)
        wf.refreshTargets(
            updateFreq=wait_time
        )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs,
                                               concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
            rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks(merge_tasks)
        if config["target"] == "overlapping":
            wf.refreshTargets(
                updateFreq=wait_time
            )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            exit(0)
Exemple #11
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    config = get_config(parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(
        os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(
        os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs={"i_fofn": input_fofn_plf},
                                  outputs={"o_fofn": rawread_fofn_plf},
                                  parameters={},
                                  TaskType=PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(make_fofn_abs_raw)
    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile(os.path.join(rawread_dir, "sleep_done"))
        rdb_build_done = makePypeLocalFile(
            os.path.join(rawread_dir, "rdb_build_done"))
        parameters = {"work_dir": rawread_dir, "config": config}

        make_build_rdb_task = PypeTask(
            inputs={"input_fofn": rawread_fofn_plf},
            outputs={"rdb_build_done": rdb_build_done},
            parameters=parameters,
            TaskType=PypeThreadTaskBase)

        build_rdb_task = make_build_rdb_task(build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done])

        db_file = makePypeLocalFile(
            os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(
            rawread_dir, "raw_reads", db_file, rdb_build_done, config)

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done"))

        @PypeTask(inputs=daligner_out,
                  outputs={"da_done": r_da_done},
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/rda_check")
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))

        wf.addTask(check_r_da_task)
        wf.refreshTargets(
            updateFreq=wait_time
        )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs,
                                               concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
            rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks(merge_tasks)
        if config["target"] == "overlapping":
            wf.refreshTargets(
                updateFreq=wait_time
            )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks(consensus_tasks)

        r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done"))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

        @PypeTask(inputs=consensus_out,
                  outputs={
                      "cns_done": r_cns_done,
                      "pread_fofn": pread_fofn
                  },
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/cns_check")
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn), "w") as f:
                fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >> f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(
            updateFreq=wait_time)  # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs={"i_fofn": rawread_fofn_plf},
                                      outputs={"o_fofn": pread_fofn},
                                      parameters={},
                                      TaskType=PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile(
        os.path.join(pread_dir, "pdb_build_done"))
    parameters = {"work_dir": pread_dir, "config": config}

    make_build_pdb_task = PypeTask(inputs={"pread_fofn": pread_fofn},
                                   outputs={"pdb_build_done": pdb_build_done},
                                   parameters=parameters,
                                   TaskType=PypeThreadTaskBase,
                                   URL="task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done])

    db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads"))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(pread_dir,
                                                         "preads",
                                                         db_file,
                                                         pdb_build_done,
                                                         config,
                                                         pread_aln=True)
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done"))

    @PypeTask(inputs=daligner_out,
              outputs={"da_done": p_da_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pda_check")
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))

    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
        pread_dir, "preads", p_da_done, config)
    wf.addTasks(merge_tasks)
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done"))

    @PypeTask(inputs=merge_out,
              outputs={"p_merge_done": p_merge_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pmerge_check")
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))

    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq=wait_time)  #all

    falcon_asm_done = makePypeLocalFile(
        os.path.join(falcon_asm_dir, "falcon_asm_done"))

    @PypeTask(inputs={
        "p_merge_done": p_merge_done,
        "db_file": db_file
    },
              outputs={"falcon_asm_done": falcon_asm_done},
              parameters={
                  "wd": falcon_asm_dir,
                  "config": config,
                  "pread_dir": pread_dir
              },
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/falcon")
    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join(wd)
        script_fn = os.path.join(script_dir, "run_falcon_asm.sh")

        script = []
        script.append("set -vex")
        script.append("trap 'touch %s.exit' EXIT" % fn(self.falcon_asm_done))
        script.append("source {install_prefix}/bin/activate".format(
            install_prefix=install_prefix))
        script.append("cd %s" % pread_dir)
        # Write preads4falcon.fasta, in 1-preads_ovl:
        script.append("DB2Falcon -U preads")
        script.append("cd %s" % wd)
        script.append("""find %s/las_files -name "*.las" > las.fofn """ %
                      pread_dir)
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append( """fc_ovlp_filter.py --db %s --fofn las.fofn %s --min_len %d > preads.ovl""" %\
                (fn(db_file), overlap_filtering_setting, length_cutoff_pr) )
        script.append("ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append(
            """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log"""
            % length_cutoff_pr)  # TODO: drop this logfile
        # Write 'p_ctg.fa' and 'a_ctg.fa':
        script.append("""fc_graph_to_contig.py""")
        script.append("""touch %s""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_name = self.URL.split("/")[-1]
        job_name += "-" + str(uuid.uuid4())[:8]
        job_data = {
            "job_name": job_name,
            "cwd": wd,
            "sge_option": config["sge_option_fc"],
            "script_fn": script_fn
        }
        run_script(job_data, job_type=config["job_type"])
        wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_name)

    wf.addTask(run_falcon_asm_task)
    wf.refreshTargets(updateFreq=wait_time)  #all
Exemple #12
0
            }
            make_quiver_task = PypeTask(
                inputs={
                    "ref_fasta": ref_fasta,
                    "read_sam": read_sam
                },
                outputs={
                    "cns_fasta": cns_fasta,
                    "cns_fastq": cns_fastq,
                    "job_done": job_done
                },
                parameters=parameters,
                TaskType=PypeThreadTaskBase,
                URL="task://localhost/q_{ctg_id}".format(ctg_id=ctg_id))
            quiver_task = make_quiver_task(task_run_quiver)
            wf.addTask(quiver_task)

    wf.refreshTargets()
    os.system("sleep 30")

    mkdir("./4-quiver/cns_output")
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fasta")
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fastq")
    for cns_fasta, cns_fastq in sorted(p_ctg_out):
        os.system(
            "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_p_ctg.fasta".format(
                cns_fasta=fn(cns_fasta)))
        os.system(
            "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_p_ctg.fastq".format(
                cns_fastq=fn(cns_fastq)))
Exemple #13
0
    prefix = sys.argv[1]
    concurrent_jobs = 64
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    job_id = 0
    db_file = makePypeLocalFile(os.path.abspath( "./%s.db" % prefix ))
    with open("run_jobs.sh") as f :
        for l in f :
            l = l.strip().split()
            if l[0] == "daligner":
                try:
                    os.makedirs("./job_%05d" % job_id)
                except OSError:
                    pass
                os.system("cd ./job_%05d;ln -s ../.%s.bps .; ln -s ../.%s.idx .; ln -s ../%s.db ." % (job_id, prefix, prefix, prefix) )
                job_done = makePypeLocalFile(os.path.abspath( "./job_%05d/job_%05d_done" % (job_id,job_id)  ))
                parameters =  {"daligner_cmd": " ".join(l),
                               "cwd": os.path.join(os.getcwd(), "job_%05d" % job_id),
                               "job_id": job_id}
                make_daligner_task = PypeTask( inputs = {"db_file": db_file},
                                               outputs = {"job_done": job_done},
                                               parameters = parameters,
                                               TaskType = PypeThreadTaskBase,
                                               URL = "task://localhost/mtask_%05d" % job_id )
                daligner_task = make_daligner_task ( run_daligner )
                wf.addTask(daligner_task)
                job_id += 1
                print job_id
    wf.refreshTargets(updateFreq = 45) #all
Exemple #14
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    try:
        config = support.get_dict_from_old_falcon_cfg(support.parse_config(input_config_fn))
    except Exception:
        fc_run_logger.exception('Failed to parse config "{}".'.format(input_config_fn))
        raise
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    exitOnFailure=config['stop_all_jobs_on_failure'] # only matter for parallel jobs
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(config["input_fofn"])
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") )
        run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") )
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        raw_reads_db_plf = makePypeLocalFile(os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done,
                                                 "raw_reads_db": raw_reads_db_plf,
                                                 "run_jobs": run_jobs,
                                      },
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done])

        raw_reads_nblock = support.get_nblock(fn(raw_reads_db_plf))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", rdb_build_done,
                nblock=raw_reads_nblock, config=config)

        wf.addTasks(daligner_tasks)
        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        parameters =  {
                "nblock": raw_reads_nblock,
        }
        make_daligner_gather = PypeTask(
                   inputs = daligner_out,
                   outputs =  {"da_done":r_da_done},
                   parameters = parameters,
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        check_r_da_task = make_daligner_gather(task_daligner_gather)
        wf.addTask(check_r_da_task)
        wf.refreshTargets(exitOnFailure=exitOnFailure)

        merge_tasks, merge_out, p_ids_merge_job_done = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks( merge_tasks )
        wf.refreshTargets(exitOnFailure=exitOnFailure)

        if config["target"] == "overlapping":
            sys.exit(0)
        consensus_tasks, consensus_out = create_consensus_tasks(rawread_dir, "raw_reads", config, p_ids_merge_job_done)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out,
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            system("touch %s" % fn(self.cns_done))
        wf.addTask(check_r_cns_task)

        length_cutoff_plf = makePypeLocalFile(os.path.join(rawread_dir, "length_cutoff"))
        pre_assembly_report_plf = makePypeLocalFile(os.path.join(rawread_dir, "pre_assembly_stats.json")) #tho technically it needs pread_fofn
        make_task = PypeTask(
                inputs = {"length_cutoff_fn": length_cutoff_plf,
                          "raw_reads_db": raw_reads_db_plf,
                          "preads_fofn": pread_fofn, },
                outputs = {"pre_assembly_report": pre_assembly_report_plf, },
                parameters = config,
                TaskType = PypeThreadTaskBase,
                URL = "task://localhost/report_pre_assembly")
        task = make_task(task_report_pre_assembly)
        wf.addTask(task)

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        wf.refreshTargets(exitOnFailure=exitOnFailure)

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") )
    parameters = {"work_dir": pread_dir,
                  "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    preads_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db')) # Also .preads.*, of course.
    make_build_pdb_task  = PypeTask(inputs = {"pread_fofn": pread_fofn },
                                    outputs = {"pdb_build_done": pdb_build_done,
                                               "preads_db": preads_db,
                                               "run_jobs": run_jobs},
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done])


    preads_nblock = support.get_nblock(fn(preads_db))
    #### run daligner
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", pdb_build_done,
                nblock=preads_nblock, config=config, pread_aln=True)
    wf.addTasks(daligner_tasks)

    p_da_done = makePypeLocalFile(os.path.join( pread_dir, "da_done"))
    parameters =  {
            "nblock": preads_nblock,
    }
    make_daligner_gather = PypeTask(
                inputs = daligner_out,
                outputs =  {"da_done":p_da_done},
                parameters = parameters,
                TaskType = PypeThreadTaskBase,
                URL = "task://localhost/pda_check" )
    check_p_da_task = make_daligner_gather(task_daligner_gather)
    wf.addTask(check_p_da_task)
    wf.refreshTargets(exitOnFailure=exitOnFailure)

    merge_tasks, merge_out, _ = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks( merge_tasks )

    p_merge_done = makePypeLocalFile(os.path.join( pread_dir, "p_merge_done"))

    @PypeTask( inputs = merge_out,
               outputs =  {"p_merge_done": p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        system("touch %s" % fn(self.p_merge_done))
    wf.addTask(check_p_merge_check_task)

    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)

    wf.refreshTargets(exitOnFailure=exitOnFailure)


    db2falcon_done = makePypeLocalFile( os.path.join(pread_dir, "db2falcon_done"))
    make_run_db2falcon = PypeTask(
               inputs = {"p_merge_done": p_merge_done,},
               outputs =  {"db2falcon_done": db2falcon_done},
               parameters = {"wd": pread_dir,
                             "config": config,
                            },
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/db2falcon" )
    wf.addTask(make_run_db2falcon(task_run_db2falcon))

    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    make_run_falcon_asm = PypeTask(
               inputs = {"db2falcon_done": db2falcon_done, "db_file": preads_db},
               outputs =  {"falcon_asm_done": falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets()
Exemple #15
0
            query_group_done["q%05d_t%05d_done" % (q_sn, t_sn)] = job_done

            inputs = { "query_fofn" : q_fofn, "target_fa": t_fa, "target_sa": t_sa, "gather_targets_done": gather_targets_done }
            outputs = { "job_done": job_done }
            parameters = { "mapping_data_dir": mapping_data_dir, "q_sn": q_sn, "t_sn": t_sn, "config":config }
        
            #for testing 
            #task_decorator = PypeTask(inputs = inputs, outputs = outputs, parameters = parameters, TaskType = PypeTaskBase )
            #task() 
            make_mapping_task = PypeTask(inputs = inputs, 
                                 outputs = outputs, 
                                 parameters = parameters, 
                                 TaskType = PypeThreadTaskBase,
                                 URL = "task://localhost/mapping_task_q%05d_t%05d" % (q_sn, t_sn) )
            mapping_task = make_mapping_task ( blasr_align )
            wf.addTask(mapping_task)

        qf_out = os.path.join( mapping_data_dir, "qf%05d.m4" % q_sn ) 
        qf_out = makePypeLocalFile( qf_out )

        all_qf_out["qf_out_%s" % q_sn] = qf_out

        job_done = os.path.join( mapping_data_dir, "qf%05d_done" % q_sn ) 
        job_done = makePypeLocalFile(job_done)

        all_qf_out["qf_done_%s" % q_sn] = job_done
        parameters = { "mapping_data_dir": mapping_data_dir, "q_sn": q_sn }
        make_qf_task = PypeTask(inputs = query_group_done,
                                outputs = {"qf_out": qf_out, "job_done": job_done},
                                TaskType = PypeThreadTaskBase,
                                URL = "task://localhost/qf_task_q%05d" % q_sn,
Exemple #16
0
            if ctg_types[ctg_id] == "p":
                p_ctg_out.append( (cns_fasta, cns_fastq) )
            if ctg_types[ctg_id] == "h":
                h_ctg_out.append( (cns_fasta, cns_fastq) )
            if not os.path.exists(fn(ref_fasta)):
                with open(fn(ref_fasta),"w") as f:
                    print >>f, ">"+ctg_id
                    print >>f, sequence
            parameters = {"job_uid":"q-"+ctg_id, "wd": wd, "config":config, "ctg_id": ctg_id } 
            make_quiver_task = PypeTask(inputs = {"ref_fasta": ref_fasta, "read_sam": read_sam},
                                       outputs = {"cns_fasta": cns_fasta, "cns_fastq": cns_fastq, "job_done": job_done},
                                       parameters = parameters,
                                       TaskType = PypeThreadTaskBase,
                                       URL = "task://localhost/q_{ctg_id}".format( ctg_id = ctg_id ) )
            quiver_task = make_quiver_task(task_run_quiver)
            wf.addTask( quiver_task )
        


    wf.refreshTargets()
    os.system("sleep 30")

    mkdir( "./4-quiver/cns_output" )
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fasta")
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fastq")
    for cns_fasta, cns_fastq in sorted(p_ctg_out):
        os.system("zcat {cns_fasta} >> ./4-quiver/cns_output/cns_p_ctg.fasta".format( cns_fasta=fn(cns_fasta) ) )
        os.system("zcat {cns_fastq} >> ./4-quiver/cns_output/cns_p_ctg.fastq".format( cns_fastq=fn(cns_fastq) ) )


        
Exemple #17
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    config = support.get_dict_from_old_falcon_cfg(support.parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    exitOnFailure=config['stop_all_jobs_on_failure'] # only matter for parallel jobs
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(config["input_fofn"])
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        raw_reads_db = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done,
                                                 "raw_reads.db": raw_reads_db,
                                                 "run_jobs": run_jobs}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done]) 

        raw_reads_nblock = support.get_nblock(fn(raw_reads_db))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        parameters =  {
                "nblock": raw_reads_nblock,
        }
        make_daligner_gather = PypeTask(
                   inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   parameters = parameters,
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        check_r_da_task = make_daligner_gather(task_daligner_gather)
        wf.addTask(check_r_da_task)
        wf.refreshTargets(exitOnFailure=exitOnFailure)
        
        merge_tasks, merge_out, p_ids_merge_job_done = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks( merge_tasks )
        wf.refreshTargets(exitOnFailure=exitOnFailure)

        if config["target"] == "overlapping":
            sys.exit(0)
        consensus_tasks, consensus_out = create_consensus_tasks(rawread_dir, "raw_reads", config, p_ids_merge_job_done)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            system("touch %s" % fn(self.cns_done))
        wf.addTask(check_r_cns_task)

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        wf.refreshTargets(exitOnFailure=exitOnFailure)

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) 
    parameters = {"work_dir": pread_dir,
                  "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    preads_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db')) # Also .preads.*, of course.
    make_build_pdb_task  = PypeTask(inputs = {"pread_fofn": pread_fofn },
                                    outputs = {"pdb_build_done": pdb_build_done,
                                               "preads_db": preads_db,
                                               "run_jobs": run_jobs},
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done]) 


    preads_nblock = support.get_nblock(fn(preads_db))
    #### run daligner
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", pdb_build_done, config, pread_aln=True)
    wf.addTasks(daligner_tasks)

    p_da_done = makePypeLocalFile(os.path.join( pread_dir, "da_done"))
    parameters =  {
            "nblock": preads_nblock,
    }
    make_daligner_gather = PypeTask(
                inputs = daligner_out, 
                outputs =  {"da_done":p_da_done},
                parameters = parameters,
                TaskType = PypeThreadTaskBase,
                URL = "task://localhost/pda_check" )
    check_p_da_task = make_daligner_gather(task_daligner_gather)
    wf.addTask(check_p_da_task)
    wf.refreshTargets(exitOnFailure=exitOnFailure)

    merge_tasks, merge_out, _ = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks( merge_tasks )

    p_merge_done = makePypeLocalFile(os.path.join( pread_dir, "p_merge_done"))

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done": p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        system("touch %s" % fn(self.p_merge_done))
    wf.addTask(check_p_merge_check_task)

    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)

    wf.refreshTargets(exitOnFailure=exitOnFailure)

    
    db2falcon_done = makePypeLocalFile( os.path.join(pread_dir, "db2falcon_done"))
    make_run_db2falcon = PypeTask(
               inputs = {"p_merge_done": p_merge_done,},
               outputs =  {"db2falcon_done": db2falcon_done},
               parameters = {"wd": pread_dir,
                             "config": config,
                            },
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/db2falcon" )
    wf.addTask(make_run_db2falcon(task_run_db2falcon))

    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    make_run_falcon_asm = PypeTask(
               inputs = {"db2falcon_done": db2falcon_done, "db_file": preads_db},
               outputs =  {"falcon_asm_done": falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets()
Exemple #18
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    config = support.get_dict_from_old_falcon_cfg(
        support.parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(
        os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(
        os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs={"i_fofn": input_fofn_plf},
                                  outputs={"o_fofn": rawread_fofn_plf},
                                  parameters={},
                                  TaskType=PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile(os.path.join(rawread_dir, "sleep_done"))
        rdb_build_done = makePypeLocalFile(
            os.path.join(rawread_dir, "rdb_build_done"))
        run_jobs = makePypeLocalFile(os.path.join(rawread_dir, "run_jobs.sh"))
        parameters = {"work_dir": rawread_dir, "config": config}

        make_build_rdb_task = PypeTask(inputs={"input_fofn": rawread_fofn_plf},
                                       outputs={
                                           "rdb_build_done": rdb_build_done,
                                           "run_jobs": run_jobs
                                       },
                                       parameters=parameters,
                                       TaskType=PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done])

        db_file = makePypeLocalFile(
            os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(
            fn(run_jobs), rawread_dir, "raw_reads", db_file, rdb_build_done,
            config)

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done"))

        @PypeTask(inputs=daligner_out,
                  outputs={"da_done": r_da_done},
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/rda_check")
        def check_r_da_task(self):
            system("touch %s" % fn(self.da_done))

        wf.addTask(check_r_da_task)
        wf.refreshTargets(
            updateFreq=wait_time
        )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs,
                                               concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
            fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks(merge_tasks)
        if config["target"] == "overlapping":
            wf.refreshTargets(
                updateFreq=wait_time
            )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks(consensus_tasks)

        r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done"))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

        @PypeTask(inputs=consensus_out,
                  outputs={
                      "cns_done": r_cns_done,
                      "pread_fofn": pread_fofn
                  },
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/cns_check")
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn), "w") as f:
                fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >> f, fa_fn
            system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(
            updateFreq=wait_time)  # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs={"i_fofn": rawread_fofn_plf},
                                      outputs={"o_fofn": pread_fofn},
                                      parameters={},
                                      TaskType=PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile(
        os.path.join(pread_dir, "pdb_build_done"))
    parameters = {"work_dir": pread_dir, "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    make_build_pdb_task = PypeTask(inputs={"pread_fofn": pread_fofn},
                                   outputs={
                                       "pdb_build_done": pdb_build_done,
                                       "run_jobs": run_jobs
                                   },
                                   parameters=parameters,
                                   TaskType=PypeThreadTaskBase,
                                   URL="task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done])

    db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads"))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs),
                                                         pread_dir,
                                                         "preads",
                                                         db_file,
                                                         pdb_build_done,
                                                         config,
                                                         pread_aln=True)
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done"))

    @PypeTask(inputs=daligner_out,
              outputs={"da_done": p_da_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pda_check")
    def check_p_da_task(self):
        system("touch %s" % fn(self.da_done))

    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
        fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks(merge_tasks)
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done"))

    @PypeTask(inputs=merge_out,
              outputs={"p_merge_done": p_merge_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pmerge_check")
    def check_p_merge_check_task(self):
        system("touch %s" % fn(self.p_merge_done))

    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq=wait_time)  #all

    falcon_asm_done = makePypeLocalFile(
        os.path.join(falcon_asm_dir, "falcon_asm_done"))
    make_run_falcon_asm = PypeTask(
        inputs={
            "p_merge_done": p_merge_done,
            "db_file": db_file
        },
        outputs={"falcon_asm_done": falcon_asm_done},
        parameters={
            "wd": falcon_asm_dir,
            "config": config,
            "pread_dir": pread_dir
        },
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/falcon")
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets(updateFreq=wait_time)  #all
Exemple #19
0
                print >> p_script, l
                print >> p_script, "mv %s.%d.las ../las_files" % (prefix, p_id) 
            
        p_file = os.path.abspath( "./p_%05d/p_%05d.sh" % (p_id, p_id) )
        job_done = makePypeLocalFile(os.path.abspath( "./p_%05d/p_%05d_done" % (p_id,p_id)  ))
        parameters =  {"p_file": p_file, 
                       "cwd": os.path.join(os.getcwd(), "p_%05d" % p_id),
                       "job_id": p_id}
        make_p_task = PypeTask( inputs = {"db_file": db_file},
                                       outputs = {"job_done": job_done},
                                       parameters = parameters,
                                       TaskType = PypeThreadTaskBase,
                                       URL = "task://localhost/ptask_%05d" % p_id )
        p_task = make_p_task ( run_p_task )

        wf.addTask(p_task)


        out_file = makePypeLocalFile(os.path.abspath( "./preads/out.%04d.fa" % p_id  ))
        parameters =  {"cwd": os.path.join(os.getcwd(), "preads" ),
                       "job_id": p_id}
        make_c_task = PypeTask( inputs = {"job_done": job_done},
                                outputs = {"out_file": out_file },
                                parameters = parameters,
                                TaskType = PypeThreadTaskBase,
                                URL = "task://localhost/ct_%05d" % p_id )
        
        c_task = make_c_task( run_consensus_task )
        wf.addTask(c_task)
        print p_id
    wf.refreshTargets(updateFreq = 15) #all            
Exemple #20
0
        
        # outputs
        wd = os.path.join( os.getcwd(),  "./3-unzip/0-phasing/{ctg_id}/".format( ctg_id = ctg_id ) )
        mkdir(wd)
        ctg_aln_out = makePypeLocalFile( os.path.join( wd, "{ctg_id}_sorted.bam".format( ctg_id = ctg_id ) ) )
        job_done = makePypeLocalFile( os.path.join( wd, "aln_{ctg_id}_done".format( ctg_id = ctg_id ) ) )
    
        parameters = {"job_uid":"aln-"+ctg_id, "wd": wd, "config":config, "ctg_id": ctg_id} 
        make_blasr_task = PypeTask(inputs = {"ref_fasta": ref_fasta, "read_fasta": read_fasta},
                                   outputs = {"ctg_aln_out": ctg_aln_out, "job_done": job_done},
                                   parameters = parameters,
                                   TaskType = PypeThreadTaskBase,
                                   URL = "task://localhost/aln_{ctg_id}".format( ctg_id = ctg_id ) )
        blasr_task = make_blasr_task(task_run_blasr)
        aln1_outs[ctg_id] = (ctg_aln_out, job_done)
        wf.addTask(blasr_task)

        job_done = makePypeLocalFile( os.path.join( wd, "p_{ctg_id}_done".format( ctg_id = ctg_id ) ) )
        parameters = {"job_uid":"ha-"+ctg_id, "wd": wd, "config":config, "ctg_id": ctg_id} 
        make_phasing_task = PypeTask(inputs = {"ref_fasta": ref_fasta, "aln_bam":ctg_aln_out},
                                   outputs = {"job_done": job_done},
                                   parameters = parameters,
                                   TaskType = PypeThreadTaskBase,
                                   URL = "task://localhost/p_{ctg_id}".format( ctg_id = ctg_id ) )
        phasing_task = make_phasing_task(task_phasing)
        wf.addTask(phasing_task)
    #print aln1_outs
    wf.refreshTargets()