Example #1
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    config = support.get_dict_from_old_falcon_cfg(
        support.parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(
        os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(
        os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs={"i_fofn": input_fofn_plf},
                                  outputs={"o_fofn": rawread_fofn_plf},
                                  parameters={},
                                  TaskType=PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile(os.path.join(rawread_dir, "sleep_done"))
        rdb_build_done = makePypeLocalFile(
            os.path.join(rawread_dir, "rdb_build_done"))
        run_jobs = makePypeLocalFile(os.path.join(rawread_dir, "run_jobs.sh"))
        parameters = {"work_dir": rawread_dir, "config": config}

        make_build_rdb_task = PypeTask(inputs={"input_fofn": rawread_fofn_plf},
                                       outputs={
                                           "rdb_build_done": rdb_build_done,
                                           "run_jobs": run_jobs
                                       },
                                       parameters=parameters,
                                       TaskType=PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done])

        db_file = makePypeLocalFile(
            os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(
            fn(run_jobs), rawread_dir, "raw_reads", db_file, rdb_build_done,
            config)

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done"))

        @PypeTask(inputs=daligner_out,
                  outputs={"da_done": r_da_done},
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/rda_check")
        def check_r_da_task(self):
            system("touch %s" % fn(self.da_done))

        wf.addTask(check_r_da_task)
        wf.refreshTargets(
            updateFreq=wait_time
        )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs,
                                               concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
            fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks(merge_tasks)
        if config["target"] == "overlapping":
            wf.refreshTargets(
                updateFreq=wait_time
            )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks(consensus_tasks)

        r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done"))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

        @PypeTask(inputs=consensus_out,
                  outputs={
                      "cns_done": r_cns_done,
                      "pread_fofn": pread_fofn
                  },
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/cns_check")
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn), "w") as f:
                fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >> f, fa_fn
            system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(
            updateFreq=wait_time)  # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs={"i_fofn": rawread_fofn_plf},
                                      outputs={"o_fofn": pread_fofn},
                                      parameters={},
                                      TaskType=PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile(
        os.path.join(pread_dir, "pdb_build_done"))
    parameters = {"work_dir": pread_dir, "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    make_build_pdb_task = PypeTask(inputs={"pread_fofn": pread_fofn},
                                   outputs={
                                       "pdb_build_done": pdb_build_done,
                                       "run_jobs": run_jobs
                                   },
                                   parameters=parameters,
                                   TaskType=PypeThreadTaskBase,
                                   URL="task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done])

    db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads"))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs),
                                                         pread_dir,
                                                         "preads",
                                                         db_file,
                                                         pdb_build_done,
                                                         config,
                                                         pread_aln=True)
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done"))

    @PypeTask(inputs=daligner_out,
              outputs={"da_done": p_da_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pda_check")
    def check_p_da_task(self):
        system("touch %s" % fn(self.da_done))

    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
        fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks(merge_tasks)
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done"))

    @PypeTask(inputs=merge_out,
              outputs={"p_merge_done": p_merge_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pmerge_check")
    def check_p_merge_check_task(self):
        system("touch %s" % fn(self.p_merge_done))

    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq=wait_time)  #all

    falcon_asm_done = makePypeLocalFile(
        os.path.join(falcon_asm_dir, "falcon_asm_done"))
    make_run_falcon_asm = PypeTask(
        inputs={
            "p_merge_done": p_merge_done,
            "db_file": db_file
        },
        outputs={"falcon_asm_done": falcon_asm_done},
        parameters={
            "wd": falcon_asm_dir,
            "config": config,
            "pread_dir": pread_dir
        },
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/falcon")
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets(updateFreq=wait_time)  #all
Example #2
0
ch.setFormatter(formatter)
logger.addHandler(ch)

inputs = {"input": makePypeLocalFile("/tmp/test1_input")}
outputs = {"output": makePypeLocalFile("/tmp/test1_output")}
os.system("touch /tmp/test1_input")

@PypeTask(inputs = inputs, outputs = outputs, TaskType = PypeThreadTaskBase)
def f(self):
    i = 0
    while 1:
        time.sleep(0.1)
        if self.shutdown_event != None and self.shutdown_event.is_set():
            break
        if i > 10:
            break
        i += 1
    if self.shutdown_event == None or not self.shutdown_event.is_set():
        os.system("touch %s" % fn(self.output))

wf = PypeThreadWorkflow()
wf.addTasks([f])
wf.refreshTargets()







Example #3
0
def main(*argv):
    setup_logger()
    if len(argv) < 2:
        print "you need to specify a configuration file"
        print "example: HGAP.py HGAP_run.cfg"
        sys.exit(1)

    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(argv[1])
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        input_h5_fofn = makePypeLocalFile(
            os.path.abspath(config["input_fofn_fn"]))
        rdb_build_done = makePypeLocalFile(
            os.path.join(rawread_dir, "rdb_build_done"))
        parameters = {"work_dir": rawread_dir, "config": config}

        make_buid_rdb_task = PypeTask(
            inputs={"input_fofn": input_h5_fofn},
            outputs={"rdb_build_done": rdb_build_done},
            parameters=parameters,
            TaskType=PypeThreadTaskBase)

        buid_rdb_task = make_buid_rdb_task(build_rdb)

        wf.addTasks([buid_rdb_task])
        wf.refreshTargets([rdb_build_done])

        db_file = makePypeLocalFile(
            os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(
            rawread_dir, "raw_reads", db_file, rdb_build_done, config)

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done"))

        @PypeTask(inputs=daligner_out,
                  outputs={"da_done": r_da_done},
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/rda_check")
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))

        wf.addTask(check_r_da_task)
        wf.refreshTargets(
            updateFreq=wait_time
        )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs,
                                               concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
            rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks(merge_tasks)
        if config["target"] == "overlapping":
            wf.refreshTargets(
                updateFreq=wait_time
            )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            exit(0)
        wf.addTasks(consensus_tasks)

        r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done"))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

        @PypeTask(inputs=consensus_out,
                  outputs={
                      "cns_done": r_cns_done,
                      "pread_fofn": pread_fofn
                  },
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/cns_check")
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn), "w") as f:
                fn_list = glob.glob("%s/preads/out*.fa" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >> f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(
            updateFreq=wait_time)  # larger number better for more jobs

    if config["target"] == "pre-assembly":
        exit(0)

    if config["input_type"] == "preads":
        if not os.path.exists("%s/input_preads.fofn" % pread_dir):
            os.system("cp %s %s/input_preads.fofn" %
                      (os.path.abspath(config["input_fofn_fn"]), pread_dir))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

    rdb_build_done = makePypeLocalFile(
        os.path.join(pread_dir, "rdb_build_done"))

    @PypeTask(inputs={"pread_fofn": pread_fofn},
              outputs={"rdb_build_done": rdb_build_done},
              parameters={
                  "config": config,
                  "pread_dir": pread_dir
              },
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/build_p_rdb")
    def build_p_rdb_task(self):
        config = self.parameters["config"]
        pread_dir = self.parameters["pread_dir"]
        fa_serial = 0
        for fa_fn in open(fn(self.pread_fofn)).readlines():
            fa_fn = fa_fn.strip()
            c = 0
            fa_serial += 1
            with open("%s/preads_norm_%05d.fasta" % (pread_dir, fa_serial),
                      "w") as p_norm:
                f = FastaReader(fa_fn)
                for r in f:
                    if len(r.sequence) < config["length_cutoff_pr"]:
                        continue
                    name = r.name
                    name = name.replace("_", "")
                    ignore_read = False
                    for cc in r.sequence:
                        if cc not in ["A", "C", "G", "T"]:
                            ignore_read = True
                            break
                    if ignore_read:
                        continue
                    print >> p_norm, ">prolog_%05d/%d/%d_%d" % (
                        fa_serial, c, 0, len(r.sequence))
                    for i in range(0, len(r.sequence) / 80):
                        print >> p_norm, r.sequence[i * 80:(i + 1) * 80]
                    print >> p_norm, r.sequence[(i + 1) * 80:]
                    c += 1
            os.system("cd %s; fasta2DB preads preads_norm_%05d.fasta" %
                      (pread_dir, fa_serial))

        os.system("cd %s; DBsplit %s preads" %
                  (pread_dir, config["ovlp_DBsplit_option"]))
        os.system("cd %s; HPCdaligner %s preads > run_jobs.sh" %
                  (pread_dir, config["ovlp_HPCdaligner_option"]))
        os.system("cd %s; touch rdb_build_done" % pread_dir)

    wf.addTask(build_p_rdb_task)
    wf.refreshTargets(
        updateFreq=wait_time)  # larger number better for more jobs

    db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads"))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(pread_dir,
                                                         "preads",
                                                         db_file,
                                                         rdb_build_done,
                                                         config,
                                                         pread_aln=True)
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done"))

    @PypeTask(inputs=daligner_out,
              outputs={"da_done": p_da_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pda_check")
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))

    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
        pread_dir, "preads", p_da_done, config)
    wf.addTasks(merge_tasks)
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done"))

    @PypeTask(inputs=merge_out,
              outputs={"p_merge_done": p_merge_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pmerge_check")
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))

    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq=wait_time)  #all

    falcon_asm_done = makePypeLocalFile(
        os.path.join(falcon_asm_dir, "falcon_asm_done"))

    @PypeTask(inputs={"p_merge_done": p_merge_done},
              outputs={"falcon_asm_done": falcon_asm_done},
              parameters={
                  "wd": falcon_asm_dir,
                  "config": config,
                  "pread_dir": pread_dir
              },
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/falcon")
    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join(wd)
        script_fn = os.path.join(script_dir, "run_falcon_asm.sh")

        script = []
        script.append("source {install_prefix}/bin/activate".format(
            install_prefix=install_prefix))
        script.append("cd %s" % pread_dir)
        script.append("DB2Falcon preads")
        script.append("cd %s" % wd)
        script.append("""find %s/las_files -name "*.las" > las.fofn """ %
                      pread_dir)
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append("""fc_ovlp_filter.py --fofn las.fofn %s \
                                 --n_core 24 --min_len %d > preads.ovl""" %
                      (overlap_filtering_setting, length_cutoff_pr))

        script.append("ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append("""fc_ovlp_to_graph.py preads.ovl > fc.log""")
        script.append("""fc_graph_to_contig.py""")
        script.append("""touch %s\n""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_name = self.URL.split("/")[-1]
        job_name += "-" + str(uuid.uuid1())[:8]
        job_data = {
            "job_name": job_name,
            "cwd": wd,
            "sge_option": config["sge_option_fc"],
            "script_fn": script_fn
        }
        run_script(job_data, job_type="SGE")
        wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_name)

    wf.addTask(run_falcon_asm_task)
    wf.refreshTargets(updateFreq=wait_time)  #all
Example #4
0
        smrt_bin = config.get('Unzip', 'smrt_bin')

    quiver_concurrent_jobs = 8
    if config.has_option('Unzip', 'quiver_concurrent_jobs'):
        quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs')

    config = {"job_type": job_type,
              "sge_quiver": sge_quiver,
              "smrt_bin": smrt_bin}

    support.job_type = "SGE" #tmp hack until we have a configuration parser

    ctg_ids = []


    PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs, quiver_concurrent_jobs)
    wf = PypeThreadWorkflow()

    ref_seq_data = {}
    p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa")
    ctg_types = {}
    for r in p_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
        ctg_types[rid] = "p"


    h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa")
    for r in h_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
Example #5
0
    fasta_dir = os.path.abspath("./0-fasta_files")
    dist_map_dir = os.path.abspath("./1-dist_map")
    pa_dir = os.path.abspath("./2-preads")
    celera_asm_dir  = os.path.abspath("./3-CA")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (dist_map_dir, fasta_dir, pa_dir, script_dir, celera_asm_dir,  sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(sys.argv[1])
    concurrent_jobs = config["concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()


    #### Task to convert bas.h5 and bax.h5 to fasta files, it will generates two fofn files for the queries and targets
    input_h5_fofn = makePypeLocalFile(os.path.abspath( config["input_fofn_fn"] ))
    query_fa_fofn = makePypeLocalFile( os.path.join( fasta_dir, "queries.fofn" ) )
    target_fa_fofn = makePypeLocalFile( os.path.join( fasta_dir, "targets.fofn" ) )
    fasta_dump_done = makePypeLocalFile(os.path.abspath( os.path.join( fasta_dir, "fasta_dump_done") ) )
    parameters = {"fasta_dir": fasta_dir,
                  "min_length": config["length_cutoff"],
                  "min_read_score": config["RQ_threshold"]}

    @PypeTask(inputs = {"input_fofn": input_h5_fofn},
              outputs = {"fasta_dump_done": fasta_dump_done, 
                         "target_fa_fofn": target_fa_fofn,
Example #6
0
    fc_run_logger.info( "fc_run started with configuration %s", sys.argv[1] ) 
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(sys.argv[1])
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        input_h5_fofn = makePypeLocalFile( os.path.abspath( config["input_fofn_fn"] ) )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        make_buid_rdb_task = PypeTask(inputs = {"input_fofn": input_h5_fofn},
                                      outputs = {"rdb_build_done": rdb_build_done}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)

        buid_rdb_task = make_buid_rdb_task(build_rdb)
Example #7
0
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(sys.argv[1])
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        input_h5_fofn = makePypeLocalFile(
            os.path.abspath(config["input_fofn_fn"]))
        rdb_build_done = makePypeLocalFile(
            os.path.join(rawread_dir, "rdb_build_done"))
        parameters = {"work_dir": rawread_dir, "config": config}

        make_buid_rdb_task = PypeTask(
            inputs={"input_fofn": input_h5_fofn},
            outputs={"rdb_build_done": rdb_build_done},
            parameters=parameters,
            TaskType=PypeThreadTaskBase)
Example #8
0
    dist_map_dir = os.path.abspath("./1-dist_map")
    pa_dir = os.path.abspath("./2-preads")
    celera_asm_dir = os.path.abspath("./3-CA")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (dist_map_dir, fasta_dir, pa_dir, script_dir, celera_asm_dir,
              sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(sys.argv[1])
    concurrent_jobs = config["concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    #### Task to convert bas.h5 and bax.h5 to fasta files, it will generates two fofn files for the queries and targets
    input_h5_fofn = makePypeLocalFile(os.path.abspath(config["input_fofn_fn"]))
    query_fa_fofn = makePypeLocalFile(os.path.join(fasta_dir, "queries.fofn"))
    target_fa_fofn = makePypeLocalFile(os.path.join(fasta_dir, "targets.fofn"))
    fasta_dump_done = makePypeLocalFile(
        os.path.abspath(os.path.join(fasta_dir, "fasta_dump_done")))
    parameters = {
        "fasta_dir": fasta_dir,
        "min_length": config["length_cutoff"],
        "min_read_score": config["RQ_threshold"]
    }

    @PypeTask(inputs={"input_fofn": input_h5_fofn},
Example #9
0
    quiver_concurrent_jobs = 8
    if config.has_option('Unzip', 'quiver_concurrent_jobs'):
        quiver_concurrent_jobs = config.getint('Unzip',
                                               'quiver_concurrent_jobs')

    config = {
        "job_type": job_type,
        "sge_quiver": sge_quiver,
        "smrt_bin": smrt_bin
    }

    support.job_type = "SGE"  #tmp hack until we have a configuration parser

    ctg_ids = []

    PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs,
                                           quiver_concurrent_jobs)
    wf = PypeThreadWorkflow()

    ref_seq_data = {}
    p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa")
    ctg_types = {}
    for r in p_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
        ctg_types[rid] = "p"

    h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa")
    for r in h_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
        ctg_types[rid] = "h"
Example #10
0
    parser.add_argument('--base_dir', type=str, default="./", help='the output base_dir, default to current working directory')

    args = parser.parse_args()
    bam_fn = args.bam
    fasta_fn = args.fasta
    ctg_id = args.ctg_id
    base_dir = args.base_dir
    
    ref_seq = "" 
    for r in FastaReader(fasta_fn):
        rid = r.name.split()[0]
        if rid != ctg_id:
            continue
        ref_seq = r.sequence.upper()

    PypeThreadWorkflow.setNumThreadAllowed(1, 1)
    wf = PypeThreadWorkflow()



    bam_file = makePypeLocalFile(bam_fn)
    vmap_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "variant_map") )
    vpos_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "variant_pos") )
    q_id_map_file = makePypeLocalFile( os.path.join(base_dir, ctg_id, "q_id_map") )
    parameters = {}
    parameters["ctg_id"] = ctg_id
    parameters["ref_seq"] = ref_seq
    parameters["base_dir"] = base_dir
    
    make_het_call_task = PypeTask( inputs = { "bam_file": bam_file },
                         outputs = { "vmap_file": vmap_file, "vpos_file": vpos_file, "q_id_map_file": q_id_map_file },
Example #11
0
def phasing(args):
    bam_fn = args.bam
    fasta_fn = args.fasta
    ctg_id = args.ctg_id
    base_dir = args.base_dir

    ref_seq = ""
    for r in FastaReader(fasta_fn):
        rid = r.name.split()[0]
        if rid != ctg_id:
            continue
        ref_seq = r.sequence.upper()

    PypeThreadWorkflow.setNumThreadAllowed(1, 1)
    wf = PypeThreadWorkflow()

    bam_file = makePypeLocalFile(bam_fn)
    vmap_file = makePypeLocalFile(os.path.join(base_dir, ctg_id,
                                               "variant_map"))
    vpos_file = makePypeLocalFile(os.path.join(base_dir, ctg_id,
                                               "variant_pos"))
    q_id_map_file = makePypeLocalFile(
        os.path.join(base_dir, ctg_id, "q_id_map"))
    parameters = {}
    parameters["ctg_id"] = ctg_id
    parameters["ref_seq"] = ref_seq
    parameters["base_dir"] = base_dir

    make_het_call_task = PypeTask(
        inputs={"bam_file": bam_file},
        outputs={
            "vmap_file": vmap_file,
            "vpos_file": vpos_file,
            "q_id_map_file": q_id_map_file
        },
        parameters=parameters,
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/het_call")(make_het_call)

    wf.addTasks([make_het_call_task])

    atable_file = makePypeLocalFile(os.path.join(base_dir, ctg_id, "atable"))
    parameters = {}
    parameters["ctg_id"] = ctg_id
    parameters["base_dir"] = base_dir
    generate_association_table_task = PypeTask(
        inputs={"vmap_file": vmap_file},
        outputs={"atable_file": atable_file},
        parameters=parameters,
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/g_atable")(generate_association_table)

    wf.addTasks([generate_association_table_task])

    phased_variant_file = makePypeLocalFile(
        os.path.join(base_dir, ctg_id, "phased_variants"))
    get_phased_blocks_task = PypeTask(
        inputs={
            "vmap_file": vmap_file,
            "atable_file": atable_file
        },
        outputs={"phased_variant_file": phased_variant_file},
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/get_phased_blocks")(get_phased_blocks)
    wf.addTasks([get_phased_blocks_task])

    phased_read_file = makePypeLocalFile(
        os.path.join(base_dir, ctg_id, "phased_reads"))
    get_phased_reads_task = PypeTask(
        inputs={
            "vmap_file": vmap_file,
            "q_id_map_file": q_id_map_file,
            "phased_variant_file": phased_variant_file
        },
        outputs={"phased_read_file": phased_read_file},
        parameters={"ctg_id": ctg_id},
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/get_phased_reads")(get_phased_reads)
    wf.addTasks([get_phased_reads_task])

    wf.refreshTargets()
Example #12
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    try:
        config = support.get_dict_from_old_falcon_cfg(support.parse_config(input_config_fn))
    except Exception:
        fc_run_logger.exception('Failed to parse config "{}".'.format(input_config_fn))
        raise
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    exitOnFailure=config['stop_all_jobs_on_failure'] # only matter for parallel jobs
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(config["input_fofn"])
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") )
        run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") )
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        raw_reads_db_plf = makePypeLocalFile(os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done,
                                                 "raw_reads_db": raw_reads_db_plf,
                                                 "run_jobs": run_jobs,
                                      },
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done])

        raw_reads_nblock = support.get_nblock(fn(raw_reads_db_plf))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", rdb_build_done,
                nblock=raw_reads_nblock, config=config)

        wf.addTasks(daligner_tasks)
        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        parameters =  {
                "nblock": raw_reads_nblock,
        }
        make_daligner_gather = PypeTask(
                   inputs = daligner_out,
                   outputs =  {"da_done":r_da_done},
                   parameters = parameters,
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        check_r_da_task = make_daligner_gather(task_daligner_gather)
        wf.addTask(check_r_da_task)
        wf.refreshTargets(exitOnFailure=exitOnFailure)

        merge_tasks, merge_out, p_ids_merge_job_done = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks( merge_tasks )
        wf.refreshTargets(exitOnFailure=exitOnFailure)

        if config["target"] == "overlapping":
            sys.exit(0)
        consensus_tasks, consensus_out = create_consensus_tasks(rawread_dir, "raw_reads", config, p_ids_merge_job_done)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out,
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            system("touch %s" % fn(self.cns_done))
        wf.addTask(check_r_cns_task)

        length_cutoff_plf = makePypeLocalFile(os.path.join(rawread_dir, "length_cutoff"))
        pre_assembly_report_plf = makePypeLocalFile(os.path.join(rawread_dir, "pre_assembly_stats.json")) #tho technically it needs pread_fofn
        make_task = PypeTask(
                inputs = {"length_cutoff_fn": length_cutoff_plf,
                          "raw_reads_db": raw_reads_db_plf,
                          "preads_fofn": pread_fofn, },
                outputs = {"pre_assembly_report": pre_assembly_report_plf, },
                parameters = config,
                TaskType = PypeThreadTaskBase,
                URL = "task://localhost/report_pre_assembly")
        task = make_task(task_report_pre_assembly)
        wf.addTask(task)

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        wf.refreshTargets(exitOnFailure=exitOnFailure)

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") )
    parameters = {"work_dir": pread_dir,
                  "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    preads_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db')) # Also .preads.*, of course.
    make_build_pdb_task  = PypeTask(inputs = {"pread_fofn": pread_fofn },
                                    outputs = {"pdb_build_done": pdb_build_done,
                                               "preads_db": preads_db,
                                               "run_jobs": run_jobs},
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done])


    preads_nblock = support.get_nblock(fn(preads_db))
    #### run daligner
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", pdb_build_done,
                nblock=preads_nblock, config=config, pread_aln=True)
    wf.addTasks(daligner_tasks)

    p_da_done = makePypeLocalFile(os.path.join( pread_dir, "da_done"))
    parameters =  {
            "nblock": preads_nblock,
    }
    make_daligner_gather = PypeTask(
                inputs = daligner_out,
                outputs =  {"da_done":p_da_done},
                parameters = parameters,
                TaskType = PypeThreadTaskBase,
                URL = "task://localhost/pda_check" )
    check_p_da_task = make_daligner_gather(task_daligner_gather)
    wf.addTask(check_p_da_task)
    wf.refreshTargets(exitOnFailure=exitOnFailure)

    merge_tasks, merge_out, _ = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks( merge_tasks )

    p_merge_done = makePypeLocalFile(os.path.join( pread_dir, "p_merge_done"))

    @PypeTask( inputs = merge_out,
               outputs =  {"p_merge_done": p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        system("touch %s" % fn(self.p_merge_done))
    wf.addTask(check_p_merge_check_task)

    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)

    wf.refreshTargets(exitOnFailure=exitOnFailure)


    db2falcon_done = makePypeLocalFile( os.path.join(pread_dir, "db2falcon_done"))
    make_run_db2falcon = PypeTask(
               inputs = {"p_merge_done": p_merge_done,},
               outputs =  {"db2falcon_done": db2falcon_done},
               parameters = {"wd": pread_dir,
                             "config": config,
                            },
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/db2falcon" )
    wf.addTask(make_run_db2falcon(task_run_db2falcon))

    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    make_run_falcon_asm = PypeTask(
               inputs = {"db2falcon_done": db2falcon_done, "db_file": preads_db},
               outputs =  {"falcon_asm_done": falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets()
Example #13
0
        help='the output base_dir, default to current working directory')

    args = parser.parse_args()
    bam_fn = args.bam
    fasta_fn = args.fasta
    ctg_id = args.ctg_id
    base_dir = args.base_dir

    ref_seq = ""
    for r in FastaReader(fasta_fn):
        rid = r.name.split()[0]
        if rid != ctg_id:
            continue
        ref_seq = r.sequence.upper()

    PypeThreadWorkflow.setNumThreadAllowed(1, 1)
    wf = PypeThreadWorkflow()

    bam_file = makePypeLocalFile(bam_fn)
    vmap_file = makePypeLocalFile(os.path.join(base_dir, ctg_id,
                                               "variant_map"))
    vpos_file = makePypeLocalFile(os.path.join(base_dir, ctg_id,
                                               "variant_pos"))
    q_id_map_file = makePypeLocalFile(
        os.path.join(base_dir, ctg_id, "q_id_map"))
    parameters = {}
    parameters["ctg_id"] = ctg_id
    parameters["ref_seq"] = ref_seq
    parameters["base_dir"] = base_dir

    make_het_call_task = PypeTask(
Example #14
0
def main(argv=sys.argv):

    global fc_run_logger
    fc_run_logger = support.setup_logger(None)

    if len(sys.argv) < 2:
        print "you need to provide a configuration file to specific a couple cluster running environment"
        sys.exit(1)

    config_fn = sys.argv[1]

    config = ConfigParser.ConfigParser()
    config.read(config_fn)

    job_type = "SGE"
    if config.has_option('General', 'job_type'):
        job_type = config.get('General', 'job_type')

    sge_track_reads = " -pe smp 12 -q bigmem"
    if config.has_option('Unzip', 'sge_track_reads'):
        sge_track_reads = config.get('Unzip', 'sge_track_reads')

    sge_quiver = " -pe smp 24 -q bigmem "
    if config.has_option('Unzip', 'sge_quiver'):
        sge_quiver = config.get('Unzip', 'sge_quiver')

    smrt_bin = "/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/"
    if config.has_option('Unzip', 'smrt_bin'):
        smrt_bin = config.get('Unzip', 'smrt_bin')

    input_bam_fofn = "input_bam.fofn"
    if config.has_option('Unzip', 'input_bam_fofn'):
        input_bam_fofn = config.get('Unzip', 'input_bam_fofn')

    quiver_concurrent_jobs = 8
    if config.has_option('Unzip', 'quiver_concurrent_jobs'):
        quiver_concurrent_jobs = config.getint('Unzip',
                                               'quiver_concurrent_jobs')

    config = {
        "job_type": job_type,
        "sge_quiver": sge_quiver,
        "sge_track_reads": sge_track_reads,
        "input_bam_fofn": input_bam_fofn,
        "smrt_bin": smrt_bin
    }

    support.job_type = "SGE"  #tmp hack until we have a configuration parser

    ctg_ids = []

    PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs,
                                           quiver_concurrent_jobs)
    wf = PypeThreadWorkflow()

    parameters = {"wd": os.path.abspath("."), "config": config}
    hasm_done = makePypeLocalFile("./3-unzip/1-hasm/hasm_done")
    job_done = makePypeLocalFile(
        os.path.join(parameters["wd"], "track_reads_h_done"))
    make_track_reads_task = PypeTask(inputs={"hasm_done": hasm_done},
                                     outputs={"job_done": job_done},
                                     parameters=parameters,
                                     TaskType=PypeThreadTaskBase,
                                     URL="task://localhost/track_reads_h")
    track_reads_task = make_track_reads_task(task_track_reads)

    wf.addTask(track_reads_task)
    wf.refreshTargets()  #force refresh now, will put proper dependence later

    ref_seq_data = {}
    p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa")
    ctg_types = {}
    for r in p_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
        ctg_types[rid] = "p"

    h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa")
    for r in h_ctg_fa:
        rid = r.name.split()[0]
        ref_seq_data[rid] = r.sequence
        ctg_types[rid] = "h"

    ctg_ids = sorted(ref_seq_data.keys())
    p_ctg_out = []
    h_ctg_out = []
    for ctg_id in ctg_ids:
        sequence = ref_seq_data[ctg_id]
        m_ctg_id = ctg_id.split("-")[0]
        wd = os.path.join(os.getcwd(), "./4-quiver/", m_ctg_id)
        mkdir(wd)
        ref_fasta = makePypeLocalFile(
            os.path.join(wd, "{ctg_id}_ref.fa".format(ctg_id=ctg_id)))
        read_sam = makePypeLocalFile(
            os.path.join(
                os.getcwd(), "./4-quiver/reads/"
                "{ctg_id}.sam".format(ctg_id=ctg_id)))
        cns_fasta = makePypeLocalFile(
            os.path.join(wd, "cns-{ctg_id}.fasta.gz".format(ctg_id=ctg_id)))
        cns_fastq = makePypeLocalFile(
            os.path.join(wd, "cns-{ctg_id}.fastq.gz".format(ctg_id=ctg_id)))
        job_done = makePypeLocalFile(
            os.path.join(wd, "{ctg_id}_quiver_done".format(ctg_id=ctg_id)))

        if os.path.exists(fn(read_sam)):
            if ctg_types[ctg_id] == "p":
                p_ctg_out.append((cns_fasta, cns_fastq))
            if ctg_types[ctg_id] == "h":
                h_ctg_out.append((cns_fasta, cns_fastq))
            if not os.path.exists(fn(ref_fasta)):
                with open(fn(ref_fasta), "w") as f:
                    print >> f, ">" + ctg_id
                    print >> f, sequence
            parameters = {
                "job_uid": "q-" + ctg_id,
                "wd": wd,
                "config": config,
                "ctg_id": ctg_id
            }
            make_quiver_task = PypeTask(
                inputs={
                    "ref_fasta": ref_fasta,
                    "read_sam": read_sam
                },
                outputs={
                    "cns_fasta": cns_fasta,
                    "cns_fastq": cns_fastq,
                    "job_done": job_done
                },
                parameters=parameters,
                TaskType=PypeThreadTaskBase,
                URL="task://localhost/q_{ctg_id}".format(ctg_id=ctg_id))
            quiver_task = make_quiver_task(task_run_quiver)
            wf.addTask(quiver_task)

    wf.refreshTargets()
    os.system("sleep 30")

    mkdir("./4-quiver/cns_output")
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fasta")
    os.system("rm ./4-quiver/cns_output/cns_p_ctg.fastq")
    for cns_fasta, cns_fastq in sorted(p_ctg_out):
        os.system(
            "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_p_ctg.fasta".format(
                cns_fasta=fn(cns_fasta)))
        os.system(
            "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_p_ctg.fastq".format(
                cns_fastq=fn(cns_fastq)))

    os.system("rm ./4-quiver/cns_output/cns_h_ctg.fasta")
    os.system("rm ./4-quiver/cns_output/cns_h_ctg.fastq")
    for cns_fasta, cns_fastq in sorted(h_ctg_out):
        os.system(
            "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_h_ctg.fasta".format(
                cns_fasta=fn(cns_fasta)))
        os.system(
            "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_h_ctg.fastq".format(
                cns_fastq=fn(cns_fastq)))
Example #15
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    config = get_config(parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(
        os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(
        os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs={"i_fofn": input_fofn_plf},
                                  outputs={"o_fofn": rawread_fofn_plf},
                                  parameters={},
                                  TaskType=PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(make_fofn_abs_raw)
    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile(os.path.join(rawread_dir, "sleep_done"))
        rdb_build_done = makePypeLocalFile(
            os.path.join(rawread_dir, "rdb_build_done"))
        parameters = {"work_dir": rawread_dir, "config": config}

        make_build_rdb_task = PypeTask(
            inputs={"input_fofn": rawread_fofn_plf},
            outputs={"rdb_build_done": rdb_build_done},
            parameters=parameters,
            TaskType=PypeThreadTaskBase)

        build_rdb_task = make_build_rdb_task(build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done])

        db_file = makePypeLocalFile(
            os.path.join(rawread_dir, "%s.db" % "raw_reads"))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(
            rawread_dir, "raw_reads", db_file, rdb_build_done, config)

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile(os.path.join(rawread_dir, "da_done"))

        @PypeTask(inputs=daligner_out,
                  outputs={"da_done": r_da_done},
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/rda_check")
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))

        wf.addTask(check_r_da_task)
        wf.refreshTargets(
            updateFreq=wait_time
        )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs,
                                               concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
            rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks(merge_tasks)
        if config["target"] == "overlapping":
            wf.refreshTargets(
                updateFreq=wait_time
            )  # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks(consensus_tasks)

        r_cns_done = makePypeLocalFile(os.path.join(rawread_dir, "cns_done"))
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, "input_preads.fofn"))

        @PypeTask(inputs=consensus_out,
                  outputs={
                      "cns_done": r_cns_done,
                      "pread_fofn": pread_fofn
                  },
                  TaskType=PypeThreadTaskBase,
                  URL="task://localhost/cns_check")
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn), "w") as f:
                fn_list = glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >> f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(
            updateFreq=wait_time)  # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(
            os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs={"i_fofn": rawread_fofn_plf},
                                      outputs={"o_fofn": pread_fofn},
                                      parameters={},
                                      TaskType=PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile(
        os.path.join(pread_dir, "pdb_build_done"))
    parameters = {"work_dir": pread_dir, "config": config}

    make_build_pdb_task = PypeTask(inputs={"pread_fofn": pread_fofn},
                                   outputs={"pdb_build_done": pdb_build_done},
                                   parameters=parameters,
                                   TaskType=PypeThreadTaskBase,
                                   URL="task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done])

    db_file = makePypeLocalFile(os.path.join(pread_dir, "%s.db" % "preads"))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(pread_dir,
                                                         "preads",
                                                         db_file,
                                                         pdb_build_done,
                                                         config,
                                                         pread_aln=True)
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile(os.path.join(pread_dir, "da_done"))

    @PypeTask(inputs=daligner_out,
              outputs={"da_done": p_da_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pda_check")
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))

    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(
        pread_dir, "preads", p_da_done, config)
    wf.addTasks(merge_tasks)
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile(os.path.join(pread_dir, "p_merge_done"))

    @PypeTask(inputs=merge_out,
              outputs={"p_merge_done": p_merge_done},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/pmerge_check")
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))

    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq=wait_time)  #all

    falcon_asm_done = makePypeLocalFile(
        os.path.join(falcon_asm_dir, "falcon_asm_done"))

    @PypeTask(inputs={
        "p_merge_done": p_merge_done,
        "db_file": db_file
    },
              outputs={"falcon_asm_done": falcon_asm_done},
              parameters={
                  "wd": falcon_asm_dir,
                  "config": config,
                  "pread_dir": pread_dir
              },
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/falcon")
    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join(wd)
        script_fn = os.path.join(script_dir, "run_falcon_asm.sh")

        script = []
        script.append("set -vex")
        script.append("trap 'touch %s.exit' EXIT" % fn(self.falcon_asm_done))
        script.append("source {install_prefix}/bin/activate".format(
            install_prefix=install_prefix))
        script.append("cd %s" % pread_dir)
        # Write preads4falcon.fasta, in 1-preads_ovl:
        script.append("DB2Falcon -U preads")
        script.append("cd %s" % wd)
        script.append("""find %s/las_files -name "*.las" > las.fofn """ %
                      pread_dir)
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append( """fc_ovlp_filter.py --db %s --fofn las.fofn %s --min_len %d > preads.ovl""" %\
                (fn(db_file), overlap_filtering_setting, length_cutoff_pr) )
        script.append("ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append(
            """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log"""
            % length_cutoff_pr)  # TODO: drop this logfile
        # Write 'p_ctg.fa' and 'a_ctg.fa':
        script.append("""fc_graph_to_contig.py""")
        script.append("""touch %s""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_name = self.URL.split("/")[-1]
        job_name += "-" + str(uuid.uuid4())[:8]
        job_data = {
            "job_name": job_name,
            "cwd": wd,
            "sge_option": config["sge_option_fc"],
            "script_fn": script_fn
        }
        run_script(job_data, job_type=config["job_type"])
        wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_name)

    wf.addTask(run_falcon_asm_task)
    wf.refreshTargets(updateFreq=wait_time)  #all
Example #16
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    setup_logger(logger_config_fn)

    fc_run_logger.info( "fc_run started with configuration %s", input_config_fn ) 
    config = get_config(parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(make_fofn_abs_raw)
    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)

        build_rdb_task = make_build_rdb_task(build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done]) 
        

        db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        @PypeTask( inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))
        
        wf.addTask(check_r_da_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
        
        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config )
        wf.addTasks( merge_tasks )
        if config["target"] == "overlapping":
            wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) 
    parameters = {"work_dir": pread_dir,
                  "config": config}

    make_build_pdb_task  = PypeTask( inputs = { "pread_fofn": pread_fofn },
                                    outputs = { "pdb_build_done": pdb_build_done },
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done]) 



    db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" ))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks( pread_dir, "preads", db_file, pdb_build_done, config, pread_aln= True) 
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") )

    @PypeTask( inputs = daligner_out, 
               outputs =  {"da_done":p_da_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pda_check" )
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))
    
    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config )
    wf.addTasks( merge_tasks )
    #wf.refreshTargets(updateFreq = 30) #all            

    p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") )

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done":p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))
    
    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq = wait_time) #all            

    
    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    @PypeTask( inputs = {"p_merge_done": p_merge_done, "db_file":db_file}, 
               outputs =  {"falcon_asm_done":falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )

    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join( wd )
        script_fn =  os.path.join( script_dir ,"run_falcon_asm.sh" )
        
        script = []
        script.append( "set -vex" )
        script.append( "trap 'touch %s.exit' EXIT" % fn(self.falcon_asm_done) )
        script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
        script.append( "cd %s" % pread_dir )
        # Write preads4falcon.fasta, in 1-preads_ovl:
        script.append( "DB2Falcon -U preads")
        script.append( "cd %s" % wd )
        script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir )
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append( """fc_ovlp_filter.py --db %s --fofn las.fofn %s --min_len %d > preads.ovl""" %\
                (fn(db_file), overlap_filtering_setting, length_cutoff_pr) )
        script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append( """fc_ovlp_to_graph.py preads.ovl --min_len %d > fc_ovlp_to_graph.log""" % length_cutoff_pr) # TODO: drop this logfile
        # Write 'p_ctg.fa' and 'a_ctg.fa':
        script.append( """fc_graph_to_contig.py""" )
        script.append( """touch %s""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_data = make_job_data(self.URL, script_fn)
        job_data["sge_option"] = config["sge_option_fc"]
        run_script(job_data, job_type = config["job_type"])
        wait_for_file(fn(self.falcon_asm_done), task=self, job_name=job_data['job_name'])
    
    wf.addTask( run_falcon_asm_task )
    wf.refreshTargets(updateFreq = wait_time) #all            
Example #17
0
    
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(sys.argv[1])
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        input_h5_fofn = makePypeLocalFile( os.path.abspath( config["input_fofn_fn"] ) )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config} 

        make_buid_rdb_task = PypeTask(inputs = {"input_fofn": input_h5_fofn},
                                      outputs = {"rdb_build_done": rdb_build_done}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)

        buid_rdb_task = make_buid_rdb_task(build_rdb)
Example #18
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info( "fc_run started with configuration %s", input_config_fn ) 
    config = support.get_config(support.parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(os.path.basename(config["input_fofn_fn"]))
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn_fn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done,
                                                 "run_jobs": run_jobs}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done]) 

        db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", db_file, rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        @PypeTask( inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))
        
        wf.addTask(check_r_da_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
        
        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks( merge_tasks )
        if config["target"] == "overlapping":
            wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            sys.exit(0)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn_fn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) 
    parameters = {"work_dir": pread_dir,
                  "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    make_build_pdb_task  = PypeTask(inputs = { "pread_fofn": pread_fofn },
                                    outputs = { "pdb_build_done": pdb_build_done,
                                                "run_jobs": run_jobs},
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done]) 



    db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" ))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", db_file, pdb_build_done, config, pread_aln= True) 
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") )

    @PypeTask( inputs = daligner_out, 
               outputs =  {"da_done":p_da_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pda_check" )
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))
    
    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks( merge_tasks )
    #wf.refreshTargets(updateFreq = 30) #all

    p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") )

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done":p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))
    
    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq = wait_time) #all

    
    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    make_run_falcon_asm = PypeTask(
               inputs = {"p_merge_done": p_merge_done, "db_file":db_file},
               outputs =  {"falcon_asm_done":falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets(updateFreq = wait_time) #all
Example #19
0
def main(*argv):
    setup_logger()
    if len(argv) < 2:
        print "you need to specify a configuration file"
        print "example: HGAP.py HGAP_run.cfg"
        sys.exit(1)
    
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        try:
            os.makedirs(d)
        except:
            pass

    config = get_config(argv[1])
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        input_h5_fofn = makePypeLocalFile( os.path.abspath( config["input_fofn_fn"] ) )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        make_buid_rdb_task = PypeTask(inputs = {"input_fofn": input_h5_fofn},
                                      outputs = {"rdb_build_done": rdb_build_done}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)

        buid_rdb_task = make_buid_rdb_task(build_rdb)

        wf.addTasks([buid_rdb_task])
        wf.refreshTargets([rdb_build_done]) 
        

        db_file = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks( rawread_dir, "raw_reads", db_file, rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        #wf.refreshTargets(updateFreq = 60) # larger number better for more jobs

        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        @PypeTask( inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        def check_r_da_task(self):
            os.system("touch %s" % fn(self.da_done))
        
        wf.addTask(check_r_da_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
        
        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( rawread_dir, "raw_reads", r_da_done, config )
        wf.addTasks( merge_tasks )
        if config["target"] == "overlapping":
            wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs, need to call to run jobs here or the # of concurrency is changed
            exit(0)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fa" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            os.system("touch %s" % fn(self.cns_done))

        wf.addTask(check_r_cns_task)
        wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    if config["target"] == "pre-assembly":
        exit(0)
    
    if config["input_type"] == "preads":
        if not os.path.exists( "%s/input_preads.fofn" % pread_dir):
            os.system( "cp %s %s/input_preads.fofn" % (os.path.abspath( config["input_fofn_fn"] ), pread_dir) )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

    rdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "rdb_build_done") ) 
    @PypeTask( inputs = { "pread_fofn": pread_fofn },
               outputs = { "rdb_build_done": rdb_build_done },
               parameters = {"config": config, "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/build_p_rdb")
    def build_p_rdb_task(self):
        config = self.parameters["config"]
        pread_dir = self.parameters["pread_dir"]
        fa_serial = 0
        for fa_fn in open(fn(self.pread_fofn)).readlines():
            fa_fn = fa_fn.strip()
            c = 0
            fa_serial += 1
            with open("%s/preads_norm_%05d.fasta" % (pread_dir, fa_serial), "w") as p_norm:
                f = FastaReader(fa_fn)
                for r in f:
                    if len(r.sequence) < config["length_cutoff_pr"]:
                        continue
                    name = r.name
                    name = name.replace("_","")
                    ignore_read = False
                    for  cc in r.sequence:
                        if cc not in ["A","C","G","T"]:
                            ignore_read = True
                            break
                    if ignore_read:
                        continue
                    print >> p_norm, ">prolog_%05d/%d/%d_%d" % (fa_serial, c, 0, len(r.sequence) )
                    for i in range(0, len(r.sequence)/80):
                        print >> p_norm, r.sequence[ i *80 : (i + 1) * 80]
                    print >> p_norm, r.sequence[(i+1)*80:]
                    c += 1
            os.system("cd %s; fasta2DB preads preads_norm_%05d.fasta" % (pread_dir, fa_serial) )

        os.system("cd %s; DBsplit %s preads" % (pread_dir, config["ovlp_DBsplit_option"]))
        os.system("cd %s; HPCdaligner %s preads > run_jobs.sh" % (pread_dir, config["ovlp_HPCdaligner_option"]))
        os.system("cd %s; touch rdb_build_done" % pread_dir)

    wf.addTask(build_p_rdb_task)
    wf.refreshTargets(updateFreq = wait_time) # larger number better for more jobs

    db_file = makePypeLocalFile(os.path.join( pread_dir, "%s.db" % "preads" ))
    #### run daligner
    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks( pread_dir, "preads", db_file, rdb_build_done, config, pread_aln= True) 
    wf.addTasks(daligner_tasks)
    #wf.refreshTargets(updateFreq = 30) # larger number better for more jobs

    p_da_done = makePypeLocalFile( os.path.join( pread_dir, "da_done") )

    @PypeTask( inputs = daligner_out, 
               outputs =  {"da_done":p_da_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pda_check" )
    def check_p_da_task(self):
        os.system("touch %s" % fn(self.da_done))
    
    wf.addTask(check_p_da_task)

    merge_tasks, merge_out, consensus_tasks, consensus_out = create_merge_tasks( pread_dir, "preads", p_da_done, config )
    wf.addTasks( merge_tasks )
    #wf.refreshTargets(updateFreq = 30) #all            

    p_merge_done = makePypeLocalFile( os.path.join( pread_dir, "p_merge_done") )

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done":p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        os.system("touch %s" % fn(self.p_merge_done))
    
    wf.addTask(check_p_merge_check_task)
    wf.refreshTargets(updateFreq = wait_time) #all            

    
    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    @PypeTask( inputs = {"p_merge_done": p_merge_done}, 
               outputs =  {"falcon_asm_done":falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    def run_falcon_asm_task(self):
        wd = self.parameters["wd"]
        config = self.parameters["config"]
        install_prefix = config["install_prefix"]
        pread_dir = self.parameters["pread_dir"]
        script_dir = os.path.join( wd )
        script_fn =  os.path.join( script_dir ,"run_falcon_asm.sh" )
        
        script = []
        script.append( "source {install_prefix}/bin/activate".format(install_prefix = install_prefix) )
        script.append( "cd %s" % pread_dir )
        script.append( "DB2Falcon preads")
        script.append( "cd %s" % wd )
        script.append( """find %s/las_files -name "*.las" > las.fofn """ % pread_dir )
        overlap_filtering_setting = config["overlap_filtering_setting"]
        length_cutoff_pr = config["length_cutoff_pr"]
        script.append( """fc_ovlp_filter.py --fofn las.fofn %s \
                                 --n_core 24 --min_len %d > preads.ovl""" % (overlap_filtering_setting, length_cutoff_pr) )

        script.append( "ln -sf %s/preads4falcon.fasta ." % pread_dir)
        script.append( """fc_ovlp_to_graph.py preads.ovl > fc.log""" )
        script.append( """fc_graph_to_contig.py""" )
        script.append( """touch %s\n""" % fn(self.falcon_asm_done))

        with open(script_fn, "w") as script_file:
            script_file.write("\n".join(script))

        job_name = self.URL.split("/")[-1]
        job_name += "-"+str(uuid.uuid1())[:8]
        job_data = {"job_name": job_name,
                    "cwd": wd,
                    "sge_option": config["sge_option_fc"],
                    "script_fn": script_fn }
        run_script(job_data, job_type = "SGE")
        wait_for_file( fn(self.falcon_asm_done), task=self, job_name=job_name )
    
    wf.addTask( run_falcon_asm_task )
    wf.refreshTargets(updateFreq = wait_time) #all            
Example #20
0
def main1(prog_name, input_config_fn, logger_config_fn=None):
    global fc_run_logger
    fc_run_logger = support.setup_logger(logger_config_fn)

    fc_run_logger.info("fc_run started with configuration %s", input_config_fn)
    config = support.get_dict_from_old_falcon_cfg(support.parse_config(input_config_fn))
    rawread_dir = os.path.abspath("./0-rawreads")
    pread_dir = os.path.abspath("./1-preads_ovl")
    falcon_asm_dir  = os.path.abspath("./2-asm-falcon")
    script_dir = os.path.abspath("./scripts")
    sge_log_dir = os.path.abspath("./sge_log")

    for d in (rawread_dir, pread_dir, falcon_asm_dir, script_dir, sge_log_dir):
        support.make_dirs(d)

    exitOnFailure=config['stop_all_jobs_on_failure'] # only matter for parallel jobs
    concurrent_jobs = config["pa_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    input_fofn_plf = makePypeLocalFile(config["input_fofn"])
    rawread_fofn_plf = makePypeLocalFile(os.path.join(rawread_dir, os.path.basename(config["input_fofn"])))
    make_fofn_abs_task = PypeTask(inputs = {"i_fofn": input_fofn_plf},
                                  outputs = {"o_fofn": rawread_fofn_plf},
                                  parameters = {},
                                  TaskType = PypeThreadTaskBase)
    fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_raw)

    wf.addTasks([fofn_abs_task])
    wf.refreshTargets([fofn_abs_task])

    if config["input_type"] == "raw":
        #### import sequences into daligner DB
        sleep_done = makePypeLocalFile( os.path.join( rawread_dir, "sleep_done") )
        rdb_build_done = makePypeLocalFile( os.path.join( rawread_dir, "rdb_build_done") ) 
        run_jobs = makePypeLocalFile( os.path.join( rawread_dir, "run_jobs.sh") ) 
        parameters = {"work_dir": rawread_dir,
                      "config": config}

        raw_reads_db = makePypeLocalFile(os.path.join( rawread_dir, "%s.db" % "raw_reads" ))
        make_build_rdb_task = PypeTask(inputs = {"input_fofn": rawread_fofn_plf},
                                      outputs = {"rdb_build_done": rdb_build_done,
                                                 "raw_reads.db": raw_reads_db,
                                                 "run_jobs": run_jobs}, 
                                      parameters = parameters,
                                      TaskType = PypeThreadTaskBase)
        build_rdb_task = make_build_rdb_task(task_build_rdb)

        wf.addTasks([build_rdb_task])
        wf.refreshTargets([rdb_build_done]) 

        raw_reads_nblock = support.get_nblock(fn(raw_reads_db))
        #### run daligner
        daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), rawread_dir, "raw_reads", rdb_build_done, config) 

        wf.addTasks(daligner_tasks)
        r_da_done = makePypeLocalFile( os.path.join( rawread_dir, "da_done") )

        parameters =  {
                "nblock": raw_reads_nblock,
        }
        make_daligner_gather = PypeTask(
                   inputs = daligner_out, 
                   outputs =  {"da_done":r_da_done},
                   parameters = parameters,
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/rda_check" )
        check_r_da_task = make_daligner_gather(task_daligner_gather)
        wf.addTask(check_r_da_task)
        wf.refreshTargets(exitOnFailure=exitOnFailure)
        
        merge_tasks, merge_out, p_ids_merge_job_done = create_merge_tasks(fn(run_jobs), rawread_dir, "raw_reads", r_da_done, config)
        wf.addTasks( merge_tasks )
        wf.refreshTargets(exitOnFailure=exitOnFailure)

        if config["target"] == "overlapping":
            sys.exit(0)
        consensus_tasks, consensus_out = create_consensus_tasks(rawread_dir, "raw_reads", config, p_ids_merge_job_done)
        wf.addTasks( consensus_tasks )

        r_cns_done = makePypeLocalFile( os.path.join( rawread_dir, "cns_done") )
        pread_fofn = makePypeLocalFile( os.path.join( pread_dir,  "input_preads.fofn" ) )

        @PypeTask( inputs = consensus_out, 
                   outputs =  {"cns_done":r_cns_done, "pread_fofn": pread_fofn},
                   TaskType = PypeThreadTaskBase,
                   URL = "task://localhost/cns_check" )
        def check_r_cns_task(self):
            with open(fn(self.pread_fofn),  "w") as f:
                fn_list =  glob.glob("%s/preads/out*.fasta" % rawread_dir)
                fn_list.sort()
                for fa_fn in fn_list:
                    print >>f, fa_fn
            system("touch %s" % fn(self.cns_done))
        wf.addTask(check_r_cns_task)

        concurrent_jobs = config["cns_concurrent_jobs"]
        PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
        wf.refreshTargets(exitOnFailure=exitOnFailure)

    if config["target"] == "pre-assembly":
        sys.exit(0)

    # build pread database
    if config["input_type"] == "preads":
        pread_fofn = makePypeLocalFile(os.path.join(pread_dir, os.path.basename(config["input_fofn"])))
        make_fofn_abs_task = PypeTask(inputs = {"i_fofn": rawread_fofn_plf},
                                     outputs = {"o_fofn": pread_fofn},
                                     parameters = {},
                                     TaskType = PypeThreadTaskBase)
        fofn_abs_task = make_fofn_abs_task(task_make_fofn_abs_preads)
        wf.addTasks([fofn_abs_task])
        wf.refreshTargets([fofn_abs_task])

    pdb_build_done = makePypeLocalFile( os.path.join( pread_dir, "pdb_build_done") ) 
    parameters = {"work_dir": pread_dir,
                  "config": config}

    run_jobs = makePypeLocalFile(os.path.join(pread_dir, 'run_jobs.sh'))
    preads_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db')) # Also .preads.*, of course.
    make_build_pdb_task  = PypeTask(inputs = {"pread_fofn": pread_fofn },
                                    outputs = {"pdb_build_done": pdb_build_done,
                                               "preads_db": preads_db,
                                               "run_jobs": run_jobs},
                                    parameters = parameters,
                                    TaskType = PypeThreadTaskBase,
                                    URL = "task://localhost/build_pdb")
    build_pdb_task = make_build_pdb_task(task_build_pdb)

    wf.addTasks([build_pdb_task])
    wf.refreshTargets([pdb_build_done]) 


    preads_nblock = support.get_nblock(fn(preads_db))
    #### run daligner
    config["sge_option_da"] = config["sge_option_pda"]
    config["sge_option_la"] = config["sge_option_pla"]
    daligner_tasks, daligner_out = create_daligner_tasks(fn(run_jobs), pread_dir, "preads", pdb_build_done, config, pread_aln=True)
    wf.addTasks(daligner_tasks)

    p_da_done = makePypeLocalFile(os.path.join( pread_dir, "da_done"))
    parameters =  {
            "nblock": preads_nblock,
    }
    make_daligner_gather = PypeTask(
                inputs = daligner_out, 
                outputs =  {"da_done":p_da_done},
                parameters = parameters,
                TaskType = PypeThreadTaskBase,
                URL = "task://localhost/pda_check" )
    check_p_da_task = make_daligner_gather(task_daligner_gather)
    wf.addTask(check_p_da_task)
    wf.refreshTargets(exitOnFailure=exitOnFailure)

    merge_tasks, merge_out, _ = create_merge_tasks(fn(run_jobs), pread_dir, "preads", p_da_done, config)
    wf.addTasks( merge_tasks )

    p_merge_done = makePypeLocalFile(os.path.join( pread_dir, "p_merge_done"))

    @PypeTask( inputs = merge_out, 
               outputs =  {"p_merge_done": p_merge_done},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/pmerge_check" )
    def check_p_merge_check_task(self):
        system("touch %s" % fn(self.p_merge_done))
    wf.addTask(check_p_merge_check_task)

    concurrent_jobs = config["ovlp_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)

    wf.refreshTargets(exitOnFailure=exitOnFailure)

    
    db2falcon_done = makePypeLocalFile( os.path.join(pread_dir, "db2falcon_done"))
    make_run_db2falcon = PypeTask(
               inputs = {"p_merge_done": p_merge_done,},
               outputs =  {"db2falcon_done": db2falcon_done},
               parameters = {"wd": pread_dir,
                             "config": config,
                            },
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/db2falcon" )
    wf.addTask(make_run_db2falcon(task_run_db2falcon))

    falcon_asm_done = makePypeLocalFile( os.path.join( falcon_asm_dir, "falcon_asm_done") )
    make_run_falcon_asm = PypeTask(
               inputs = {"db2falcon_done": db2falcon_done, "db_file": preads_db},
               outputs =  {"falcon_asm_done": falcon_asm_done},
               parameters = {"wd": falcon_asm_dir,
                             "config": config,
                             "pread_dir": pread_dir},
               TaskType = PypeThreadTaskBase,
               URL = "task://localhost/falcon" )
    wf.addTask(make_run_falcon_asm(task_run_falcon_asm))
    wf.refreshTargets()
Example #21
0
def unzip_all(config):
    unzip_concurrent_jobs = config["unzip_concurrent_jobs"]
    PypeThreadWorkflow.setNumThreadAllowed(unzip_concurrent_jobs,
                                           unzip_concurrent_jobs)
    wf = PypeThreadWorkflow()

    ctg_list_file = makePypeLocalFile("./3-unzip/reads/ctg_list")
    falcon_asm_done = makePypeLocalFile("./2-asm-falcon/falcon_asm_done")
    parameters = {"wd": os.path.abspath("."), "config": config}

    job_done = makePypeLocalFile(
        os.path.join(parameters["wd"], "track_reads_done"))
    make_track_reads_task = PypeTask(
        inputs={"falcon_asm_done": falcon_asm_done},
        outputs={
            "job_done": job_done,
            "ctg_list_file": ctg_list_file
        },
        parameters=parameters,
        TaskType=PypeThreadTaskBase,
        URL="task://localhost/track_reads")
    track_reads_task = make_track_reads_task(task_track_reads)

    wf.addTask(track_reads_task)
    wf.refreshTargets()  #force refresh now, will put proper dependence later

    ctg_ids = []
    with open("./3-unzip/reads/ctg_list") as f:
        for row in f:
            row = row.strip()
            ctg_ids.append(row)

    aln1_outs = {}

    all_ctg_out = {}

    for ctg_id in ctg_ids:
        # inputs
        ref_fasta = makePypeLocalFile(
            "./3-unzip/reads/{ctg_id}_ref.fa".format(ctg_id=ctg_id))
        read_fasta = makePypeLocalFile(
            "./3-unzip/reads/{ctg_id}_reads.fa".format(ctg_id=ctg_id))

        # outputs
        wd = os.path.join(
            os.getcwd(), "./3-unzip/0-phasing/{ctg_id}/".format(ctg_id=ctg_id))
        mkdir(wd)
        ctg_aln_out = makePypeLocalFile(
            os.path.join(wd, "{ctg_id}_sorted.bam".format(ctg_id=ctg_id)))
        job_done = makePypeLocalFile(
            os.path.join(wd, "aln_{ctg_id}_done".format(ctg_id=ctg_id)))

        parameters = {
            "job_uid": "aln-" + ctg_id,
            "wd": wd,
            "config": config,
            "ctg_id": ctg_id
        }
        make_blasr_task = PypeTask(
            inputs={
                "ref_fasta": ref_fasta,
                "read_fasta": read_fasta
            },
            outputs={
                "ctg_aln_out": ctg_aln_out,
                "job_done": job_done
            },
            parameters=parameters,
            TaskType=PypeThreadTaskBase,
            URL="task://localhost/aln_{ctg_id}".format(ctg_id=ctg_id))
        blasr_task = make_blasr_task(task_run_blasr)
        aln1_outs[ctg_id] = (ctg_aln_out, job_done)
        wf.addTask(blasr_task)

        job_done = makePypeLocalFile(
            os.path.join(wd, "p_{ctg_id}_done".format(ctg_id=ctg_id)))
        rid_to_phase_out = makePypeLocalFile(
            os.path.join(wd, "rid_to_phase.{ctg_id}".format(ctg_id=ctg_id)))
        all_ctg_out["r2p.{ctg_id}".format(ctg_id=ctg_id)] = rid_to_phase_out

        parameters = {
            "job_uid": "ha-" + ctg_id,
            "wd": wd,
            "config": config,
            "ctg_id": ctg_id
        }
        make_phasing_task = PypeTask(
            inputs={
                "ref_fasta": ref_fasta,
                "aln_bam": ctg_aln_out
            },
            outputs={"job_done": job_done},
            parameters=parameters,
            TaskType=PypeThreadTaskBase,
            URL="task://localhost/p_{ctg_id}".format(ctg_id=ctg_id))
        phasing_task = make_phasing_task(task_phasing)
        wf.addTask(phasing_task)

    wf.refreshTargets()

    hasm_wd = os.path.abspath("./3-unzip/1-hasm/")
    mkdir(hasm_wd)
    rid_to_phase_all = makePypeLocalFile(
        os.path.join(hasm_wd, "rid_to_phase.all"))

    @PypeTask(inputs=all_ctg_out,
              outputs={"rid_to_phase_all": rid_to_phase_all},
              TaskType=PypeThreadTaskBase,
              URL="task://localhost/rid_to_phase_all")
    def get_rid_to_phase_all(self):
        rid_to_phase_all_fn = fn(self.rid_to_phase_all)
        inputs_fn = [fn(f) for f in self.inputs.values()]
        inputs_fn.sort()
        output = []
        for fname in inputs_fn:
            output.extend(open(fname).read())

        out = open(rid_to_phase_all_fn, "w")
        out.write("".join(output))
        out.close()

    wf.addTask(get_rid_to_phase_all)

    parameters["wd"] = hasm_wd
    job_done = makePypeLocalFile(os.path.join(hasm_wd, "hasm_done"))
    make_hasm_task = PypeTask(inputs={"rid_to_phase_all": rid_to_phase_all},
                              outputs={"job_done": job_done},
                              parameters=parameters,
                              TaskType=PypeThreadTaskBase,
                              URL="task://localhost/hasm")
    hasm_task = make_hasm_task(task_hasm)

    wf.addTask(hasm_task)

    wf.refreshTargets()
Example #22
0
    job_name = self.URL.split("/")[-1]
    job_name += "-"+str(uuid.uuid1())[:8]
    job_data = {"job_name": job_name,
                "cwd": cwd,
                "sge_option": " -pe smp 6 -q huasm ",
                "script_fn": script_fn }
    run_script(job_data, job_type = "SGE")
    wait_for_file( os.path.join(cwd,"c_%05d_done" % job_id) , task=self, job_name=job_name )


if __name__ == "__main__":

    prefix = sys.argv[1]

    concurrent_jobs = 16
    PypeThreadWorkflow.setNumThreadAllowed(concurrent_jobs, concurrent_jobs)
    wf = PypeThreadWorkflow()

    mjob_data = {}

    with open("run_jobs.sh") as f:
        for l in f:
            l = l.strip().split()
            if l[0] not in ( "LAsort", "LAmerge" ):
                continue
            if l[0] == "LAsort":
                p_id = int( l[2].split(".")[1] )
                mjob_data.setdefault( p_id, [] )
                mjob_data[p_id].append(  " ".join(l) )
            if l[0] == "LAmerge":
                l2 = l[2].split(".")
Example #23
0
        unzip_concurrent_jobs = config.getint('Unzip', 'unzip_concurrent_jobs')

    config = {"job_type": job_type,
              "sge_blasr_aln": sge_blasr_aln,
              "smrt_bin": smrt_bin,
              "sge_phasing": sge_phasing}

    support.job_type = "SGE" #tmp hack until we have a configuration parser

    ctg_ids = []
    with open("./3-unzip/reads/ctg_list") as f:
        for row in f:
            row = row.strip()
            ctg_ids.append( row )

    PypeThreadWorkflow.setNumThreadAllowed(unzip_concurrent_jobs, unzip_concurrent_jobs)
    wf = PypeThreadWorkflow()

    ctg_list_file = makePypeLocalFile("./3-unzip/reads/ctg_list")

    aln1_outs = {}
    for ctg_id in ctg_ids:
        # inputs
        ref_fasta = makePypeLocalFile("./3-unzip/reads/{ctg_id}_ref.fa".format(ctg_id = ctg_id))
        read_fasta = makePypeLocalFile("./3-unzip/reads/{ctg_id}_reads.fa".format(ctg_id = ctg_id))
        
        # outputs
        wd = os.path.join( os.getcwd(),  "./3-unzip/0-phasing/{ctg_id}/".format( ctg_id = ctg_id ) )
        mkdir(wd)
        ctg_aln_out = makePypeLocalFile( os.path.join( wd, "{ctg_id}_sorted.bam".format( ctg_id = ctg_id ) ) )
        job_done = makePypeLocalFile( os.path.join( wd, "aln_{ctg_id}_done".format( ctg_id = ctg_id ) ) )