def run_tasks(tasks, cpu=4):
    for t in tasks:
        print(t.name)
	t.stdout = os.path.join(PATH_DATABASES, t.name+'.stdout')
        t.stderr = os.path.join(PATH_DATABASES, t.name+'.stderr')

    s = Supervisor(tasks=tasks, force_run=False, log=database_supervisor_log, cpu=cpu)
    s.run()
    for t in tasks:#if everything executes properly, rm the task logs
        if os.path.exists(t.stdout):
	    os.remove(t.stdout)
        if os.path.exists(t.stderr):
            os.remove(t.stderr)
def run_tasks(tasks, cpu=4):
    for t in tasks:
        print(t.name)
        t.stdout = join(PATH_TOOLS, t.name+'.stdout')
        t.stderr = join(PATH_TOOLS, t.name+'.stderr')

    s = Supervisor(tasks=tasks, force_run=False, log=tool_supervisor_log, cpu=cpu)
    s.run()
    for t in tasks:  #if everything executes properly, rm the task logs
        if exists(t.stdout):
            os.remove(t.stdout)
        if exists(t.stderr):
            os.remove(t.stderr)
def run_tasks(tasks, cpu=4):
    for t in tasks:
        print(t.name)
        t.stdout = join(PATH_TOOLS, t.name+'.stdout')
        t.stderr = join(PATH_TOOLS, t.name+'.stderr')

    s = Supervisor(tasks=tasks, force_run=False, log=tool_supervisor_log, cpu=cpu)
    s.run()
    for t in tasks:  #if everything executes properly, rm the task logs
        if exists(t.stdout):
            os.remove(t.stdout)
        if exists(t.stderr):
            os.remove(t.stderr)
def run_tasks(tasks, cpu=4):
    for t in tasks:
        print(t.name)
        t.stdout = os.path.join(PATH_DATABASES, t.name + '.stdout')
        t.stderr = os.path.join(PATH_DATABASES, t.name + '.stderr')

    s = Supervisor(tasks=tasks,
                   force_run=False,
                   log=database_supervisor_log,
                   cpu=cpu)
    s.run()
    for t in tasks:  #if everything executes properly, rm the task logs
        if os.path.exists(t.stdout):
            os.remove(t.stdout)
        if os.path.exists(t.stderr):
            os.remove(t.stderr)
Beispiel #5
0
def gen_rapclust_supervisor(opc, fastq1, fastq2, paired_names, unpaired,
                            unpaired_names, assembly_path, assembly_name,
                            bowtie2_index, gene_trans_map, sample_info, model,
                            out_dir, cpu_cap, deps):
    rc_tasks, bowtie_rc_tasks = [], []
    rc_dir = fg.make_dir_task(os.path.join(out_dir, 'rapclust_bt2'))
    out_dir = rc_dir.targets[0]
    for i in range(len(fastq1)):
        filename = paired_names[
            i]  #'_'.join([paired_names[i],express_naming,assembly_name])
        #filename = '_'.join([paired_names[i],express_naming,assembly_name])
        bowtie_rc = fex.bowtie2_task(opc, bowtie2_index, out_dir, fastq1[i],
                                     fastq2[i], filename, 2,
                                     fg.round_div(cpu_cap, 2), deps)
        #        express = fex.express_task(opc, bowtie2_index,assembly_path,out_dir,paired_names[i],bowtie_e.targets[0],[bowtie_e])
        bowtie_rc_tasks.append(bowtie_rc)
#        express_tasks.append(express)
    for i in range(len(unpaired)):
        filename = unpaired_names[
            i]  #'_'.join([unpaired_names[i],express_naming,assembly_name])
        bowtie_rcU = fex.bowtie2_unpaired_task(opc, bowtie2_index, out_dir,
                                               unpaired[i], filename, 2,
                                               fg.round_div(cpu_cap, 2), deps)
        bowtie_rc_tasks.append(bowtie_rcU)
#       express = fex.express_task(opc, bowtie2_index,assembly_path,out_dir,unpaired_names[i],bowtie_e.targets[0],[bowtie_e])
#      express_tasks.append(express)


#    transcriptName = assembly_name #'_'.join([assembly_name,express_naming])
#    geneName = assembly_name + '_gene' #'_'.join([assembly_name,express_naming,'gene'])
#    counts_to_table_express = fex.counts_to_table_task(opc, assembly_name,gene_trans_map,out_dir,[t.targets[0] for t in express_tasks],transcriptName,'--eXpress',express_tasks)
#    deseq2_express = fex.deseq2_task(opc, assembly_name,out_dir,counts_to_table_express.targets[0],sample_info,transcriptName,model,[counts_to_table_express])
#    deseq2_express_gene = fex.deseq2_task(opc, assembly_name,out_dir,counts_to_table_express.targets[1],sample_info,geneName,model,[counts_to_table_express])
    rc_tasks = [rc_dir] + bowtie_rc_tasks + rc_tasks
    return Supervisor(tasks=rc_tasks)
Beispiel #6
0
def gen_db_supervisor(force=False,
                      sprot=False,
                      uniref90=False,
                      nr=False,
                      busco_args=busco_defaults,
                      blast_plus=False,
                      idmapping=False,
                      cpu=float('inf'),
                      pfam=True,
                      nog_functions=True,
                      dep=[]):
    check_db_dir()
    dbs = get_dbs(defaults=force)
    tasks = []
    if (sprot):
        tasks.append(
            gen_dmnd_blast_tasks(dbs['uniprot_sprot'], force, blast_plus))
    if (uniref90):
        tasks.append(gen_dmnd_blast_tasks(dbs['uniref90'], force, blast_plus))
    if (nr):
        tasks.append(gen_dmnd_blast_tasks(dbs['nr'], force, blast_plus))
    for busco_db in busco_args:
        if (busco_args[busco_db]):
            tasks.append(download_task_wrapper(dbs['busco_' + busco_db], []))
    if (pfam):
        pfam_task = download_task_wrapper(dbs['pfam'], [])
        hmmpress = fdb.pfam_build_task(dbs['pfam'].download_location,
                                       dbs['pfam'].call_path, [pfam_task])
        tasks.append(pfam_task)
        tasks.append(hmmpress)
    if (nog_functions):
        nogF_task = download_task_wrapper(dbs['nog_functions'], [])
        tasks.append(nogF_task)
    if (idmapping):
        idmap_task = download_task_wrapper(dbs['id_mapping'], [])
        tasks.append(idmap_task)
        tasks.append(download_task_wrapper(dbs['id_mapping_selected'], []))
        tasks.append(
            fdb.subset_idmapping_task(dbs['id_mapping'].download_location,
                                      dbs['id_mapping_biocyc'].call_path,
                                      dbs['id_mapping_eggnog'].call_path,
                                      dbs['id_mapping_ko'].call_path,
                                      dbs['id_mapping_orthodb'].call_path,
                                      [idmap_task]))
    special_dbs = set([
        'uniprot_sprot', 'uniref90', 'nr', 'swiss_enzyme', 'orthology_pathway',
        'nog_categories', 'nog_functions', 'pfam'
    ])
    for db_string in dbs:
        if (db_string in special_dbs or db_string.startswith('busco_')
                or db_string.startswith('id_mapping')):
            pass
        else:
            tasks.append(download_task_wrapper(dbs[db_string], []))
    tasks = [t for t in tasks if (t is not None)]
    return Supervisor(tasks, cpu=cpu)
Beispiel #7
0
def gen_salmon_supervisor(opc, fastq1, fastq2, paired_names, unpaired,
                          unpaired_names, assembly_path, assembly_name,
                          gene_trans_map, sample_info, model, out_dir, cpu_cap,
                          deps):
    salmon_tasks = []
    salmon_dir = fg.make_dir_task(os.path.join(out_dir, 'salmon'))
    out_dir = salmon_dir.targets[0]
    build_salmon = fex.build_salmon_task(opc, assembly_path,
                                         assembly_name, out_dir,
                                         fg.round_div(cpu_cap,
                                                      2), [salmon_dir])
    deps = deps + [build_salmon]  #, salmon_gene_map]
    salmon_trans_gene_map = ''
    if len(gene_trans_map) > 0:
        salmon_gene_map = fex.salmon_gene_map_task(opc, out_dir, assembly_name,
                                                   gene_trans_map,
                                                   [salmon_dir])
        salmon_trans_gene_map = salmon_gene_map.targets[0]
        deps = deps + [salmon_gene_map]
    for i in range(len(fastq1)):
        #filename = '_'.join([paired_names[i],salmon_naming,assembly_name])
        filename = paired_names[i]  #,salmon_naming,assembly_name])
        salmon = fex.salmon_task(opc, build_salmon.targets[0], fastq1[i],
                                 fastq2[i], filename, salmon_trans_gene_map,
                                 out_dir, fg.round_div(cpu_cap, 2), deps)
        salmon_tasks.append(salmon)
    for i in range(len(unpaired)):
        #filename = '_'.join([unpaired_names[i],salmon_naming,assembly_name])
        filename = unpaired_names[i]  #,salmon_naming,assembly_name])
        salmon = fex.salmon_unpaired_task(opc, build_salmon.targets[0],
                                          unpaired[i], filename,
                                          salmon_trans_gene_map, out_dir,
                                          fg.round_div(cpu_cap, 2), deps)
        salmon_tasks.append(salmon)
    transcriptName = assembly_name  #'_'.join([assembly_name,salmon_naming])
    geneName = assembly_name + '_gene'  #'_'.join([assembly_name,salmon_naming,'gene'])
    counts_to_table_salmon = fex.counts_to_table_task(
        opc, assembly_name, gene_trans_map, out_dir,
        [t.targets[0]
         for t in salmon_tasks], transcriptName, '--salmon', salmon_tasks)
    deseq2_salmon = fex.deseq2_task(opc, assembly_name, out_dir,
                                    counts_to_table_salmon.targets[0],
                                    sample_info, transcriptName, model,
                                    [counts_to_table_salmon])
    deseq2_salmon_gene = fex.deseq2_task(opc, assembly_name, out_dir,
                                         counts_to_table_salmon.targets[1],
                                         sample_info, geneName, model,
                                         [counts_to_table_salmon])
    salmon_tasks = [
        salmon_dir, build_salmon, salmon_gene_map, counts_to_table_salmon,
        deseq2_salmon, deseq2_salmon_gene
    ] + salmon_tasks
    return Supervisor(tasks=salmon_tasks)
Beispiel #8
0
def gen_paired_prinseq_supervisor(opc, out_dir, fastq1, fastq2, unpaired,
                                  dependency_set, rmdup):
    tasks = []
    prinseq_count = 0
    prinseq_opts = '--derep 14' if (rmdup) else ''
    for input1, input2 in zip(fastq1, fastq2):
        p_task = fa.prinseq_task(out_dir, input1, input2,
                                 'prinseq_output_' + str(prinseq_count),
                                 prinseq_opts, [])
        prinseq_count += 1
        tasks.append(p_task)
    return Supervisor(tasks=tasks)
Beispiel #9
0
def gen_paired_trimmomatic_supervisor(opc, out_dir, fq1, fq2, unpaired,
                                      dependency_set, cpu_cap):
    tasks = []
    count = 0
    # cpu_mod = min(len(fq1),cpu_cap)
    cpu_mod = int(round(float(cpu_cap) / len(fq1)))
    for i1, i2 in zip(fq1, fq2):
        trim_task = fa.trimmomatic_task(opc, out_dir, i1, i2, cpu_mod,
                                        'trimmomatic_output_' + str(count),
                                        dependency_set)
        count += 1
        tasks.append(trim_task)
    return Supervisor(tasks=tasks)
Beispiel #10
0
def gen_dmnd_blast_tasks(db, force, blast_plus):
    tasks = []
    sprot_download = download_task_wrapper(db, [])
    tasks.append(sprot_download)
    install_dmnd = fdb.build_diamond_task(db.download_location, db.call_path,
                                          [sprot_download])
    tasks.append(install_dmnd)
    if (blast_plus):
        install_blast = fdb.build_blast_task(db.download_location,
                                             db.call_path, 'prot',
                                             [sprot_download])
        tasks.append(install_blast)
    tasks.append(fdb.db2stitle_task(db.download_location, [sprot_download]))
    return Supervisor(tasks)
Beispiel #11
0
def gen_unpaired_trimmomatic_supervisor(opc, out_dir, fq1, fq2, unpaired,
                                        dependency_set, cpu_cap):
    # fq2 needed for unpaired trimmomatic
    tasks = []
    count = len(fq1)
    #    cpu_mod = min(len(fq1),cpu_cap)
    cpu_mod = int(round(float(cpu_cap) / len(unpaired)))
    for i in unpaired:
        trim_task = fa.trimmomatic_unpaired_task(
            opc, out_dir, i, cpu_mod, 'trimmomatic_output_' + str(count),
            dependency_set)
        count += 1
        tasks.append(trim_task)
    return Supervisor(tasks=tasks)
Beispiel #12
0
def gen_filter_supervisor(opc,
                          dbs,
                          main_path_assembly,
                          main_assembly_name,
                          out_dir,
                          transrate_task,
                          dependency_set,
                          tpm_threshold=1):
    tasks = []
    filter_full = fg.filter_task(main_path_assembly, main_assembly_name,
                                 out_dir, [transrate_task.targets[2]],
                                 tpm_threshold, 2, [transrate_task])
    tasks.append(filter_full)
    if transrate_task is not None:
        filter_good = fg.filter_task(transrate_task.targets[1],
                                     'good.' + main_assembly_name, out_dir,
                                     [transrate_task.targets[2]],
                                     tpm_threshold, 2, [transrate_task])
        tasks.append(filter_good)
    return Supervisor(tasks=tasks, dependencies=dependency_set)
Beispiel #13
0
def gen_quality_supervisor(opc, dbs, transrate_fq1, transrate_fq2, dependency_set, busco_refs, assembly_name, assembly_path, out_dir, transrate_dir, reads_dir, filter_dir, cp_transrate=True, cpu=12, cegma_flag=False, transrate_ref=''):
    tasks = []
    for busco_ref in busco_refs:
        tasks.append(fq.busco_task(opc, dbs, assembly_path, assembly_name, out_dir, busco_ref, int(cpu/2), []))
    assembly_stats = fq.assembly_stats_task(opc, out_dir,assembly_path, [])
    if transrate_fq1 == None:
        transrate_fq1 = []
    if transrate_fq2 == None:
        transrate_fq2 = []
    transrate = fq.transrate_task(opc,reads_dir,assembly_path,assembly_name,transrate_fq1,transrate_fq2,out_dir,transrate_dir,int(round(float(cpu),4)),[],transrate_ref)
    tasks.append(transrate)
    tasks.append(assembly_stats)
    if cp_transrate:
        tasks.append(fg.cp_assembly_task(join(filter_dir,'good.'+assembly_name),transrate.targets[1], [transrate]))
#    for busco_ref in busco_refs:
#        tasks.append(fq.busco_task(transrate.targets[1], os.path.basename(transrate.targets[1]), out_dir, busco_ref, int(cpu/2), [transrate]))
    if(cegma_flag):
        cegma = fq.cegma_task(out_dir,assembly_path, cpu, []) 
        tasks.append(cegma)
    return Supervisor(tasks=tasks,dependencies=dependency_set)
Beispiel #14
0
def gen_assembly_supervisor(opc,
                            dbs,
                            fastq1,
                            fastq2,
                            unpaired,
                            dependency_set,
                            no_trim=False,
                            rnaSPAdes=False,
                            rmdup=False,
                            subset_size=50000000,
                            cpu=12,
                            subset_seed='I am a seed value',
                            normalize_flag=False,
                            truncate_opt=-1,
                            trimmomatic_flag=True,
                            trinity_memory=100):
    out_dir = opc.path_assembly_files
    path_assembly = opc.path_assembly
    tasks = []
    trim_reads, fastq1, fastq2, unpaired = gen_trimming_supervisor(
        opc, out_dir, fastq1, fastq2, unpaired, no_trim, trimmomatic_flag,
        rmdup, subset_size, subset_seed, truncate_opt, [], cpu)
    tasks.append(trim_reads)
    if (rnaSPAdes):
        rnaspades = fa.rnaspades_task(path_assembly, out_dir, fastq1, fastq2,
                                      unpaired, cpu, [trim_reads])
        tasks.append(rnaspades)
    else:
        trinity = fa.trinity_task(opc, path_assembly,
                                  out_dir, fastq1, fastq2, unpaired, cpu,
                                  int(cpu / 2), trinity_memory, trinity_memory,
                                  normalize_flag, [trim_reads])
        tasks.append(trinity)
        gene_trans_map = fan.gene_trans_map_task(opc, path_assembly, out_dir,
                                                 [trinity])
        tasks.append(gene_trans_map)
    return Supervisor(tasks=tasks)
Beispiel #15
0
def gen_expression_supervisor(opc,
                              dbs,
                              fastq1,
                              fastq2,
                              paired_names,
                              unpaired,
                              unpaired_names,
                              cpu,
                              sample_info,
                              model,
                              gene_trans_map,
                              dependency_set,
                              assembly_name,
                              assembly_path,
                              out_dir,
                              run_salmon=True,
                              run_express=False,
                              run_intersectbed=False,
                              run_rapclust=False):
    all_tasks = []
    deps = []
    trim_reads = False
    if trim_reads:
        trimmomatic_flag = True
        rmdup = False
        truncate_opt = False
        trim_tasks, fastq1, fastq2, unpaired = assemb.gen_trimming_supervisor(
            opc, out_dir, fastq1, fastq2, unpaired, False, trimmomatic_flag,
            rmdup, 10**15, 0, truncate_opt, [], cpu)
        all_tasks.append(trim_tasks)
        deps.append(trim_tasks)
    if run_salmon:
        salmon_tasks = gen_salmon_supervisor(opc, fastq1, fastq2, paired_names,
                                             unpaired, unpaired_names,
                                             assembly_path, assembly_name,
                                             gene_trans_map, sample_info,
                                             model, out_dir, cpu, deps)
        all_tasks.append(salmon_tasks)
    if run_express or run_intersectbed or run_rapclust:
        build_bowtie = fex.build_bowtie_task(opc, assembly_path, assembly_name,
                                             out_dir, [])
        bowtie2_index = join(dirname(build_bowtie.targets[0]),
                             basename(build_bowtie.targets[0]).split('.')[0])
        all_tasks.append(build_bowtie)
        if run_express:
            express_tasks = gen_express_supervisor(
                opc, fastq1, fastq2, paired_names, unpaired, unpaired_names,
                assembly_path, assembly_name, bowtie2_index, gene_trans_map,
                sample_info, model, out_dir, cpu, [build_bowtie])
            all_tasks.append(express_tasks)
        if run_rapclust:
            rc_tsks = gen_rapclust_supervisor(
                opc, fastq1, fastq2, paired_names, unpaired, unpaired_names,
                assembly_path, assembly_name, bowtie2_index, gene_trans_map,
                sample_info, model, out_dir, cpu, [build_bowtie])
            all_tasks.append(rc_tsks)
        if run_intersectbed:
            intersect_tasks = gen_intersect_supervisor(
                opc, fastq1, fastq2, paired_names, unpaired, unpaired_names,
                assembly_path, assembly_name, bowtie2_index, gene_trans_map,
                sample_info, model, out_dir, cpu, [build_bowtie])
            all_tasks.append(intersect_tasks)
    return Supervisor(tasks=all_tasks, dependencies=dependency_set)
Beispiel #16
0
def gen_intersect_supervisor(opc, fq1, fq2, paired_names, unpaired,
                             unpaired_names, assembly_path, assembly_name,
                             bowtie2_index, gene_trans_map, sample_info, model,
                             out_dir, cpu_cap, deps):
    intersect_tasks, bowtie_i_tasks, sam_sort_tasks = [], [], []
    intersect_dir = fg.make_dir_task(os.path.join(out_dir, 'intersectBed'))
    out_dir = intersect_dir.targets[0]
    deps.append(intersect_dir)
    fasta_to_bed = fan.assembly_to_bed_task(opc, assembly_path, out_dir,
                                            [intersect_dir])
    for i in range(len(fq1)):
        filename = paired_names[
            i]  #'_'.join([paired_names[i],intersect_naming,assembly_name])
        #filename = '_'.join([paired_names[i],intersect_naming,assembly_name])
        bowtie_i = fex.bowtie2_task(opc, bowtie2_index, out_dir, fq1[i],
                                    fq2[i], filename, 1,
                                    fg.round_div(cpu_cap, 2), deps)
        sorted_name = filename + '_sorted'
        sam_sort = fex.sam_sort_task(opc, out_dir, bowtie_i.targets[0],
                                     sorted_name, [bowtie_i])
        intersect_bed = fex.intersect_bed_task(opc, out_dir,
                                               sam_sort.targets[0],
                                               fasta_to_bed.targets[0],
                                               paired_names[i],
                                               [sam_sort, fasta_to_bed])
        bowtie_i_tasks.append(bowtie_i)
        sam_sort_tasks.append(sam_sort)
        intersect_tasks.append(intersect_bed)
    for i in range(len(unpaired)):
        filename = unpaired_names[
            i]  #'_'.join([unpaired_names[i],intersect_naming,assembly_name])
        bowtie_i = fex.bowtie2_unpaired_task(opc, bowtie2_index, out_dir,
                                             unpaired[i], filename, 1,
                                             fg.round_div(cpu_cap, 2), deps)
        bowtie_i_tasks.append(bowtie_i)
        sorted_name = filename + '_sorted'
        sam_sort = fex.sam_sort_task(opc, out_dir, bowtie_i.targets[0],
                                     sorted_name, [bowtie_i])
        sam_sort_tasks.append(sam_sort)
        intersect_bed = fex.intersect_bed_task(opc, out_dir,
                                               sam_sort.targets[0],
                                               fasta_to_bed.targets[0],
                                               unpaired_names[i],
                                               [sam_sort, fasta_to_bed])
        intersect_tasks.append(intersect_bed)
    transcriptName = assembly_name  #'_'.join([assembly_name,express_naming])
    geneName = assembly_name + '_gene'  #'_'.join([assembly_name,express_naming,'gene'])
    counts_to_table_intersect = fex.counts_to_table_task(
        opc, assembly_name, gene_trans_map, out_dir,
        [t.targets[0] for t in intersect_tasks], transcriptName, '',
        intersect_tasks)
    deseq2_intersect = fex.deseq2_task(opc, assembly_name, out_dir,
                                       counts_to_table_intersect.targets[0],
                                       sample_info, transcriptName, model,
                                       [counts_to_table_intersect])
    deseq2_intersect_gene = fex.deseq2_task(
        opc, assembly_name, out_dir, counts_to_table_intersect.targets[1],
        sample_info, geneName, model, [counts_to_table_intersect])
    i_tasks = [
        intersect_dir, fasta_to_bed, counts_to_table_intersect,
        deseq2_intersect, deseq2_intersect_gene
    ] + bowtie_i_tasks + sam_sort_tasks + intersect_tasks
    return Supervisor(tasks=i_tasks)
Beispiel #17
0
def gen_annotation_supervisor(opc,
                              dbs,
                              cpu,
                              uniref90_flag,
                              nr_flag,
                              blast_flag,
                              signalp_flag,
                              tmhmm_flag,
                              rnammer_flag,
                              dependency_set,
                              gene_trans_map,
                              path_assembly,
                              assembly_name,
                              out_dir,
                              improve_orfs=False):
    tasks = []
    annot_table_opts = {'geneTransMap': gene_trans_map}
    gff3_dependencies = []
    gff3_opts = {}

    def task_insert(task, name=None, index=0, gff3_flag=False):
        tasks.append(task)
        if (name != None):
            annot_table_opts[name] = task.targets[index]
        if (gff3_flag):
            gff3_dependencies.append(task)
            gff3_opts[name] = task.targets[index]

    annot_table_opts['geneTransMap'] = gene_trans_map
    transd_dir = os.path.join(out_dir, 'transdecoder')
    longorfs = fan.transdecoder_longorfs_task(opc, path_assembly, transd_dir,
                                              cpumod(cpu, 2), [])
    tasks.append(longorfs)
    if improve_orfs:
        blastp_transd = fan.blast_task(opc, 'blastp', transd_dir,
                                       longorfs.targets[0],
                                       dbs['uniprot_sprot'].call_path,
                                       int(cpu / 2), [longorfs])
        pfam_transd = fan.pfam_task(opc, dbs, longorfs.targets[0], transd_dir,
                                    cpumod(cpu, 2), [longorfs])
        tasks.extend([blastp_transd, pfam_transd])
        predict_orfs = fan.transdecoder_predict_orfs_task(
            opc, path_assembly, transd_dir,
            [longorfs, pfam_transd, blastp_transd], pfam_transd.targets[0],
            blastp_transd.targets[0])
    else:
        predict_orfs = fan.transdecoder_predict_orfs_task(
            opc, path_assembly, transd_dir, [longorfs])
    gff3_dependencies.append(predict_orfs)
    gff3_opts['transdecoder_gff3'] = predict_orfs.targets[2]
    task_insert(predict_orfs, 'transdecoder', 1)
    pfam = fan.pfam_task(opc, dbs, predict_orfs.targets[0], out_dir,
                         cpumod(cpu, 4), [predict_orfs])
    #pfam = fan.pfam_task(predict_orfs.targets[0], out_dir,cpu, [predict_orfs])
    task_insert(pfam, 'pfam', gff3_flag=True)
    if (blast_flag):
        blastx_sprot = fan.blast_task(opc, 'blastx', out_dir, path_assembly,
                                      dbs['uniprot_sprot'].call_path,
                                      cpumod(cpu, 2), [])
        task_insert(blastx_sprot, 'spX', gff3_flag=True)
        blastp_sprot = fan.blast_task(opc, 'blastp', out_dir,
                                      predict_orfs.targets[0],
                                      dbs['uniprot_sprot'].call_path,
                                      cpumod(cpu, 2), [predict_orfs])
        task_insert(blastp_sprot, 'spP', gff3_flag=True)

        if (uniref90_flag):
            blastx_ur90 = fan.blast_task(opc, 'blastx', out_dir, path_assembly,
                                         dbs['uniref90'].call_path,
                                         cpumod(cpu, 2), [])
            task_insert(blastx_ur90, 'ur90X', gff3_flag=True)
            blastp_ur90 = fan.blast_task(opc, 'blastp', out_dir,
                                         predict_orfs.targets[0],
                                         dbs['uniref90'].call_path,
                                         cpumod(cpu, 2), [predict_orfs])
            task_insert(blastp_ur90, 'ur90P', gff3_flag=True)
        if (nr_flag):
            blastx_nr = fan.blast_task(opc, 'blastx', out_dir, path_assembly,
                                       dbs['nr'].call_path, cpumod(cpu, 2), [])
            task_insert(blastx_nr, 'nrX', gff3_flag=True)
            blastp_nr = fan.blast_task(opc, 'blastp', out_dir,
                                       predict_orfs.targets[0],
                                       dbs['nr'].call_path, cpumod(cpu, 2),
                                       [predict_orfs])
            task_insert(blastp_nr, 'nrP', gff3_flag=True)
    else:
        dmnd_dependencies = []

        def dmnd_task_insert(task, name=None):
            dmnd_dependencies.append(task)
            task_insert(task, name)

        dmnd_xsprot = fan.diamond_task(opc, 'blastx', out_dir, path_assembly,
                                       dbs['uniprot_sprot'].call_path,
                                       cpumod(cpu, 2), dmnd_dependencies[:])
        dmnd_task_insert(dmnd_xsprot)
        expand = fan.blast_augment_task(opc, dbs['uniprot_sprot'].call_path,
                                        dmnd_xsprot.targets[0], [dmnd_xsprot])
        task_insert(expand, 'spX', gff3_flag=True)
        dmnd_psprot = fan.diamond_task(opc, 'blastp', out_dir,
                                       predict_orfs.targets[0],
                                       dbs['uniprot_sprot'].call_path,
                                       cpumod(cpu, 2),
                                       dmnd_dependencies + [predict_orfs])
        dmnd_task_insert(dmnd_psprot)
        expand = fan.blast_augment_task(opc, dbs['uniprot_sprot'].call_path,
                                        dmnd_psprot.targets[0], [dmnd_psprot])
        task_insert(expand, 'spP', gff3_flag=True)
        if (uniref90_flag):
            dmnd_xur90 = fan.diamond_task(opc, 'blastx', out_dir,
                                          path_assembly,
                                          dbs['uniref90'].call_path,
                                          cpumod(cpu, 2), dmnd_dependencies[:])
            dmnd_task_insert(dmnd_xur90)
            expand = fan.blast_augment_task(opc, dbs['uniref90'].call_path,
                                            dmnd_xur90.targets[0],
                                            [dmnd_xur90])
            task_insert(expand, 'ur90X', gff3_flag=True)
            dmnd_pur90 = fan.diamond_task(opc, 'blastp', out_dir,
                                          predict_orfs.targets[0],
                                          dbs['uniref90'].call_path,
                                          cpumod(cpu, 2),
                                          dmnd_dependencies + [predict_orfs])
            dmnd_task_insert(dmnd_pur90)
            expand = fan.blast_augment_task(opc, dbs['uniref90'].call_path,
                                            dmnd_pur90.targets[0],
                                            [dmnd_pur90])
            task_insert(expand, 'ur90P', gff3_flag=True)
        if (nr_flag):
            dmnd_xnr = fan.diamond_task(opc, 'blastx', out_dir,
                                        path_assembly, dbs['nr'].call_path,
                                        cpumod(cpu, 2), dmnd_dependencies[:])
            dmnd_task_insert(dmnd_xnr)
            expand = fan.blast_augment_task(opc, dbs['nr'].call_path,
                                            dmnd_xnr.targets[0], [dmnd_xnr])
            task_insert(expand, 'nrX', gff3_flag=True)
            dmnd_pnr = fan.diamond_task(opc, 'blastp', out_dir,
                                        predict_orfs.targets[0],
                                        dbs['nr'].call_path, cpumod(cpu, 2),
                                        dmnd_dependencies + [predict_orfs])
            dmnd_task_insert(dmnd_pnr)
            expand = fan.blast_augment_task(opc, dbs['nr'].call_path,
                                            dmnd_pnr.targets[0], [dmnd_pnr])
            task_insert(expand, 'nrP', gff3_flag=True)
    if (tmhmm_flag):
        tmhmm = fan.tmhmm_task(opc, predict_orfs.targets[0], out_dir,
                               [predict_orfs])
        task_insert(tmhmm, 'tmhmm')
    if (signalp_flag):
        signalp = fan.signalp_task(opc, predict_orfs.targets[0], out_dir,
                                   [predict_orfs])
        task_insert(signalp, 'signalP')
    # need more intelligent annot table -- if pfam fails, for example, we can still generate an annot table
    annot = fan.annot_table_task(opc, dbs, path_assembly, out_dir,
                                 annot_table_opts, tasks[:])
    tasks.append(annot)
    gff3_output = os.path.join(opc.path_dir, opc.assembly_name + '.gff3')
    gff3 = fan.gff3_task(opc, path_assembly, gff3_output, gff3_opts,
                         gff3_dependencies)
    tasks.append(gff3)
    pipeplot = fan.pipeplot_task(opc, dbs, annot.targets[0], out_dir, [annot])
    tasks.append(pipeplot)
    kegg = fan.kegg_task(opc, annot.targets[0], out_dir, [annot])
    tasks.append(kegg)
    return Supervisor(tasks=tasks, dependencies=dependency_set)
Beispiel #18
0
def gen_trimming_supervisor(opc, out_dir, fq1, fq2, unpaired, no_trim,
                            trimmomatic_flag, rmdup, subset_size, subset_seed,
                            truncate_opt, dependency_set, cpu_cap):
    tasks = []
    deps = []
    if (not no_trim):
        tasks.append(
            fa.fastqc_task(opc, opc.path_assembly_files, fq1 + fq2 + unpaired,
                           'pre_trimming',
                           min(cpu_cap, len(fq1 + fq2 + unpaired)), []))
        if (fq1 != []):
            if (trimmomatic_flag):
                paired_sup = gen_paired_trimmomatic_supervisor(
                    opc, out_dir, fq1, fq2, unpaired, dependency_set, cpu_cap)
            else:
                paired_sup = gen_paired_prinseq_supervisor(
                    opc, out_dir, fq1, fq2, unpaired, dependency_set, rmdup)
            fq1 = [
                paired_sup.targets[x]
                for x in range(0, len(paired_sup.targets), 2)
            ]
            fq2 = [
                paired_sup.targets[x]
                for x in range(1, len(paired_sup.targets), 2)
            ]
            tasks.append(paired_sup)
            tasks.append(
                fa.fastqc_task(opc, opc.path_assembly_files, fq1 + fq2,
                               'post_trimming_paired',
                               int(round(float(cpu_cap) / 2)), [paired_sup]))
            deps.append(paired_sup)
        if (unpaired != []):
            if (trimmomatic_flag):
                unpaired_sup = gen_unpaired_trimmomatic_supervisor(
                    opc, out_dir, fq1, fq2, unpaired, dependency_set, cpu_cap)
            else:
                unpaired_sup = gen_unpaired_prinseq_supervisor(
                    opc, out_dir, fq1, fq2, unpaired, dependency_set, rmdup)
            unpaired = unpaired_sup.targets
            tasks.append(unpaired_sup)
            tasks.append(
                fa.fastqc_task(opc, opc.path_assembly_files, unpaired,
                               'post_trimming_unpaired',
                               int(round(float(cpu_cap) / 2)), [unpaired_sup]))
            deps.append(unpaired_sup)
    # need to add support for unp here
    #if len(fq2) <1:
    #subset = fa.subset_task(out_dir, fq1,fq2,unpaired, 'final_reads', subset_size, subset_seed, deps)
    #unpaired = [subset.targets[0]]
    #else:
    if fq1 != []:
        subset = fa.subset_task(opc, out_dir, fq1, fq2, 'final_reads',
                                subset_size, subset_seed, deps)
        fq1 = [subset.targets[0]]
        fq2 = [subset.targets[1]]
        tasks.append(subset)
        if (truncate_opt >= 0):
            truncate = fa.truncate_task(out_dir, fastq1[0], fastq2[0],
                                        truncate_opt, [subset])
            fq1 = [truncate.targets[0]]
            fq2 = [truncate.targets[1]]
            deps.append(truncate)
            tasks.append(truncate)
    late_fastqc = fa.fastqc_task(opc, out_dir, fq1 + fq2 + unpaired,
                                 'final_reads_paired', cpu_cap, deps)
    tasks.append(late_fastqc)
    return (Supervisor(tasks=tasks,
                       dependencies=dependency_set), fq1, fq2, unpaired)