def run_tasks(tasks, cpu=4): for t in tasks: print(t.name) t.stdout = os.path.join(PATH_DATABASES, t.name+'.stdout') t.stderr = os.path.join(PATH_DATABASES, t.name+'.stderr') s = Supervisor(tasks=tasks, force_run=False, log=database_supervisor_log, cpu=cpu) s.run() for t in tasks:#if everything executes properly, rm the task logs if os.path.exists(t.stdout): os.remove(t.stdout) if os.path.exists(t.stderr): os.remove(t.stderr)
def run_tasks(tasks, cpu=4): for t in tasks: print(t.name) t.stdout = join(PATH_TOOLS, t.name+'.stdout') t.stderr = join(PATH_TOOLS, t.name+'.stderr') s = Supervisor(tasks=tasks, force_run=False, log=tool_supervisor_log, cpu=cpu) s.run() for t in tasks: #if everything executes properly, rm the task logs if exists(t.stdout): os.remove(t.stdout) if exists(t.stderr): os.remove(t.stderr)
def run_tasks(tasks, cpu=4): for t in tasks: print(t.name) t.stdout = os.path.join(PATH_DATABASES, t.name + '.stdout') t.stderr = os.path.join(PATH_DATABASES, t.name + '.stderr') s = Supervisor(tasks=tasks, force_run=False, log=database_supervisor_log, cpu=cpu) s.run() for t in tasks: #if everything executes properly, rm the task logs if os.path.exists(t.stdout): os.remove(t.stdout) if os.path.exists(t.stderr): os.remove(t.stderr)
def gen_rapclust_supervisor(opc, fastq1, fastq2, paired_names, unpaired, unpaired_names, assembly_path, assembly_name, bowtie2_index, gene_trans_map, sample_info, model, out_dir, cpu_cap, deps): rc_tasks, bowtie_rc_tasks = [], [] rc_dir = fg.make_dir_task(os.path.join(out_dir, 'rapclust_bt2')) out_dir = rc_dir.targets[0] for i in range(len(fastq1)): filename = paired_names[ i] #'_'.join([paired_names[i],express_naming,assembly_name]) #filename = '_'.join([paired_names[i],express_naming,assembly_name]) bowtie_rc = fex.bowtie2_task(opc, bowtie2_index, out_dir, fastq1[i], fastq2[i], filename, 2, fg.round_div(cpu_cap, 2), deps) # express = fex.express_task(opc, bowtie2_index,assembly_path,out_dir,paired_names[i],bowtie_e.targets[0],[bowtie_e]) bowtie_rc_tasks.append(bowtie_rc) # express_tasks.append(express) for i in range(len(unpaired)): filename = unpaired_names[ i] #'_'.join([unpaired_names[i],express_naming,assembly_name]) bowtie_rcU = fex.bowtie2_unpaired_task(opc, bowtie2_index, out_dir, unpaired[i], filename, 2, fg.round_div(cpu_cap, 2), deps) bowtie_rc_tasks.append(bowtie_rcU) # express = fex.express_task(opc, bowtie2_index,assembly_path,out_dir,unpaired_names[i],bowtie_e.targets[0],[bowtie_e]) # express_tasks.append(express) # transcriptName = assembly_name #'_'.join([assembly_name,express_naming]) # geneName = assembly_name + '_gene' #'_'.join([assembly_name,express_naming,'gene']) # counts_to_table_express = fex.counts_to_table_task(opc, assembly_name,gene_trans_map,out_dir,[t.targets[0] for t in express_tasks],transcriptName,'--eXpress',express_tasks) # deseq2_express = fex.deseq2_task(opc, assembly_name,out_dir,counts_to_table_express.targets[0],sample_info,transcriptName,model,[counts_to_table_express]) # deseq2_express_gene = fex.deseq2_task(opc, assembly_name,out_dir,counts_to_table_express.targets[1],sample_info,geneName,model,[counts_to_table_express]) rc_tasks = [rc_dir] + bowtie_rc_tasks + rc_tasks return Supervisor(tasks=rc_tasks)
def gen_db_supervisor(force=False, sprot=False, uniref90=False, nr=False, busco_args=busco_defaults, blast_plus=False, idmapping=False, cpu=float('inf'), pfam=True, nog_functions=True, dep=[]): check_db_dir() dbs = get_dbs(defaults=force) tasks = [] if (sprot): tasks.append( gen_dmnd_blast_tasks(dbs['uniprot_sprot'], force, blast_plus)) if (uniref90): tasks.append(gen_dmnd_blast_tasks(dbs['uniref90'], force, blast_plus)) if (nr): tasks.append(gen_dmnd_blast_tasks(dbs['nr'], force, blast_plus)) for busco_db in busco_args: if (busco_args[busco_db]): tasks.append(download_task_wrapper(dbs['busco_' + busco_db], [])) if (pfam): pfam_task = download_task_wrapper(dbs['pfam'], []) hmmpress = fdb.pfam_build_task(dbs['pfam'].download_location, dbs['pfam'].call_path, [pfam_task]) tasks.append(pfam_task) tasks.append(hmmpress) if (nog_functions): nogF_task = download_task_wrapper(dbs['nog_functions'], []) tasks.append(nogF_task) if (idmapping): idmap_task = download_task_wrapper(dbs['id_mapping'], []) tasks.append(idmap_task) tasks.append(download_task_wrapper(dbs['id_mapping_selected'], [])) tasks.append( fdb.subset_idmapping_task(dbs['id_mapping'].download_location, dbs['id_mapping_biocyc'].call_path, dbs['id_mapping_eggnog'].call_path, dbs['id_mapping_ko'].call_path, dbs['id_mapping_orthodb'].call_path, [idmap_task])) special_dbs = set([ 'uniprot_sprot', 'uniref90', 'nr', 'swiss_enzyme', 'orthology_pathway', 'nog_categories', 'nog_functions', 'pfam' ]) for db_string in dbs: if (db_string in special_dbs or db_string.startswith('busco_') or db_string.startswith('id_mapping')): pass else: tasks.append(download_task_wrapper(dbs[db_string], [])) tasks = [t for t in tasks if (t is not None)] return Supervisor(tasks, cpu=cpu)
def gen_salmon_supervisor(opc, fastq1, fastq2, paired_names, unpaired, unpaired_names, assembly_path, assembly_name, gene_trans_map, sample_info, model, out_dir, cpu_cap, deps): salmon_tasks = [] salmon_dir = fg.make_dir_task(os.path.join(out_dir, 'salmon')) out_dir = salmon_dir.targets[0] build_salmon = fex.build_salmon_task(opc, assembly_path, assembly_name, out_dir, fg.round_div(cpu_cap, 2), [salmon_dir]) deps = deps + [build_salmon] #, salmon_gene_map] salmon_trans_gene_map = '' if len(gene_trans_map) > 0: salmon_gene_map = fex.salmon_gene_map_task(opc, out_dir, assembly_name, gene_trans_map, [salmon_dir]) salmon_trans_gene_map = salmon_gene_map.targets[0] deps = deps + [salmon_gene_map] for i in range(len(fastq1)): #filename = '_'.join([paired_names[i],salmon_naming,assembly_name]) filename = paired_names[i] #,salmon_naming,assembly_name]) salmon = fex.salmon_task(opc, build_salmon.targets[0], fastq1[i], fastq2[i], filename, salmon_trans_gene_map, out_dir, fg.round_div(cpu_cap, 2), deps) salmon_tasks.append(salmon) for i in range(len(unpaired)): #filename = '_'.join([unpaired_names[i],salmon_naming,assembly_name]) filename = unpaired_names[i] #,salmon_naming,assembly_name]) salmon = fex.salmon_unpaired_task(opc, build_salmon.targets[0], unpaired[i], filename, salmon_trans_gene_map, out_dir, fg.round_div(cpu_cap, 2), deps) salmon_tasks.append(salmon) transcriptName = assembly_name #'_'.join([assembly_name,salmon_naming]) geneName = assembly_name + '_gene' #'_'.join([assembly_name,salmon_naming,'gene']) counts_to_table_salmon = fex.counts_to_table_task( opc, assembly_name, gene_trans_map, out_dir, [t.targets[0] for t in salmon_tasks], transcriptName, '--salmon', salmon_tasks) deseq2_salmon = fex.deseq2_task(opc, assembly_name, out_dir, counts_to_table_salmon.targets[0], sample_info, transcriptName, model, [counts_to_table_salmon]) deseq2_salmon_gene = fex.deseq2_task(opc, assembly_name, out_dir, counts_to_table_salmon.targets[1], sample_info, geneName, model, [counts_to_table_salmon]) salmon_tasks = [ salmon_dir, build_salmon, salmon_gene_map, counts_to_table_salmon, deseq2_salmon, deseq2_salmon_gene ] + salmon_tasks return Supervisor(tasks=salmon_tasks)
def gen_paired_prinseq_supervisor(opc, out_dir, fastq1, fastq2, unpaired, dependency_set, rmdup): tasks = [] prinseq_count = 0 prinseq_opts = '--derep 14' if (rmdup) else '' for input1, input2 in zip(fastq1, fastq2): p_task = fa.prinseq_task(out_dir, input1, input2, 'prinseq_output_' + str(prinseq_count), prinseq_opts, []) prinseq_count += 1 tasks.append(p_task) return Supervisor(tasks=tasks)
def gen_paired_trimmomatic_supervisor(opc, out_dir, fq1, fq2, unpaired, dependency_set, cpu_cap): tasks = [] count = 0 # cpu_mod = min(len(fq1),cpu_cap) cpu_mod = int(round(float(cpu_cap) / len(fq1))) for i1, i2 in zip(fq1, fq2): trim_task = fa.trimmomatic_task(opc, out_dir, i1, i2, cpu_mod, 'trimmomatic_output_' + str(count), dependency_set) count += 1 tasks.append(trim_task) return Supervisor(tasks=tasks)
def gen_dmnd_blast_tasks(db, force, blast_plus): tasks = [] sprot_download = download_task_wrapper(db, []) tasks.append(sprot_download) install_dmnd = fdb.build_diamond_task(db.download_location, db.call_path, [sprot_download]) tasks.append(install_dmnd) if (blast_plus): install_blast = fdb.build_blast_task(db.download_location, db.call_path, 'prot', [sprot_download]) tasks.append(install_blast) tasks.append(fdb.db2stitle_task(db.download_location, [sprot_download])) return Supervisor(tasks)
def gen_unpaired_trimmomatic_supervisor(opc, out_dir, fq1, fq2, unpaired, dependency_set, cpu_cap): # fq2 needed for unpaired trimmomatic tasks = [] count = len(fq1) # cpu_mod = min(len(fq1),cpu_cap) cpu_mod = int(round(float(cpu_cap) / len(unpaired))) for i in unpaired: trim_task = fa.trimmomatic_unpaired_task( opc, out_dir, i, cpu_mod, 'trimmomatic_output_' + str(count), dependency_set) count += 1 tasks.append(trim_task) return Supervisor(tasks=tasks)
def gen_filter_supervisor(opc, dbs, main_path_assembly, main_assembly_name, out_dir, transrate_task, dependency_set, tpm_threshold=1): tasks = [] filter_full = fg.filter_task(main_path_assembly, main_assembly_name, out_dir, [transrate_task.targets[2]], tpm_threshold, 2, [transrate_task]) tasks.append(filter_full) if transrate_task is not None: filter_good = fg.filter_task(transrate_task.targets[1], 'good.' + main_assembly_name, out_dir, [transrate_task.targets[2]], tpm_threshold, 2, [transrate_task]) tasks.append(filter_good) return Supervisor(tasks=tasks, dependencies=dependency_set)
def gen_quality_supervisor(opc, dbs, transrate_fq1, transrate_fq2, dependency_set, busco_refs, assembly_name, assembly_path, out_dir, transrate_dir, reads_dir, filter_dir, cp_transrate=True, cpu=12, cegma_flag=False, transrate_ref=''): tasks = [] for busco_ref in busco_refs: tasks.append(fq.busco_task(opc, dbs, assembly_path, assembly_name, out_dir, busco_ref, int(cpu/2), [])) assembly_stats = fq.assembly_stats_task(opc, out_dir,assembly_path, []) if transrate_fq1 == None: transrate_fq1 = [] if transrate_fq2 == None: transrate_fq2 = [] transrate = fq.transrate_task(opc,reads_dir,assembly_path,assembly_name,transrate_fq1,transrate_fq2,out_dir,transrate_dir,int(round(float(cpu),4)),[],transrate_ref) tasks.append(transrate) tasks.append(assembly_stats) if cp_transrate: tasks.append(fg.cp_assembly_task(join(filter_dir,'good.'+assembly_name),transrate.targets[1], [transrate])) # for busco_ref in busco_refs: # tasks.append(fq.busco_task(transrate.targets[1], os.path.basename(transrate.targets[1]), out_dir, busco_ref, int(cpu/2), [transrate])) if(cegma_flag): cegma = fq.cegma_task(out_dir,assembly_path, cpu, []) tasks.append(cegma) return Supervisor(tasks=tasks,dependencies=dependency_set)
def gen_assembly_supervisor(opc, dbs, fastq1, fastq2, unpaired, dependency_set, no_trim=False, rnaSPAdes=False, rmdup=False, subset_size=50000000, cpu=12, subset_seed='I am a seed value', normalize_flag=False, truncate_opt=-1, trimmomatic_flag=True, trinity_memory=100): out_dir = opc.path_assembly_files path_assembly = opc.path_assembly tasks = [] trim_reads, fastq1, fastq2, unpaired = gen_trimming_supervisor( opc, out_dir, fastq1, fastq2, unpaired, no_trim, trimmomatic_flag, rmdup, subset_size, subset_seed, truncate_opt, [], cpu) tasks.append(trim_reads) if (rnaSPAdes): rnaspades = fa.rnaspades_task(path_assembly, out_dir, fastq1, fastq2, unpaired, cpu, [trim_reads]) tasks.append(rnaspades) else: trinity = fa.trinity_task(opc, path_assembly, out_dir, fastq1, fastq2, unpaired, cpu, int(cpu / 2), trinity_memory, trinity_memory, normalize_flag, [trim_reads]) tasks.append(trinity) gene_trans_map = fan.gene_trans_map_task(opc, path_assembly, out_dir, [trinity]) tasks.append(gene_trans_map) return Supervisor(tasks=tasks)
def gen_expression_supervisor(opc, dbs, fastq1, fastq2, paired_names, unpaired, unpaired_names, cpu, sample_info, model, gene_trans_map, dependency_set, assembly_name, assembly_path, out_dir, run_salmon=True, run_express=False, run_intersectbed=False, run_rapclust=False): all_tasks = [] deps = [] trim_reads = False if trim_reads: trimmomatic_flag = True rmdup = False truncate_opt = False trim_tasks, fastq1, fastq2, unpaired = assemb.gen_trimming_supervisor( opc, out_dir, fastq1, fastq2, unpaired, False, trimmomatic_flag, rmdup, 10**15, 0, truncate_opt, [], cpu) all_tasks.append(trim_tasks) deps.append(trim_tasks) if run_salmon: salmon_tasks = gen_salmon_supervisor(opc, fastq1, fastq2, paired_names, unpaired, unpaired_names, assembly_path, assembly_name, gene_trans_map, sample_info, model, out_dir, cpu, deps) all_tasks.append(salmon_tasks) if run_express or run_intersectbed or run_rapclust: build_bowtie = fex.build_bowtie_task(opc, assembly_path, assembly_name, out_dir, []) bowtie2_index = join(dirname(build_bowtie.targets[0]), basename(build_bowtie.targets[0]).split('.')[0]) all_tasks.append(build_bowtie) if run_express: express_tasks = gen_express_supervisor( opc, fastq1, fastq2, paired_names, unpaired, unpaired_names, assembly_path, assembly_name, bowtie2_index, gene_trans_map, sample_info, model, out_dir, cpu, [build_bowtie]) all_tasks.append(express_tasks) if run_rapclust: rc_tsks = gen_rapclust_supervisor( opc, fastq1, fastq2, paired_names, unpaired, unpaired_names, assembly_path, assembly_name, bowtie2_index, gene_trans_map, sample_info, model, out_dir, cpu, [build_bowtie]) all_tasks.append(rc_tsks) if run_intersectbed: intersect_tasks = gen_intersect_supervisor( opc, fastq1, fastq2, paired_names, unpaired, unpaired_names, assembly_path, assembly_name, bowtie2_index, gene_trans_map, sample_info, model, out_dir, cpu, [build_bowtie]) all_tasks.append(intersect_tasks) return Supervisor(tasks=all_tasks, dependencies=dependency_set)
def gen_intersect_supervisor(opc, fq1, fq2, paired_names, unpaired, unpaired_names, assembly_path, assembly_name, bowtie2_index, gene_trans_map, sample_info, model, out_dir, cpu_cap, deps): intersect_tasks, bowtie_i_tasks, sam_sort_tasks = [], [], [] intersect_dir = fg.make_dir_task(os.path.join(out_dir, 'intersectBed')) out_dir = intersect_dir.targets[0] deps.append(intersect_dir) fasta_to_bed = fan.assembly_to_bed_task(opc, assembly_path, out_dir, [intersect_dir]) for i in range(len(fq1)): filename = paired_names[ i] #'_'.join([paired_names[i],intersect_naming,assembly_name]) #filename = '_'.join([paired_names[i],intersect_naming,assembly_name]) bowtie_i = fex.bowtie2_task(opc, bowtie2_index, out_dir, fq1[i], fq2[i], filename, 1, fg.round_div(cpu_cap, 2), deps) sorted_name = filename + '_sorted' sam_sort = fex.sam_sort_task(opc, out_dir, bowtie_i.targets[0], sorted_name, [bowtie_i]) intersect_bed = fex.intersect_bed_task(opc, out_dir, sam_sort.targets[0], fasta_to_bed.targets[0], paired_names[i], [sam_sort, fasta_to_bed]) bowtie_i_tasks.append(bowtie_i) sam_sort_tasks.append(sam_sort) intersect_tasks.append(intersect_bed) for i in range(len(unpaired)): filename = unpaired_names[ i] #'_'.join([unpaired_names[i],intersect_naming,assembly_name]) bowtie_i = fex.bowtie2_unpaired_task(opc, bowtie2_index, out_dir, unpaired[i], filename, 1, fg.round_div(cpu_cap, 2), deps) bowtie_i_tasks.append(bowtie_i) sorted_name = filename + '_sorted' sam_sort = fex.sam_sort_task(opc, out_dir, bowtie_i.targets[0], sorted_name, [bowtie_i]) sam_sort_tasks.append(sam_sort) intersect_bed = fex.intersect_bed_task(opc, out_dir, sam_sort.targets[0], fasta_to_bed.targets[0], unpaired_names[i], [sam_sort, fasta_to_bed]) intersect_tasks.append(intersect_bed) transcriptName = assembly_name #'_'.join([assembly_name,express_naming]) geneName = assembly_name + '_gene' #'_'.join([assembly_name,express_naming,'gene']) counts_to_table_intersect = fex.counts_to_table_task( opc, assembly_name, gene_trans_map, out_dir, [t.targets[0] for t in intersect_tasks], transcriptName, '', intersect_tasks) deseq2_intersect = fex.deseq2_task(opc, assembly_name, out_dir, counts_to_table_intersect.targets[0], sample_info, transcriptName, model, [counts_to_table_intersect]) deseq2_intersect_gene = fex.deseq2_task( opc, assembly_name, out_dir, counts_to_table_intersect.targets[1], sample_info, geneName, model, [counts_to_table_intersect]) i_tasks = [ intersect_dir, fasta_to_bed, counts_to_table_intersect, deseq2_intersect, deseq2_intersect_gene ] + bowtie_i_tasks + sam_sort_tasks + intersect_tasks return Supervisor(tasks=i_tasks)
def gen_annotation_supervisor(opc, dbs, cpu, uniref90_flag, nr_flag, blast_flag, signalp_flag, tmhmm_flag, rnammer_flag, dependency_set, gene_trans_map, path_assembly, assembly_name, out_dir, improve_orfs=False): tasks = [] annot_table_opts = {'geneTransMap': gene_trans_map} gff3_dependencies = [] gff3_opts = {} def task_insert(task, name=None, index=0, gff3_flag=False): tasks.append(task) if (name != None): annot_table_opts[name] = task.targets[index] if (gff3_flag): gff3_dependencies.append(task) gff3_opts[name] = task.targets[index] annot_table_opts['geneTransMap'] = gene_trans_map transd_dir = os.path.join(out_dir, 'transdecoder') longorfs = fan.transdecoder_longorfs_task(opc, path_assembly, transd_dir, cpumod(cpu, 2), []) tasks.append(longorfs) if improve_orfs: blastp_transd = fan.blast_task(opc, 'blastp', transd_dir, longorfs.targets[0], dbs['uniprot_sprot'].call_path, int(cpu / 2), [longorfs]) pfam_transd = fan.pfam_task(opc, dbs, longorfs.targets[0], transd_dir, cpumod(cpu, 2), [longorfs]) tasks.extend([blastp_transd, pfam_transd]) predict_orfs = fan.transdecoder_predict_orfs_task( opc, path_assembly, transd_dir, [longorfs, pfam_transd, blastp_transd], pfam_transd.targets[0], blastp_transd.targets[0]) else: predict_orfs = fan.transdecoder_predict_orfs_task( opc, path_assembly, transd_dir, [longorfs]) gff3_dependencies.append(predict_orfs) gff3_opts['transdecoder_gff3'] = predict_orfs.targets[2] task_insert(predict_orfs, 'transdecoder', 1) pfam = fan.pfam_task(opc, dbs, predict_orfs.targets[0], out_dir, cpumod(cpu, 4), [predict_orfs]) #pfam = fan.pfam_task(predict_orfs.targets[0], out_dir,cpu, [predict_orfs]) task_insert(pfam, 'pfam', gff3_flag=True) if (blast_flag): blastx_sprot = fan.blast_task(opc, 'blastx', out_dir, path_assembly, dbs['uniprot_sprot'].call_path, cpumod(cpu, 2), []) task_insert(blastx_sprot, 'spX', gff3_flag=True) blastp_sprot = fan.blast_task(opc, 'blastp', out_dir, predict_orfs.targets[0], dbs['uniprot_sprot'].call_path, cpumod(cpu, 2), [predict_orfs]) task_insert(blastp_sprot, 'spP', gff3_flag=True) if (uniref90_flag): blastx_ur90 = fan.blast_task(opc, 'blastx', out_dir, path_assembly, dbs['uniref90'].call_path, cpumod(cpu, 2), []) task_insert(blastx_ur90, 'ur90X', gff3_flag=True) blastp_ur90 = fan.blast_task(opc, 'blastp', out_dir, predict_orfs.targets[0], dbs['uniref90'].call_path, cpumod(cpu, 2), [predict_orfs]) task_insert(blastp_ur90, 'ur90P', gff3_flag=True) if (nr_flag): blastx_nr = fan.blast_task(opc, 'blastx', out_dir, path_assembly, dbs['nr'].call_path, cpumod(cpu, 2), []) task_insert(blastx_nr, 'nrX', gff3_flag=True) blastp_nr = fan.blast_task(opc, 'blastp', out_dir, predict_orfs.targets[0], dbs['nr'].call_path, cpumod(cpu, 2), [predict_orfs]) task_insert(blastp_nr, 'nrP', gff3_flag=True) else: dmnd_dependencies = [] def dmnd_task_insert(task, name=None): dmnd_dependencies.append(task) task_insert(task, name) dmnd_xsprot = fan.diamond_task(opc, 'blastx', out_dir, path_assembly, dbs['uniprot_sprot'].call_path, cpumod(cpu, 2), dmnd_dependencies[:]) dmnd_task_insert(dmnd_xsprot) expand = fan.blast_augment_task(opc, dbs['uniprot_sprot'].call_path, dmnd_xsprot.targets[0], [dmnd_xsprot]) task_insert(expand, 'spX', gff3_flag=True) dmnd_psprot = fan.diamond_task(opc, 'blastp', out_dir, predict_orfs.targets[0], dbs['uniprot_sprot'].call_path, cpumod(cpu, 2), dmnd_dependencies + [predict_orfs]) dmnd_task_insert(dmnd_psprot) expand = fan.blast_augment_task(opc, dbs['uniprot_sprot'].call_path, dmnd_psprot.targets[0], [dmnd_psprot]) task_insert(expand, 'spP', gff3_flag=True) if (uniref90_flag): dmnd_xur90 = fan.diamond_task(opc, 'blastx', out_dir, path_assembly, dbs['uniref90'].call_path, cpumod(cpu, 2), dmnd_dependencies[:]) dmnd_task_insert(dmnd_xur90) expand = fan.blast_augment_task(opc, dbs['uniref90'].call_path, dmnd_xur90.targets[0], [dmnd_xur90]) task_insert(expand, 'ur90X', gff3_flag=True) dmnd_pur90 = fan.diamond_task(opc, 'blastp', out_dir, predict_orfs.targets[0], dbs['uniref90'].call_path, cpumod(cpu, 2), dmnd_dependencies + [predict_orfs]) dmnd_task_insert(dmnd_pur90) expand = fan.blast_augment_task(opc, dbs['uniref90'].call_path, dmnd_pur90.targets[0], [dmnd_pur90]) task_insert(expand, 'ur90P', gff3_flag=True) if (nr_flag): dmnd_xnr = fan.diamond_task(opc, 'blastx', out_dir, path_assembly, dbs['nr'].call_path, cpumod(cpu, 2), dmnd_dependencies[:]) dmnd_task_insert(dmnd_xnr) expand = fan.blast_augment_task(opc, dbs['nr'].call_path, dmnd_xnr.targets[0], [dmnd_xnr]) task_insert(expand, 'nrX', gff3_flag=True) dmnd_pnr = fan.diamond_task(opc, 'blastp', out_dir, predict_orfs.targets[0], dbs['nr'].call_path, cpumod(cpu, 2), dmnd_dependencies + [predict_orfs]) dmnd_task_insert(dmnd_pnr) expand = fan.blast_augment_task(opc, dbs['nr'].call_path, dmnd_pnr.targets[0], [dmnd_pnr]) task_insert(expand, 'nrP', gff3_flag=True) if (tmhmm_flag): tmhmm = fan.tmhmm_task(opc, predict_orfs.targets[0], out_dir, [predict_orfs]) task_insert(tmhmm, 'tmhmm') if (signalp_flag): signalp = fan.signalp_task(opc, predict_orfs.targets[0], out_dir, [predict_orfs]) task_insert(signalp, 'signalP') # need more intelligent annot table -- if pfam fails, for example, we can still generate an annot table annot = fan.annot_table_task(opc, dbs, path_assembly, out_dir, annot_table_opts, tasks[:]) tasks.append(annot) gff3_output = os.path.join(opc.path_dir, opc.assembly_name + '.gff3') gff3 = fan.gff3_task(opc, path_assembly, gff3_output, gff3_opts, gff3_dependencies) tasks.append(gff3) pipeplot = fan.pipeplot_task(opc, dbs, annot.targets[0], out_dir, [annot]) tasks.append(pipeplot) kegg = fan.kegg_task(opc, annot.targets[0], out_dir, [annot]) tasks.append(kegg) return Supervisor(tasks=tasks, dependencies=dependency_set)
def gen_trimming_supervisor(opc, out_dir, fq1, fq2, unpaired, no_trim, trimmomatic_flag, rmdup, subset_size, subset_seed, truncate_opt, dependency_set, cpu_cap): tasks = [] deps = [] if (not no_trim): tasks.append( fa.fastqc_task(opc, opc.path_assembly_files, fq1 + fq2 + unpaired, 'pre_trimming', min(cpu_cap, len(fq1 + fq2 + unpaired)), [])) if (fq1 != []): if (trimmomatic_flag): paired_sup = gen_paired_trimmomatic_supervisor( opc, out_dir, fq1, fq2, unpaired, dependency_set, cpu_cap) else: paired_sup = gen_paired_prinseq_supervisor( opc, out_dir, fq1, fq2, unpaired, dependency_set, rmdup) fq1 = [ paired_sup.targets[x] for x in range(0, len(paired_sup.targets), 2) ] fq2 = [ paired_sup.targets[x] for x in range(1, len(paired_sup.targets), 2) ] tasks.append(paired_sup) tasks.append( fa.fastqc_task(opc, opc.path_assembly_files, fq1 + fq2, 'post_trimming_paired', int(round(float(cpu_cap) / 2)), [paired_sup])) deps.append(paired_sup) if (unpaired != []): if (trimmomatic_flag): unpaired_sup = gen_unpaired_trimmomatic_supervisor( opc, out_dir, fq1, fq2, unpaired, dependency_set, cpu_cap) else: unpaired_sup = gen_unpaired_prinseq_supervisor( opc, out_dir, fq1, fq2, unpaired, dependency_set, rmdup) unpaired = unpaired_sup.targets tasks.append(unpaired_sup) tasks.append( fa.fastqc_task(opc, opc.path_assembly_files, unpaired, 'post_trimming_unpaired', int(round(float(cpu_cap) / 2)), [unpaired_sup])) deps.append(unpaired_sup) # need to add support for unp here #if len(fq2) <1: #subset = fa.subset_task(out_dir, fq1,fq2,unpaired, 'final_reads', subset_size, subset_seed, deps) #unpaired = [subset.targets[0]] #else: if fq1 != []: subset = fa.subset_task(opc, out_dir, fq1, fq2, 'final_reads', subset_size, subset_seed, deps) fq1 = [subset.targets[0]] fq2 = [subset.targets[1]] tasks.append(subset) if (truncate_opt >= 0): truncate = fa.truncate_task(out_dir, fastq1[0], fastq2[0], truncate_opt, [subset]) fq1 = [truncate.targets[0]] fq2 = [truncate.targets[1]] deps.append(truncate) tasks.append(truncate) late_fastqc = fa.fastqc_task(opc, out_dir, fq1 + fq2 + unpaired, 'final_reads_paired', cpu_cap, deps) tasks.append(late_fastqc) return (Supervisor(tasks=tasks, dependencies=dependency_set), fq1, fq2, unpaired)