def split_454_style(self, seqfiles_to_split): tasks, demuxed = list(), list() self.sample_metadata = sorted(self.sample_metadata, key=firstitem) for sample_id, sample_group in groupby(self.sample_metadata, firstitem): sample_dir = join(self.products_dir, sample_id) sample_group = list(sample_group) map_fname = util.new_file("map.txt", basedir=sample_dir) tasks.append( sixteen.write_map(sample_group, sample_dir, **self.options.get('write_map', dict()))) files_list = self._filter_files_for_sample(seqfiles_to_split, sample_group) fasta_fname = util.new_file(sample_id + ".fa", basedir=sample_dir) qual_fname = util.new_file(sample_id + ".qual", basedir=sample_dir) tasks.append( general.fastq_split(files_list, fasta_fname, qual_fname, **self.options.get('fastq_split', dict()))) qiime_opts = self.options['demultiplex'].pop('qiime_opts', {}) if 'barcode-type' not in qiime_opts: qiime_opts['barcode-type'] = _determine_barcode_type( sample_group) demuxed_fname = util.addtag(fasta_fname, "demuxed") tasks.append( sixteen.demultiplex(map_fname, fasta_fname, qual_fname, demuxed_fname, qiime_opts=qiime_opts, **self.options.get('demultiplex', dict()))) demuxed.append(demuxed_fname) return demuxed, tasks
def _configure(self): if self.options['infer_pairs'].get('infer'): paired, notpaired = infer_pairs(self.raw_seq_files) self.raw_seq_files = paired + notpaired maybe_tasks = list() for maybe_pair in self.raw_seq_files: is_pair = type(maybe_pair) in (tuple, list) if is_pair: pair, tasks = maybe_convert_to_fastq(maybe_pair, self.products_dir) self.paired_fastq_files.append(pair) maybe_tasks.extend(tasks) elif util.guess_seq_filetype(maybe_pair) == 'bam': prefix = util.new_file(util.rmext(basename(maybe_pair)), basedir=self.products_dir) t = samtools.to_paired_fastq(maybe_pair, prefix) paired, single = t['targets'][:2], t['targets'][2] self.paired_fastq_files.append(paired) self.unpaired_fastq_files.append(single) maybe_tasks.append(t) else: single, tasks = maybe_convert_to_fastq([maybe_pair], self.products_dir) self.unpaired_fastq_files.append(single[0]) maybe_tasks.extend(tasks) for task in maybe_tasks: yield task for pair in self.paired_fastq_files: align_sam = util.new_file(_to_merged(basename(pair[0]), tag="align"), basedir=self.products_dir) align_sam += ".sam" self.align_sams.append(align_sam) yield subread.align(pair, align_sam, self.options.get('subread_align', dict())) for single in self.unpaired_fastq_files: align_sam = util.new_file(util.addtag(basename(single), "align"), basedir=self.products_dir) align_sam += ".sam" self.align_sams.append(align_sam) yield subread.align(single, align_sam, self.options.get('subread_align', dict())) for align_sam in self.align_sams: count_table = util.new_file(util.addtag(basename(align_sam), "count"), basedir=self.products_dir) self.count_tables.append(count_table) yield subread.featureCounts([align_sam], count_table, self.options.get( 'featureCounts', dict()))
def group_by_sampleid(large_fastas, output_dir, sample_ids): output_fnames = [ new_file(s + "_demuxed.fa", basedir=output_dir) for s in sample_ids ] if type(large_fastas) is str: large_fastas = [large_fastas] def _run(): import contextlib from Bio import SeqIO from itertools import chain records = chain.from_iterable( iter(SeqIO.parse(fname, "fasta") for fname in large_fastas)) files = dict([(s_id, open(f, 'w')) for f, s_id in zip(output_fnames, sample_ids)]) with contextlib.nested(*list(files.values())): for rec in records: sample_id = "_".join(rec.id.split("_")[:-2]) SeqIO.write(rec, files[sample_id], "fasta") return { "name": "group_by_sampleid: " + large_fastas[0], "actions": [_run], "targets": output_fnames, "file_dep": large_fastas }
def maybe_stitch(maybe_pairs, products_dir, barcode_files=list(), drop_unpaired=False): pairs, singles = split_pairs(maybe_pairs) tasks = list() barcodes = list() if not pairs: return singles, barcode_files, tasks pairs = sorted(pairs, key=firstitem) barcode_files = sorted(barcode_files) for pair, maybe_barcode in izip_longest(pairs, barcode_files): (forward, reverse), maybe_tasks = maybe_convert_to_fastq(pair, products_dir) tasks.extend(maybe_tasks) output = util.new_file(_to_merged(forward), basedir=products_dir) singles.append(output) if maybe_barcode and drop_unpaired: tasks.append( general.fastq_join(forward, reverse, output, options={'drop_unpaired': drop_unpaired})) filtered_barcode = util.new_file(util.addtag( maybe_barcode, "filtered"), basedir=products_dir) pairtask = general.sequence_pair(maybe_barcode, output, outfname1=filtered_barcode, options={"inner_join": "right"}) barcodes.append(filtered_barcode) tasks.append(pairtask) else: tasks.append( general.fastq_join(forward, reverse, output, maybe_barcode, {'drop_unpaired': drop_unpaired})) barcodes.append(maybe_barcode) return singles, barcodes, tasks
def split_illumina_style(self, seqfiles_to_split, barcode_seqfiles): demuxed, tasks = list(), list() bcode_pairs = zip(seqfiles_to_split, barcode_seqfiles) options = self.options.get("demultiplex_illumina", dict()) if 'barcode_type' not in options: options['barcode_type'] = _determine_barcode_type( self.sample_metadata) do_groupby = options.pop("group_by_sampleid", False) for seqfile, bcode_file in bcode_pairs: sample_dir = join(self.products_dir, basename(seqfile) + "_split") map_fname = util.new_file("map.txt", basedir=sample_dir) sample_group = self._filter_samples_for_file( self.sample_metadata, seqfile, key=lambda val: val.Run_accession) tasks.append( sixteen.write_map(sample_group, sample_dir, **self.options.get('write_map', dict()))) outfile = util.new_file(util.rmext(basename(seqfile)) + "_demuxed.fna", basedir=sample_dir) tasks.append( sixteen.demultiplex_illumina([seqfile], [bcode_file], map_fname, outfile, qiime_opts=options)) demuxed.append(outfile) if do_groupby: output_dir = join(self.products_dir, "demuxed_by-sampleid") sample_ids = [s[0] for s in sample_group] task_dict = general.group_by_sampleid(demuxed, output_dir, sample_ids) demuxed = task_dict['targets'] tasks.append(task_dict) return demuxed, tasks
def maybe_convert_to_fastq(fnames, products_dir): new_fnames, tasks = list(), list() for f in fnames: guess = util.guess_seq_filetype(f) if guess != "fastq" or util.is_compressed(f): fastq_file = util.new_file(f + ".fastq", basedir=products_dir) new_fnames.append(fastq_file) tasks.append(general.sequence_convert([f], fastq_file)) else: new_fnames.append(f) return new_fnames, tasks
def _configure(self): if self.otu_tables: merged_name = util.addtag(self.otu_tables[0], "merged") merged_file = util.new_file(merged_name, basedir=self.products_dir) yield sixteen.merge_otu_tables( self.otu_tables, name=merged_file ) meta_biom_name = util.addtag(merged_file, "meta") yield biom.add_metadata( merged_file, meta_biom_name, self._get_or_create_sample_metadata() ) self.merged_otu_tables.append(meta_biom_name) for otu_table in self.merged_otu_tables: barchart_path = util.new_file( otu_table+"_barcharts", basedir=self.products_dir) yield visualization.stacked_bar_chart( otu_table, barchart_path, **self.options.get('stacked_bar_chart', {})) tsv_filename = otu_table+".tsv" yield association.biom_to_tsv(otu_table, tsv_filename) nice_tsv_filename = util.addtag(tsv_filename, 'maaslin') yield association.qiime_to_maaslin(tsv_filename, nice_tsv_filename) pcl_filename = otu_table+".pcl" yield association.merge_otu_metadata( nice_tsv_filename, self._get_or_create_sample_metadata(), pcl_filename ) self.pcl_files.append(pcl_filename) for pcl_file in self.pcl_files: yield visualization.breadcrumbs_pcoa_plot( pcl_file, pcl_file+"_pcoa_plot.png", CoordinatesMatrix = pcl_file+"_pcoa_coords.txt", **self.options.get('breadcrumbs_pcoa_plot', {}) )
def _configure(self): yield self._handle_raw_seqs_and_demultiplex() # merge all of the demultiplexed files into a single file merged_fasta = util.new_file("all_samples.fa", basedir=self.products_dir) yield general.cat(self.demuxed_fasta_files, merged_fasta) otu_table_biom = util.new_file("all_samples_otu_tax.biom", basedir=self.products_dir) otu_table_tsv = util.new_file("all_samples_otu_tax.tsv", basedir=self.products_dir) # run closed reference picking yield pick_otus_closed_ref(merged_fasta, otu_table_biom, out_tsv=otu_table_tsv, **self.options.get('pick_otus_closed_ref', dict())) # infer genes and pathways with picrust yield sixteen.picrust(otu_table_biom, **self.options.get('picrust', dict()))
def maybe_concatenate(maybe_pairs, products_dir): pairs, singles = split_pairs(maybe_pairs) tasks = list() if not pairs: return singles, tasks for pair in pairs: catted_fname = util.new_file( _to_merged(pair[0], tag="cat", strip_ext=False), basedir=products_dir ) simply_cat = all( util.guess_seq_filetype(s) in ('fastq' , 'fasta') for s in pair ) if simply_cat: tasks.append(general.cat(pair, catted_fname)) else: tasks.append(general.sequence_convert(pair, catted_fname)) singles.append(catted_fname) return singles, tasks
def write_map(sample_group, sample_dir): """Workflow to write a new map.txt file from a list of samples. The resultant map.txt file is always named 'map.txt' and is placed in the ``sample_dir`` directory :param sample_group: List of namedtuples; A list of samples as deserialized by anadama.util.deserialize_map_file :param sample_dir: String; Directory path indicating where to write the map.txt file """ map_fname = new_file("map.txt", basedir=sample_dir) def _write(targets): with open(map_fname, 'w') as map_file: # print the headers first print >> map_file, "#"+"\t".join(sample_group[0]._fields) for _, samples_bycode in itertools.groupby( sample_group, operator.attrgetter("BarcodeSequence")): # get the first (hopefully only) sample from the samples # grouped by ID then barcode. Ignore any other samples # under the same ID for the same barcode sample = samples_bycode.next() bcode = sample.BarcodeSequence # uniq-ify to make qiime happy sample = list(sample) sample[0] += "_" + bcode print >> map_file, "\t".join(sample) return { "name": "write_map:"+map_fname, "actions": [_write], "targets": [map_fname], "title": lambda t: t.name+" Estimated mem=200 time=5 threads=1" }
def metaphlan2(files_list, scratch=None, **opts): """Workflow to perform taxonomic profiling from whole metagenome shotgun sequences. Additional keyword options are used directly as bowtie2 command-line flags. :param files_list: List of strings; File paths to input sequences, in fastq format. External dependencies - Metaphlan2 @tip: https://bitbucket.org/biobakery/metaphlan2 Resource utilization: - Ram: 1.5-3.0G """ def_base = opts.get("output_file") or files_list[0] all_opts = { 'bt2_ps': 'very-sensitive', 'bowtie2db': settings.workflows.metaphlan2.bowtie2db, 'mpa_pkl': settings.workflows.metaphlan2.mpa_pkl, "bowtie2out": new_file(addext(def_base, "bowtie2out.txt")), "output_file": new_file(addext(def_base, "metaphlan2")) } all_opts.update(opts) if 'input_type' not in all_opts: guessed = guess_seq_filetype(files_list[0]) if guessed not in ('fasta', 'fastq'): raise ValueError("Need sequences in fasta or fastq format, " "or provide keyword 'input_type'") all_opts['input_type'] = biopython_to_metaphlan[guessed] targets = [all_opts['output_file'], all_opts['bowtie2out']] if 'biom' in opts: targets.append(opts['biom']) cmd = starters.cat(files_list, guess_from=files_list[0]) if scratch: db, pkl = all_opts['bowtie2db'], all_opts['mpa_pkl'] all_opts.pop('bowtie2db', None), all_opts.pop('mpa_pkl', None) dbbase, pklbase = map(os.path.basename, (db, pkl)) cmd += (" | metaphlan2.py" + " " + dict_to_cmd_opts(all_opts)) actions = [ """ tdir=$(mktemp -d -p {sdir}); cd ${{tdir}}; mkdir -pv ${{tdir}}/dbs; cp {db}* {pkl} ${{tdir}}/dbs; {cmd} --mpa_pkl ${{tdir}}/dbs/{pklbase} \ --bowtie2db ${{tdir}}/dbs/{dbbase}; rm -rvf ${{tdir}}; """.format(sdir=scratch, pkl=pkl, db=db, cmd=cmd, pklbase=pklbase, dbbase=dbbase) ] else: cmd += (" | metaphlan2.py" + " " + dict_to_cmd_opts(all_opts)) actions = [cmd] def _perfhint(task): threads = int(all_opts.get('nproc', 1)) insize = sum(os.stat(f).st_size for f in files_list) return "{n} Estimated mem={mem:.0f} time={time:.0f}, threads={threads:.0f}".format( n=task.name, mem=1.5 * 1024, time=15 + (insize / 1.2e9 / (threads)), threads=threads) return dict( name="metaphlan2:" + all_opts['output_file'], actions=actions, file_dep=files_list, targets=targets, title=_perfhint, )
def _join(s): file, ext = os.path.splitext(seqfile_in) file = file + '.' + suffix return new_file(addtag(file, s), basedir=default_opts['output'])
def picrust(file, output_dir=None, verbose=True, **opts): """Workflow to predict metagenome functional content from 16S OTU tables. :param file: String; input OTU table. :keyword tab_in: Boolean; True if the input is a tabulated file (default:0) :keyword tab_out: Boolean; True if the output file is to be tabulated (default:False) :keyword gg_version: String; the greengenes version to be used (default:most recent version) :keyword t: String; option to use a different type of prediction (default:KO) :keyword with_confidence: Boolean; Set to True to output confidence intervals (default:0) :keyword custom: String; specify a file containing a custom trait to predict metagenomes External Dependencies: - PICRUSt: Version 1.0.0, http://picrust.github.io/picrust/install.html#install """ norm_out = new_file(addtag(file, "normalized_otus"), basedir=output_dir) predict_out = new_file(addtag(file, "picrust"), basedir=output_dir) all_opts = { 'tab_in' : 0, 'tab_out' : 0, 'gg_version' : '', 't' : '', 'with_confidence' : 0, 'custom' : '', 'drop_unknown' : True} all_opts.update(opts) drop_unknown = all_opts.pop("drop_unknown", True) _copy_fname = settings.workflows.picrust.copy_number def _drop_unknown(): import os import gzip import json from biom.table import DenseOTUTable from biom.parse import ( OBS_META_TYPES, parse_biom_table, parse_classic_table_to_rich_table ) idx = set([ row.strip().split('\t')[0] for row in gzip.open(_copy_fname) ]) filter_func = lambda a, otu_id, c: str(otu_id) in idx tmpfile = file+"_tmp.biom" with open(file) as f, open(tmpfile, 'w') as f_out: try: table = parse_biom_table(f) except Exception as e: table = parse_classic_table_to_rich_table( f, None, None, OBS_META_TYPES['taxonomy'], DenseOTUTable) table = table.filterObservations(filter_func) json.dump( table.getBiomFormatObject("AnADAMA"), f_out ) os.rename(file, addtag(file, "unfiltered")) os.rename(tmpfile, file) cmd1 = ("normalize_by_copy_number.py " + "-i %s" + " -o " + norm_out) if all_opts['gg_version']: cmd1 += " -g " + all_opts['gg_version'] if all_opts['tab_in']: cmd1 += " -f" cmd2 = ("predict_metagenomes.py " + "-i %s" + " -o " + predict_out) if all_opts['gg_version']: cmd2 += " -g " + all_opts['gg_version'] if all_opts['tab_out']: cmd2 += " -f" if all_opts['t']: cmd2 += " -t " + all_opts['t'] if all_opts['with_confidence']: cmd2 += " --with_confidence" if all_opts['custom']: cmd2 += " -c " + all_opts['custom'] converted = addtag(file, "json") format_cmd = CmdAction('biom convert --table-type="OTU table"' ' --header-key taxonomy --to-json' ' -i {} -o {} '.format(file, converted), verbose=verbose) def run(targets): # try to run without converting to json, if that fails, # convert first, then run on the json-converted biom file if os.stat(file).st_size < 1: for target in targets: open(target, "w").close() return True return strategies.backup( (strategies.Group(CmdAction(cmd1%(file), verbose=verbose), CmdAction(cmd2%(norm_out), verbose=verbose)), strategies.Group(format_cmd, CmdAction(cmd1%(converted), verbose=verbose), CmdAction(cmd2%(norm_out), verbose=verbose))) ) actions = [run] if drop_unknown: actions = [_drop_unknown, run] def _rusage(task): msg = task.name+" Estimated mem={mem} time={time} threads=1" s = os.stat(list(task.file_dep)[0]).st_size return msg.format( mem=100+(s/1024.), time=100+(s*2.5e-4) ) return dict( name = "picrust:"+predict_out, actions = actions, file_dep = [file], targets = [predict_out, norm_out], title = _rusage, )
def pick_otus_open_ref(input_fname, output_dir, verbose=None, qiime_opts={}): """Workflow to perform open-reference OTU picking. Similar to closed-reference OTU picking, this workflow generates a biom-formatted OTU table from demultiplexed 16S reads. This workflow (in general terms) wraps qiime's pick_open_reference_otus.py, which itself wraps either uclust or usearch. Note that uclust and usearch require a fairly large memory footprint (1.5-2.0G in some cases). :param input_fname: String; File path to the input, fasta-formatted 16S sequences :param output_dir: String; Path to the directory where the output OTU table will be saved as 'otu_table.biom'. Other qiime-specific logs will go there, too. :keyword verbose: Boolean: set to true to print the commands that are run as they are run :keyword qiime_opts: Dictionary; A dictionary of command line options to be passed to the wrapped split_libraries.py script. No - or -- flags are necessary; the correct - or --t flags are inferred based on the length of the option. For boolean options, use the key/value pattern of { "my-option": "" }. External dependencies: - Qiime 1.8.0: https://github.com/qiime/qiime-deploy - USEARCH: (only if using the usearch option) http://www.drive5.com/usearch/ Resource utilization: - RAM: >1.5 G """ output_fname = new_file("otu_table.biom", basedir=output_dir) revcomp_fname = new_file( "revcomp.fna", basedir=os.path.dirname(input_fname)) verbose = settings.workflows.verbose if verbose is None else verbose default_opts = { "reference_fp": settings.workflows.sixteen.otu_refseq } default_opts.update(qiime_opts) opts = dict_to_cmd_opts(default_opts) cmd = (" pick_open_reference_otus.py"+ " --input_fp={}"+ " --output_dir="+output_dir+ " -f"+ " "+opts) revcomp_cmd = ("sequence_convert"+ " --format=fasta"+ " --to=fasta "+ " -r"+ " "+input_fname+ " > "+revcomp_fname) def run(targets): strategies.backup( (CmdAction(cmd.format(input_fname),verbose=verbose), strategies.Group( CmdAction(revcomp_cmd), CmdAction(cmd.format(revcomp_fname),verbose=verbose))), extra_conditions = [ lambda ret, output_fname: os.stat(output_fname).st_size == 0 ], output_fname=output_fname ) return { "name": "pick_otus_open_ref:"+input_fname, "actions": [run], "targets": [output_fname], "file_dep": [input_fname], }
def demultiplex_illumina(fastq_fnames, barcode_fnames, map_fname, output_fname, verbose=True, qiime_opts={}): output_dir, output_basename = os.path.split(output_fname) default_opts = { "i": ",".join(fastq_fnames), "b": ",".join(barcode_fnames), "m": map_fname, "o": output_dir } default_opts.update(qiime_opts) opts = dict_to_cmd_opts(default_opts) cmd = "split_libraries_fastq.py " revcomp_map_fname = new_file(addtag(map_fname, "revcomp"), basedir=output_dir) revcomp_opts = default_opts.copy() revcomp_opts['m'] = revcomp_map_fname revcomp_opts = dict_to_cmd_opts(revcomp_opts) def _revcomp(): from anadama.util import deserialize_map_file, serialize_map_file from Bio.Seq import Seq def _reverse(sample): seq = Seq(sample.BarcodeSequence).reverse_complement() return sample._replace(BarcodeSequence=str(seq)) with open(map_fname) as from_map: from_samples = deserialize_map_file(from_map) serialize_map_file( ( _reverse(s) for s in from_samples ), revcomp_map_fname ) default_out = os.path.join(output_dir, "seqs.fna") output_exists = lambda *args, **kwargs: ( not os.path.exists(default_out) or not os.stat(default_out).st_size > 1 ) def run(): return strategies.backup( (CmdAction(cmd+opts, verbose=verbose), strategies.Group( PythonAction(_revcomp), CmdAction(cmd+revcomp_opts,verbose=verbose))), extra_conditions=[output_exists] ) actions = [run] if output_basename != "seqs.fna": actions.append("mv '%s' '%s'"%(default_out, output_fname)) return { "name": "demultiplex_illumina:"+output_fname, "actions": actions, "file_dep": list(fastq_fnames) + list(barcode_fnames) + [map_fname], "targets": [output_fname], "title": lambda t: t.name+" Estimated time=%.2f"%( sum(os.stat(f).st_size for f in t.file_dep)/1024./1024/5) }
def _decompress(f): unz_f = os.path.splitext(f)[0] unz_f = util.new_file(os.path.basename(unz_f), basedir=products_dir) tasks.append(general.extract(f, unz_f)) return unz_f
def upload(files_16s, files_wgs, sub_fname, ready_fname, keyfile, remote_path, remote_srv, user, products_dir): """Upload raw sequence files and xml. :param keyfile: String; absolute filepath to private SSH keyfile for access to NCBI's submission server :param remote_path: String; the directory on the NCBI submission server where to upload data. If unset, the remote_path is automatically determined. :param remote_srv: String; TLD of NCBI's submission server :param user: String; username used to access NCBI's submission server """ to_upload = [ f for f in list(files_16s) + list(files_wgs) if not f.endswith(".complete") ] ssh_session = ssh.SSHConnection(user, remote_srv, keyfile, remote_path) uptodate = [ssh_session.uptodate] def _upload(local_fname, complete_fname, blithely=False): def _u(): ret = asp.upload_file(remote_srv, user, None, local_fname, remote_path, keyfile=keyfile) if blithely or ret: open(complete_fname, 'w').close() return blithely or ret # return True if blithely is True return _u complete_fnames = [ new_file(f + ".complete", basedir=products_dir) for f in to_upload ] for f, complete_fname in zip(to_upload, complete_fnames): yield { "name": "upload: " + basename(f), "actions": [_upload(f, complete_fname)], "file_dep": [f], "uptodate": uptodate, "targets": [complete_fname] } yield { "name": "upload: " + basename(sub_fname), "actions": [_upload(sub_fname, sub_fname + ".complete")], "file_dep": complete_fnames, "targets": [sub_fname + ".complete"] } yield { "name": "upload: " + basename(ready_fname), "actions": [_upload(ready_fname, ready_fname + ".complete", True)], "file_dep": complete_fnames + [sub_fname + ".complete"], "targets": [ready_fname + ".complete"] }
def _configure(self): for attr in self.sequence_attrs: seq_set = getattr(self, attr) if self.options['infer_pairs'].get('infer'): paired, notpaired = infer_pairs(seq_set) seq_set = paired + notpaired seq_set, maybe_tasks = maybe_concatenate(seq_set, self.products_dir) setattr(self, attr, seq_set) for t in maybe_tasks: yield t for file_ in self.raw_seq_files: if util.guess_seq_filetype(file_) != "fastq": fastq_file = util.new_file( basename(file_)+"_filtered.fastq", basedir=self.products_dir ) yield general.sequence_convert( [file_], fastq_file, **self.options.get('sequence_convert', dict()) ) else: fastq_file = file_ self.intermediate_fastq_files.append(fastq_file) for fastq_file in self.intermediate_fastq_files: name_base = util.new_file(util.rmext(fastq_file, all=True), basedir=self.products_dir) task_dict = next(wgs.knead_data( [fastq_file], name_base, **self.options.get('decontaminate', {}) )) decontaminated_fastq = task_dict['targets'][0] self.decontaminated_fastq_files.append(decontaminated_fastq) yield task_dict for d_fastq in self.decontaminated_fastq_files: metaphlan_file = util.new_file( basename(d_fastq)+".metaphlan2.tsv", basedir=self.products_dir ) otu_table = metaphlan_file.replace('.tsv', '.biom') yield wgs.metaphlan2( [d_fastq], output_file=metaphlan_file, biom=otu_table, # first index is for first item in list of samples # second index is to get the sample id from the sample sample_id=self._filter_samples_for_file(self.sample_metadata, d_fastq)[0][0], input_type="multifastq", **self.options.get('metaphlan2', dict()) ) self.metaphlan_results.append(metaphlan_file) self.otu_tables.append(otu_table) # Finally, HUMAnN all alignment files humann_output_dir = util.new_file( util.rmext(basename(d_fastq), all=True)+"_humann", basedir=self.products_dir ) yield wgs.humann2( d_fastq, humann_output_dir, **self.options.get('humann', dict()) )