def get_batch_ids_raw(runname, config, tag=None, checkpoints=None): tag_barcode = get_tag_barcode(tag, runname, config) if tag else None if tag_barcode and checkpoints: if hasattr(checkpoints, config['demux_default'] + '_barcode'): barcode_batch_dir = getattr(checkpoints, config['demux_default'] + '_barcode').get(runname=runname).output.barcodes else: raise NotImplementedError("Demultiplexing with {} is not implemented.".format(config['demux_default'])) barcode_batch = os.path.join(barcode_batch_dir, tag_barcode, '{id}.txt') batches_txt, = glob_wildcards(barcode_batch) return batches_txt else: batches_tar, = glob_wildcards("{datadir}/{runname}/reads/{{id}}.tar".format(datadir=config["storage_data_raw"], runname=runname)) batches_fast5, = glob_wildcards("{datadir}/{runname}/reads/{{id}}.fast5".format(datadir=config["storage_data_raw"], runname=runname)) return batches_tar + batches_fast5
def predict_genes_genomes(in_dir, out_dir, log): path = os.path.join(in_dir, '{genome}.fasta') os.makedirs(out_dir, exist_ok=True) for genome in glob_wildcards(path).genome: predict_genes(genome, path.format(genome=genome), out_dir, log)
def rename_genomes(input_folder, mapfile_genomes, mapfile_contigs, output_dir): file_name = f"{input_folder}/{{binid}}.fasta" bin_ids, = glob_wildcards(file_name) old2new_name = dict( zip(bin_ids, utils.gen_names_for_range(len(bin_ids), prefix='MAG'))) os.makedirs(output_dir) with open(mapfile_contigs, 'w') as out_contigs, open(mapfile_genomes, 'w') as old2new_mapping_file: old2new_mapping_file.write(f"BinID\tMAG\n") for binid in bin_ids: fasta_in = file_name.format(binid=binid) new_name = old2new_name[binid] old2new_mapping_file.write(f"{binid}\t{new_name}\n") fasta_out = os.path.join(output_dir, f"{new_name}.fasta") # write names of contigs in mapping file with open(fasta_in) as ffi, open(fasta_out, 'w') as ffo: Nseq = 0 for line in ffi: if line[0] == ">": Nseq += 1 new_header = f'{new_name}_{Nseq}' out_contigs.write(f"{new_header}\t{new_name}\n") ffo.write(f">{new_header}\n") else: ffo.write(line)
def get_wildcards(inputmap, wildcard_constraints): """Given a list of snakemake IO filenames, extract the wildcards. Params: inputmap (list): list of input wildcard/filename tuples """ d = {} try: all_wc = [] all_files = [] for wc, filename in inputmap: try: wc = eval(wc) except: pass wc = update_wildcard_constraints(wc, wildcard_constraints, {}) all_wc.append(wc) if filename is None: continue if isinstance(filename, str): filename = [filename] all_files = all_files + filename for f in all_files: for wc in all_wc: wildcards = glob_wildcards(wc, [os.path.basename(f)]) for k, v in wildcards._asdict().items(): if len(v) > 0: d[k] = v[0] except: logger.debug("Failed to get wildcards for inputmap ", inputmap) raise return d
def aggregate_input(wildcards): ops = base_checkpoint_obj.get(**wildcards).output checkpoint_output = _output_accessor(ops, output_key) expand_base_rule = os.path.join(checkpoint_output, base_rule) expand_target_rule = target_rule or expand_base_rule return expand(expand_target_rule, **glob_wildcards(expand_base_rule)._asdict())
def get_batch_ids_raw(runname, config, tag=None, checkpoints=None): tag_barcode = get_tag_barcode(tag, runname, config) if tag else None if tag_barcode and checkpoints: barcode_batch_dir = checkpoints.demux_split_barcodes.get( demultiplexer=config['demux_default'], runname=runname).output.barcodes barcode_batch = os.path.join(barcode_batch_dir, tag_barcode, '{id}.txt') batches_txt, = glob_wildcards(barcode_batch) return batches_txt else: batches_tar, = glob_wildcards( "{datadir}/{runname}/reads/{{id}}.tar".format( datadir=config["storage_data_raw"], runname=runname)) batches_fast5, = glob_wildcards( "{datadir}/{runname}/reads/{{id}}.fast5".format( datadir=config["storage_data_raw"], runname=runname)) return batches_tar + batches_fast5
def expand_basenames(source_folder: str, source_extension: str, years: int = None): opts_digits: str = r'\d*' any_digits: str = r'\d+' year_constraint: str = ( rf"{{year,{years}\d*}}" if isinstance( years, (int, str), ) else f"{{year,{'|'.join(f'{y}{opts_digits}' for y in years)}}}" if isinstance(years, list) else rf"{{year,{any_digits}}}") source_years, target_basenames = glob_wildcards( jj(source_folder, year_constraint, f"{{file}}.{source_extension}")) return source_years, target_basenames
def test_expand_call_arguments(): target_folder = nj( "/data/riksdagen_corpus_data/riksdagen-corpus-exports/speech_xml") source_folder = nj("/data/riksdagen_corpus_data/riksdagen-corpus/corpus/") extension = "xml" years, basenames = glob_wildcards( jj(source_folder, "{year}", f"{{file}}.{extension}")) filenames = expand(jj(target_folder, '{year}', f'{{basename}}.{extension}'), zip, year=years, basename=basenames) assert len(filenames) == len(years)
def auto_report(dag, path, stylesheet=None): try: from jinja2 import Template, Environment, PackageLoader except ImportError as e: raise WorkflowError( "Python package jinja2 must be installed to create reports." ) mode_embedded = True if path.endswith(".zip"): mode_embedded = False elif not path.endswith(".html"): raise WorkflowError("Report file does not end with .html or .zip") custom_stylesheet = None if stylesheet is not None: try: with open(stylesheet) as s: custom_stylesheet = s.read() except (Exception, BaseException) as e: raise WorkflowError("Unable to read custom report stylesheet.", e) logger.info("Creating report...") env = Environment( loader=PackageLoader("snakemake", "report"), trim_blocks=True, lstrip_blocks=True, ) env.filters["get_resource_as_string"] = get_resource_as_string persistence = dag.workflow.persistence results = defaultdict(lambda: defaultdict(list)) records = defaultdict(JobRecord) recorded_files = set() for job in dag.jobs: for f in itertools.chain(job.expanded_output, job.input): if is_flagged(f, "report") and f not in recorded_files: if not f.exists: raise WorkflowError( "File {} marked for report but does " "not exist.".format(f) ) report_obj = get_flag_value(f, "report") def register_file( f, wildcards_overwrite=None, aux_files=None, name_overwrite=None ): wildcards = wildcards_overwrite or job.wildcards category = Category( report_obj.category, wildcards=wildcards, job=job ) subcategory = Category( report_obj.subcategory, wildcards=wildcards, job=job ) results[category][subcategory].append( FileRecord( f, job, report_obj.caption, env, category, dag.workflow, wildcards_overwrite=wildcards_overwrite, mode_embedded=mode_embedded, aux_files=aux_files, name_overwrite=name_overwrite, ) ) recorded_files.add(f) if os.path.isfile(f): register_file(f) elif os.path.isdir(f): if report_obj.htmlindex: if mode_embedded: raise WorkflowError( "Directory marked for report specifies htmlindex. " "This is unsupported when requesting a pure HTML report. " "Please use store as zip instead (--report report.zip)." ) aux_files = [] index_found = False for root, dirs, files in os.walk(f): for name in files: if name != ".snakemake_timestamp": filepath = os.path.join(root, name) if ( os.path.relpath(filepath, f) != report_obj.htmlindex ): aux_files.append(filepath) else: index_found = True if not index_found: raise WorkflowError( "Given htmlindex {} not found in directory " "marked for report".format(report_obj.htmlindex) ) register_file( os.path.join(f, report_obj.htmlindex), aux_files=aux_files, name_overwrite="{}.html".format(os.path.basename(f)), ) elif report_obj.patterns: if not isinstance(report_obj.patterns, list): raise WorkflowError( "Invalid patterns given for report. Must be list.", rule=job.rule, ) for pattern in report_obj.patterns: pattern = os.path.join(f, pattern) wildcards = glob_wildcards(pattern)._asdict() names = wildcards.keys() for w in zip(*wildcards.values()): w = dict(zip(names, w)) w.update(job.wildcards_dict) w = Wildcards(fromdict=w) f = apply_wildcards(pattern, w) register_file(f, wildcards_overwrite=w) else: raise WorkflowError( "Directory marked for report but neither file patterns " "given via patterns=[...], nor htmlindex given. " "See report documentation.", rule=job.rule, ) for f in job.expanded_output: meta = persistence.metadata(f) if not meta: logger.warning( "Missing metadata for file {}. Maybe metadata " "was deleted or it was created using an older " "version of Snakemake. This is a non critical " "warning.".format(f) ) continue def get_time(rectime, metatime, sel_func): if metatime is None: return rectime return sel_func(metatime, rectime) try: job_hash = meta["job_hash"] rule = meta["rule"] rec = records[(job_hash, rule)] rec.rule = rule rec.job = job rec.starttime = get_time(rec.starttime, meta["starttime"], min) rec.endtime = get_time(rec.endtime, meta["endtime"], max) rec.conda_env_file = None rec.conda_env = meta["conda_env"] rec.container_img_url = meta["container_img_url"] rec.output.append(f) except KeyError as e: print(e) logger.warning( "Metadata for file {} was created with a too " "old Snakemake version.".format(f) ) for subcats in results.values(): for catresults in subcats.values(): catresults.sort(key=lambda res: res.name) # prepare runtimes runtimes = [ {"rule": rec.rule, "runtime": rec.endtime - rec.starttime} for rec in sorted(records.values(), key=lambda rec: rec.rule) ] def get_datetime(rectime): try: return datetime.datetime.fromtimestamp(rectime).isoformat() except OSError: return None # prepare end times timeline = [ { "rule": rec.rule, "starttime": get_datetime(rec.starttime), "endtime": get_datetime(rec.endtime), } for rec in sorted(records.values(), key=lambda rec: rec.rule) ] # prepare per-rule information rules = defaultdict(list) for rec in records.values(): rule = RuleRecord(rec.job, rec) if rec.rule not in rules: rules[rec.rule].append(rule) else: merged = False for other in rules[rec.rule]: if rule == other: other.add(rec) merged = True break if not merged: rules[rec.rule].append(rule) # rulegraph rulegraph, xmax, ymax = rulegraph_d3_spec(dag) # configfiles configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles] seen = set() files = [ seen.add(res.target) or res for cat in results.values() for subcat in cat.values() for res in subcat if res.target not in seen ] rst_links = textwrap.dedent( """ .. _Workflow: javascript:show_panel('workflow') .. _Statistics: javascript:show_panel('statistics') {% for cat, catresults in categories|dictsort %} .. _{{ cat.name }}: javascript:show_panel("{{ cat.id }}") {% endfor %} {% for res in files %} .. _{{ res.target }}: javascript:show_panel("{{ res.category.id }}") {% endfor %} """ ) for cat, subcats in results.items(): for subcat, catresults in subcats.items(): for res in catresults: res.render(env, rst_links, results, files) # global description text = "" if dag.workflow.report_text: with dag.workflow.sourcecache.open(dag.workflow.report_text) as f: class Snakemake: config = dag.workflow.config text = f.read() + rst_links text = publish_parts( env.from_string(text).render( snakemake=Snakemake, categories=results, files=files ), writer_name="html", )["body"] # record time now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0]) results_size = sum( res.size for cat in results.values() for subcat in cat.values() for res in subcat ) try: from pygments.formatters import HtmlFormatter except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports." ) template = env.get_template("report.html.jinja2") logger.info("Downloading resources and rendering HTML.") rendered = template.render( results=results, results_size=results_size, configfiles=configfiles, text=text, rulegraph_nodes=rulegraph["nodes"], rulegraph_links=rulegraph["links"], rulegraph_width=xmax + 20, rulegraph_height=ymax + 20, runtimes=runtimes, timeline=timeline, rules=[rec for recs in rules.values() for rec in recs], version=__version__, now=now, pygments_css=HtmlFormatter(style="trac").get_style_defs(".source"), custom_stylesheet=custom_stylesheet, mode_embedded=mode_embedded, ) # TODO look into supporting .WARC format, also see (https://webrecorder.io) if not mode_embedded: with ZipFile(path, compression=ZIP_DEFLATED, mode="w") as zipout: folder = Path(Path(path).stem) # store results in data folder for subcats in results.values(): for catresults in subcats.values(): for result in catresults: # write raw data zipout.write(result.path, str(folder.joinpath(result.data_uri))) # write thumbnail if result.is_img and result.png_content: zipout.writestr( str(folder.joinpath(result.png_uri)), result.png_content ) # write aux files parent = folder.joinpath(result.data_uri).parent for aux_path in result.aux_files: # print(aux_path, parent, str(parent.joinpath(os.path.relpath(aux_path, os.path.dirname(result.path))))) zipout.write( aux_path, str( parent.joinpath( os.path.relpath( aux_path, os.path.dirname(result.path) ) ) ), ) # write report html zipout.writestr(str(folder.joinpath("report.html")), rendered) else: with open(path, "w", encoding="utf-8") as htmlout: htmlout.write(rendered) logger.info("Report created: {}.".format(path))
import os from snakemake.shell import shell from snakemake.io import glob_wildcards from multiprocessing.dummy import Pool pool = Pool(snakemake.threads) for dir in snakemake.params.dirs: if not os.path.exists(dir): os.makedirs(dir) path = os.path.join(snakemake.input[0], "{genome}" + snakemake.params.fasta_extension) all_genomes = glob_wildcards(path).genome print( f"Call genes of {len(all_genomes)} gneomes in {snakemake.threads} threads." ) def callgenes(genome): fasta = path.format(genome=genome) if not os.path.exists(f"annotations/faa/{genome}.faa.gz"): shell("callgenes.sh in={fasta} outa=annotations/faa/{genome}.faa.gz" " out=annotations/gff/{genome}.gff.gz" " out16S=annotations/16S/{genome}.fasta" " stats=annotations/stats/{genome}.json json=t ow > /dev/null")
def auto_report(dag, path): try: from jinja2 import Template, Environment, PackageLoader except ImportError as e: raise WorkflowError( "Python package jinja2 must be installed to create reports.") if not path.endswith(".html"): raise WorkflowError("Report file does not end with .html") logger.info("Creating report...") env = Environment( loader=PackageLoader("snakemake", "report"), trim_blocks=True, lstrip_blocks=True, ) env.filters["get_resource_as_string"] = get_resource_as_string persistence = dag.workflow.persistence results = defaultdict(list) records = defaultdict(JobRecord) recorded_files = set() for job in dag.jobs: for f in itertools.chain(job.expanded_output, job.input): if is_flagged(f, "report") and f not in recorded_files: if not f.exists: raise WorkflowError("File {} marked for report but does " "not exist.".format(f)) report_obj = get_flag_value(f, "report") category = Category(report_obj.category) def register_file(f, wildcards_overwrite=None): results[category].append( FileRecord( f, job, report_obj.caption, env, category, wildcards_overwrite=wildcards_overwrite, )) recorded_files.add(f) if os.path.isfile(f): register_file(f) if os.path.isdir(f): if not isinstance(report_obj.patterns, list): raise WorkflowError( "Invalid patterns given for report. Must be list.", rule=job.rule, ) if not report_obj.patterns: raise WorkflowError( "Directory marked for report but no file patterns given via patterns=[...]. " "See report documentation.", rule=job.rule, ) for pattern in report_obj.patterns: pattern = os.path.join(f, pattern) wildcards = glob_wildcards(pattern)._asdict() names = wildcards.keys() for w in zip(*wildcards.values()): w = dict(zip(names, w)) w.update(job.wildcards_dict) w = Wildcards(fromdict=w) f = apply_wildcards(pattern, w) register_file(f, wildcards_overwrite=w) for f in job.expanded_output: meta = persistence.metadata(f) if not meta: logger.warning("Missing metadata for file {}. Maybe metadata " "was deleted or it was created using an older " "version of Snakemake. This is a non critical " "warning.".format(f)) continue try: job_hash = meta["job_hash"] rule = meta["rule"] rec = records[(job_hash, rule)] rec.rule = rule rec.job = job rec.starttime = min(rec.starttime, meta["starttime"]) rec.endtime = max(rec.endtime, meta["endtime"]) rec.conda_env_file = None rec.conda_env = meta["conda_env"] rec.container_img_url = meta["container_img_url"] rec.output.append(f) except KeyError as e: print(e) logger.warning("Metadata for file {} was created with a too " "old Snakemake version.".format(f)) for catresults in results.values(): catresults.sort(key=lambda res: res.name) # prepare runtimes runtimes = [{ "rule": rec.rule, "runtime": rec.endtime - rec.starttime } for rec in sorted(records.values(), key=lambda rec: rec.rule)] # prepare end times timeline = [{ "rule": rec.rule, "starttime": datetime.datetime.fromtimestamp(rec.starttime).isoformat(), "endtime": datetime.datetime.fromtimestamp(rec.endtime).isoformat(), } for rec in sorted(records.values(), key=lambda rec: rec.rule)] # prepare per-rule information rules = defaultdict(list) for rec in records.values(): rule = RuleRecord(rec.job, rec) if rec.rule not in rules: rules[rec.rule].append(rule) else: merged = False for other in rules[rec.rule]: if rule == other: other.add(rec) merged = True break if not merged: rules[rec.rule].append(rule) # rulegraph rulegraph, xmax, ymax = rulegraph_d3_spec(dag) # configfiles configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles] seen = set() files = [ seen.add(res.target) or res for cat in results.values() for res in cat if res.target not in seen ] rst_links = textwrap.dedent(""" .. _Results: #results .. _Rules: #rules .. _Statistics: #stats {% for cat, catresults in categories|dictsort %} .. _{{ cat.name }}: #{{ cat.id }} {% for res in files %} .. _{{ res.target }}: #{{ res.id }} {% endfor %} {% endfor %} .. _ """) for cat, catresults in results.items(): for res in catresults: res.render(env, rst_links, results, files) # global description text = "" if dag.workflow.report_text: with open(dag.workflow.report_text) as f: class Snakemake: config = dag.workflow.config text = f.read() + rst_links text = publish_parts( env.from_string(text).render(snakemake=Snakemake, categories=results, files=files), writer_name="html", )["body"] # record time now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0]) results_size = sum(res.size for cat in results.values() for res in cat) try: from pygments.formatters import HtmlFormatter except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports.") # render HTML template = env.get_template("report.html") with open(path, "w", encoding="utf-8") as out: out.write( template.render( results=results, results_size=results_size, configfiles=configfiles, text=text, rulegraph_nodes=rulegraph["nodes"], rulegraph_links=rulegraph["links"], rulegraph_width=xmax + 20, rulegraph_height=ymax + 20, runtimes=runtimes, timeline=timeline, rules=[rec for recs in rules.values() for rec in recs], version=__version__, now=now, pygments_css=HtmlFormatter( style="trac").get_style_defs(".source"), )) logger.info("Report created.")
def __check_config_dic(self): """Configuration file checking""" # check output mandatory directory self._check_dir_or_string(level1="DATA", level2="OUTPUT") self.reference = self.get_config_value('DATA', 'REFERENCE_FILE') self.bam_path = self.get_config_value(level1="DATA", level2="BAM") self.vcf_path = self.get_config_value(level1="DATA", level2="VCF") # check cleaning activation self.list_cleaning_tool_activated = self.__build_tools_activated( "CLEANING", AVAIL_CLEANING) if len(self.list_cleaning_tool_activated) > 0: self.cleaning_tool = "_" + self.list_cleaning_tool_activated[0] self.cleaning_activated = True self._check_file_or_string(level1="DATA", level2="REFERENCE_FILE", mandatory=["CLEANING"]) elif len(self.list_cleaning_tool_activated) > 1: raise ValueError( f'CONFIG FILE CHECKING FAIL for section "CLEANING": please activate only one cleaning tool avail\n' ) # check mapping activation, if not use folder name to set self.mapping_tool_activated instead of mapping tool self.mapping_activated = var_2_bool(tool="MAPPING", key="ACTIVATE", to_convert=self.get_config_value( "MAPPING", "ACTIVATE")) self.mapping_stats_activated = var_2_bool( tool="MAPPING", key="BUILD_STATS", to_convert=self.get_config_value("MAPPING", "BUILD_STATS")) if self.mapping_activated: self.mapping_tool_activated = self.get_config_value( "MAPPING", "TOOL") self._check_file_or_string(level1="DATA", level2="REFERENCE_FILE", mandatory=[self.mapping_tool_activated]) if self.mapping_tool_activated not in AVAIL_MAPPING: raise ValueError( f'CONFIG FILE CHECKING FAIL for section "MAPPING" key "TOOL": {self.mapping_tool_activated} not avail on RattleSNP\n' ) elif self.mapping_stats_activated: raise ValueError( f'CONFIG FILE CHECKING FAIL for section "MAPPING" key "BUILD_STATS" is "True" but no mapping activate, please change "ACTIVATE" to "True"\n' ) # if cleaning or mapping check fastq path and if self.cleaning_activated or self.mapping_activated: self._check_dir_or_string(level1="DATA", level2="FASTQ") self.__check_fastq_files() self.samples, = glob_wildcards( f"{self.fastq_path}{{fastq,[^/]+}}_R1{self.fastq_files_ext}", followlinks=True) for sample in self.samples: if not Path( f"{self.fastq_path}{sample}_R2{self.fastq_files_ext}" ).exists(): ValueError( f"DATA CHECKING FAIL : The samples '{sample}' are single-end, please only use paired data: \n" ) self._check_file_or_string(level1="DATA", level2="REFERENCE_FILE", mandatory=[]) # check SNP calling activation: self.calling_activated = var_2_bool( tool="SNPCALLING", key="", to_convert=self.get_config_value(level1="SNPCALLING")) if not self.mapping_activated and self.calling_activated: self._check_dir_or_string(level1="DATA", level2="BAM") self.samples, = glob_wildcards(f"{self.bam_path}{{bam,[^/]+}}.bam", followlinks=True) self._check_file_or_string(level1="DATA", level2="REFERENCE_FILE", mandatory=["SNPCALLING"]) # check VCF filter activation self.vcf_filter_activated = var_2_bool( tool="FILTER", key="", to_convert=self.get_config_value(level1="FILTER")) # If only VCF filtration get vcf path if not self.mapping_activated and not self.calling_activated and self.vcf_filter_activated: self._check_file_or_string(level1="DATA", level2="VCF", mandatory=["VCFTOOL FILTER"]) self.run_RAXML = var_2_bool( tool="RAXML", key="", to_convert=self.get_config_value(level1="RAXML")) self.run_RAXML_NG = var_2_bool( tool="RAXML_NG", key="", to_convert=self.get_config_value(level1="RAXML_NG")) # check mitochondrial name is in fasta is not Nome if self.cleaning_activated or self.mapping_activated or self.calling_activated: self.mito_name = self.get_config_value('PARAMS', 'MITOCHONDRIAL_NAME') self.CHROMOSOMES = get_list_chromosome_names( self.get_config_value('DATA', 'REFERENCE_FILE')) if self.mito_name and self.mito_name not in self.CHROMOSOMES: raise NameError( f'CONFIG FILE CHECKING FAIL : in the "PARAMS" section, "MITOCHONDRIAL_NAME" key: the name "{self.mito_name}" is not in fasta file {self.get_config_value("DATA", "REFERENCE_FILE")}\n' ) self.CHROMOSOMES_WITHOUT_MITO = self.CHROMOSOMES.copy() if self.mito_name and self.mito_name in self.CHROMOSOMES: self.CHROMOSOMES_WITHOUT_MITO.remove(self.mito_name) if self.calling_activated and self.mapping_activated and self.bam_path: raise ValueError( f"CONFIG FILE CHECKING FAIL : You want to run mapping with {self.mapping_tool_activated} but provided bam path '{self.bam_path}'\n" ) # check VCF filter activation if raxml or raxml_ng self.raxml_activated = var_2_bool( tool="RAXML", key="", to_convert=self.get_config_value(level1="RAXML")) self.raxml_ng_activated = var_2_bool( tool="RAXML_NG", key="", to_convert=self.get_config_value(level1="RAXML_NG")) if (self.raxml_activated or self.raxml_ng_activated) and not self.vcf_filter_activated: self._check_file_or_string(level1="DATA", level2="VCF", mandatory=["FILTER", "RAXML"])
def get_batches(wildcards, config): batches, = glob_wildcards("{datadir}/{wildcards.runname}/reads/{{id}}.tar".format(datadir=config["storage_data_raw"], wildcards=wildcards)) return batches