def main(args): get_pipeline_log("config/pipeline_log.yaml") make_sample_sheet(args.input, "config/sample_sheet.yaml") resources = get_resources(args.cores, args.queue) check_databases(args.serobadb) snakemake.snakemake( "Snakefile", workdir=pathlib.Path(__file__).parent.absolute(), config={ "out": str(args.output), "sample_sheet": "config/sample_sheet.yaml", "seroba_db": str(args.serobadb), "kmer_size": int(args.kmersize), "min_cov": int(args.mincov) }, cores=resources['cores'], nodes=resources['cores'], use_conda=True, conda_frontend="mamba", dryrun=args.dryrun, jobname="seroba_{name}.jobid{jobid}", keepgoing=True, printshellcmds=True, unlock=args.unlock, force_incomplete=args.rerunincomplete, configfiles=["config/pipeline_parameters.yaml"], drmaa= " -q bio -n {threads} -o %s/log/drmaa/{name}_{wildcards}_{jobid}.out -e %s/log/drmaa/{name}_{wildcards}_{jobid}.err -R \"span[hosts=1]\" -R \"rusage[mem={resources.mem_mb}]\" " % (str(args.output), str(args.output)))
def run_pipeline(config_file): conf, errors = config.read_config(config_file) pipeline = conf['pipeline'] del conf['pipeline'] output_dir = conf['output']['output_dir'] template = _TEMPLATE_ENV.get_template( os.path.join('pipeline_template.sm')) misc.make_sure_dir_exists(output_dir) # write config file for pipeline config_file_name = 'config.yaml' pipeline_config_file = os.path.join(output_dir, config_file_name) config.write_config(conf, pipeline_config_file) # write snakemake file for pipeline snakemake_file = os.path.join(output_dir, 'Snakefile') with open(snakemake_file, 'w') as ofh: ofh.write(template.render(config_file=config_file_name)) #with open(pipeline_config_file, 'w') as ofh: # yaml.dump(conf, .write(conf, ) num_threads = conf['parameters']['num_threads'] snakemake.snakemake(snakemake_file, workdir=output_dir, cores=num_threads)
def print_graph(snakefile, config, dag_prefix): # store old stdout stdout = sys.stdout # call snakemake api and capture output sys.stdout = io.StringIO() snakemake.snakemake(snakefile, config=config, targets=config['targets'], dryrun=True, printdag=True) output = sys.stdout.getvalue() # restore sys.stdout sys.stdout = stdout # write output if shutil.which('dot'): svg_file = '{}.svg'.format(dag_prefix) # pipe the output to dot with open(svg_file, 'wb') as svg: dot_process = subprocess.Popen(['dot', '-Tsvg'], stdin=subprocess.PIPE, stdout=svg) dot_process.communicate(input=output.encode()) else: # write the file as dag dag_file = '{}.dag'.format(dag_prefix) with open(dag_file, 'wt') as file: file.write(output)
def run_salmon(param): import snakemake snakefile = os.path.join( os.path.dirname(__file__), "snakemake/Snakefile.paired" if param["paired"] else "snakemake/Snakefile.single") snakemake.snakemake(snakefile=snakefile, config={ "input_path": param["inpath"], "output_path": param["--outpath"], "index": param["--reference"], "salmon": os.path.join(os.path.dirname(__file__), "salmon/{}/bin/salmon"), "num_threads": param["--num_threads"], "exprtype": param["--exprtype"] }) with open(os.path.join(param["--outpath"], "EXPR.csv"), "r") as inp: sample_ids = inp.readline().strip().split(',')[1:] with open(os.path.join(param["--outpath"], "phenotype.csv"), "w") as oup: oup.write("SampleID,phenotype\n") oup.write("\n".join([s + "," + "NA" for s in sample_ids]) + "\n")
def test_keep_logger(): with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, "Snakefile") with open(path, "w") as f: print("rule:\n output: 'result.txt'\n shell: 'touch {output}'", file=f) snakemake(path, workdir=tmpdir, keep_logger=True)
def test_snakemake_word_frequency(): test_protocols: List[str] = [ 'prot-1936--ak--8.xml', 'prot-197778--160.xml', ] workdir = aj("./tests/output/work_folder") config_filename = aj("./tests/test_data/test_config_output.yml") rmtree(workdir, ignore_errors=True) makedirs(workdir, exist_ok=True) makedirs(jj(workdir, "logs"), exist_ok=True) setup_parlaclarin_repository(test_protocols, workdir, "riksdagen-corpus") setup_work_folder_for_tagging_with_stanza(workdir) snakefile = jj('workflow', 'Snakefile') snakemake.snakemake( snakefile, config=dict(config_filename=config_filename, processes=4), debug=True, # workdir=workdir, keep_target_files=True, cores=1, verbose=True, targets=['word_frequency'], ) assert isfile(jj(workdir, "riksdagen-corpus-term-frequencies.pkl"))
def generate_graph(config_collection_dict, config_path): """Generate DAG graph using snakemake stdout output""" with CaptureStdout() as graph_dot: snakemake.snakemake( snakefile=get_snakefile( analysis_type=config_collection_dict["analysis"] ["analysis_type"], sequencing_type=config_collection_dict["analysis"] ["sequencing_type"], ), dryrun=True, configfiles=[config_path], printrulegraph=True, ) graph_title = "_".join([ "BALSAMIC", BALSAMIC.__version__, config_collection_dict["analysis"]["case_id"], ]) graph_dot = "".join(graph_dot).replace( "snakemake_dag {", 'BALSAMIC { label="' + graph_title + '";labelloc="t";') graph_obj = graphviz.Source( graph_dot, filename=".".join( config_collection_dict["analysis"]["dag"].split(".")[:-1]), format="pdf", engine="dot", ) graph_obj.render(cleanup=True)
def RNA_illumina(args): """""" # Get and check config files log.warning("CHECKING CONFIGURATION FILES") snakefile = get_snakefile_fn(workflow_dir=WORKFLOW_DIR, workflow=args.subcommands) configfile = get_config_fn(config=args.config) # Store additionnal options to pass to snakemake log.info("Build config dict for snakemake") config = { "genome": required_option("genome", args.genome), "transcriptome": required_option("transcriptome", args.transcriptome), "annotation": required_option("annotation", args.annotation), "sample_sheet": get_sample_sheet(sample_sheet=args.sample_sheet, required_fields=["sample_id", "fastq1", "fastq2"]) } log.debug(config) # Filter other args option compatible with snakemake API kwargs = filter_valid_snakemake_options(args) log.debug(kwargs) # Run Snakemake through the API log.warning("RUNNING SNAKEMAKE PIPELINE") snakemake(snakefile=snakefile, configfile=configfile, config=config, use_conda=True, **kwargs)
def run_inspect(args): kwargs = { "snakefile": os.path.join(os.path.dirname(__file__), "Snakefile"), "workdir": args.work_dir, "summary": True, "verbose": True, } snakemake.snakemake(**kwargs)
def run_snakemake_with_config(snakefile_path, config): # Since snakemake() function can only handle "flat" dicts using the direct config= parameter, # need to write the config dict to a temporary file and instead pass in to configfile= try: f = tempfile.NamedTemporaryFile(mode='w', delete=False) yaml.dump(config, f, default_flow_style=False) snakemake_api.snakemake(snakefile=snakefile_path, configfiles=[f.name]) f.close() finally: os.unlink(f.name)
def test_dryrun_all(self): ''' Test that the "all" rule dryruns properly ''' self.assertTrue(snakemake.snakemake( os.path.join(self.workdir, 'Snakefile'), #configfile=os.path.join(self.workdir, 'config.json'), workdir=self.workdir, dryrun=True)) self.assertTrue(snakemake.snakemake( os.path.join(self.workdir, 'Snakefile'), #configfile=os.path.join(self.workdir, 'config.json'), workdir=self.workdir, dryrun=True, targets=['all']))
def main(args): indir = os.path.realpath(args.biokit_dir) refgenomes_dir = os.path.realpath(args.refgenomes_dir) outdir = args.outdir if outdir is None: outdir = os.path.join(indir, 'HBVouroboros') if not os.path.exists(outdir): makedirs(outdir, mode=0o775, exist_ok=True) unmapped_sample_annotation=os.path.join(outdir, 'unmapped_samples.txt') biokit_status = snakemake.snakemake( snakefile=biokit_snakefile, config={ 'biokit_dir': indir, 'output_file': unmapped_sample_annotation }, workdir=outdir) if not biokit_status: raise Exception('Failed to derive sample annotation files ' 'for unmapped reads in directory {}'.format(indir)) cluster_logs_dir = os.path.join(outdir, 'cluster-logs') makedirs(cluster_logs_dir, mode=0o775, exist_ok=True) cluster_out_pattern = os.path.join(cluster_logs_dir, 'slurm-%x-%j.out') cluster_err_pattern = os.path.join(cluster_logs_dir, 'slurm-%x-%j.err') if not args.local: cluster_comm = ('sbatch -t {cluster.time} -c {cluster.cpu} ' '-N {cluster.nodes} --mem={cluster.mem} ' '--ntasks-per-node={cluster.ntasks_per_node}' ' -o ' + cluster_out_pattern + \ ' -e ' + cluster_err_pattern) cluster_config = align_clusterfile else: cluster_comm = None cluster_config = None status = snakemake.snakemake(align_snakefile, cluster=cluster_comm, cluster_config=cluster_config, cores=128, nodes=128, local_cores=4, config={ 'sample_annotation': unmapped_sample_annotation, 'refgenomes_dir': refgenomes_dir }, workdir=outdir, restart_times=3, printshellcmds=True) if status: # translate "success" into shell exit code of 0 return 0 return 1
def status(context, sample_config, show_only_missing, print_files): """ cli for status sub-command. """ LOG.info(f"BALSAMIC started with log level {context.obj['loglevel']}.") LOG.debug("Reading input sample config") with open(sample_config, "r") as fn: sample_config_dict = json.load(fn) result_dir = get_result_dir(sample_config_dict) analysis_type = sample_config_dict["analysis"]["analysis_type"] sequencing_type = sample_config_dict["analysis"]["sequencing_type"] snakefile = get_snakefile(analysis_type, sequencing_type) with CaptureStdout() as summary: snakemake.snakemake( snakefile=snakefile, dryrun=True, summary=True, configfiles=[sample_config], quiet=True, ) summary = [i.split("\t") for i in summary] summary_dict = [dict(zip(summary[0], value)) for value in summary[1:]] if not os.path.isfile(os.path.join(result_dir, "analysis_finish")): LOG.warning( "analysis_finish file is missing. Analysis might be incomplete or running." ) existing_files = set() missing_files = set() for entries in summary_dict: delivery_file = entries["output_file"] file_status_str, file_status = get_file_status_string(delivery_file) if file_status and print_files: click.echo(file_status_str) if not file_status and (show_only_missing or print_files): click.echo(file_status_str) if file_status: existing_files.add(delivery_file) if not file_status: missing_files.add(delivery_file) finish_file_count = 'Finished file count: {}'.format(len(existing_files)) missing_file_count = 'Missing file count: {}'.format(len(missing_files)) click.echo(Color('{yellow}Final tally:{/yellow}')) click.echo(Color('{yellow}\t' + finish_file_count + '{/yellow}')) click.echo(Color('{yellow}\t' + missing_file_count + '{/yellow}'))
def test_run_script_directive(): with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, 'Snakefile') with open(path, 'w') as f: print(dedent(""" rule: output: 'result.txt' run: with open(output[0], 'w') as f: print("hello", file=f) """), file=f) snakemake(path, workdir=tmpdir)
def run_pipeline(config_f): import snakemake import os sgp_home = os.path.dirname(os.path.realpath(__file__)) snakemake.snakemake(snakefile=os.path.join(sgp_home, 'main.smk'), use_conda=True, conda_prefix=os.path.join(sgp_home, 'env'), restart_times=3, configfile=config_f, workdir=os.path.dirname(config_f), printshellcmds=True, force_incomplete=True, notemp=True)
def test_run_single_task(simple_merfish_task): simple_merfish_task.save() assert not simple_merfish_task.is_complete() snakeRule = snakewriter.SnakemakeRule(simple_merfish_task) with open('temp.Snakefile', 'w') as outFile: outFile.write('rule all: \n\tinput: ' + snakeRule.full_output() + '\n\n') outFile.write(snakeRule.as_string()) snakemake.snakemake('temp.Snakefile') os.remove('temp.Snakefile') shutil.rmtree('.snakemake') assert simple_merfish_task.is_complete()
def cme(): """The main function call for runnning the CME application """ parser = argparse.ArgumentParser(prog='CME', description='Computational Modeling Engine') parser. add_argument('-v','--version',action='version',version='%(prog)s 1.0',help = 'print version and exit') parser.add_argument('--prepare',action='store_true',default=False,dest='prepare',help = 'set prepare structure switch to true') parser.add_argument('--inchi',action='store_true',default=False,dest='inchi',help='set inchi file creation switch to true') parser.add_argument('--configfile',default='config.yaml',help = 'snakemake configuration file') parser.add_argument('--dft',action='store_true',default=False,dest='dft',help = 'run DFT calculation') parser.add_argument('--md',action='store_true',default=False,dest='md',help = 'run MD simulation') args = parser.parse_args() if (args.inchi == True): print('Creating .inchi files from inchi strings in .csv file') snakemake(resource_filename('cme', 'rules/MD-pipeline.snakemake'),configfile=args.configfile) if (args.prepare == True): print('Preparing initial structure files for DFT and MD runs') snakemake(resource_filename('cme', 'rules/2.snakemake'),configfile=args.configfile) if (args.dft == True): print('Starting DFT runs') snakemake(resource_filename('cme', 'rules/rj-dft.snakemake'),configfile=args.configfile) if (args.md == True): print('Starting MD runs') snakemake(resource_filename('cme', 'rules/rj-md.snakemake'),configfile=args.configfile) # if (args.md2 == True): # print('Starting MD-2 runs') # snakemake(resource_filename('cme', 'rules/rj-md2.snakemake'),configfile=args.configfile) print(args)
def download_human_genomes(): config = { "output": { "hg19": os.path.join(EXPLOSIG_DATA_DIR, "genomes", "hg19.fa"), "hg38": os.path.join(EXPLOSIG_DATA_DIR, "genomes", "hg38.fa") } } # Since snakemake() function can only handle "flat" dicts using the direct config= parameter, # need to write the config dict to a temporary file and instead pass in to configfile= with tempfile.NamedTemporaryFile(mode='w') as temp: yaml.dump(config, temp, default_flow_style=False) snakefile = os.path.join(os.path.dirname(__file__), 'snakefiles', 'genomes', 'human.smk') snakemake_api.snakemake(snakefile=snakefile, configfiles=[temp.name])
def main(): snakefile = get_snakefile() options = parse_args() snakemake.snakemake(snakefile=snakefile, configfiles=options.configfiles, config=options.config, nodes=options.jobs, cores=options.jobs, keepgoing=options.keep_going, notemp=options.notemp, dryrun=options.dry_run, quiet=options.quiet, forceall=options.forceall, forcerun=options.forcerun)
def test_dryrun_all(self): ''' Test that the "all" rule dryruns properly ''' self.assertTrue( snakemake.snakemake( os.path.join(self.workdir, 'Snakefile'), #configfile=os.path.join(self.workdir, 'config.yaml'), workdir=self.workdir, dryrun=True)) self.assertTrue( snakemake.snakemake( os.path.join(self.workdir, 'Snakefile'), #configfile=os.path.join(self.workdir, 'config.yaml'), workdir=self.workdir, dryrun=True, targets=['all']))
def run_neoantigen_pipeline(args, parsed_config, configfile): configfile.seek(0) output_dir = get_output_dir(parsed_config) stats_file = join(output_dir, "stats.json") # only run targets in the output directory (exclude reference processing) targets = [x for x in get_and_check_targets(args, parsed_config) if x.startswith(output_dir)] if not targets: logger.info("No output targets specified") return config_extension = make_config_extension_dict(args, parsed_config) logger.info("Running neoantigen pipeline with targets %s " % targets) start_time = datetime.datetime.now() if not snakemake.snakemake( 'pipeline/Snakefile', cores=args.cores, resources={'mem_mb': int(1024 * args.memory)}, config=config_extension, configfile=configfile.name, printshellcmds=True, dryrun=args.dry_run, targets=targets, workdir=parsed_config["workdir"], stats=stats_file): raise ValueError("Pipeline failed, see Snakemake error message for details") end_time = datetime.datetime.now() logger.info("--- Pipeline running time: %s ---" % (str(end_time - start_time)))
def infer_topology( obj, working_directory, tree_method, rooting_method, lsd_output_format ): if working_directory is None: working_directory = obj.output_path.parents[0] with importlib.resources.path("treeflow_pipeline", "topology.smk") as snakefile: success = snakemake.snakemake( snakefile, config=dict( alignment=obj.alignment_path, output=obj.output_path, working_directory=working_directory, tree_method=tree_method, rooting_method=rooting_method, subst_model=obj.model.subst_model, site_model=obj.model.site_model, clock_model=obj.model.clock_model, lsd_output_format=lsd_output_format, seed=obj.seed, ), targets=["tree", "starting_values"], lock=False, ) if not success: raise click.UsageError( "Topology inference pipeline was unsuccessful, check inputs" )
def run( dryrun: bool = False, cores: int = 4, keepgoing: bool = False, unlock: bool = False, printdag: bool = False, targets=None, workdir=None, ): # snakemake sets up its own logging, and this cannot be easily changed # (setting keep_logger=True crashes), so remove our own log handler # for now logger.root.handlers = [] snakefile_path = pkg_resources.resource_filename("dbspro", "rules.smk") success = snakemake(snakefile_path, snakemakepath="snakemake", # Needed in snakemake 3.9.0 dryrun=dryrun, printdag=printdag, quiet=False if not printdag else True, cores=cores, keepgoing=keepgoing, unlock=unlock, printshellcmds=True, targets=targets, workdir=workdir) if not success: raise SnakemakeError()
def run(path, shouldfail=False, snakefile="Snakefile", **params): """ Test the Snakefile in path. There must be a Snakefile in the path and a subdirectory named expected-results. """ results_dir = join(path, 'expected-results') snakefile = join(path, snakefile) assert os.path.exists(snakefile) assert os.path.exists(results_dir) and os.path.isdir(results_dir), \ '{} does not exist'.format(results_dir) tmpdir = mkdtemp() try: call('cp `find {} -maxdepth 1 -type f` {}'.format(path, tmpdir), shell=True) success = snakemake(snakefile, cores=3, workdir=tmpdir, stats = "stats.txt", snakemakepath = SCRIPTPATH, **params) if shouldfail: assert not success, "expected error on execution" else: assert success, "expected successful execution" for resultfile in os.listdir(results_dir): if not os.path.isfile(resultfile): continue # skip .svn dirs etc. targetfile = join(tmpdir, resultfile) expectedfile = join(results_dir, resultfile) assert os.path.exists(targetfile), 'expected file "{}" not produced'.format(resultfile) assert md5sum(targetfile) == md5sum(expectedfile), 'wrong result produced for file "{}"'.format(resultfile) finally: call(['rm', '-rf', tmpdir])
def test_workflow_compiles(self): chdir(self._get_pipeline_dir_path()) self.assertTrue( snakemake.snakemake( 'Snakefile', cores=20, resources={'mem_mb': 160000}, configfile=self.config_tmpfile.name, config={ 'num_threads': 22, 'mem_gb': 160, 'contigs': ['2'] }, dryrun=True, printshellcmds=True, targets=[ join( self.workdir.name, 'idh1-test-sample', 'vaccine-peptide-report_netmhcpan-iedb_mutect-strelka.txt' ), join(self.workdir.name, 'idh1-test-sample', 'rna_final.bam'), ], stats=join(self.workdir.name, 'idh1-test-sample', 'stats.json')))
def run_snakemake(self): """Run Snakemake and display result files afterwards""" print('\nRunning Snakemake\n=================\n', file=sys.stderr) snakemake.snakemake( snakefile=os.path.join(self.args.work_dir, 'Snakefile'), workdir=self.args.work_dir, ) # TODO: check Snakemake result print('\nThe End\n=======\n', file=sys.stderr) print('\n'.join( textwrap.wrap( textwrap.dedent(r""" You can find the results in the "{}/report.txt" file. """).format(self.args.work_dir).lstrip())), file=sys.stderr)
def test_snakemake_execute(): config_filename = aj("./tests/test_data/test_config.yml") cfg: Config = load_typed_config(config_name=config_filename) snakefile = jj('workflow', 'Snakefile') rmtree(cfg.annotated_folder, ignore_errors=True) makedirs(cfg.annotated_folder, exist_ok=True) success = snakemake.snakemake( snakefile, config=dict(config_filename=config_filename), debug=True, # workdir=workdir, keep_target_files=True, cores=1, verbose=True, ) assert success source_files: List[str] = glob.glob(jj( cfg.data_folder, 'riksdagen-corpus/corpus/**/prot*.xml'), recursive=True) for filename in source_files: document_name: str = strip_path_and_extension(filename) target_dir: str = jj(cfg.annotated_folder, document_name.split('-')[1]) assert isfile(jj(target_dir, f"{document_name}.zip"))
def call_snakemake(workdir, targets=None): return snakemake.snakemake( os.path.join(workdir, 'Snakefile'), configfile=os.path.join(workdir, 'config.yaml'), workdir=workdir, dryrun=True, targets=targets)
def process_reference(args, parsed_config, configfile): configfile.seek(0) reference_genome_dir = get_reference_genome_dir(parsed_config) stats_file = join(reference_genome_dir, "stats.json") targets = [ x for x in get_and_check_targets(args, parsed_config) if x.startswith(reference_genome_dir)] if not targets: targets = [parsed_config["reference"]["genome"] + '.done'] logger.info("Processing reference with targets: %s" % targets) start_time = datetime.datetime.now() if not snakemake.snakemake( 'pipeline/reference_Snakefile', cores=args.cores, resources={'mem_mb': int(1024 * args.memory)}, config={'num_threads': args.cores, 'mem_gb': args.memory}, configfile=configfile.name, printshellcmds=True, dryrun=args.dry_run, targets=targets, stats=stats_file): raise ValueError("Reference processing failed, see Snakemake error message for details") end_time = datetime.datetime.now() logger.info("--- Reference processing time: %s ---" % (str(end_time - start_time)))
def run_snakemake_from_config(dry_run, config_yaml, cores, cluster_command, nodes, delete_all_output=False): print("[INFO] Invoking Snakemake with config {} and {} cores.".format( config_yaml, cores)) finished_successfully = snakemake.snakemake( snakefile=snakefile_location, configfiles=[config_yaml], dryrun=dry_run, cores=cores, local_cores=cores, nodes=nodes, printshellcmds=True, delete_all_output=delete_all_output, use_conda=True, conda_prefix=conda_prefix, cluster=cluster_command) if not finished_successfully: os.sys.exit(os.EX_SOFTWARE)
def run_neoantigen_pipeline(args, parsed_config, configfile): configfile.seek(0) output_dir = get_output_dir(parsed_config) stats_file = join(output_dir, "stats.json") targets = [x for x in get_and_check_targets(args, parsed_config) if x.startswith(output_dir)] if not targets: return logger.info("Running neoantigen pipeline with targets %s " % targets) # include all relevant contigs in the pipeline config with open(parsed_config["reference"]["genome"] + ".contigs") as f: contigs = [x.strip() for x in f.readlines()] # parse out targets that start with output directory (not reference) start_time = datetime.datetime.now() if not snakemake.snakemake( 'pipeline/Snakefile', cores=args.cores, resources={'mem_mb': int(1024 * args.memory)}, config={'num_threads': args.cores, 'mem_gb': args.memory, 'contigs': contigs}, configfile=configfile.name, printshellcmds=True, dryrun=args.dry_run, targets=targets, stats=stats_file): raise ValueError("Pipeline failed, see Snakemake error message for details") end_time = datetime.datetime.now() logger.info("--- Pipeline running time: %s ---" % (str(end_time - start_time)))
def test_missing_merge_inputs(self): add_to_sample_list(self.workdir, 'assembly', 'G_missing') res = snakemake.snakemake(os.path.join(self.workdir, 'Snakefile'), workdir=self.workdir, dryrun=True, targets=['all_assemble']) assert res == False
def run(self, result): result.startTest(self) if snakemake(self.snakefile, dryrun=True, workdir=self.testdir): result.addSuccess(self) else: result.addFailure(self, "") result.stopTest(self)
def main(): p = get_argument_parser() args = p.parse_args() stranded = "reverse" if args.reverse_stranded else "yes" workflow_fpath = get_workflow_file_fpath() success = snakemake( snakefile=workflow_fpath, targets=args.target, configfile=args.config_file, config={ 'output_dir': args.output_dir, 'experiment_table': args.experiment_table, 'stranded': stranded, 'run_celseq2_to_st': args.celseq2_to_st, 'keep_intermediate': args.keep_temp }, printshellcmds=True, printreason=True, # timestamp=True, # deprecated since snakemake 5.2.2 latency_wait=300, jobname="celseq2_job.{rulename}.{jobid}.sh", keepgoing=False, restart_times=2, dryrun=args.dryrun, lock=not args.nolock, unlock=args.unlock, cluster=args.cluster, cores=args.cores, nodes=args.cores, force_incomplete=args.rerun_incomplete, ignore_incomplete=args.ignore_incomplete, notemp=args.keep_temp) sys.exit(0 if success else 1)
def test_missing_merge_inputs(self): add_to_sample_list(self.workdir, 'assembly', 'G_missing') res = snakemake.snakemake( os.path.join(self.workdir, 'Snakefile'), workdir=self.workdir, dryrun=True, targets=['all_assemble']) assert res == False
def test_dryrun_all_metagenomics(self): ''' Test that the "all_metagenomics" rule dryruns properly ''' self.assertTrue(snakemake.snakemake( os.path.join(self.workdir, 'Snakefile'), #configfile=os.path.join(self.workdir, 'config.yaml'), workdir=self.workdir, dryrun=True, targets=['all_metagenomics']))
def run(path, shouldfail=False, needs_connection=False, snakefile="Snakefile", subpath=None, check_md5=True, **params): """ Test the Snakefile in path. There must be a Snakefile in the path and a subdirectory named expected-results. """ if needs_connection and not is_connected(): print("Skipping test because of missing internet connection", file=sys.stderr) return False results_dir = join(path, "expected-results") snakefile = join(path, snakefile) assert os.path.exists(snakefile) assert os.path.exists(results_dir) and os.path.isdir(results_dir), "{} does not exist".format(results_dir) tmpdir = mkdtemp() try: config = {} if subpath is not None: # set up a working directory for the subworkflow and pass it in `config` # for now, only one subworkflow is supported assert os.path.exists(subpath) and os.path.isdir(subpath), "{} does not exist".format(subpath) subworkdir = os.path.join(tmpdir, "subworkdir") os.mkdir(subworkdir) call("cp `find {} -maxdepth 1 -type f` {}".format(subpath, subworkdir), shell=True) config["subworkdir"] = subworkdir call("cp `find {} -maxdepth 1 -type f` {}".format(path, tmpdir), shell=True) success = snakemake( snakefile, cores=3, workdir=tmpdir, stats="stats.txt", snakemakepath=SCRIPTPATH, config=config, **params ) if shouldfail: assert not success, "expected error on execution" else: assert success, "expected successful execution" for resultfile in os.listdir(results_dir): if resultfile == ".gitignore" or not os.path.isfile(os.path.join(results_dir, resultfile)): # this means tests cannot use directories as output files continue targetfile = join(tmpdir, resultfile) expectedfile = join(results_dir, resultfile) assert os.path.exists(targetfile), 'expected file "{}" not produced'.format(resultfile) if check_md5: assert md5sum(targetfile) == md5sum(expectedfile), 'wrong result produced for file "{}"'.format( resultfile ) finally: rmtree(tmpdir)
def test_keep_logger(): with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, 'Snakefile') with open(path, 'w') as f: print("rule:\n output: 'result.txt'\n shell: 'touch {output}'", file=f) snakemake(path, workdir=tmpdir, keep_logger=True)
def main(): parser = argparse.ArgumentParser(description='run snakemake workflows for spacegraphcats', usage='''spacegraphcats <configfile.yaml> [<target1> ...] Run workflows for spacegraphcats, using the given config file. Targets: build - builds catlas (default) searchquick - do a quick search (only a few files) search - do a full search for this data set (many files) extract_reads - extract reads for search results (many files) extract_contigs - extract contigs for search results (many files) clean - remove the primary catlas build files show - parse and display the config file For a quickstart, run this: spacegraphcats dory-test searchquick For an example config file, run: spacegraphcats dory-test show from the main spacegraphcats directory. . ''') parser.add_argument('configfile') parser.add_argument('targets', nargs='*', default=['build']) parser.add_argument('-n', '--dry-run', action='store_true') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('-d', '--debug', action='store_true') parser.add_argument('--nolock', action='store_true') parser.add_argument('--overhead', type=float, default=None) parser.add_argument('--experiment', default=None) parser.add_argument('--radius', type=int, default=None) parser.add_argument('--cdbg-only', action='store_true', help='for paper evaluation purposes') args = parser.parse_args() # first, find the Snakefile snakefile = os.path.join(thisdir, './conf', 'Snakefile') if not os.path.exists(snakefile): sys.stderr.write('Error: cannot find Snakefile at {}\n'.format(snakefile)) sys.exit(-1) # next, find the config file configfile = None if os.path.exists(args.configfile) and not os.path.isdir(args.configfile): configfile = args.configfile else: for suffix in ('', '.json', '.yaml'): tryfile = os.path.join(thisdir, './conf', args.configfile + suffix) if os.path.exists(tryfile) and not os.path.isdir(tryfile): sys.stderr.write('Found configfile at {}\n'.format(tryfile)) configfile = tryfile break if not configfile: sys.stderr.write('Error: cannot find configfile {}\n'.format(args.configfile)) sys.exit(-1) # build config override dict config = dict() if args.overhead is not None: config['overhead'] = args.overhead if args.experiment is not None: config['experiment'] = args.experiment if args.radius is not None: config['radius'] = args.radius if args.cdbg_only: config['cdbg_only'] = True print('--------', file=sys.stderr) print('details!', file=sys.stderr) print('\tsnakefile: {}'.format(snakefile), file=sys.stderr) print('\tconfig: {}'.format(configfile), file=sys.stderr) print('\ttargets: {}'.format(repr(args.targets)), file=sys.stderr) if config: print('\toverride: {}'.format(pprint.pformat(config)), file=sys.stderr) print('--------', file=sys.stderr) if 'show' in args.targets: if configfile.endswith('json'): print(yaml.dump(yaml.load(json.dumps(json.loads(open(configfile).read()))), default_flow_style=False)) else: print(yaml.dump(yaml.load(open(configfile).read()), default_flow_style=False)) return 0 # run!! status = snakemake.snakemake(snakefile, configfile=configfile, targets=args.targets, printshellcmds=True, dryrun=args.dry_run, lock=not args.nolock, config=config, verbose=args.verbose, debug_dag=args.debug) if status: # translate "success" into shell exit code of 0 return 0 return 1
def assemble_transcripts_pipeline(args): """ This section of Daijin is focused on creating the necessary configuration for driving the pipeline. :param args: :return: """ if args.config.endswith("json"): loader = json.load else: loader = yaml.load with open(args.config, 'r') as _: doc = loader(_) if args.exe and os.path.exists(args.exe): if args.exe.endswith("json"): loader = json.load else: loader = yaml.load with open(args.exe) as _: doc["load"] = loader(_) # Check the configuration check_config(doc) # pylint: disable=invalid-name if not "short_reads" in doc and not "long_reads" in doc: print("No short reads section or long reads sections was present in the configuration. Please include your samples and try again") exit(1) LABELS = [] R1 = [] R2 = [] LR_LABELS = [] LR_FILES = [] if "short_reads" in doc: LABELS = doc["short_reads"]["samples"] R1 = doc["short_reads"]["r1"] R2 = doc["short_reads"]["r2"] if "long_reads" in doc: LR_LABELS = doc["long_reads"]["samples"] LR_FILES = doc["long_reads"]["files"] READS_DIR = doc["out_dir"] + "/1-reads" SCHEDULER = doc["scheduler"] if doc["scheduler"] else "" CWD = os.path.abspath(".") # pylint: enable=invalid-name res_cmd, sub_cmd = get_sub_commands(SCHEDULER, args.prefix) # Create log folder if not os.path.exists("daijin_logs"): os.makedirs("daijin_logs") elif not os.path.isdir("daijin_logs"): raise OSError("{} is not a directory!".format("daijin_logs")) if (len(R1) != len(R2)) and (len(R1) != len(LABELS)): print("R1, R2 and LABELS lists are not the same length. Please check and try again") exit(1) if len(LR_LABELS) != len(LR_FILES): print("long read samples and file arrays in the configuration file are not the same length. Please check and try again") exit(1) if not os.path.exists(READS_DIR): os.makedirs(READS_DIR) for read1, read2, label in zip(R1, R2, LABELS): suffix = read1.split(".")[-1] if suffix not in ("gz", "bz2"): suffix = "" else: suffix = ".{}".format(suffix) r1out = READS_DIR + "/" + label + ".R1.fq{}".format(suffix) r2out = READS_DIR + "/" + label + ".R2.fq{}".format(suffix) if not os.path.islink(r1out): os.symlink(os.path.abspath(read1), r1out) if not os.path.islink(r2out): os.symlink(os.path.abspath(read2), r2out) for lr_file, label in zip(LR_FILES, LR_LABELS): suffix = lr_file.split(".")[-1] if suffix in ("fa", "fna", "fasta"): suffix = ".fa" elif suffix in ("fq", "fastq"): suffix = ".fq" else: suffix = ".{}".format(suffix) out = READS_DIR + "/" + label + ".long{}".format(suffix) if not os.path.islink(out): os.symlink(os.path.abspath(lr_file), out) # Launch using SnakeMake assert pkg_resources.resource_exists("Mikado", os.path.join("daijin", "tr.snakefile")) additional_config = {} if args.threads is not None: additional_config["threads"] = args.threads cluster_var = None if args.no_drmaa is True and sub_cmd: cluster_var = sub_cmd + res_cmd drmaa_var = None if args.no_drmaa is False and res_cmd: drmaa_var = res_cmd if drmaa_var or cluster_var: if os.path.exists(args.hpc_conf): hpc_conf = args.hpc_conf else: hpc_conf = system_hpc_yaml else: hpc_conf = None snakemake.snakemake( pkg_resources.resource_filename("Mikado", os.path.join("daijin", "tr.snakefile")), dryrun=args.dryrun, cores=args.cores, nodes=args.jobs, configfile=args.config, config=additional_config, workdir=CWD, cluster_config=hpc_conf, cluster=cluster_var, drmaa=drmaa_var, printshellcmds=True, snakemakepath=shutil.which("snakemake"), stats="daijin_tr_" + NOW + ".stats", force_incomplete=args.rerun_incomplete, detailed_summary=args.detailed_summary, list_resources=args.list, latency_wait=60 if SCHEDULER else 1, printdag=args.dag, forceall=args.dag, forcerun=args.forcerun, lock=(not args.nolock))
def mikado_pipeline(args): """ This function launches the sub-section dedicated to the Mikado pipeline. :param args: :return: """ if args.config.endswith("json"): loader = json.load else: loader = yaml.load with open(args.config, 'r') as _: doc = loader(_) additional_config = {} if args.threads is not None: additional_config["threads"] = args.threads if args.exe and os.path.exists(args.exe): if args.exe.endswith("json"): loader = json.load else: loader = yaml.load with open(args.exe) as _: additional_config["load"] = loader(_) check_config(doc) # pylint: disable=invalid-name SCHEDULER = doc["scheduler"] if ("scheduler" in doc and doc["scheduler"]) else "" CWD = os.path.abspath(".") # pylint: enable=invalid-name res_cmd, sub_cmd = get_sub_commands(SCHEDULER, args.prefix) if not os.path.exists("daijin_logs"): os.makedirs("daijin_logs") elif not os.path.isdir("daijin_logs"): raise OSError("{} is not a directory!".format("daijin_logs")) # Launch using SnakeMake assert pkg_resources.resource_exists("Mikado", os.path.join("daijin", "mikado.snakefile")) cluster_var = None if args.no_drmaa is True and sub_cmd: cluster_var = sub_cmd + res_cmd drmaa_var = None if args.no_drmaa is False and res_cmd: drmaa_var = res_cmd if drmaa_var or cluster_var: if os.path.exists(args.hpc_conf): hpc_conf = args.hpc_conf else: hpc_conf = system_hpc_yaml else: hpc_conf = None snakemake.snakemake( pkg_resources.resource_filename("Mikado", os.path.join("daijin", "mikado.snakefile")), ignore_ambiguity=False, cores=args.cores, dryrun=args.dryrun, nodes=args.jobs, configfile=args.config, config=additional_config, workdir=CWD, cluster_config=hpc_conf, cluster=cluster_var, drmaa=drmaa_var, printshellcmds=True, snakemakepath=shutil.which("snakemake"), stats="daijin_tr_" + NOW + ".stats", force_incomplete=args.rerun_incomplete, detailed_summary=args.detailed_summary, list_resources=args.list, latency_wait=60 if not SCHEDULER == "" else 1, printdag=args.dag, forceall=args.dag, forcerun=args.forcerun, lock=(not args.nolock))