예제 #1
0
def main(args):
    get_pipeline_log("config/pipeline_log.yaml")
    make_sample_sheet(args.input, "config/sample_sheet.yaml")
    resources = get_resources(args.cores, args.queue)
    check_databases(args.serobadb)
    snakemake.snakemake(
        "Snakefile",
        workdir=pathlib.Path(__file__).parent.absolute(),
        config={
            "out": str(args.output),
            "sample_sheet": "config/sample_sheet.yaml",
            "seroba_db": str(args.serobadb),
            "kmer_size": int(args.kmersize),
            "min_cov": int(args.mincov)
        },
        cores=resources['cores'],
        nodes=resources['cores'],
        use_conda=True,
        conda_frontend="mamba",
        dryrun=args.dryrun,
        jobname="seroba_{name}.jobid{jobid}",
        keepgoing=True,
        printshellcmds=True,
        unlock=args.unlock,
        force_incomplete=args.rerunincomplete,
        configfiles=["config/pipeline_parameters.yaml"],
        drmaa=
        " -q bio -n {threads} -o %s/log/drmaa/{name}_{wildcards}_{jobid}.out -e %s/log/drmaa/{name}_{wildcards}_{jobid}.err -R \"span[hosts=1]\" -R \"rusage[mem={resources.mem_mb}]\" "
        % (str(args.output), str(args.output)))
예제 #2
0
파일: pipeline.py 프로젝트: xjyx/singlecell
def run_pipeline(config_file):
    
    conf, errors = config.read_config(config_file)

    pipeline = conf['pipeline']
    del conf['pipeline']

    output_dir = conf['output']['output_dir']

    template = _TEMPLATE_ENV.get_template(
        os.path.join('pipeline_template.sm'))

    misc.make_sure_dir_exists(output_dir)

    # write config file for pipeline
    config_file_name = 'config.yaml'
    pipeline_config_file = os.path.join(output_dir, config_file_name)
    config.write_config(conf, pipeline_config_file)

    # write snakemake file for pipeline
    snakemake_file = os.path.join(output_dir, 'Snakefile')
    with open(snakemake_file, 'w') as ofh:
        ofh.write(template.render(config_file=config_file_name))

    #with open(pipeline_config_file, 'w') as ofh:
    #    yaml.dump(conf, .write(conf, )

    num_threads = conf['parameters']['num_threads']
    snakemake.snakemake(snakemake_file, workdir=output_dir, cores=num_threads)
예제 #3
0
def print_graph(snakefile, config, dag_prefix):
    # store old stdout
    stdout = sys.stdout
    # call snakemake api and capture output
    sys.stdout = io.StringIO()
    snakemake.snakemake(snakefile,
                        config=config,
                        targets=config['targets'],
                        dryrun=True,
                        printdag=True)
    output = sys.stdout.getvalue()
    # restore sys.stdout
    sys.stdout = stdout
    # write output
    if shutil.which('dot'):
        svg_file = '{}.svg'.format(dag_prefix)
        # pipe the output to dot
        with open(svg_file, 'wb') as svg:
            dot_process = subprocess.Popen(['dot', '-Tsvg'],
                                           stdin=subprocess.PIPE,
                                           stdout=svg)
            dot_process.communicate(input=output.encode())
    else:
        # write the file as dag
        dag_file = '{}.dag'.format(dag_prefix)
        with open(dag_file, 'wt') as file:
            file.write(output)
예제 #4
0
def run_salmon(param):
    import snakemake
    snakefile = os.path.join(
        os.path.dirname(__file__), "snakemake/Snakefile.paired"
        if param["paired"] else "snakemake/Snakefile.single")

    snakemake.snakemake(snakefile=snakefile,
                        config={
                            "input_path":
                            param["inpath"],
                            "output_path":
                            param["--outpath"],
                            "index":
                            param["--reference"],
                            "salmon":
                            os.path.join(os.path.dirname(__file__),
                                         "salmon/{}/bin/salmon"),
                            "num_threads":
                            param["--num_threads"],
                            "exprtype":
                            param["--exprtype"]
                        })

    with open(os.path.join(param["--outpath"], "EXPR.csv"), "r") as inp:
        sample_ids = inp.readline().strip().split(',')[1:]
    with open(os.path.join(param["--outpath"], "phenotype.csv"), "w") as oup:
        oup.write("SampleID,phenotype\n")
        oup.write("\n".join([s + "," + "NA" for s in sample_ids]) + "\n")
예제 #5
0
def test_keep_logger():
    with tempfile.TemporaryDirectory() as tmpdir:
        path = os.path.join(tmpdir, "Snakefile")
        with open(path, "w") as f:
            print("rule:\n  output: 'result.txt'\n  shell: 'touch {output}'",
                  file=f)
        snakemake(path, workdir=tmpdir, keep_logger=True)
예제 #6
0
def test_snakemake_word_frequency():

    test_protocols: List[str] = [
        'prot-1936--ak--8.xml',
        'prot-197778--160.xml',
    ]

    workdir = aj("./tests/output/work_folder")
    config_filename = aj("./tests/test_data/test_config_output.yml")

    rmtree(workdir, ignore_errors=True)
    makedirs(workdir, exist_ok=True)
    makedirs(jj(workdir, "logs"), exist_ok=True)

    setup_parlaclarin_repository(test_protocols, workdir, "riksdagen-corpus")
    setup_work_folder_for_tagging_with_stanza(workdir)

    snakefile = jj('workflow', 'Snakefile')

    snakemake.snakemake(
        snakefile,
        config=dict(config_filename=config_filename, processes=4),
        debug=True,
        # workdir=workdir,
        keep_target_files=True,
        cores=1,
        verbose=True,
        targets=['word_frequency'],
    )

    assert isfile(jj(workdir, "riksdagen-corpus-term-frequencies.pkl"))
예제 #7
0
파일: cli.py 프로젝트: fossabot/BALSAMIC
def generate_graph(config_collection_dict, config_path):
    """Generate DAG graph using snakemake stdout output"""

    with CaptureStdout() as graph_dot:
        snakemake.snakemake(
            snakefile=get_snakefile(
                analysis_type=config_collection_dict["analysis"]
                ["analysis_type"],
                sequencing_type=config_collection_dict["analysis"]
                ["sequencing_type"],
            ),
            dryrun=True,
            configfiles=[config_path],
            printrulegraph=True,
        )

    graph_title = "_".join([
        "BALSAMIC",
        BALSAMIC.__version__,
        config_collection_dict["analysis"]["case_id"],
    ])
    graph_dot = "".join(graph_dot).replace(
        "snakemake_dag {",
        'BALSAMIC { label="' + graph_title + '";labelloc="t";')
    graph_obj = graphviz.Source(
        graph_dot,
        filename=".".join(
            config_collection_dict["analysis"]["dag"].split(".")[:-1]),
        format="pdf",
        engine="dot",
    )
    graph_obj.render(cleanup=True)
예제 #8
0
def RNA_illumina(args):
    """"""
    # Get and check config files
    log.warning("CHECKING CONFIGURATION FILES")
    snakefile = get_snakefile_fn(workflow_dir=WORKFLOW_DIR,
                                 workflow=args.subcommands)
    configfile = get_config_fn(config=args.config)

    # Store additionnal options to pass to snakemake
    log.info("Build config dict for snakemake")
    config = {
        "genome":
        required_option("genome", args.genome),
        "transcriptome":
        required_option("transcriptome", args.transcriptome),
        "annotation":
        required_option("annotation", args.annotation),
        "sample_sheet":
        get_sample_sheet(sample_sheet=args.sample_sheet,
                         required_fields=["sample_id", "fastq1", "fastq2"])
    }
    log.debug(config)

    # Filter other args option compatible with snakemake API
    kwargs = filter_valid_snakemake_options(args)
    log.debug(kwargs)

    # Run Snakemake through the API
    log.warning("RUNNING SNAKEMAKE PIPELINE")
    snakemake(snakefile=snakefile,
              configfile=configfile,
              config=config,
              use_conda=True,
              **kwargs)
예제 #9
0
def run_inspect(args):
    kwargs = {
        "snakefile": os.path.join(os.path.dirname(__file__), "Snakefile"),
        "workdir": args.work_dir,
        "summary": True,
        "verbose": True,
    }
    snakemake.snakemake(**kwargs)
예제 #10
0
파일: utils.py 프로젝트: lrgr/explosig-data
def run_snakemake_with_config(snakefile_path, config):
    # Since snakemake() function can only handle "flat" dicts using the direct config= parameter,
    # need to write the config dict to a temporary file and instead pass in to configfile=
    try:
        f = tempfile.NamedTemporaryFile(mode='w', delete=False)
        yaml.dump(config, f, default_flow_style=False)
        snakemake_api.snakemake(snakefile=snakefile_path, configfiles=[f.name])
        f.close()
    finally:
        os.unlink(f.name)        
예제 #11
0
 def test_dryrun_all(self):
     ''' Test that the "all" rule dryruns properly '''
     self.assertTrue(snakemake.snakemake(
         os.path.join(self.workdir, 'Snakefile'),
         #configfile=os.path.join(self.workdir, 'config.json'),
         workdir=self.workdir, dryrun=True))
     self.assertTrue(snakemake.snakemake(
         os.path.join(self.workdir, 'Snakefile'),
         #configfile=os.path.join(self.workdir, 'config.json'),
         workdir=self.workdir, dryrun=True,
         targets=['all']))
def main(args):
    indir = os.path.realpath(args.biokit_dir)
    refgenomes_dir = os.path.realpath(args.refgenomes_dir)

    outdir = args.outdir
    if outdir is None:
        outdir = os.path.join(indir, 'HBVouroboros')
    if not os.path.exists(outdir):
        makedirs(outdir, mode=0o775, exist_ok=True)

    unmapped_sample_annotation=os.path.join(outdir,
        'unmapped_samples.txt')
    biokit_status = snakemake.snakemake(
        snakefile=biokit_snakefile,
        config={
            'biokit_dir': indir,
            'output_file': unmapped_sample_annotation
        },
        workdir=outdir)
    if not biokit_status:
        raise Exception('Failed to derive sample annotation files '
            'for unmapped reads in directory {}'.format(indir))

    cluster_logs_dir = os.path.join(outdir, 'cluster-logs')
    makedirs(cluster_logs_dir, mode=0o775, exist_ok=True)
    cluster_out_pattern = os.path.join(cluster_logs_dir, 'slurm-%x-%j.out')
    cluster_err_pattern = os.path.join(cluster_logs_dir, 'slurm-%x-%j.err')

    if not args.local:
        cluster_comm = ('sbatch -t {cluster.time} -c {cluster.cpu} '
                       '-N {cluster.nodes} --mem={cluster.mem} '
                       '--ntasks-per-node={cluster.ntasks_per_node}'
                       ' -o ' + cluster_out_pattern + \
                       ' -e ' + cluster_err_pattern)
        cluster_config = align_clusterfile
    else:
        cluster_comm = None
        cluster_config = None

    status = snakemake.snakemake(align_snakefile,
        cluster=cluster_comm,
        cluster_config=cluster_config,
        cores=128, nodes=128, local_cores=4,
        config={
            'sample_annotation': unmapped_sample_annotation,
            'refgenomes_dir': refgenomes_dir
            },
        workdir=outdir,
        restart_times=3,
        printshellcmds=True)

    if status: # translate "success" into shell exit code of 0
        return 0
    return 1
예제 #13
0
def status(context, sample_config, show_only_missing, print_files):
    """
    cli for status sub-command.
    """
    LOG.info(f"BALSAMIC started with log level {context.obj['loglevel']}.")
    LOG.debug("Reading input sample config")
    with open(sample_config, "r") as fn:
        sample_config_dict = json.load(fn)

    result_dir = get_result_dir(sample_config_dict)
    analysis_type = sample_config_dict["analysis"]["analysis_type"]
    sequencing_type = sample_config_dict["analysis"]["sequencing_type"]
    snakefile = get_snakefile(analysis_type, sequencing_type)

    with CaptureStdout() as summary:
        snakemake.snakemake(
            snakefile=snakefile,
            dryrun=True,
            summary=True,
            configfiles=[sample_config],
            quiet=True,
        )
    summary = [i.split("\t") for i in summary]
    summary_dict = [dict(zip(summary[0], value)) for value in summary[1:]]

    if not os.path.isfile(os.path.join(result_dir, "analysis_finish")):
        LOG.warning(
            "analysis_finish file is missing. Analysis might be incomplete or running."
        )

    existing_files = set()
    missing_files = set()

    for entries in summary_dict:
        delivery_file = entries["output_file"]

        file_status_str, file_status = get_file_status_string(delivery_file)
        if file_status and print_files:
            click.echo(file_status_str)

        if not file_status and (show_only_missing or print_files):
            click.echo(file_status_str)

        if file_status:
            existing_files.add(delivery_file)
        if not file_status:
            missing_files.add(delivery_file)

    finish_file_count = 'Finished file count: {}'.format(len(existing_files))
    missing_file_count = 'Missing file count: {}'.format(len(missing_files))
    click.echo(Color('{yellow}Final tally:{/yellow}'))
    click.echo(Color('{yellow}\t' + finish_file_count + '{/yellow}'))
    click.echo(Color('{yellow}\t' + missing_file_count + '{/yellow}'))
예제 #14
0
def test_run_script_directive():
    with tempfile.TemporaryDirectory() as tmpdir:
        path = os.path.join(tmpdir, 'Snakefile')
        with open(path, 'w') as f:
            print(dedent("""
                rule:
                    output: 'result.txt'
                    run:
                        with open(output[0], 'w') as f:
                            print("hello", file=f)
                """), file=f)
        snakemake(path, workdir=tmpdir)
예제 #15
0
def run_pipeline(config_f):
    import snakemake
    import os
    sgp_home = os.path.dirname(os.path.realpath(__file__))
    snakemake.snakemake(snakefile=os.path.join(sgp_home, 'main.smk'),
                        use_conda=True,
                        conda_prefix=os.path.join(sgp_home, 'env'),
                        restart_times=3,
                        configfile=config_f,
                        workdir=os.path.dirname(config_f),
                        printshellcmds=True,
                        force_incomplete=True,
                        notemp=True)
예제 #16
0
def test_run_single_task(simple_merfish_task):
    simple_merfish_task.save()
    assert not simple_merfish_task.is_complete()
    snakeRule = snakewriter.SnakemakeRule(simple_merfish_task)
    with open('temp.Snakefile', 'w') as outFile:
        outFile.write('rule all: \n\tinput: ' + snakeRule.full_output() +
                      '\n\n')
        outFile.write(snakeRule.as_string())

    snakemake.snakemake('temp.Snakefile')
    os.remove('temp.Snakefile')
    shutil.rmtree('.snakemake')

    assert simple_merfish_task.is_complete()
예제 #17
0
def cme():
    """The main function call for runnning the CME application """
    parser = argparse.ArgumentParser(prog='CME', description='Computational Modeling Engine')
    parser. add_argument('-v','--version',action='version',version='%(prog)s 1.0',help = 'print version and exit')
    parser.add_argument('--prepare',action='store_true',default=False,dest='prepare',help = 'set prepare structure switch to true')
    parser.add_argument('--inchi',action='store_true',default=False,dest='inchi',help='set inchi file creation switch to true')
    parser.add_argument('--configfile',default='config.yaml',help = 'snakemake configuration file')
    parser.add_argument('--dft',action='store_true',default=False,dest='dft',help = 'run DFT calculation')
    parser.add_argument('--md',action='store_true',default=False,dest='md',help = 'run MD simulation')
   
 
    args = parser.parse_args()

    if (args.inchi == True):
        print('Creating .inchi files from inchi strings in .csv file')
        snakemake(resource_filename('cme', 'rules/MD-pipeline.snakemake'),configfile=args.configfile)
    if (args.prepare == True):
        print('Preparing initial structure files for DFT and MD runs')
        snakemake(resource_filename('cme', 'rules/2.snakemake'),configfile=args.configfile)
    if (args.dft == True):
        print('Starting DFT runs')
        snakemake(resource_filename('cme', 'rules/rj-dft.snakemake'),configfile=args.configfile)
    if (args.md == True):
        print('Starting MD runs')
        snakemake(resource_filename('cme', 'rules/rj-md.snakemake'),configfile=args.configfile)
#    if (args.md2 == True):
#        print('Starting MD-2 runs')
#        snakemake(resource_filename('cme', 'rules/rj-md2.snakemake'),configfile=args.configfile)
    print(args)
예제 #18
0
def download_human_genomes():
    config = {
        "output": {
            "hg19": os.path.join(EXPLOSIG_DATA_DIR, "genomes", "hg19.fa"),
            "hg38": os.path.join(EXPLOSIG_DATA_DIR, "genomes", "hg38.fa")
        }
    }

    # Since snakemake() function can only handle "flat" dicts using the direct config= parameter,
    # need to write the config dict to a temporary file and instead pass in to configfile=
    with tempfile.NamedTemporaryFile(mode='w') as temp:
        yaml.dump(config, temp, default_flow_style=False)
        snakefile = os.path.join(os.path.dirname(__file__), 'snakefiles',
                                 'genomes', 'human.smk')
        snakemake_api.snakemake(snakefile=snakefile, configfiles=[temp.name])
예제 #19
0
def main():
    snakefile = get_snakefile()
    options = parse_args()

    snakemake.snakemake(snakefile=snakefile,
                        configfiles=options.configfiles,
                        config=options.config,
                        nodes=options.jobs,
                        cores=options.jobs,
                        keepgoing=options.keep_going,
                        notemp=options.notemp,
                        dryrun=options.dry_run,
                        quiet=options.quiet,
                        forceall=options.forceall,
                        forcerun=options.forcerun)
예제 #20
0
 def test_dryrun_all(self):
     ''' Test that the "all" rule dryruns properly '''
     self.assertTrue(
         snakemake.snakemake(
             os.path.join(self.workdir, 'Snakefile'),
             #configfile=os.path.join(self.workdir, 'config.yaml'),
             workdir=self.workdir,
             dryrun=True))
     self.assertTrue(
         snakemake.snakemake(
             os.path.join(self.workdir, 'Snakefile'),
             #configfile=os.path.join(self.workdir, 'config.yaml'),
             workdir=self.workdir,
             dryrun=True,
             targets=['all']))
def run_neoantigen_pipeline(args, parsed_config, configfile):
    configfile.seek(0)

    output_dir = get_output_dir(parsed_config)
    stats_file = join(output_dir, "stats.json")

    # only run targets in the output directory (exclude reference processing)
    targets = [x for x in get_and_check_targets(args, parsed_config) if x.startswith(output_dir)]
    if not targets:
        logger.info("No output targets specified")
        return

    config_extension = make_config_extension_dict(args, parsed_config)

    logger.info("Running neoantigen pipeline with targets %s " % targets)
    start_time = datetime.datetime.now()
    if not snakemake.snakemake(
            'pipeline/Snakefile',
            cores=args.cores,
            resources={'mem_mb': int(1024 * args.memory)},
            config=config_extension,
            configfile=configfile.name,
            printshellcmds=True,
            dryrun=args.dry_run,
            targets=targets,
            workdir=parsed_config["workdir"],
            stats=stats_file):
        raise ValueError("Pipeline failed, see Snakemake error message for details")

    end_time = datetime.datetime.now()
    logger.info("--- Pipeline running time: %s ---" % (str(end_time - start_time)))
예제 #22
0
def infer_topology(
    obj, working_directory, tree_method, rooting_method, lsd_output_format
):
    if working_directory is None:
        working_directory = obj.output_path.parents[0]

    with importlib.resources.path("treeflow_pipeline", "topology.smk") as snakefile:
        success = snakemake.snakemake(
            snakefile,
            config=dict(
                alignment=obj.alignment_path,
                output=obj.output_path,
                working_directory=working_directory,
                tree_method=tree_method,
                rooting_method=rooting_method,
                subst_model=obj.model.subst_model,
                site_model=obj.model.site_model,
                clock_model=obj.model.clock_model,
                lsd_output_format=lsd_output_format,
                seed=obj.seed,
            ),
            targets=["tree", "starting_values"],
            lock=False,
        )
    if not success:
        raise click.UsageError(
            "Topology inference pipeline was unsuccessful, check inputs"
        )
예제 #23
0
파일: run.py 프로젝트: AfshinLab/DBS-Pro
def run(
        dryrun: bool = False,
        cores: int = 4,
        keepgoing: bool = False,
        unlock: bool = False,
        printdag: bool = False,
        targets=None,
        workdir=None,
):
    # snakemake sets up its own logging, and this cannot be easily changed
    # (setting keep_logger=True crashes), so remove our own log handler
    # for now
    logger.root.handlers = []
    snakefile_path = pkg_resources.resource_filename("dbspro", "rules.smk")
    success = snakemake(snakefile_path,
                        snakemakepath="snakemake",  # Needed in snakemake 3.9.0
                        dryrun=dryrun,
                        printdag=printdag,
                        quiet=False if not printdag else True,
                        cores=cores,
                        keepgoing=keepgoing,
                        unlock=unlock,
                        printshellcmds=True,
                        targets=targets,
                        workdir=workdir)
    if not success:
        raise SnakemakeError()
예제 #24
0
파일: tests.py 프로젝트: Kirill84/snakemake
def run(path, shouldfail=False, snakefile="Snakefile", **params):
	"""
	Test the Snakefile in path.
	There must be a Snakefile in the path and a subdirectory named
	expected-results.
	"""
	results_dir = join(path, 'expected-results')
	snakefile = join(path, snakefile)
	assert os.path.exists(snakefile)
	assert os.path.exists(results_dir) and os.path.isdir(results_dir), \
		'{} does not exist'.format(results_dir)
	tmpdir = mkdtemp()
	try:
		call('cp `find {} -maxdepth 1 -type f` {}'.format(path, tmpdir), shell=True)
		success = snakemake(snakefile, cores=3, workdir=tmpdir, stats = "stats.txt", snakemakepath = SCRIPTPATH, **params)
		if shouldfail:
			assert not success, "expected error on execution"
		else:
			assert success, "expected successful execution"
			for resultfile in os.listdir(results_dir):
				if not os.path.isfile(resultfile):
					continue # skip .svn dirs etc.
				targetfile = join(tmpdir, resultfile)
				expectedfile = join(results_dir, resultfile)
				assert os.path.exists(targetfile), 'expected file "{}" not produced'.format(resultfile)
				assert md5sum(targetfile) == md5sum(expectedfile), 'wrong result produced for file "{}"'.format(resultfile)
	finally:
		call(['rm', '-rf', tmpdir])
예제 #25
0
 def test_workflow_compiles(self):
     chdir(self._get_pipeline_dir_path())
     self.assertTrue(
         snakemake.snakemake(
             'Snakefile',
             cores=20,
             resources={'mem_mb': 160000},
             configfile=self.config_tmpfile.name,
             config={
                 'num_threads': 22,
                 'mem_gb': 160,
                 'contigs': ['2']
             },
             dryrun=True,
             printshellcmds=True,
             targets=[
                 join(
                     self.workdir.name, 'idh1-test-sample',
                     'vaccine-peptide-report_netmhcpan-iedb_mutect-strelka.txt'
                 ),
                 join(self.workdir.name, 'idh1-test-sample',
                      'rna_final.bam'),
             ],
             stats=join(self.workdir.name, 'idh1-test-sample',
                        'stats.json')))
예제 #26
0
파일: app.py 프로젝트: Biocodings/hlama
    def run_snakemake(self):
        """Run Snakemake and display result files afterwards"""
        print('\nRunning Snakemake\n=================\n', file=sys.stderr)
        snakemake.snakemake(
            snakefile=os.path.join(self.args.work_dir, 'Snakefile'),
            workdir=self.args.work_dir,
        )
        # TODO: check Snakemake result

        print('\nThe End\n=======\n', file=sys.stderr)
        print('\n'.join(
            textwrap.wrap(
                textwrap.dedent(r"""
            You can find the results in the "{}/report.txt" file.
            """).format(self.args.work_dir).lstrip())),
              file=sys.stderr)
예제 #27
0
def test_snakemake_execute():

    config_filename = aj("./tests/test_data/test_config.yml")

    cfg: Config = load_typed_config(config_name=config_filename)

    snakefile = jj('workflow', 'Snakefile')

    rmtree(cfg.annotated_folder, ignore_errors=True)
    makedirs(cfg.annotated_folder, exist_ok=True)

    success = snakemake.snakemake(
        snakefile,
        config=dict(config_filename=config_filename),
        debug=True,
        # workdir=workdir,
        keep_target_files=True,
        cores=1,
        verbose=True,
    )

    assert success

    source_files: List[str] = glob.glob(jj(
        cfg.data_folder, 'riksdagen-corpus/corpus/**/prot*.xml'),
                                        recursive=True)

    for filename in source_files:

        document_name: str = strip_path_and_extension(filename)
        target_dir: str = jj(cfg.annotated_folder, document_name.split('-')[1])

        assert isfile(jj(target_dir, f"{document_name}.zip"))
예제 #28
0
def call_snakemake(workdir, targets=None):
    return snakemake.snakemake(
        os.path.join(workdir, 'Snakefile'),
        configfile=os.path.join(workdir, 'config.yaml'),
        workdir=workdir,
        dryrun=True,
        targets=targets)
예제 #29
0
def process_reference(args, parsed_config, configfile):
    configfile.seek(0)

    reference_genome_dir = get_reference_genome_dir(parsed_config)
    stats_file = join(reference_genome_dir, "stats.json")
    targets = [
        x for x in get_and_check_targets(args, parsed_config) if x.startswith(reference_genome_dir)]
    if not targets:
        targets = [parsed_config["reference"]["genome"] + '.done']
    logger.info("Processing reference with targets: %s" % targets)

    start_time = datetime.datetime.now()
    if not snakemake.snakemake(
            'pipeline/reference_Snakefile',
            cores=args.cores,
            resources={'mem_mb': int(1024 * args.memory)},
            config={'num_threads': args.cores, 'mem_gb': args.memory},
            configfile=configfile.name,
            printshellcmds=True,
            dryrun=args.dry_run,
            targets=targets,
            stats=stats_file):
        raise ValueError("Reference processing failed, see Snakemake error message for details")
    end_time = datetime.datetime.now()
    logger.info("--- Reference processing time: %s ---" % (str(end_time - start_time)))
예제 #30
0
def run_snakemake_from_config(dry_run,
                              config_yaml,
                              cores,
                              cluster_command,
                              nodes,
                              delete_all_output=False):

    print("[INFO] Invoking Snakemake with config {} and {} cores.".format(
        config_yaml, cores))

    finished_successfully = snakemake.snakemake(
        snakefile=snakefile_location,
        configfiles=[config_yaml],
        dryrun=dry_run,
        cores=cores,
        local_cores=cores,
        nodes=nodes,
        printshellcmds=True,
        delete_all_output=delete_all_output,
        use_conda=True,
        conda_prefix=conda_prefix,
        cluster=cluster_command)

    if not finished_successfully:
        os.sys.exit(os.EX_SOFTWARE)
예제 #31
0
def run_neoantigen_pipeline(args, parsed_config, configfile):
    configfile.seek(0)

    output_dir = get_output_dir(parsed_config)
    stats_file = join(output_dir, "stats.json")
    targets = [x for x in get_and_check_targets(args, parsed_config) if x.startswith(output_dir)]
    if not targets:
        return

    logger.info("Running neoantigen pipeline with targets %s " % targets)

    # include all relevant contigs in the pipeline config
    with open(parsed_config["reference"]["genome"] + ".contigs") as f:
        contigs = [x.strip() for x in f.readlines()]

    # parse out targets that start with output directory (not reference)
    start_time = datetime.datetime.now()
    if not snakemake.snakemake(
            'pipeline/Snakefile',
            cores=args.cores,
            resources={'mem_mb': int(1024 * args.memory)},
            config={'num_threads': args.cores, 'mem_gb': args.memory, 'contigs': contigs},
            configfile=configfile.name,
            printshellcmds=True,
            dryrun=args.dry_run,
            targets=targets,
            stats=stats_file):
        raise ValueError("Pipeline failed, see Snakemake error message for details")

    end_time = datetime.datetime.now()
    logger.info("--- Pipeline running time: %s ---" % (str(end_time - start_time)))
예제 #32
0
 def test_missing_merge_inputs(self):
     add_to_sample_list(self.workdir, 'assembly', 'G_missing')
     res = snakemake.snakemake(os.path.join(self.workdir, 'Snakefile'),
                               workdir=self.workdir,
                               dryrun=True,
                               targets=['all_assemble'])
     assert res == False
예제 #33
0
 def run(self, result):
     result.startTest(self)
     if snakemake(self.snakefile, dryrun=True, workdir=self.testdir):
         result.addSuccess(self)
     else:
         result.addFailure(self, "")
     result.stopTest(self)
예제 #34
0
파일: celseq2.py 프로젝트: yanailab/celseq2
def main():
    p = get_argument_parser()
    args = p.parse_args()
    stranded = "reverse" if args.reverse_stranded else "yes"
    workflow_fpath = get_workflow_file_fpath()

    success = snakemake(
        snakefile=workflow_fpath,
        targets=args.target,
        configfile=args.config_file,
        config={
            'output_dir': args.output_dir,
            'experiment_table': args.experiment_table,
            'stranded': stranded,
            'run_celseq2_to_st': args.celseq2_to_st,
            'keep_intermediate': args.keep_temp
        },
        printshellcmds=True,
        printreason=True,
        # timestamp=True,  # deprecated since snakemake 5.2.2
        latency_wait=300,
        jobname="celseq2_job.{rulename}.{jobid}.sh",
        keepgoing=False,
        restart_times=2,
        dryrun=args.dryrun,
        lock=not args.nolock,
        unlock=args.unlock,
        cluster=args.cluster,
        cores=args.cores,
        nodes=args.cores,
        force_incomplete=args.rerun_incomplete,
        ignore_incomplete=args.ignore_incomplete,
        notemp=args.keep_temp)

    sys.exit(0 if success else 1)
예제 #35
0
 def test_missing_merge_inputs(self):
     add_to_sample_list(self.workdir, 'assembly', 'G_missing')
     res = snakemake.snakemake(
         os.path.join(self.workdir, 'Snakefile'),
         workdir=self.workdir,
         dryrun=True,
         targets=['all_assemble'])
     assert res == False
예제 #36
0
 def test_dryrun_all_metagenomics(self):
     ''' Test that the "all_metagenomics" rule dryruns properly '''
     self.assertTrue(snakemake.snakemake(
         os.path.join(self.workdir, 'Snakefile'),
         #configfile=os.path.join(self.workdir, 'config.yaml'),
         workdir=self.workdir,
         dryrun=True,
         targets=['all_metagenomics']))
예제 #37
0
def run(path, shouldfail=False, needs_connection=False, snakefile="Snakefile", subpath=None, check_md5=True, **params):
    """
    Test the Snakefile in path.
    There must be a Snakefile in the path and a subdirectory named
    expected-results.
    """
    if needs_connection and not is_connected():
        print("Skipping test because of missing internet connection", file=sys.stderr)
        return False

    results_dir = join(path, "expected-results")
    snakefile = join(path, snakefile)
    assert os.path.exists(snakefile)
    assert os.path.exists(results_dir) and os.path.isdir(results_dir), "{} does not exist".format(results_dir)
    tmpdir = mkdtemp()
    try:
        config = {}
        if subpath is not None:
            # set up a working directory for the subworkflow and pass it in `config`
            # for now, only one subworkflow is supported
            assert os.path.exists(subpath) and os.path.isdir(subpath), "{} does not exist".format(subpath)
            subworkdir = os.path.join(tmpdir, "subworkdir")
            os.mkdir(subworkdir)
            call("cp `find {} -maxdepth 1 -type f` {}".format(subpath, subworkdir), shell=True)
            config["subworkdir"] = subworkdir

        call("cp `find {} -maxdepth 1 -type f` {}".format(path, tmpdir), shell=True)
        success = snakemake(
            snakefile, cores=3, workdir=tmpdir, stats="stats.txt", snakemakepath=SCRIPTPATH, config=config, **params
        )
        if shouldfail:
            assert not success, "expected error on execution"
        else:
            assert success, "expected successful execution"
            for resultfile in os.listdir(results_dir):
                if resultfile == ".gitignore" or not os.path.isfile(os.path.join(results_dir, resultfile)):
                    # this means tests cannot use directories as output files
                    continue
                targetfile = join(tmpdir, resultfile)
                expectedfile = join(results_dir, resultfile)
                assert os.path.exists(targetfile), 'expected file "{}" not produced'.format(resultfile)
                if check_md5:
                    assert md5sum(targetfile) == md5sum(expectedfile), 'wrong result produced for file "{}"'.format(
                        resultfile
                    )
    finally:
        rmtree(tmpdir)
예제 #38
0
def test_keep_logger():
    with tempfile.TemporaryDirectory() as tmpdir:
        path = os.path.join(tmpdir, 'Snakefile')
        with open(path, 'w') as f:
            print("rule:\n  output: 'result.txt'\n  shell: 'touch {output}'", file=f)
        snakemake(path, workdir=tmpdir, keep_logger=True)
예제 #39
0
def main():
    parser = argparse.ArgumentParser(description='run snakemake workflows for spacegraphcats', usage='''spacegraphcats <configfile.yaml> [<target1> ...]

Run workflows for spacegraphcats, using the given config file.

Targets:

   build            - builds catlas (default)
   searchquick      - do a quick search (only a few files)
   search           - do a full search for this data set (many files)
   extract_reads    - extract reads for search results (many files)
   extract_contigs  - extract contigs for search results (many files)
   clean            - remove the primary catlas build files
   show             - parse and display the config file

For a quickstart, run this:

   spacegraphcats dory-test searchquick

For an example config file, run:

   spacegraphcats dory-test show

from the main spacegraphcats directory.
.
''')

    parser.add_argument('configfile')
    parser.add_argument('targets', nargs='*', default=['build'])
    parser.add_argument('-n', '--dry-run', action='store_true')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('-d', '--debug', action='store_true')
    parser.add_argument('--nolock', action='store_true')
    parser.add_argument('--overhead', type=float, default=None)
    parser.add_argument('--experiment', default=None)
    parser.add_argument('--radius', type=int, default=None)
    parser.add_argument('--cdbg-only', action='store_true',
                        help='for paper evaluation purposes')
    args = parser.parse_args()

    # first, find the Snakefile
    snakefile = os.path.join(thisdir, './conf', 'Snakefile')
    if not os.path.exists(snakefile):
        sys.stderr.write('Error: cannot find Snakefile at {}\n'.format(snakefile))
        sys.exit(-1)

    # next, find the config file
    configfile = None
    if os.path.exists(args.configfile) and not os.path.isdir(args.configfile):
        configfile = args.configfile
    else:
        for suffix in ('', '.json', '.yaml'):
            tryfile = os.path.join(thisdir, './conf', args.configfile + suffix)
            if os.path.exists(tryfile) and not os.path.isdir(tryfile):
                sys.stderr.write('Found configfile at {}\n'.format(tryfile))
                configfile = tryfile
                break

    if not configfile:
        sys.stderr.write('Error: cannot find configfile {}\n'.format(args.configfile))
        sys.exit(-1)

    # build config override dict
    config = dict()
    if args.overhead is not None:
        config['overhead'] = args.overhead
    if args.experiment is not None:
        config['experiment'] = args.experiment
    if args.radius is not None:
        config['radius'] = args.radius
    if args.cdbg_only:
        config['cdbg_only'] = True

    print('--------', file=sys.stderr)
    print('details!', file=sys.stderr)
    print('\tsnakefile: {}'.format(snakefile), file=sys.stderr)
    print('\tconfig: {}'.format(configfile), file=sys.stderr)
    print('\ttargets: {}'.format(repr(args.targets)), file=sys.stderr)
    if config:
        print('\toverride: {}'.format(pprint.pformat(config)), file=sys.stderr)
    print('--------', file=sys.stderr)

    if 'show' in args.targets:
        if configfile.endswith('json'):
            print(yaml.dump(yaml.load(json.dumps(json.loads(open(configfile).read()))), default_flow_style=False))
        else:
            print(yaml.dump(yaml.load(open(configfile).read()), default_flow_style=False))
        return 0

    # run!!
    status = snakemake.snakemake(snakefile, configfile=configfile,
                                 targets=args.targets, printshellcmds=True,
                                 dryrun=args.dry_run,
                                 lock=not args.nolock, config=config,
                                 verbose=args.verbose, debug_dag=args.debug)

    if status: # translate "success" into shell exit code of 0
       return 0
    return 1
예제 #40
0
파일: __init__.py 프로젝트: Jamure/Mikado
def assemble_transcripts_pipeline(args):

    """
    This section of Daijin is focused on creating the necessary configuration for
    driving the pipeline.
    :param args:
    :return:
    """

    if args.config.endswith("json"):
        loader = json.load
    else:
        loader = yaml.load

    with open(args.config, 'r') as _:
        doc = loader(_)

    if args.exe and os.path.exists(args.exe):
        if args.exe.endswith("json"):
            loader = json.load
        else:
            loader = yaml.load
        with open(args.exe) as _:
            doc["load"] = loader(_)

    # Check the configuration
    check_config(doc)

    # pylint: disable=invalid-name

    if not "short_reads" in doc and not "long_reads" in doc:
        print("No short reads section or long reads sections was present in the configuration.  Please include your samples and try again")
        exit(1)

    LABELS = []
    R1 = []
    R2 = []
    LR_LABELS = []
    LR_FILES = []
    
    if "short_reads" in doc:
        LABELS = doc["short_reads"]["samples"]
        R1 = doc["short_reads"]["r1"]
        R2 = doc["short_reads"]["r2"]
    
    if "long_reads" in doc:
        LR_LABELS = doc["long_reads"]["samples"]
        LR_FILES = doc["long_reads"]["files"]
    READS_DIR = doc["out_dir"] + "/1-reads"
    SCHEDULER = doc["scheduler"] if doc["scheduler"] else ""
    CWD = os.path.abspath(".")
    # pylint: enable=invalid-name

    res_cmd, sub_cmd = get_sub_commands(SCHEDULER, args.prefix)

    # Create log folder
    if not os.path.exists("daijin_logs"):
        os.makedirs("daijin_logs")
    elif not os.path.isdir("daijin_logs"):
        raise OSError("{} is not a directory!".format("daijin_logs"))

    if (len(R1) != len(R2)) and (len(R1) != len(LABELS)):
        print("R1, R2 and LABELS lists are not the same length.  Please check and try again")
        exit(1)

    if len(LR_LABELS) != len(LR_FILES):
        print("long read samples and file arrays in the configuration file are not the same length.  Please check and try again")
        exit(1)

    if not os.path.exists(READS_DIR):
        os.makedirs(READS_DIR)

    for read1, read2, label in zip(R1, R2, LABELS):
        suffix = read1.split(".")[-1]
        if suffix not in ("gz", "bz2"):
            suffix = ""
        else:
            suffix = ".{}".format(suffix)

        r1out = READS_DIR + "/" + label + ".R1.fq{}".format(suffix)
        r2out = READS_DIR + "/" + label + ".R2.fq{}".format(suffix)
        if not os.path.islink(r1out):
            os.symlink(os.path.abspath(read1), r1out)

        if not os.path.islink(r2out):
            os.symlink(os.path.abspath(read2), r2out)
    
    for lr_file, label in zip(LR_FILES, LR_LABELS):
        suffix = lr_file.split(".")[-1]
        if suffix in ("fa", "fna", "fasta"):
            suffix = ".fa"
        elif suffix in ("fq", "fastq"):
            suffix = ".fq"
        else:
            suffix = ".{}".format(suffix)

        out = READS_DIR + "/" + label + ".long{}".format(suffix)
        if not os.path.islink(out):
            os.symlink(os.path.abspath(lr_file), out)


    # Launch using SnakeMake
    assert pkg_resources.resource_exists("Mikado",
                                         os.path.join("daijin", "tr.snakefile"))

    additional_config = {}
    if args.threads is not None:
        additional_config["threads"] = args.threads

    cluster_var = None
    if args.no_drmaa is True and sub_cmd:
        cluster_var = sub_cmd + res_cmd

    drmaa_var = None
    if args.no_drmaa is False and res_cmd:
        drmaa_var = res_cmd

    if drmaa_var or cluster_var:
        if os.path.exists(args.hpc_conf):
            hpc_conf = args.hpc_conf
        else:
            hpc_conf = system_hpc_yaml
    else:
        hpc_conf = None

    snakemake.snakemake(
        pkg_resources.resource_filename("Mikado",
                                        os.path.join("daijin", "tr.snakefile")),
        dryrun=args.dryrun,
        cores=args.cores,
        nodes=args.jobs,
        configfile=args.config,
        config=additional_config,
        workdir=CWD,
        cluster_config=hpc_conf,
        cluster=cluster_var,
        drmaa=drmaa_var,
        printshellcmds=True,
        snakemakepath=shutil.which("snakemake"),
        stats="daijin_tr_" + NOW + ".stats",
        force_incomplete=args.rerun_incomplete,
        detailed_summary=args.detailed_summary,
        list_resources=args.list,
        latency_wait=60 if SCHEDULER else 1,
        printdag=args.dag,
        forceall=args.dag,
        forcerun=args.forcerun,
        lock=(not args.nolock))
예제 #41
0
파일: __init__.py 프로젝트: Jamure/Mikado
def mikado_pipeline(args):

    """
    This function launches the sub-section dedicated to the Mikado pipeline.
    :param args:
    :return:
    """

    if args.config.endswith("json"):
        loader = json.load
    else:
        loader = yaml.load

    with open(args.config, 'r') as _:
        doc = loader(_)

    additional_config = {}
    if args.threads is not None:
        additional_config["threads"] = args.threads

    if args.exe and os.path.exists(args.exe):
        if args.exe.endswith("json"):
            loader = json.load
        else:
            loader = yaml.load
        with open(args.exe) as _:
            additional_config["load"] = loader(_)

    check_config(doc)

    # pylint: disable=invalid-name
    SCHEDULER = doc["scheduler"] if ("scheduler" in doc and doc["scheduler"]) else ""
    CWD = os.path.abspath(".")
    # pylint: enable=invalid-name

    res_cmd, sub_cmd = get_sub_commands(SCHEDULER, args.prefix)

    if not os.path.exists("daijin_logs"):
        os.makedirs("daijin_logs")
    elif not os.path.isdir("daijin_logs"):
        raise OSError("{} is not a directory!".format("daijin_logs"))

    # Launch using SnakeMake
    assert pkg_resources.resource_exists("Mikado",
                                         os.path.join("daijin", "mikado.snakefile"))

    cluster_var = None
    if args.no_drmaa is True and sub_cmd:
        cluster_var = sub_cmd + res_cmd

    drmaa_var = None
    if args.no_drmaa is False and res_cmd:
        drmaa_var = res_cmd

    if drmaa_var or cluster_var:
        if os.path.exists(args.hpc_conf):
            hpc_conf = args.hpc_conf
        else:
            hpc_conf = system_hpc_yaml
    else:
        hpc_conf = None

    snakemake.snakemake(
        pkg_resources.resource_filename("Mikado",
                                        os.path.join("daijin", "mikado.snakefile")),
        ignore_ambiguity=False,
        cores=args.cores,
        dryrun=args.dryrun,
        nodes=args.jobs,
        configfile=args.config,
        config=additional_config,
        workdir=CWD,
        cluster_config=hpc_conf,
        cluster=cluster_var,
        drmaa=drmaa_var,
        printshellcmds=True,
        snakemakepath=shutil.which("snakemake"),
        stats="daijin_tr_" + NOW + ".stats",
        force_incomplete=args.rerun_incomplete,
        detailed_summary=args.detailed_summary,
        list_resources=args.list,
        latency_wait=60 if not SCHEDULER == "" else 1,
        printdag=args.dag,
        forceall=args.dag,
        forcerun=args.forcerun,
        lock=(not args.nolock))