def main(): cosmos = Cosmos( "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)), default_drm="local", ) cosmos.initdb() sp.check_call("mkdir -p analysis_output/ex1", shell=True) os.chdir("analysis_output/ex1") workflow = cosmos.start("Example1", restart=True, skip_confirm=True) t = workflow.add_task( func=say, params=dict(text="Hello World", out_file="out.txt"), uid="my_task", time_req=None, core_req=1, mem_req=1024, ) print(("task.params", t.params)) print(("task.input_map", t.input_map)) print(("task.output_map", t.output_map)) print(("task.core_req", t.core_req)) print(("task.time_req", t.time_req)) print(("task.drm", t.drm)) print(("task.uid", t.uid)) workflow.run() sys.exit(0 if workflow.successful else 1)
def main(): # start cosmos engine cosmos = Cosmos( database_url="sqlite://", default_drm="local", # default_drm="ge", default_queue="dev-short", default_drm_options={}, get_submit_args=partial(default_get_submit_args, parallel_env="smp"), ) cosmos.initdb() # create cosmos workflow workflow = cosmos.start( # NOTE cosmos will make dirs in this path # primary_log_path=os.path.join("logs", "cosmos.log"), name="blah", restart=True, skip_confirm=True, fail_fast=True, ) for i in range(100): print("add {}".format(i)) silly_recipe(workflow, i, 100) workflow.make_output_dirs() # run cosmos workflow # with SGESignalHandler(workflow): workflow.run()
def main(): cosmos = Cosmos("cosmos.sqlite").initdb() workflow = cosmos.start("ex1", skip_confirm=True) t = workflow.add_task( func=say, params=dict(text="Hello World", out_file="out.txt"), uid="my_task", time_req=None, core_req=1, mem_req=1024, ) print(("task.params", t.params)) print(("task.input_map", t.input_map)) print(("task.output_map", t.output_map)) print(("task.core_req", t.core_req)) print(("task.time_req", t.time_req)) print(("task.drm", t.drm)) print(("task.uid", t.uid)) workflow.run(cmd_wrapper=py_call) sys.exit(0 if workflow.successful else 1)
def main(): cosmos = Cosmos() cosmos.initdb() workflow = cosmos.start("env_variables", skip_confirm=True) workflow.add_task(func=command_with_env_variables, environment_variables=environment_variables_dict, uid="special") workflow.run(cmd_wrapper=py_call)
def test_zero_tasks(): cosmos = Cosmos() cosmos.initdb() temp_dir = tempfile.mkdtemp() with cd(temp_dir): workflow = cosmos.start('workflow', skip_confirm=True) workflow.run(set_successful=False) workflow.run(cmd_wrapper=py_call) shutil.rmtree(temp_dir)
def main(): p = argparse.ArgumentParser() p.add_argument("-drm", default="local", help="", choices=("local", "drmaa:ge", "ge", "slurm")) p.add_argument("-j", "--job-class", help="Submit to this job class if the DRM supports it") p.add_argument("-q", "--queue", help="Submit to this queue if the DRM supports it") args = p.parse_args() cosmos = Cosmos( "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)), # example of how to change arguments if you're not using default_drm='local' get_submit_args=partial(default_get_submit_args, parallel_env="smp"), default_drm=args.drm, default_max_attempts=2, default_job_class=args.job_class, default_queue=args.queue, ) cosmos.initdb() sp.check_call("mkdir -p analysis_output/1000tasks/", shell=True) os.chdir("analysis_output/1000tasks/") workflow = cosmos.start("1000_tasks", restart=True, skip_confirm=True) recipe(workflow) workflow.make_output_dirs() workflow.run(max_cores=100) # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again. # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address # that limitation at some point in the future. if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(workflow.stage_graph(), "/tmp/ex1_task_graph.png", format="png") draw_task_graph(workflow.task_graph(), "/tmp/ex1_stage_graph.png", format="png") else: print("Pygraphviz is not available :(") sys.exit(0 if workflow.successful else 1)
def main(): cosmos = Cosmos() cosmos.initdb() workflow = cosmos.start('test', skip_confirm=True) for i, num_gpus in enumerate([1, 1, 2, 2, 3]): task = workflow.add_task(use_cuda_device, dict(some_arg=i, num_gpus=num_gpus), gpu_req=num_gpus, uid=str(i)) workflow.run(max_gpus=len( os.environ['COSMOS_LOCAL_GPU_DEVICES'].split(',')), cmd_wrapper=py_call, cleanup_at_exit=False)
def main(): cosmos = Cosmos().initdb() workflow = cosmos.start("gpu", skip_confirm=True) for i, num_gpus in enumerate([1, 1, 2, 2, 3]): task = workflow.add_task( use_cuda_device, dict(some_arg=i, num_gpus=num_gpus), gpu_req=num_gpus, uid=str(i), ) workflow.run( max_gpus=len(os.environ["COSMOS_LOCAL_GPU_DEVICES"].split(",")), cmd_wrapper=py_call, )
def __init__(self, config_path, reference_version): assert os.path.exists(config_path), '%s does not exist' % config_path assert reference_version in [ 'hg38', 'b37' ], 'bad reference_version: %s' % reference_version self.config_path = config_path self.config = ConfigParser(interpolation=ExtendedInterpolation()) self.config.read(config_path) self.config.add_section('ref') for k, v in self.config['ref_%s' % reference_version].iteritems(): self.config.set('ref', k, v) assert len(self.config['ref'].items()) > 1 # set_env_aws_credentials() os.environ['REQUESTS_CA_BUNDLE'] = '/etc/ssl/certs/ca-certificates.crt' # export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt from cosmos.web.gemon.views import bprint as gemon_bprint from cosmos.api import Cosmos, default_get_submit_args from functools import partial from flask import Flask flask_app = Flask('genomekey', template_folder=os.path.join(library_path, 'web/templates')) flask_app.secret_key = '\x16\x89\xf5-\tK`\xf5FY.\xb9\x9c\xb4qX\xfdm\x19\xbd\xdd\xef\xa9\xe2' flask_app.register_blueprint(gemon_bprint, url_prefix='/gemon') self.flask_app = flask_app self.cosmos_app = Cosmos(self.config['gk']['database_url'], default_drm=self.config['gk']['default_drm'], flask_app=flask_app, get_submit_args=partial( default_get_submit_args, parallel_env='orte'))
def main(): p = ArgumentParser() p.add_argument("--sleep", default=0, type=int) args = p.parse_args() cosmos = Cosmos("cosmos.sqlite").initdb() workflow = cosmos.start("ex3", restart=True, skip_confirm=True) t1 = workflow.add_task(func=say, params=dict(text="Hello World", out_file="out.txt"), uid="my_task") t2 = workflow.add_task(func=sleep, params=dict(secs=args.sleep), uid="my_task") workflow.make_output_dirs() workflow.run(cmd_wrapper=py_call)
def main(): p = argparse.ArgumentParser() p.add_argument("-drm", default="local", help="", choices=("local", "awsbatch", "slurm", "drmaa:ge", "ge")) p.add_argument("-q", "--queue", help="Submit to this queue if the DRM supports it") args = p.parse_args() cosmos = Cosmos("cosmos.sqlite", default_drm=args.drm, default_max_attempts=2, default_queue=args.queue) cosmos.initdb() workflow = cosmos.start("Example2", skip_confirm=True) recipe(workflow) # any parameters that start with out_ are output directories, and will be created if # the user calls workflow.make_output_dirs workflow.make_output_dirs() workflow.run(max_cores=10, cmd_wrapper=py_call) # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again. # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address # that limitation at some point in the future. if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(workflow.stage_graph(), "/tmp/ex1_task_graph.png", format="png") draw_task_graph(workflow.task_graph(), "/tmp/ex1_stage_graph.png", format="png") else: print("Pygraphviz is not available :(") sys.exit(0 if workflow.successful else 1)
def main(): args = parse_args() cosmos = Cosmos( "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)), default_drm="awsbatch", default_drm_options=dict( container_image=args.container_image, s3_prefix_for_command_script_temp_files=args. s3_prefix_for_command_script_temp_files, # only retry on spot instance death retry_only_if_status_reason_matches="Host EC2 .+ terminated.", ), default_queue=args.default_queue, ) cosmos.initdb() sp.check_call("mkdir -p analysis_output/ex1", shell=True) os.chdir("analysis_output/ex1") workflow = cosmos.start("Example1", restart=True, skip_confirm=True) t = workflow.add_task( func=get_instance_info, params=dict(out_s3_uri=args.out_s3_uri, sleep=args.sleep), uid="", time_req=None, max_attempts=args.max_attempts, core_req=args.core_req, mem_req=1024, ) workflow.run() print(("task.params", t.params)) print(("task.input_map", t.input_map)) print(("task.output_map", t.output_map)) print(("task.core_req", t.core_req)) print(("task.time_req", t.time_req)) print(("task.drm", t.drm)) print(("task.uid", t.uid)) print(("task.drm_options", t.drm_options)) print(("task.queue", t.queue)) sys.exit(0 if workflow.successful else 1)
def main(output_dir): if not os.path.exists(output_dir): os.makedirs(output_dir) cosmos = Cosmos() cosmos.initdb() workflow = cosmos.start( "test", skip_confirm=True, primary_log_path=os.path.join(output_dir, "workflow.log"), ) for i, num_gpus in enumerate([1, 1, 2, 2, 3]): task = workflow.add_task( use_cuda_device, dict(some_arg=i, num_gpus=num_gpus), gpu_req=num_gpus, uid=str(i), ) workflow.run( max_gpus=len(os.environ["COSMOS_LOCAL_GPU_DEVICES"].split(",")), cmd_wrapper=py_call_cmd_wrapper, do_cleanup_atexit=False, log_out_dir_func=partial(default_task_log_output_dir, prefix="%s" % output_dir), )
def main(): p = ArgumentParser() p.add_argument("--sleep", default=0, type=int) args = p.parse_args() cosmos = Cosmos( "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)), default_drm="local", ) cosmos.initdb() workflow = cosmos.start("Example3", restart=True, skip_confirm=True) t1 = workflow.add_task(func=say, params=dict(text="Hello World", out_file="out.txt"), uid="my_task") t2 = workflow.add_task(func=sleep, params=dict(secs=args.sleep), uid="my_task") workflow.make_output_dirs() workflow.run(cmd_wrapper=py_call_cmd_wrapper)
def __init__(self, config, drm, restart): self.config = config self.cosmos = Cosmos(database_url='sqlite:///{}'.format( self.config.db), get_submit_args=default_get_submit_args, default_drm=drm) self.cosmos.initdb() primary_logfile = os.path.join( self.config.rootdir, '{}.log'.format(self.config.project_name), ) self.workflow = self.cosmos.start( self.config.project_name, primary_log_path=primary_logfile, restart=restart, ) self.setup_pipeline()
def main(): cosmos = Cosmos().initdb() workflow = cosmos.start("duplicate_uids", skip_confirm=True) task = workflow.add_task(func=prepare_data, params=dict(a=1), uid="x") # normally you can't add a task with the same uid to the same stage with pytest.raises(DuplicateUid): workflow.add_task(func=prepare_data, params=dict(a=1), uid="x") # normally you can't add a task with the same uid to the same stage with pytest.raises(DuplicateUid): workflow.add_task(func=prepare_data, params=dict(a=1), uid="x") # set if_duplicate="return" to True to get the same task back that you added task2 = workflow.add_task(func=prepare_data, params=dict(a=1), uid="x", if_duplicate="return") assert task == task2 # this can be especially useful in loops to avoid repeating computation for _ in range(3): task = workflow.add_task(func=prepare_data, params=dict(a=1), uid="x", if_duplicate="return") workflow.add_task(func=train_machine_learning_model, params=dict(a=1), uid="x", if_duplicate="return", parents=task) # NOTE: parameters must be identical when using this feature with pytest.raises(InvalidParams): workflow.add_task(func=prepare_data, params=dict(a=1000), uid="x", if_duplicate="return")
help='', choices=('local', 'drmaa:ge', 'ge', 'slurm')) p.add_argument('-j', '--job-class', help='Submit to this job class if the DRM supports it') p.add_argument('-q', '--queue', help='Submit to this queue if the DRM supports it') args = p.parse_args() cosmos = Cosmos( 'sqlite:///%s/sqlite.db?check_same_thread=False' % os.path.dirname(os.path.abspath(__file__)), # example of how to change arguments if you're not using default_drm='local' get_submit_args=partial(default_get_submit_args, parallel_env='smp'), default_drm=args.drm, default_max_attempts=2, default_job_class=args.job_class, default_queue=args.queue) cosmos.initdb() sp.check_call('mkdir -p analysis_output/ex2', shell=True) os.chdir('analysis_output/ex2') workflow = cosmos.start('Example2', restart=True, skip_confirm=True) recipe(workflow) workflow.make_output_dirs() workflow.run(max_cores=10)
def make_app(database_url): cosmos = Cosmos(database_url) flask = cosmos.init_flask() return flask
from cosmos.api import Execution, add_execution_args, Cosmos from configparser import ConfigParser from cosmos.util.helpers import mkdir root_path = os.path.dirname(os.path.realpath(__file__)) config = ConfigParser() config.read(os.path.join(root_path, 'settings.conf')) settings = config['main'] if __name__ == '__main__': import sys #sys.path.append('./src') #from src import testxx cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() import argparse parser = argparse.ArgumentParser() parser.add_argument('-g', '--growl', action='store_true', help='sends a growl notification on execution status changes') parser.add_argument('-d', '--debug', action='store_true', help='launch ipdb on exception') sps = parser.add_subparsers(title="Commands", metavar="<command>") sp = sps.add_parser('resetdb', help=cosmos.resetdb.__doc__) sp.set_defaults(func=cosmos.resetdb) sp = sps.add_parser('initdb', help=cosmos.initdb.__doc__)
# all of the WordCounts (a many2one relationship). summarize = execution.add_task(cat, dict(), word_counts, '', 'Summary_Analysis') if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(execution.stage_graph(), '/tmp/ex2_task_graph.png', format='png') draw_task_graph(execution.task_graph(), '/tmp/ex2_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' execution.run() if __name__ == '__main__': cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start('Example2', 'analysis_output/ex2', max_attempts=1, restart=True, skip_confirm=True, max_cpus=10) run_ex2(execution)
""" Basic demonstration the structure of a Task instance """ import subprocess as sp import os import sys from cosmos.api import Cosmos, default_get_submit_args from functools import partial cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)), get_submit_args=partial(default_get_submit_args, parallel_env='smp'), default_drm='local') cosmos.initdb() sp.check_call('mkdir -p analysis_output/ex1', shell=True) os.chdir('analysis_output/ex1') workflow = cosmos.start('Example1', restart=True, skip_confirm=True) def say(text, out_file, core_req=1): return r""" echo "{text}" > {out_file} """.format(text=text, out_file=out_file) t = workflow.add_task(func=say, params=dict(text='Hello World', out_file='out.txt', core_req=2),
""" Basic demonstration the structure of a Task instance """ import subprocess as sp import os import sys from cosmos.api import Cosmos cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)), default_drm='local') cosmos.initdb() sp.check_call('mkdir -p analysis_output/ex1', shell=True) os.chdir('analysis_output/ex1') workflow = cosmos.start('Example1', restart=True, skip_confirm=True) def say(text, out_file): return r""" echo "{text}" > {out_file} """.format(text=text, out_file=out_file) t = workflow.add_task(func=say, params=dict( text='Hello World', out_file='out.txt', ), uid='my_task', time_req=None,
from cosmos.api import Cosmos, signal_execution_status_change, ExecutionStatus from ex1 import run_ex1 import os from cosmos.util.helpers import mkdir def run_ex3(execution): @signal_execution_status_change.connect def sig(ex): msg = "%s %s" % (ex, ex.status) if ex.status in [ExecutionStatus.successful, ExecutionStatus.failed, ExecutionStatus.killed]: text_message(msg) ex.log.info('Sent a text message') def text_message(message): from twilio.rest import TwilioRestClient account = "XYZ" token = "XYZ" client = TwilioRestClient(account, token) message = client.messages.create(to="+1231231234", from_="+1231231234", body=message) run_ex1(execution) if __name__ == '__main__': cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start('Example_Email', 'analysis_output/ex3', max_attempts=2, restart=True, skip_confirm=True) run_ex1(execution)
def variant_call(execution, bam_path, target_bed_path, max_complex_gap): """ Bioinformatics variant calling workflow """ contigs = sp.check_output("cat %s |cut -f1|uniq" % target_bed_path, shell=True).strip().split("\n") bed_tasks = [execution.add_task(tools.filter_bed_by_contig, tags=dict(in_bam=bam_path, in_bed=target_bed_path, contig=contig), out_dir='work/{contig}') for contig in contigs ] freebayes_tasks = one2one(tools.freebayes, bed_tasks, dict(max_complex_gap=max_complex_gap)) merge_vcf_tasks = many2one(tools.vcf_concat_parts, freebayes_tasks) execution.run() if __name__ == '__main__': p = argparse.ArgumentParser() p.add_argument('bam_path') p.add_argument('target_bed_path') p.add_argument('--max_complex_gap', type=int, default=2) add_execution_args(p) start_kwargs, variant_call_args = pop_execution_args(vars(p.parse_args())) cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start(output_dir='../analysis_output/variant_calling', **start_kwargs) variant_call(execution, **variant_call_args)
""" Basic demonstration the structure of a Task instance """ import os from cosmos.api import Cosmos cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start('Example1', 'analysis_output/ex3', restart=True, skip_confirm=True) def cmd(in_files, out_file): return r""" echo "{in_files}" > {out_file} """.format(**locals()) t = execution.add_task(cmd, tags=dict(in_files=[('a', 'b', 'in_file')], out_file='out.txt')) print 'Task:', t print 'task.tags', t.tags print 'task.input_files', t.input_files print 'task.output_files', t.output_files #execution.run()
tags=dict(chars=True, **cat_task.tags), parents=[cat_task], out_dir='{word}/{n}') for cat_task in cats] # Cat the contents of all word_counts into one file. Only one node is being created who's parents are # all of the WordCounts (a many2one relationship). summarize = execution.add_task(cat, tags=dict(), parents=word_counts, out_dir='', stage_name='Summary_Analysis') if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(execution.stage_graph(), '/tmp/ex1_task_graph.png', format='png') draw_task_graph(execution.task_graph(), '/tmp/ex1_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' execution.run() if __name__ == '__main__': cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start('Example1', 'analysis_output/ex1', max_attempts=1, restart=True, skip_confirm=True, max_cpus=10) run_ex1(execution)
with open(out_file) as fp: i = int(fp.read()) else: i = 0 with open(out_file, "w") as fp: fp.write(str(i + 1)) if i < 2: # fail the first 2 times raise if __name__ == "__main__": cosmos = Cosmos( "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)), default_drm="local", ) cosmos.initdb() workflow = cosmos.start("ExampleReattempt", restart=True, skip_confirm=True) if os.path.exists("out.txt"): os.unlink("out.txt") t = workflow.add_task(func=add_one, params=dict(out_file="out.txt"), uid="my_task", max_attempts=3) workflow.make_output_dirs()
load_dotenv() def set_env_variables(): return "\n".join([ f'export {variable}="{os.getenv(variable)}"' for variable in env_variables ]) cosmos = Cosmos( RDS_COSMOS_DATABASE, default_drm="awsbatch", default_drm_options=dict( container_image=os.getenv("ECR_CONTAINER_IMAGE"), s3_prefix_for_command_script_temp_files=os.path.join( S3_BUCKET_PATH, "cosmos-tmp"), shm_size=int(args.mem_req * 0.75), retry_only_if_status_reason_matches= "Host EC2 .+ terminated.", # only retry on spot instance death ), default_queue=os.getenv("BATCH_QUEUE_NAME"), ) cosmos.initdb() workflow_name = f"{args.name}-{uuid1().hex}" workflow = cosmos.start(workflow_name, restart=True, skip_confirm=True) task_name = uuid1().hex workflow.add_task( func=pretrain,
class Pipeline(object): def __init__(self, config, drm, restart, skip_confirm): self.config = config self.cosmos = Cosmos(database_url='sqlite:///{}'.format( self.config.db), get_submit_args=default_get_submit_args, default_drm=drm) self.cosmos.initdb() primary_logfile = os.path.join( self.config.rootdir, '{}.log'.format(self.config.project_name), ) self.workflow = self.cosmos.start( self.config.project_name, primary_log_path=primary_logfile, restart=restart, skip_confirm=skip_confirm, ) self.setup_pipeline() def setup_pipeline(self): self.construct_pipeline() self.workflow.make_output_dirs() def run(self, task_flush): # put set_successful to False if you intend to add more tasks to the # pipeline later custom_log_dir = lambda task: os.path.join(self.config.rootdir, 'logs', task.stage.name, task.uid) self.workflow.run(set_successful=False, log_out_dir_func=custom_log_dir, db_task_flush=task_flush) def construct_pipeline(self): # 1. remove unused alternates remove_ac_0_tasks = self.create_remove_ac_0_tasks(1) # 2. calculate sample missingness (counting phase) count_sample_missingness_tasks = self.create_count_sample_missingness_tasks( remove_ac_0_tasks, 2) # 2.1 calculate sample missingness (merge and calculation phase) calculate_sample_missingness_task = self.create_calculate_sample_missingness_task( count_sample_missingness_tasks, 2.1) # 3. denormalize, decompose, and uniq dnu_tasks = self.create_decompose_normalize_unique_tasks( remove_ac_0_tasks, 3) # 4. remove symbolic alleles rsa_tasks = self.create_remove_symbolic_deletion_tasks(dnu_tasks, 4) # 5. filter missingness filter_variant_missingness_tasks = self.create_filter_variant_missingness_tasks( rsa_tasks, 5) # 6. annotate allele balances allele_balance_annotation_tasks = self.create_allele_balance_annotation_tasks( filter_variant_missingness_tasks, 6) # 7. annotate with 1000G annotate_1000G_tasks = self.create_1000G_annotation_tasks( allele_balance_annotation_tasks, 7) # 8. annotate with ExAC annotate_ExAC_tasks = self.create_ExAC_annotation_tasks( annotate_1000G_tasks, 8) # 9. VEP annotation annotate_vep_cadd_tasks = self.create_vep_cadd_annotation_tasks( annotate_ExAC_tasks, 9) # 10. VCF concatenation concatenated_vcfs = self.create_concatenate_vcfs_task( annotate_vep_cadd_tasks, 10) # 11. bcftools stats bcftools_stats_tasks = self.create_bcftools_stats_tasks( annotate_ExAC_tasks, 11) # 11.1 Merge & Plot bcftools stats bcftools_stats_summary_task = self.create_bcftools_stats_summary_task( bcftools_stats_tasks, 11.1) # 12. GATK VariantEval variant_eval_tasks = self.create_variant_eval_tasks( annotate_ExAC_tasks, 12) # 12.1. Merge & Plot GATK VariantEval Stats variant_eval_summary_task = self.create_variant_eval_summary_task( variant_eval_tasks, 12.1) def create_bcftools_stats_summary_task(self, parent_tasks, step_number): stage = self._construct_task_name('bcftools-stats-summary', step_number) output_dir = os.path.join(self.config.rootdir, stage) prior_stage_name = parent_tasks[0].stage.name input_dir = os.path.join(self.config.rootdir, prior_stage_name) lsf_params = get_lsf_params(bcftools_stats_summary_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) task = { 'func': bcftools_stats_summary, 'params': { 'in_dir': input_dir, 'out_dir': output_dir, }, 'stage_name': stage, 'uid': 'all-chroms', 'drm_params': lsf_params_json, 'parents': parent_tasks, } summary_task = self.workflow.add_task(**task) return summary_task def create_concatenate_vcfs_task(self, parent_tasks, step_number): tasks = list() stage = self._construct_task_name('concat-vcfs', step_number) output_dir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(concatenate_vcfs_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) def region_key(task): reference_fai = os.path.join( '/gscmnt/ams1102/info/model_data/2869585698/build106942997', 'all_sequences.fa.fai') return Region(all_sequences.fa.fai, task.params['in_chrom']) def chromosome_key(task): reference_fai = os.path.join( '/gscmnt/ams1102/info/model_data/2869585698/build106942997', 'all_sequences.fa.fai') return Region(reference_fai, task.params['in_chrom']).chrom for ref_chrom, chrom_tasks in groupby(sorted(parent_tasks, key=region_key), key=chromosome_key): ptasks = list(chrom_tasks) input_vcfs = [x.params['out_vcf'] for x in ptasks] output_vcf = 'concatenated.c{}.vcf.gz'.format(ref_chrom) output_log = 'concatenate.{}.log'.format(ref_chrom) task = { 'func': concatenate_vcfs, 'params': { 'in_vcfs': input_vcfs, 'in_chrom': ref_chrom, 'out_vcf': os.path.join(output_dir, ref_chrom, output_vcf), 'out_log': os.path.join(output_dir, ref_chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=ref_chrom), 'drm_params': lsf_params_json, 'parents': ptasks, } tasks.append(self.workflow.add_task(**task)) return tasks def create_variant_eval_summary_task(self, parent_tasks, step_number): stage = self._construct_task_name('gatk-variant-eval-summary', step_number) output_dir = os.path.join(self.config.rootdir, stage) prior_stage_name = parent_tasks[0].stage.name input_dir = os.path.join(self.config.rootdir, prior_stage_name) lsf_params = get_lsf_params(variant_eval_summary_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) task = { 'func': variant_eval_summary, 'params': { 'in_dir': input_dir, 'out_dir': output_dir, }, 'stage_name': stage, 'uid': 'all-chroms', 'drm_params': lsf_params_json, 'parents': parent_tasks, } summary_task = self.workflow.add_task(**task) return summary_task def create_bcftools_stats_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('bcftools-stats', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(bcftools_stats_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_stats = '{}.stats.out'.format(chrom) task = { 'func': bcftools_stats, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_stats': os.path.join(basedir, chrom, output_stats), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_variant_eval_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('gatk-variant-eval', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(gatk_variant_eval_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_stats = 'chrom-{}-variant-eval.out'.format(chrom) output_log = 'chrom-{}-variant-eval.log'.format(chrom) task = { 'func': gatk_variant_eval, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_stats': os.path.join(basedir, chrom, output_stats), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_vep_cadd_annotation_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('vep-cadd-annotation', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(annotation_vep_cadd_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_vcf = 'annotated.vep.cadd.c{}.vcf.gz'.format(chrom) output_log = 'vep.cadd.annotation.{}.log'.format(chrom) task = { 'func': annotation_vep_cadd, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_ExAC_annotation_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('annotate-w-ExAC', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(annotation_ExAC_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_vcf = 'ExAC-annotated.c{}.vcf.gz'.format(chrom) output_log = 'ExAC-annotate.{}.log'.format(chrom) task = { 'func': annotation_ExAC, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_1000G_annotation_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('annotate-w-1000G', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(annotation_1000G_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_vcf = '1kg-annotated.c{}.vcf.gz'.format(chrom) output_log = '1000G-annotate.{}.log'.format(chrom) task = { 'func': annotation_1000G, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_allele_balance_annotation_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('allele-balance-annotation', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(annotate_allele_balances_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom) output_log = 'allele-balance-{}.log'.format(chrom) task = { 'func': annotate_allele_balances, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_filter_variant_missingness_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('filter-variant-missingness', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(filter_variant_missingness_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom) output_log = 'filter-missingness-{}.log'.format(chrom) task = { 'func': filter_variant_missingness, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_remove_symbolic_deletion_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('remove-symbolic-alleles', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params( remove_symbolic_deletion_alleles_lsf_params, self.config.email, self.config.docker) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom) output_log = 'remove-symbolic-alleles-chrom-{}.log'.format(chrom) task = { 'func': remove_symbolic_deletion_alleles, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_decompose_normalize_unique_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('decompose-normalize-uniq', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(normalize_decompose_unique_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom) output_log = 'decompose-normalize-unique-{}.log'.format(chrom) task = { 'func': normalize_decompose_unique, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_calculate_sample_missingness_task(self, parent_tasks, step_number): stage = self._construct_task_name('calculate-sample-missingness', step_number) output_dir = os.path.join(self.config.rootdir, stage) prior_stage_name = parent_tasks[0].stage.name input_dir = os.path.join(self.config.rootdir, prior_stage_name) input_json_wildcard_path = os.path.join(input_dir, '*', '*.json') lsf_params = get_lsf_params(calculate_sample_missingness_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) task = { 'func': calculate_sample_missingness, 'params': { 'in_json': input_json_wildcard_path, 'out_stats': os.path.join(output_dir, 'sample-missingness-pct.dat'), 'out_log': os.path.join(output_dir, 'sample-missingness-pct.dat.log'), }, 'stage_name': stage, 'uid': '1-22', 'drm_params': lsf_params_json, 'parents': parent_tasks, } summary_task = self.workflow.add_task(**task) return summary_task def create_count_sample_missingness_tasks(self, parent_tasks, step_number): tasks = [] stage = self._construct_task_name('count-sample-missingness', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params(count_sample_missingness_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for ptask in parent_tasks: chrom = ptask.params['in_chrom'] # only count missing genotypes on chromosomes 1-22 (not X, Y, or MT) if not chrom[0].isdigit(): continue output_json = '{chrom}-sample-missingness-counts.json'.format( chrom=chrom) output_log = '{}-sample-missingness-counts.log'.format(chrom) task = { 'func': count_sample_missingness, 'params': { 'in_vcf': ptask.params['out_vcf'], 'in_chrom': chrom, 'out_json': os.path.join(basedir, chrom, output_json), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, 'parents': [ptask], } tasks.append(self.workflow.add_task(**task)) return tasks def create_remove_ac_0_tasks(self, step_number): tasks = [] stage = self._construct_task_name('select-variants-ac-0-removal', step_number) basedir = os.path.join(self.config.rootdir, stage) lsf_params = get_lsf_params( gatk_select_variants_remove_ac_0_lsf_params, self.config.email, self.config.docker, self.config.drm_queue) lsf_params_json = to_json(lsf_params) for chrom in self.config.chroms: vcf = self.config.vcfs[chrom] output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom) output_log = 'select-variants-chrom-{}-gatk.log'.format(chrom) task = { 'func': gatk_select_variants_remove_ac_0, 'params': { 'in_chrom': chrom, 'in_vcf': vcf, 'out_vcf': os.path.join(basedir, chrom, output_vcf), 'out_log': os.path.join(basedir, chrom, output_log), }, 'stage_name': stage, 'uid': '{chrom}'.format(chrom=chrom), 'drm_params': lsf_params_json, } tasks.append(self.workflow.add_task(**task)) return tasks def _construct_task_name(self, name, number): task_name = '{}-{}'.format(number, name) return task_name
stage_name='Summary_Analysis', uid='') # It's the only Task in this Stage, so doesn't need a specific uid if __name__ == '__main__': import argparse p = argparse.ArgumentParser() p.add_argument('-drm', default='local', help='', choices=('local', 'drmaa:ge', 'ge')) p.add_argument('-q', '--queue', help='Submit to this queue of the DRM supports it') args = p.parse_args() cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)), # example of how to change arguments if you're NOT using default_drm='local' get_submit_args=partial(default_get_submit_args, parallel_env='smp'), default_drm=args.drm, default_queue=args.queue) cosmos.initdb() sp.check_call('mkdir -p analysis_output/ex2', shell=True) os.chdir('analysis_output/ex2') workflow = cosmos.start('Example2', restart=True, skip_confirm=True) recipe(workflow) workflow.make_output_dirs() workflow.run(max_attempts=1, max_cores=10) if pygraphviz_available:
from cosmos.api import Cosmos, signal_workflow_status_change, WorkflowStatus from ex1 import run_ex1 import os def run_ex3(workflow): @signal_workflow_status_change.connect def sig(ex): msg = "%s %s" % (ex, ex.status) if ex.status in [WorkflowStatus.successful, WorkflowStatus.failed, WorkflowStatus.killed]: text_message(msg) ex.log.info('Sent a text message') def text_message(message): from twilio.rest import TwilioRestClient account = "XYZ" token = "XYZ" client = TwilioRestClient(account, token) message = client.messages.create(to="+1231231234", from_="+1231231234", body=message) run_ex1(workflow) if __name__ == '__main__': cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() workflow = cosmos.start('Example_Email', 'analysis_output/ex3', restart=True, skip_confirm=True) run_ex1(workflow)
""" Bioinformatics variant calling workflow """ contigs = sp.check_output("cat %s |cut -f1|uniq" % target_bed_path, shell=True).strip().split("\n") freebayes_tasks = [] for contig in contigs: bed_task = execution.add_task(tools.filter_bed_by_contig, tags=dict(in_bam=bam_path, in_bed=target_bed_path, contig=contig), out_dir='work/{contig}') freebayes_task = execution.add_task(tools.freebayes, tags=dict(max_complex_gap=max_complex_gap), parents=bed_task, out_dir='work/{contig}') freebayes_tasks.append(freebayes_task) merge_vcf_tasks = many2one(tools.vcf_concat_parts, parents=freebayes_tasks) execution.run(max_attempts=max_attempts, max_cores=max_cores) if __name__ == '__main__': p = argparse.ArgumentParser() p.add_argument('bam_path') p.add_argument('target_bed_path') p.add_argument('--max_complex_gap', type=int, default=2) add_execution_args(p) args = p.parse_args() cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start(name=args.name, output_dir='../analysis_output/variant_calling', restart=args.restart, skip_confirm=args.skip_confirm) variant_call(execution, args.max_attempts, args.max_cores, args.bam_path, args.target_bed_path, args.max_complex_gap)
def main(): args = parse_args() cosmos = Cosmos( "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)), default_drm="awsbatch", default_drm_options=dict( container_image=args.container_image, s3_prefix_for_command_script_temp_files=args.s3_prefix_for_command_script_temp_files, # only retry on spot instance death retry_only_if_status_reason_matches="Host EC2 .+ terminated.", ), default_queue=args.default_queue, ) cosmos.initdb() # sp.check_call("mkdir -p analysis_output/ex1", shell=True) # os.chdir("analysis_output/ex1") workflow = cosmos.start(f"Evaluate_{args.id}", restart=True, skip_confirm=True) parameters = np.load(f"optimize_awsbatch/parameters/{args.id}.npy") for i, par in enumerate(parameters): parameters_ = dict( mean_weight=par[0], c_w=par[1], tau_pos=par[2], tau_neg=par[3], A_pos=par[4], A_neg=par[5], weight_decay=par[6], n_filters=25, time_max=250, crop=20, kernel_size=16, stride=4, intensity=127.5, c_w_min=None, c_l=True, network_type="LC_SNN", ) workflow.add_task( func=evaluate, params=dict( parameters=parameters_, out_s3_uri=f"{args.out_s3_uri}/scores/{args.id}/{i}.json", sleep=args.sleep, train=args.train, calibrate=args.calibrate, test=args.test ), uid=str(i), time_req=None, max_attempts=args.max_attempts, core_req=args.core_req, mem_req=args.mem_req, ) workflow.run() sys.exit(0 if workflow.successful else 1)
import subprocess as sp import os import sys from cosmos.api import Cosmos cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)), default_drm='local') cosmos.initdb() sp.check_call('mkdir -p analysis_output/ex1', shell=True) os.chdir('analysis_output/ex1') workflow = cosmos.start('Example1', restart=True, skip_confirm=True) def say(text, out_file): return r""" echo "{text}" > {out_file} """.format(text=text, out_file=out_file) t = workflow.add_task(func=say, params=dict(text='Hello World', out_file='out.txt',), uid='my_task', time_req=None, core_req=1, mem_req=1024) print('task.params', t.params) print('task.input_map', t.input_map) print('task.output_map', t.output_map) print('task.core_req', t.core_req) print('task.time_req', t.time_req) print('task.drm', t.drm) print('task.uid', t.uid)
""" Basic demonstration the structure of a Task instance """ import os from cosmos.api import Cosmos cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start('Example1', 'analysis_output/ex3', restart=True, skip_confirm=True) def cmd(in_files, out_file): return r""" echo "{in_files}" > {out_file} """.format(**locals()) t = execution.add_task(cmd, tags=dict(in_files=[('a', 'b', 'in_file')], out_file='out.txt')) print 'Task:', t print 'task.tags', t.tags print 'task.input_files', t.input_files print 'task.output_files', t.output_files
#-------------------- # Generation des schemas du workflow (si pygraphviz installe) if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(execution.stage_graph(), 'testing/workflow_info/test_task_graph.png', format='png') draw_task_graph(execution.task_graph(), 'testing/workflow_info/test_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' execution.run(max_attempts=1, max_cores=10) #------------------------ # EXECUTION DU WORKFLOW if __name__ == '__main__': cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() subprocess.check_call('mkdir -p testing testing/data testing/results testing/workflow_info', shell=True) #subprocess.check_call('cp extdata/4.fastq testing/data', shell=False) execution = cosmos.start('Testx', 'testing',restart=True, skip_confirm=True) run_test(execution) """else: #-------------------- # Connexion aux services cosmos (BDD) cosmos = Cosmos('sqlite:///sqlite.db') cosmos.initdb() #-------------------- # Creation des sous-dossiers resultats subprocess.check_call('mkdir -p testing testing/data testing/results testing/workflow_info', shell=True)
from cosmos.api import Execution, add_execution_args, Cosmos from configparser import ConfigParser from cosmos.util.helpers import mkdir root_path = os.path.dirname(os.path.realpath(__file__)) config = ConfigParser() config.read(os.path.join(root_path, 'settings.conf')) settings = config['main'] if __name__ == '__main__': import ex1 import ex_fail import ex_email cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() import argparse parser = argparse.ArgumentParser() parser.add_argument( '-g', '--growl', action='store_true', help='sends a growl notification on execution status changes') parser.add_argument('-d', '--debug', action='store_true', help='launch ipdb on exception') sps = parser.add_subparsers(title="Commands", metavar="<command>")
execution.add_task(tools.filter_bed_by_contig, tags=dict(in_bam=bam_path, in_bed=target_bed_path, contig=contig), out_dir='work/{contig}') for contig in contigs ] freebayes_tasks = one2one(tools.freebayes, bed_tasks, dict(max_complex_gap=max_complex_gap)) merge_vcf_tasks = many2one(tools.vcf_concat_parts, freebayes_tasks) execution.run() if __name__ == '__main__': p = argparse.ArgumentParser() p.add_argument('bam_path') p.add_argument('target_bed_path') p.add_argument('--max_complex_gap', type=int, default=2) add_execution_args(p) start_kwargs, variant_call_args = pop_execution_args(vars(p.parse_args())) cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.initdb() execution = cosmos.start(output_dir='../analysis_output/variant_calling', **start_kwargs) variant_call(execution, **variant_call_args)
import os from cosmos.api import Cosmos cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__))) cosmos.runweb('0.0.0.0', 5151)