def main(): p = argparse.ArgumentParser() p.add_argument("-drm", default="local", help="", choices=("local", "drmaa:ge", "ge", "slurm")) p.add_argument("-j", "--job-class", help="Submit to this job class if the DRM supports it") p.add_argument("-q", "--queue", help="Submit to this queue if the DRM supports it") args = p.parse_args() cosmos = Cosmos( "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)), # example of how to change arguments if you're not using default_drm='local' get_submit_args=partial(default_get_submit_args, parallel_env="smp"), default_drm=args.drm, default_max_attempts=2, default_job_class=args.job_class, default_queue=args.queue, ) cosmos.initdb() sp.check_call("mkdir -p analysis_output/1000tasks/", shell=True) os.chdir("analysis_output/1000tasks/") workflow = cosmos.start("1000_tasks", restart=True, skip_confirm=True) recipe(workflow) workflow.make_output_dirs() workflow.run(max_cores=100) # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again. # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address # that limitation at some point in the future. if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(workflow.stage_graph(), "/tmp/ex1_task_graph.png", format="png") draw_task_graph(workflow.task_graph(), "/tmp/ex1_stage_graph.png", format="png") else: print("Pygraphviz is not available :(") sys.exit(0 if workflow.successful else 1)
def run_ex1(execution): # Create two Tasks that echo "hello" and "world" respectively (these are source nodes in the dag). echos = [ execution.add_task(echo, tags=dict(word=word), out_dir='{word}') for word in ['hello', 'world'] ] # Split each echo into two dependent Tasks (a one2many relationship). cats = [ execution.add_task(cat, tags=dict(n=n, **echo_task.tags), parents=[echo_task], out_dir='{word}/{n}') for echo_task in echos for n in [1, 2] ] # Count the words in the previous stage. An example of a one2one relationship, # the most common stage dependency pattern. For each task in StageA, there is a single dependent task in StageB. word_counts = [ execution.add_task(word_count, tags=dict(chars=True, **cat_task.tags), parents=[cat_task], out_dir='{word}/{n}') for cat_task in cats ] # Cat the contents of all word_counts into one file. Only one node is being created who's parents are # all of the WordCounts (a many2one relationship). summarize = execution.add_task(cat, tags=dict(), parents=word_counts, out_dir='', stage_name='Summary_Analysis') if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(execution.stage_graph(), '/tmp/ex1_task_graph.png', format='png') draw_task_graph(execution.task_graph(), '/tmp/ex1_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' execution.run()
def main(): p = argparse.ArgumentParser() p.add_argument("-drm", default="local", help="", choices=("local", "awsbatch", "slurm", "drmaa:ge", "ge")) p.add_argument("-q", "--queue", help="Submit to this queue if the DRM supports it") args = p.parse_args() cosmos = Cosmos("cosmos.sqlite", default_drm=args.drm, default_max_attempts=2, default_queue=args.queue) cosmos.initdb() workflow = cosmos.start("Example2", skip_confirm=True) recipe(workflow) # any parameters that start with out_ are output directories, and will be created if # the user calls workflow.make_output_dirs workflow.make_output_dirs() workflow.run(max_cores=10, cmd_wrapper=py_call) # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again. # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address # that limitation at some point in the future. if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(workflow.stage_graph(), "/tmp/ex1_task_graph.png", format="png") draw_task_graph(workflow.task_graph(), "/tmp/ex1_stage_graph.png", format="png") else: print("Pygraphviz is not available :(") sys.exit(0 if workflow.successful else 1)
def run_ex1(execution): # Create two Tasks that echo "hello" and "world" respectively (these are source nodes in the dag). echos = [execution.add_task(echo, tags=dict(word=word), out_dir='{word}') for word in ['hello', 'world']] # Split each echo into two dependent Tasks (a one2many relationship). cats = [execution.add_task(cat, tags=dict(n=n, **echo_task.tags), parents=[echo_task], out_dir='{word}/{n}') for echo_task in echos for n in [1, 2]] # Count the words in the previous stage. An example of a one2one relationship, # the most common stage dependency pattern. For each task in StageA, there is a single dependent task in StageB. word_counts = [execution.add_task(word_count, tags=dict(chars=True, **cat_task.tags), parents=[cat_task], out_dir='{word}/{n}') for cat_task in cats] # Cat the contents of all word_counts into one file. Only one node is being created who's parents are # all of the WordCounts (a many2one relationship). summarize = execution.add_task(cat, tags=dict(), parents=word_counts, out_dir='', stage_name='Summary_Analysis') if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(execution.stage_graph(), '/tmp/ex1_task_graph.png', format='png') draw_task_graph(execution.task_graph(), '/tmp/ex1_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' execution.run()
p.add_argument('-drm', default='local', help='', choices=('local', 'drmaa:ge', 'ge')) p.add_argument('-q', '--queue', help='Submit to this queue of the DRM supports it') args = p.parse_args() cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)), # example of how to change arguments if you're NOT using default_drm='local' get_submit_args=partial(default_get_submit_args, parallel_env='smp'), default_drm=args.drm, default_queue=args.queue) cosmos.initdb() sp.check_call('mkdir -p analysis_output/ex2', shell=True) os.chdir('analysis_output/ex2') workflow = cosmos.start('Example2', restart=True, skip_confirm=True) recipe(workflow) workflow.make_output_dirs() workflow.run(max_attempts=1, max_cores=10) if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(workflow.stage_graph(), '/tmp/ex1_task_graph.png', format='png') draw_task_graph(workflow.task_graph(), '/tmp/ex1_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' sys.exit(0 if workflow.successful else 1)
# example of how to change arguments if you're NOT using default_drm='local' get_submit_args=partial(default_get_submit_args, parallel_env='smp'), default_drm=args.drm, default_job_class=args.job_class, default_queue=args.queue) cosmos.initdb() sp.check_call('mkdir -p analysis_output/ex2', shell=True) os.chdir('analysis_output/ex2') workflow = cosmos.start('Example2', restart=True, skip_confirm=True) recipe(workflow) workflow.make_output_dirs() workflow.run(max_cores=10) # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again. # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address # that limitation at some point in the future. if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(workflow.stage_graph(), '/tmp/ex1_task_graph.png', format='png') draw_task_graph(workflow.task_graph(), '/tmp/ex1_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' sys.exit(0 if workflow.successful else 1)
def run_test(execution): #-------------------- # Generation des graps initiaux Format0 = [execution.add_task(format_files, tags = dict(path2input="testing/data/4.fastq", path2output="testing/data/formated_initial", ftype=ftype), ) for ftype in ['quality','length']] GraphGen0_a = [execution.add_task(r_call, tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_initial_quality.txt", path2output="testing/results/initial_quality_scores.pdf" ), parents=Format0) ] GraphGen0_b = [execution.add_task(r_call, tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_initial_length.txt", path2output="testing/results/initial_sequence_length.pdf" ), parents=Format0) ] #-------------------- # Traitement de qualite et generation des graphs After Quality Treatment (AQT) QualTR = [execution.add_task(quality_treatment, tags = dict(path2file="testing/data/4.fastq", path2output="testing/data/quality_treatment_results.fastq", threshold=15) ) ] Format1 = [execution.add_task(format_files, tags = dict(path2input="testing/data/quality_treatment_results.fastq", path2output="testing/data/formated_AQT", ftype=ftype), parents = QualTR ) for ftype in ['quality','length']] GraphGen1_a = [execution.add_task(r_call, tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_AQT_quality.txt", path2output="testing/results/AQT_quality_scores.pdf" ), parents = Format1) ] GraphGen1_b = [execution.add_task(r_call, tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_AQT_length.txt", path2output="testing/results/AQT_sequence_length.pdf" ), parents = Format1) ] #-------------------- # Traitement de longueur des sequences et generation des graphs After Length Treatment (ALT) LenTR = [execution.add_task(length_treatment, tags = dict(path2file="testing/data/quality_treatment_results.fastq", path2output="testing/data/length_treatment_results.fastq", threshold=60), parents = QualTR) ] Format2 = [execution.add_task(format_files, tags = dict(path2input="testing/data/length_treatment_results.fastq", path2output="testing/data/formated_ALT", ftype=ftype), parents = LenTR ) for ftype in ['quality','length']] GraphGen2_a = [execution.add_task(r_call, tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_ALT_quality.txt", path2output="testing/results/ALT_quality_scores.pdf" ), parents = Format2) ] GraphGen2_b = [execution.add_task(r_call, tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_ALT_length.txt", path2output="testing/results/ALT_sequence_length.pdf" ), parents = Format2) ] #-------------------- # Generation d'un fichier sortie FASTA FastaFormat = [execution.add_task(fastq2fasta, tags = dict(path2input="testing/data/length_treatment_results.fastq", path2output="testing/results/Final.fasta"), parents = LenTR)] #-------------------- # Generation des schemas du workflow (si pygraphviz installe) if pygraphviz_available: # These images can also be seen on the fly in the web-interface draw_stage_graph(execution.stage_graph(), 'testing/workflow_info/test_task_graph.png', format='png') draw_task_graph(execution.task_graph(), 'testing/workflow_info/test_stage_graph.png', format='png') else: print 'Pygraphviz is not available :(' execution.run(max_attempts=1, max_cores=10)