Exemple #1
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument("-drm",
                   default="local",
                   help="",
                   choices=("local", "drmaa:ge", "ge", "slurm"))
    p.add_argument("-j",
                   "--job-class",
                   help="Submit to this job class if the DRM supports it")
    p.add_argument("-q",
                   "--queue",
                   help="Submit to this queue if the DRM supports it")

    args = p.parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        # example of how to change arguments if you're not using default_drm='local'
        get_submit_args=partial(default_get_submit_args, parallel_env="smp"),
        default_drm=args.drm,
        default_max_attempts=2,
        default_job_class=args.job_class,
        default_queue=args.queue,
    )
    cosmos.initdb()

    sp.check_call("mkdir -p analysis_output/1000tasks/", shell=True)
    os.chdir("analysis_output/1000tasks/")

    workflow = cosmos.start("1000_tasks", restart=True, skip_confirm=True)

    recipe(workflow)

    workflow.make_output_dirs()
    workflow.run(max_cores=100)

    # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG
    # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again.
    # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address
    # that limitation at some point in the future.

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(),
                         "/tmp/ex1_task_graph.png",
                         format="png")
        draw_task_graph(workflow.task_graph(),
                        "/tmp/ex1_stage_graph.png",
                        format="png")
    else:
        print("Pygraphviz is not available :(")

    sys.exit(0 if workflow.successful else 1)
Exemple #2
0
def run_ex1(execution):
    # Create two Tasks that echo "hello" and "world" respectively (these are source nodes in the dag).
    echos = [
        execution.add_task(echo, tags=dict(word=word), out_dir='{word}')
        for word in ['hello', 'world']
    ]

    # Split each echo into two dependent Tasks (a one2many relationship).
    cats = [
        execution.add_task(cat,
                           tags=dict(n=n, **echo_task.tags),
                           parents=[echo_task],
                           out_dir='{word}/{n}') for echo_task in echos
        for n in [1, 2]
    ]

    # Count the words in the previous stage.  An example of a one2one relationship,
    # the most common stage dependency pattern.  For each task in StageA, there is a single dependent task in StageB.
    word_counts = [
        execution.add_task(word_count,
                           tags=dict(chars=True, **cat_task.tags),
                           parents=[cat_task],
                           out_dir='{word}/{n}') for cat_task in cats
    ]

    # Cat the contents of all word_counts into one file.  Only one node is being created who's parents are
    # all of the WordCounts (a many2one relationship).
    summarize = execution.add_task(cat,
                                   tags=dict(),
                                   parents=word_counts,
                                   out_dir='',
                                   stage_name='Summary_Analysis')

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(),
                         '/tmp/ex1_task_graph.png',
                         format='png')
        draw_task_graph(execution.task_graph(),
                        '/tmp/ex1_stage_graph.png',
                        format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run()
Exemple #3
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument("-drm",
                   default="local",
                   help="",
                   choices=("local", "awsbatch", "slurm", "drmaa:ge", "ge"))
    p.add_argument("-q",
                   "--queue",
                   help="Submit to this queue if the DRM supports it")

    args = p.parse_args()

    cosmos = Cosmos("cosmos.sqlite",
                    default_drm=args.drm,
                    default_max_attempts=2,
                    default_queue=args.queue)
    cosmos.initdb()

    workflow = cosmos.start("Example2", skip_confirm=True)

    recipe(workflow)

    # any parameters that start with out_ are output directories, and will be created if
    # the user calls workflow.make_output_dirs
    workflow.make_output_dirs()
    workflow.run(max_cores=10, cmd_wrapper=py_call)

    # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG
    # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again.
    # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address
    # that limitation at some point in the future.

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(),
                         "/tmp/ex1_task_graph.png",
                         format="png")
        draw_task_graph(workflow.task_graph(),
                        "/tmp/ex1_stage_graph.png",
                        format="png")
    else:
        print("Pygraphviz is not available :(")

    sys.exit(0 if workflow.successful else 1)
Exemple #4
0
def run_ex1(execution):
    # Create two Tasks that echo "hello" and "world" respectively (these are source nodes in the dag).
    echos = [execution.add_task(echo,
                                tags=dict(word=word),
                                out_dir='{word}')
             for word in ['hello', 'world']]

    # Split each echo into two dependent Tasks (a one2many relationship).
    cats = [execution.add_task(cat,
                               tags=dict(n=n, **echo_task.tags),
                               parents=[echo_task],
                               out_dir='{word}/{n}')
            for echo_task in echos
            for n in [1, 2]]

    # Count the words in the previous stage.  An example of a one2one relationship,
    # the most common stage dependency pattern.  For each task in StageA, there is a single dependent task in StageB.
    word_counts = [execution.add_task(word_count,
                                      tags=dict(chars=True, **cat_task.tags),
                                      parents=[cat_task],
                                      out_dir='{word}/{n}')
                   for cat_task in cats]

    # Cat the contents of all word_counts into one file.  Only one node is being created who's parents are
    # all of the WordCounts (a many2one relationship).
    summarize = execution.add_task(cat,
                                   tags=dict(),
                                   parents=word_counts,
                                   out_dir='',
                                   stage_name='Summary_Analysis')

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(), '/tmp/ex1_task_graph.png', format='png')
        draw_task_graph(execution.task_graph(), '/tmp/ex1_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run()
Exemple #5
0
    p.add_argument('-drm', default='local', help='', choices=('local', 'drmaa:ge', 'ge'))
    p.add_argument('-q', '--queue', help='Submit to this queue of the DRM supports it')

    args = p.parse_args()

    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)),
                    # example of how to change arguments if you're NOT using default_drm='local'
                    get_submit_args=partial(default_get_submit_args, parallel_env='smp'),
                    default_drm=args.drm,
                    default_queue=args.queue)
    cosmos.initdb()

    sp.check_call('mkdir -p analysis_output/ex2', shell=True)
    os.chdir('analysis_output/ex2')

    workflow = cosmos.start('Example2', restart=True, skip_confirm=True)

    recipe(workflow)

    workflow.make_output_dirs()
    workflow.run(max_attempts=1, max_cores=10)

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(), '/tmp/ex1_task_graph.png', format='png')
        draw_task_graph(workflow.task_graph(), '/tmp/ex1_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    sys.exit(0 if workflow.successful else 1)
Exemple #6
0
                    # example of how to change arguments if you're NOT using default_drm='local'
                    get_submit_args=partial(default_get_submit_args, parallel_env='smp'),
                    default_drm=args.drm,
                    default_job_class=args.job_class,
                    default_queue=args.queue)
    cosmos.initdb()

    sp.check_call('mkdir -p analysis_output/ex2', shell=True)
    os.chdir('analysis_output/ex2')

    workflow = cosmos.start('Example2', restart=True, skip_confirm=True)

    recipe(workflow)

    workflow.make_output_dirs()
    workflow.run(max_cores=10)

    # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG
    # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again.
    # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address
    # that limitation at some point in the future.

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(), '/tmp/ex1_task_graph.png', format='png')
        draw_task_graph(workflow.task_graph(), '/tmp/ex1_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    sys.exit(0 if workflow.successful else 1)
Exemple #7
0
def run_test(execution):    
    
    #--------------------
    # Generation des graps initiaux    
    
    
    Format0 = [execution.add_task(format_files,
                                 tags = dict(path2input="testing/data/4.fastq", path2output="testing/data/formated_initial", ftype=ftype),
                                 )
                for ftype in ['quality','length']]
    GraphGen0_a = [execution.add_task(r_call,
                                   tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_initial_quality.txt", path2output="testing/results/initial_quality_scores.pdf" ),
                                   parents=Format0)
                    ]
    GraphGen0_b = [execution.add_task(r_call,
                               tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_initial_length.txt", path2output="testing/results/initial_sequence_length.pdf" ),
                               parents=Format0)
                    ]
    
    #--------------------
    # Traitement de qualite et generation des graphs After Quality Treatment (AQT)      

    QualTR = [execution.add_task(quality_treatment,
                                 tags = dict(path2file="testing/data/4.fastq", path2output="testing/data/quality_treatment_results.fastq", threshold=15)
                                 )
                    ]
        
    Format1 = [execution.add_task(format_files,
                                 tags = dict(path2input="testing/data/quality_treatment_results.fastq", path2output="testing/data/formated_AQT", ftype=ftype),
                                 parents = QualTR
                                 )
                for ftype in ['quality','length']]
    GraphGen1_a = [execution.add_task(r_call,
                                   tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_AQT_quality.txt", path2output="testing/results/AQT_quality_scores.pdf" ),
                                   parents = Format1)
                    ]
    GraphGen1_b = [execution.add_task(r_call,
                               tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_AQT_length.txt", path2output="testing/results/AQT_sequence_length.pdf" ),
                               parents = Format1)
                    ]
   
    #--------------------
    # Traitement de longueur des sequences et generation des graphs After Length Treatment (ALT)      
           
    LenTR = [execution.add_task(length_treatment,
                          tags = dict(path2file="testing/data/quality_treatment_results.fastq", path2output="testing/data/length_treatment_results.fastq", threshold=60),
                          parents = QualTR)
            ]
    
    Format2 = [execution.add_task(format_files,
                                 tags = dict(path2input="testing/data/length_treatment_results.fastq", path2output="testing/data/formated_ALT", ftype=ftype),
                                 parents = LenTR
                                 )
                for ftype in ['quality','length']]
    
    GraphGen2_a = [execution.add_task(r_call,
                                   tags = dict(path2script="plot_quality_scores.R", path2input="testing/data/formated_ALT_quality.txt", path2output="testing/results/ALT_quality_scores.pdf" ),
                                   parents = Format2)
                    ]
    GraphGen2_b = [execution.add_task(r_call,
                               tags = dict(path2script="plot_sequence_length.R", path2input="testing/data/formated_ALT_length.txt", path2output="testing/results/ALT_sequence_length.pdf" ),
                               parents = Format2)
                    ]
   
    #--------------------
    # Generation d'un fichier sortie FASTA
    
    FastaFormat = [execution.add_task(fastq2fasta,
                                     tags = dict(path2input="testing/data/length_treatment_results.fastq", path2output="testing/results/Final.fasta"),
                                     parents = LenTR)]
    #--------------------
    # Generation des schemas du workflow (si pygraphviz installe)
    
    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(), 'testing/workflow_info/test_task_graph.png', format='png')
        draw_task_graph(execution.task_graph(), 'testing/workflow_info/test_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run(max_attempts=1, max_cores=10)