コード例 #1
0
def main():
    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="local",
    )
    cosmos.initdb()

    sp.check_call("mkdir -p analysis_output/ex1", shell=True)
    os.chdir("analysis_output/ex1")
    workflow = cosmos.start("Example1", restart=True, skip_confirm=True)

    t = workflow.add_task(
        func=say,
        params=dict(text="Hello World", out_file="out.txt"),
        uid="my_task",
        time_req=None,
        core_req=1,
        mem_req=1024,
    )

    print(("task.params", t.params))
    print(("task.input_map", t.input_map))
    print(("task.output_map", t.output_map))
    print(("task.core_req", t.core_req))
    print(("task.time_req", t.time_req))
    print(("task.drm", t.drm))
    print(("task.uid", t.uid))

    workflow.run()

    sys.exit(0 if workflow.successful else 1)
コード例 #2
0
def main():
    # start cosmos engine
    cosmos = Cosmos(
        database_url="sqlite://",
        default_drm="local",
        # default_drm="ge",
        default_queue="dev-short",
        default_drm_options={},
        get_submit_args=partial(default_get_submit_args, parallel_env="smp"),
    )
    cosmos.initdb()

    # create cosmos workflow
    workflow = cosmos.start(
        # NOTE cosmos will make dirs in this path
        # primary_log_path=os.path.join("logs", "cosmos.log"),
        name="blah",
        restart=True,
        skip_confirm=True,
        fail_fast=True,
    )

    for i in range(100):
        print("add {}".format(i))
        silly_recipe(workflow, i, 100)

    workflow.make_output_dirs()

    # run cosmos workflow
    # with SGESignalHandler(workflow):
    workflow.run()
コード例 #3
0
def main():
    cosmos = Cosmos("cosmos.sqlite").initdb()

    workflow = cosmos.start("ex1", skip_confirm=True)

    t = workflow.add_task(
        func=say,
        params=dict(text="Hello World", out_file="out.txt"),
        uid="my_task",
        time_req=None,
        core_req=1,
        mem_req=1024,
    )

    print(("task.params", t.params))
    print(("task.input_map", t.input_map))
    print(("task.output_map", t.output_map))
    print(("task.core_req", t.core_req))
    print(("task.time_req", t.time_req))
    print(("task.drm", t.drm))
    print(("task.uid", t.uid))

    workflow.run(cmd_wrapper=py_call)

    sys.exit(0 if workflow.successful else 1)
コード例 #4
0
ファイル: env_variables.py プロジェクト: indraniel/COSMOS2
def main():
    cosmos = Cosmos()
    cosmos.initdb()
    workflow = cosmos.start("env_variables", skip_confirm=True)
    workflow.add_task(func=command_with_env_variables,
                      environment_variables=environment_variables_dict,
                      uid="special")
    workflow.run(cmd_wrapper=py_call)
コード例 #5
0
def test_zero_tasks():
    cosmos = Cosmos()
    cosmos.initdb()
    temp_dir = tempfile.mkdtemp()
    with cd(temp_dir):
        workflow = cosmos.start('workflow', skip_confirm=True)
        workflow.run(set_successful=False)
        workflow.run(cmd_wrapper=py_call)

    shutil.rmtree(temp_dir)
コード例 #6
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument("-drm",
                   default="local",
                   help="",
                   choices=("local", "drmaa:ge", "ge", "slurm"))
    p.add_argument("-j",
                   "--job-class",
                   help="Submit to this job class if the DRM supports it")
    p.add_argument("-q",
                   "--queue",
                   help="Submit to this queue if the DRM supports it")

    args = p.parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        # example of how to change arguments if you're not using default_drm='local'
        get_submit_args=partial(default_get_submit_args, parallel_env="smp"),
        default_drm=args.drm,
        default_max_attempts=2,
        default_job_class=args.job_class,
        default_queue=args.queue,
    )
    cosmos.initdb()

    sp.check_call("mkdir -p analysis_output/1000tasks/", shell=True)
    os.chdir("analysis_output/1000tasks/")

    workflow = cosmos.start("1000_tasks", restart=True, skip_confirm=True)

    recipe(workflow)

    workflow.make_output_dirs()
    workflow.run(max_cores=100)

    # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG
    # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again.
    # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address
    # that limitation at some point in the future.

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(),
                         "/tmp/ex1_task_graph.png",
                         format="png")
        draw_task_graph(workflow.task_graph(),
                        "/tmp/ex1_stage_graph.png",
                        format="png")
    else:
        print("Pygraphviz is not available :(")

    sys.exit(0 if workflow.successful else 1)
コード例 #7
0
def main():
    cosmos = Cosmos()
    cosmos.initdb()
    workflow = cosmos.start('test', skip_confirm=True)
    for i, num_gpus in enumerate([1, 1, 2, 2, 3]):
        task = workflow.add_task(use_cuda_device,
                                 dict(some_arg=i, num_gpus=num_gpus),
                                 gpu_req=num_gpus,
                                 uid=str(i))

    workflow.run(max_gpus=len(
        os.environ['COSMOS_LOCAL_GPU_DEVICES'].split(',')),
                 cmd_wrapper=py_call,
                 cleanup_at_exit=False)
コード例 #8
0
ファイル: local_gpus.py プロジェクト: indraniel/COSMOS2
def main():
    cosmos = Cosmos().initdb()
    workflow = cosmos.start("gpu", skip_confirm=True)

    for i, num_gpus in enumerate([1, 1, 2, 2, 3]):
        task = workflow.add_task(
            use_cuda_device,
            dict(some_arg=i, num_gpus=num_gpus),
            gpu_req=num_gpus,
            uid=str(i),
        )

    workflow.run(
        max_gpus=len(os.environ["COSMOS_LOCAL_GPU_DEVICES"].split(",")),
        cmd_wrapper=py_call,
    )
コード例 #9
0
ファイル: environment.py プロジェクト: egafni/GenomeKey2
    def __init__(self, config_path, reference_version):
        assert os.path.exists(config_path), '%s does not exist' % config_path
        assert reference_version in [
            'hg38', 'b37'
        ], 'bad reference_version: %s' % reference_version
        self.config_path = config_path
        self.config = ConfigParser(interpolation=ExtendedInterpolation())
        self.config.read(config_path)
        self.config.add_section('ref')
        for k, v in self.config['ref_%s' % reference_version].iteritems():
            self.config.set('ref', k, v)

        assert len(self.config['ref'].items()) > 1
        # set_env_aws_credentials()

        os.environ['REQUESTS_CA_BUNDLE'] = '/etc/ssl/certs/ca-certificates.crt'
        # export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
        from cosmos.web.gemon.views import bprint as gemon_bprint
        from cosmos.api import Cosmos, default_get_submit_args
        from functools import partial
        from flask import Flask

        flask_app = Flask('genomekey',
                          template_folder=os.path.join(library_path,
                                                       'web/templates'))

        flask_app.secret_key = '\x16\x89\xf5-\tK`\xf5FY.\xb9\x9c\xb4qX\xfdm\x19\xbd\xdd\xef\xa9\xe2'
        flask_app.register_blueprint(gemon_bprint, url_prefix='/gemon')
        self.flask_app = flask_app
        self.cosmos_app = Cosmos(self.config['gk']['database_url'],
                                 default_drm=self.config['gk']['default_drm'],
                                 flask_app=flask_app,
                                 get_submit_args=partial(
                                     default_get_submit_args,
                                     parallel_env='orte'))
コード例 #10
0
ファイル: ex3_pycall.py プロジェクト: indraniel/COSMOS2
def main():
    p = ArgumentParser()
    p.add_argument("--sleep", default=0, type=int)
    args = p.parse_args()

    cosmos = Cosmos("cosmos.sqlite").initdb()
    workflow = cosmos.start("ex3", restart=True, skip_confirm=True)

    t1 = workflow.add_task(func=say,
                           params=dict(text="Hello World", out_file="out.txt"),
                           uid="my_task")
    t2 = workflow.add_task(func=sleep,
                           params=dict(secs=args.sleep),
                           uid="my_task")

    workflow.make_output_dirs()
    workflow.run(cmd_wrapper=py_call)
コード例 #11
0
ファイル: ex2_complete.py プロジェクト: indraniel/COSMOS2
def main():
    p = argparse.ArgumentParser()
    p.add_argument("-drm",
                   default="local",
                   help="",
                   choices=("local", "awsbatch", "slurm", "drmaa:ge", "ge"))
    p.add_argument("-q",
                   "--queue",
                   help="Submit to this queue if the DRM supports it")

    args = p.parse_args()

    cosmos = Cosmos("cosmos.sqlite",
                    default_drm=args.drm,
                    default_max_attempts=2,
                    default_queue=args.queue)
    cosmos.initdb()

    workflow = cosmos.start("Example2", skip_confirm=True)

    recipe(workflow)

    # any parameters that start with out_ are output directories, and will be created if
    # the user calls workflow.make_output_dirs
    workflow.make_output_dirs()
    workflow.run(max_cores=10, cmd_wrapper=py_call)

    # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG
    # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again.
    # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address
    # that limitation at some point in the future.

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(),
                         "/tmp/ex1_task_graph.png",
                         format="png")
        draw_task_graph(workflow.task_graph(),
                        "/tmp/ex1_stage_graph.png",
                        format="png")
    else:
        print("Pygraphviz is not available :(")

    sys.exit(0 if workflow.successful else 1)
コード例 #12
0
ファイル: ex_awsbatch.py プロジェクト: egafni/COSMOS2
def main():
    args = parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="awsbatch",
        default_drm_options=dict(
            container_image=args.container_image,
            s3_prefix_for_command_script_temp_files=args.
            s3_prefix_for_command_script_temp_files,
            # only retry on spot instance death
            retry_only_if_status_reason_matches="Host EC2 .+ terminated.",
        ),
        default_queue=args.default_queue,
    )
    cosmos.initdb()

    sp.check_call("mkdir -p analysis_output/ex1", shell=True)
    os.chdir("analysis_output/ex1")
    workflow = cosmos.start("Example1", restart=True, skip_confirm=True)

    t = workflow.add_task(
        func=get_instance_info,
        params=dict(out_s3_uri=args.out_s3_uri, sleep=args.sleep),
        uid="",
        time_req=None,
        max_attempts=args.max_attempts,
        core_req=args.core_req,
        mem_req=1024,
    )
    workflow.run()

    print(("task.params", t.params))
    print(("task.input_map", t.input_map))
    print(("task.output_map", t.output_map))
    print(("task.core_req", t.core_req))
    print(("task.time_req", t.time_req))
    print(("task.drm", t.drm))
    print(("task.uid", t.uid))
    print(("task.drm_options", t.drm_options))
    print(("task.queue", t.queue))

    sys.exit(0 if workflow.successful else 1)
コード例 #13
0
ファイル: ex_gpu.py プロジェクト: egafni/COSMOS2
def main(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    cosmos = Cosmos()
    cosmos.initdb()
    workflow = cosmos.start(
        "test", skip_confirm=True, primary_log_path=os.path.join(output_dir, "workflow.log"),
    )
    for i, num_gpus in enumerate([1, 1, 2, 2, 3]):
        task = workflow.add_task(
            use_cuda_device, dict(some_arg=i, num_gpus=num_gpus), gpu_req=num_gpus, uid=str(i),
        )

    workflow.run(
        max_gpus=len(os.environ["COSMOS_LOCAL_GPU_DEVICES"].split(",")),
        cmd_wrapper=py_call_cmd_wrapper,
        do_cleanup_atexit=False,
        log_out_dir_func=partial(default_task_log_output_dir, prefix="%s" % output_dir),
    )
コード例 #14
0
ファイル: ex3.py プロジェクト: egafni/COSMOS2
def main():
    p = ArgumentParser()
    p.add_argument("--sleep", default=0, type=int)
    args = p.parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="local",
    )
    cosmos.initdb()
    workflow = cosmos.start("Example3", restart=True, skip_confirm=True)

    t1 = workflow.add_task(func=say,
                           params=dict(text="Hello World", out_file="out.txt"),
                           uid="my_task")
    t2 = workflow.add_task(func=sleep,
                           params=dict(secs=args.sleep),
                           uid="my_task")

    workflow.make_output_dirs()
    workflow.run(cmd_wrapper=py_call_cmd_wrapper)
コード例 #15
0
ファイル: mie.py プロジェクト: vifehe/yaps2
    def __init__(self, config, drm, restart):
        self.config = config

        self.cosmos = Cosmos(database_url='sqlite:///{}'.format(
            self.config.db),
                             get_submit_args=default_get_submit_args,
                             default_drm=drm)

        self.cosmos.initdb()

        primary_logfile = os.path.join(
            self.config.rootdir,
            '{}.log'.format(self.config.project_name),
        )

        self.workflow = self.cosmos.start(
            self.config.project_name,
            primary_log_path=primary_logfile,
            restart=restart,
        )

        self.setup_pipeline()
コード例 #16
0
ファイル: duplicate_uids.py プロジェクト: indraniel/COSMOS2
def main():
    cosmos = Cosmos().initdb()
    workflow = cosmos.start("duplicate_uids", skip_confirm=True)
    task = workflow.add_task(func=prepare_data, params=dict(a=1), uid="x")

    # normally you can't add a task with the same uid to the same stage
    with pytest.raises(DuplicateUid):
        workflow.add_task(func=prepare_data, params=dict(a=1), uid="x")

    # normally you can't add a task with the same uid to the same stage
    with pytest.raises(DuplicateUid):
        workflow.add_task(func=prepare_data, params=dict(a=1), uid="x")

    # set if_duplicate="return" to True to get the same task back that you added
    task2 = workflow.add_task(func=prepare_data,
                              params=dict(a=1),
                              uid="x",
                              if_duplicate="return")
    assert task == task2

    # this can be especially useful in loops to avoid repeating computation
    for _ in range(3):
        task = workflow.add_task(func=prepare_data,
                                 params=dict(a=1),
                                 uid="x",
                                 if_duplicate="return")
        workflow.add_task(func=train_machine_learning_model,
                          params=dict(a=1),
                          uid="x",
                          if_duplicate="return",
                          parents=task)

    # NOTE: parameters must be identical when using this feature
    with pytest.raises(InvalidParams):
        workflow.add_task(func=prepare_data,
                          params=dict(a=1000),
                          uid="x",
                          if_duplicate="return")
コード例 #17
0
                   help='',
                   choices=('local', 'drmaa:ge', 'ge', 'slurm'))
    p.add_argument('-j',
                   '--job-class',
                   help='Submit to this job class if the DRM supports it')
    p.add_argument('-q',
                   '--queue',
                   help='Submit to this queue if the DRM supports it')

    args = p.parse_args()

    cosmos = Cosmos(
        'sqlite:///%s/sqlite.db?check_same_thread=False' %
        os.path.dirname(os.path.abspath(__file__)),
        # example of how to change arguments if you're not using default_drm='local'
        get_submit_args=partial(default_get_submit_args, parallel_env='smp'),
        default_drm=args.drm,
        default_max_attempts=2,
        default_job_class=args.job_class,
        default_queue=args.queue)
    cosmos.initdb()

    sp.check_call('mkdir -p analysis_output/ex2', shell=True)
    os.chdir('analysis_output/ex2')

    workflow = cosmos.start('Example2', restart=True, skip_confirm=True)

    recipe(workflow)

    workflow.make_output_dirs()
    workflow.run(max_cores=10)
コード例 #18
0
def make_app(database_url):
    cosmos = Cosmos(database_url)
    flask = cosmos.init_flask()
    return flask
コード例 #19
0
ファイル: main.py プロジェクト: Romain-B/Cosmos_tests
from cosmos.api import Execution, add_execution_args, Cosmos
from configparser import ConfigParser
from cosmos.util.helpers import mkdir

root_path = os.path.dirname(os.path.realpath(__file__))
config = ConfigParser()
config.read(os.path.join(root_path, 'settings.conf'))
settings = config['main']

if __name__ == '__main__':

    import sys
    #sys.path.append('./src') 
    #from src import testxx

    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('-g', '--growl', action='store_true',
                        help='sends a growl notification on execution status changes')
    parser.add_argument('-d', '--debug', action='store_true',
                        help='launch ipdb on exception')
    sps = parser.add_subparsers(title="Commands", metavar="<command>")

    sp = sps.add_parser('resetdb', help=cosmos.resetdb.__doc__)
    sp.set_defaults(func=cosmos.resetdb)

    sp = sps.add_parser('initdb', help=cosmos.initdb.__doc__)
コード例 #20
0
    # all of the WordCounts (a many2one relationship).
    summarize = execution.add_task(cat, dict(), word_counts, '',
                                   'Summary_Analysis')

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(),
                         '/tmp/ex2_task_graph.png',
                         format='png')
        draw_task_graph(execution.task_graph(),
                        '/tmp/ex2_stage_graph.png',
                        format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run()


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                    os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    execution = cosmos.start('Example2',
                             'analysis_output/ex2',
                             max_attempts=1,
                             restart=True,
                             skip_confirm=True,
                             max_cpus=10)
    run_ex2(execution)
コード例 #21
0
ファイル: gunicorn.py プロジェクト: LPM-HMS/COSMOS2
def make_app(database_url):
    cosmos = Cosmos(database_url)
    flask = cosmos.init_flask()
    return flask
コード例 #22
0
"""
Basic demonstration the structure of a Task instance
"""
import subprocess as sp
import os
import sys
from cosmos.api import Cosmos, default_get_submit_args
from functools import partial

cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                os.path.dirname(os.path.abspath(__file__)),
                get_submit_args=partial(default_get_submit_args,
                                        parallel_env='smp'),
                default_drm='local')
cosmos.initdb()

sp.check_call('mkdir -p analysis_output/ex1', shell=True)
os.chdir('analysis_output/ex1')
workflow = cosmos.start('Example1', restart=True, skip_confirm=True)


def say(text, out_file, core_req=1):
    return r"""
        echo "{text}" > {out_file}
    """.format(text=text, out_file=out_file)


t = workflow.add_task(func=say,
                      params=dict(text='Hello World',
                                  out_file='out.txt',
                                  core_req=2),
コード例 #23
0
"""
Basic demonstration the structure of a Task instance
"""
import subprocess as sp
import os
import sys
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                os.path.dirname(os.path.abspath(__file__)),
                default_drm='local')
cosmos.initdb()

sp.check_call('mkdir -p analysis_output/ex1', shell=True)
os.chdir('analysis_output/ex1')
workflow = cosmos.start('Example1', restart=True, skip_confirm=True)


def say(text, out_file):
    return r"""
        echo "{text}" > {out_file}
    """.format(text=text, out_file=out_file)


t = workflow.add_task(func=say,
                      params=dict(
                          text='Hello World',
                          out_file='out.txt',
                      ),
                      uid='my_task',
                      time_req=None,
コード例 #24
0
ファイル: ex_email.py プロジェクト: yanding/COSMOS-2.0
from cosmos.api import Cosmos, signal_execution_status_change, ExecutionStatus
from ex1 import run_ex1
import os
from cosmos.util.helpers import mkdir

def run_ex3(execution):
    @signal_execution_status_change.connect
    def sig(ex):
        msg = "%s %s" % (ex, ex.status)
        if ex.status in [ExecutionStatus.successful, ExecutionStatus.failed, ExecutionStatus.killed]:
            text_message(msg)
            ex.log.info('Sent a text message')

    def text_message(message):
        from twilio.rest import TwilioRestClient

        account = "XYZ"
        token = "XYZ"
        client = TwilioRestClient(account, token)

        message = client.messages.create(to="+1231231234", from_="+1231231234", body=message)

    run_ex1(execution)


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    execution = cosmos.start('Example_Email', 'analysis_output/ex3', max_attempts=2, restart=True, skip_confirm=True)
    run_ex1(execution)
コード例 #25
0
ファイル: recipe.py プロジェクト: yanding/COSMOS-2.0
def variant_call(execution, bam_path, target_bed_path, max_complex_gap):
    """
    Bioinformatics variant calling workflow
    """
    contigs = sp.check_output("cat %s |cut -f1|uniq" % target_bed_path, shell=True).strip().split("\n")

    bed_tasks = [execution.add_task(tools.filter_bed_by_contig, tags=dict(in_bam=bam_path, in_bed=target_bed_path, contig=contig), out_dir='work/{contig}')
                 for contig in contigs ]

    freebayes_tasks = one2one(tools.freebayes, bed_tasks, dict(max_complex_gap=max_complex_gap))

    merge_vcf_tasks = many2one(tools.vcf_concat_parts, freebayes_tasks)

    execution.run()


if __name__ == '__main__':
    p = argparse.ArgumentParser()
    p.add_argument('bam_path')
    p.add_argument('target_bed_path')
    p.add_argument('--max_complex_gap', type=int, default=2)
    add_execution_args(p)
    start_kwargs, variant_call_args = pop_execution_args(vars(p.parse_args()))

    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()
    execution = cosmos.start(output_dir='../analysis_output/variant_calling', **start_kwargs)

    variant_call(execution, **variant_call_args)
コード例 #26
0
ファイル: ex3.py プロジェクト: qqss88/Cosmos2
"""
Basic demonstration the structure of a Task instance
"""
import os
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
cosmos.initdb()

execution = cosmos.start('Example1', 'analysis_output/ex3', restart=True, skip_confirm=True)


def cmd(in_files, out_file):
    return r"""
        echo "{in_files}" > {out_file}
    """.format(**locals())


t = execution.add_task(cmd, tags=dict(in_files=[('a', 'b', 'in_file')], out_file='out.txt'))

print 'Task:', t
print 'task.tags', t.tags
print 'task.input_files', t.input_files
print 'task.output_files', t.output_files

#execution.run()

コード例 #27
0
ファイル: ex1.py プロジェクト: yanding/COSMOS-2.0
                                      tags=dict(chars=True, **cat_task.tags),
                                      parents=[cat_task],
                                      out_dir='{word}/{n}')
                   for cat_task in cats]

    # Cat the contents of all word_counts into one file.  Only one node is being created who's parents are
    # all of the WordCounts (a many2one relationship).
    summarize = execution.add_task(cat,
                                   tags=dict(),
                                   parents=word_counts,
                                   out_dir='',
                                   stage_name='Summary_Analysis')

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(), '/tmp/ex1_task_graph.png', format='png')
        draw_task_graph(execution.task_graph(), '/tmp/ex1_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run()


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    execution = cosmos.start('Example1', 'analysis_output/ex1', max_attempts=1, restart=True, skip_confirm=True,
                             max_cpus=10)
    run_ex1(execution)
コード例 #28
0
        with open(out_file) as fp:
            i = int(fp.read())
    else:
        i = 0

    with open(out_file, "w") as fp:
        fp.write(str(i + 1))

    if i < 2:
        # fail the first 2 times
        raise


if __name__ == "__main__":
    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="local",
    )
    cosmos.initdb()
    workflow = cosmos.start("ExampleReattempt",
                            restart=True,
                            skip_confirm=True)

    if os.path.exists("out.txt"):
        os.unlink("out.txt")

    t = workflow.add_task(func=add_one,
                          params=dict(out_file="out.txt"),
                          uid="my_task",
                          max_attempts=3)

    workflow.make_output_dirs()
コード例 #29
0
    load_dotenv()

    def set_env_variables():

        return "\n".join([
            f'export {variable}="{os.getenv(variable)}"'
            for variable in env_variables
        ])

    cosmos = Cosmos(
        RDS_COSMOS_DATABASE,
        default_drm="awsbatch",
        default_drm_options=dict(
            container_image=os.getenv("ECR_CONTAINER_IMAGE"),
            s3_prefix_for_command_script_temp_files=os.path.join(
                S3_BUCKET_PATH, "cosmos-tmp"),
            shm_size=int(args.mem_req * 0.75),
            retry_only_if_status_reason_matches=
            "Host EC2 .+ terminated.",  # only retry on spot instance death
        ),
        default_queue=os.getenv("BATCH_QUEUE_NAME"),
    )
    cosmos.initdb()

    workflow_name = f"{args.name}-{uuid1().hex}"
    workflow = cosmos.start(workflow_name, restart=True, skip_confirm=True)

    task_name = uuid1().hex

    workflow.add_task(
        func=pretrain,
コード例 #30
0
ファイル: postvqsr.py プロジェクト: vifehe/yaps2
class Pipeline(object):
    def __init__(self, config, drm, restart, skip_confirm):
        self.config = config

        self.cosmos = Cosmos(database_url='sqlite:///{}'.format(
            self.config.db),
                             get_submit_args=default_get_submit_args,
                             default_drm=drm)

        self.cosmos.initdb()

        primary_logfile = os.path.join(
            self.config.rootdir,
            '{}.log'.format(self.config.project_name),
        )

        self.workflow = self.cosmos.start(
            self.config.project_name,
            primary_log_path=primary_logfile,
            restart=restart,
            skip_confirm=skip_confirm,
        )

        self.setup_pipeline()

    def setup_pipeline(self):
        self.construct_pipeline()
        self.workflow.make_output_dirs()

    def run(self, task_flush):
        # put set_successful to False if you intend to add more tasks to the
        # pipeline later
        custom_log_dir = lambda task: os.path.join(self.config.rootdir, 'logs',
                                                   task.stage.name, task.uid)
        self.workflow.run(set_successful=False,
                          log_out_dir_func=custom_log_dir,
                          db_task_flush=task_flush)

    def construct_pipeline(self):
        # 1. remove unused alternates
        remove_ac_0_tasks = self.create_remove_ac_0_tasks(1)
        # 2. calculate sample missingness (counting phase)
        count_sample_missingness_tasks = self.create_count_sample_missingness_tasks(
            remove_ac_0_tasks, 2)
        # 2.1 calculate sample missingness (merge and calculation phase)
        calculate_sample_missingness_task = self.create_calculate_sample_missingness_task(
            count_sample_missingness_tasks, 2.1)
        # 3. denormalize, decompose, and uniq
        dnu_tasks = self.create_decompose_normalize_unique_tasks(
            remove_ac_0_tasks, 3)
        # 4. remove symbolic alleles
        rsa_tasks = self.create_remove_symbolic_deletion_tasks(dnu_tasks, 4)
        # 5. filter missingness
        filter_variant_missingness_tasks = self.create_filter_variant_missingness_tasks(
            rsa_tasks, 5)
        # 6. annotate allele balances
        allele_balance_annotation_tasks = self.create_allele_balance_annotation_tasks(
            filter_variant_missingness_tasks, 6)
        # 7. annotate with 1000G
        annotate_1000G_tasks = self.create_1000G_annotation_tasks(
            allele_balance_annotation_tasks, 7)
        # 8. annotate with ExAC
        annotate_ExAC_tasks = self.create_ExAC_annotation_tasks(
            annotate_1000G_tasks, 8)
        # 9. VEP annotation
        annotate_vep_cadd_tasks = self.create_vep_cadd_annotation_tasks(
            annotate_ExAC_tasks, 9)
        # 10. VCF concatenation
        concatenated_vcfs = self.create_concatenate_vcfs_task(
            annotate_vep_cadd_tasks, 10)
        # 11. bcftools stats
        bcftools_stats_tasks = self.create_bcftools_stats_tasks(
            annotate_ExAC_tasks, 11)
        # 11.1 Merge & Plot bcftools stats
        bcftools_stats_summary_task = self.create_bcftools_stats_summary_task(
            bcftools_stats_tasks, 11.1)
        # 12. GATK VariantEval
        variant_eval_tasks = self.create_variant_eval_tasks(
            annotate_ExAC_tasks, 12)
        # 12.1. Merge & Plot GATK VariantEval Stats
        variant_eval_summary_task = self.create_variant_eval_summary_task(
            variant_eval_tasks, 12.1)

    def create_bcftools_stats_summary_task(self, parent_tasks, step_number):
        stage = self._construct_task_name('bcftools-stats-summary',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)

        lsf_params = get_lsf_params(bcftools_stats_summary_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': bcftools_stats_summary,
            'params': {
                'in_dir': input_dir,
                'out_dir': output_dir,
            },
            'stage_name': stage,
            'uid': 'all-chroms',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task

    def create_concatenate_vcfs_task(self, parent_tasks, step_number):
        tasks = list()
        stage = self._construct_task_name('concat-vcfs', step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(concatenate_vcfs_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        def region_key(task):
            reference_fai = os.path.join(
                '/gscmnt/ams1102/info/model_data/2869585698/build106942997',
                'all_sequences.fa.fai')
            return Region(all_sequences.fa.fai, task.params['in_chrom'])

        def chromosome_key(task):
            reference_fai = os.path.join(
                '/gscmnt/ams1102/info/model_data/2869585698/build106942997',
                'all_sequences.fa.fai')
            return Region(reference_fai, task.params['in_chrom']).chrom

        for ref_chrom, chrom_tasks in groupby(sorted(parent_tasks,
                                                     key=region_key),
                                              key=chromosome_key):
            ptasks = list(chrom_tasks)
            input_vcfs = [x.params['out_vcf'] for x in ptasks]
            output_vcf = 'concatenated.c{}.vcf.gz'.format(ref_chrom)
            output_log = 'concatenate.{}.log'.format(ref_chrom)
            task = {
                'func': concatenate_vcfs,
                'params': {
                    'in_vcfs': input_vcfs,
                    'in_chrom': ref_chrom,
                    'out_vcf': os.path.join(output_dir, ref_chrom, output_vcf),
                    'out_log': os.path.join(output_dir, ref_chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=ref_chrom),
                'drm_params': lsf_params_json,
                'parents': ptasks,
            }
            tasks.append(self.workflow.add_task(**task))
        return tasks

    def create_variant_eval_summary_task(self, parent_tasks, step_number):
        stage = self._construct_task_name('gatk-variant-eval-summary',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)

        lsf_params = get_lsf_params(variant_eval_summary_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': variant_eval_summary,
            'params': {
                'in_dir': input_dir,
                'out_dir': output_dir,
            },
            'stage_name': stage,
            'uid': 'all-chroms',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task

    def create_bcftools_stats_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('bcftools-stats', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(bcftools_stats_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_stats = '{}.stats.out'.format(chrom)
            task = {
                'func': bcftools_stats,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_stats': os.path.join(basedir, chrom, output_stats),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_variant_eval_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('gatk-variant-eval', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(gatk_variant_eval_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_stats = 'chrom-{}-variant-eval.out'.format(chrom)
            output_log = 'chrom-{}-variant-eval.log'.format(chrom)
            task = {
                'func': gatk_variant_eval,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_stats': os.path.join(basedir, chrom, output_stats),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_vep_cadd_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('vep-cadd-annotation', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_vep_cadd_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'annotated.vep.cadd.c{}.vcf.gz'.format(chrom)
            output_log = 'vep.cadd.annotation.{}.log'.format(chrom)
            task = {
                'func': annotation_vep_cadd,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_ExAC_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('annotate-w-ExAC', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_ExAC_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'ExAC-annotated.c{}.vcf.gz'.format(chrom)
            output_log = 'ExAC-annotate.{}.log'.format(chrom)
            task = {
                'func': annotation_ExAC,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_1000G_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('annotate-w-1000G', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_1000G_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = '1kg-annotated.c{}.vcf.gz'.format(chrom)
            output_log = '1000G-annotate.{}.log'.format(chrom)
            task = {
                'func': annotation_1000G,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_allele_balance_annotation_tasks(self, parent_tasks,
                                               step_number):
        tasks = []
        stage = self._construct_task_name('allele-balance-annotation',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotate_allele_balances_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'allele-balance-{}.log'.format(chrom)
            task = {
                'func': annotate_allele_balances,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_filter_variant_missingness_tasks(self, parent_tasks,
                                                step_number):
        tasks = []
        stage = self._construct_task_name('filter-variant-missingness',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(filter_variant_missingness_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'filter-missingness-{}.log'.format(chrom)
            task = {
                'func': filter_variant_missingness,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_remove_symbolic_deletion_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('remove-symbolic-alleles',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(
            remove_symbolic_deletion_alleles_lsf_params, self.config.email,
            self.config.docker)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'remove-symbolic-alleles-chrom-{}.log'.format(chrom)
            task = {
                'func': remove_symbolic_deletion_alleles,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_decompose_normalize_unique_tasks(self, parent_tasks,
                                                step_number):
        tasks = []
        stage = self._construct_task_name('decompose-normalize-uniq',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(normalize_decompose_unique_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'decompose-normalize-unique-{}.log'.format(chrom)
            task = {
                'func': normalize_decompose_unique,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_calculate_sample_missingness_task(self, parent_tasks,
                                                 step_number):
        stage = self._construct_task_name('calculate-sample-missingness',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)
        input_json_wildcard_path = os.path.join(input_dir, '*', '*.json')

        lsf_params = get_lsf_params(calculate_sample_missingness_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': calculate_sample_missingness,
            'params': {
                'in_json':
                input_json_wildcard_path,
                'out_stats':
                os.path.join(output_dir, 'sample-missingness-pct.dat'),
                'out_log':
                os.path.join(output_dir, 'sample-missingness-pct.dat.log'),
            },
            'stage_name': stage,
            'uid': '1-22',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task

    def create_count_sample_missingness_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('count-sample-missingness',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(count_sample_missingness_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']

            # only count missing genotypes on chromosomes 1-22 (not X, Y, or MT)
            if not chrom[0].isdigit(): continue

            output_json = '{chrom}-sample-missingness-counts.json'.format(
                chrom=chrom)
            output_log = '{}-sample-missingness-counts.log'.format(chrom)
            task = {
                'func': count_sample_missingness,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_json': os.path.join(basedir, chrom, output_json),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_remove_ac_0_tasks(self, step_number):
        tasks = []
        stage = self._construct_task_name('select-variants-ac-0-removal',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(
            gatk_select_variants_remove_ac_0_lsf_params, self.config.email,
            self.config.docker, self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for chrom in self.config.chroms:
            vcf = self.config.vcfs[chrom]
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'select-variants-chrom-{}-gatk.log'.format(chrom)
            task = {
                'func': gatk_select_variants_remove_ac_0,
                'params': {
                    'in_chrom': chrom,
                    'in_vcf': vcf,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def _construct_task_name(self, name, number):
        task_name = '{}-{}'.format(number, name)
        return task_name
コード例 #31
0
ファイル: ex2.py プロジェクト: alliemclean/COSMOS2
        stage_name='Summary_Analysis',
        uid='')  # It's the only Task in this Stage, so doesn't need a specific uid


if __name__ == '__main__':
    import argparse

    p = argparse.ArgumentParser()
    p.add_argument('-drm', default='local', help='', choices=('local', 'drmaa:ge', 'ge'))
    p.add_argument('-q', '--queue', help='Submit to this queue of the DRM supports it')

    args = p.parse_args()

    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)),
                    # example of how to change arguments if you're NOT using default_drm='local'
                    get_submit_args=partial(default_get_submit_args, parallel_env='smp'),
                    default_drm=args.drm,
                    default_queue=args.queue)
    cosmos.initdb()

    sp.check_call('mkdir -p analysis_output/ex2', shell=True)
    os.chdir('analysis_output/ex2')

    workflow = cosmos.start('Example2', restart=True, skip_confirm=True)

    recipe(workflow)

    workflow.make_output_dirs()
    workflow.run(max_attempts=1, max_cores=10)

    if pygraphviz_available:
コード例 #32
0
ファイル: ex_email.py プロジェクト: LPM-HMS/COSMOS2
from cosmos.api import Cosmos, signal_workflow_status_change, WorkflowStatus
from ex1 import run_ex1
import os

def run_ex3(workflow):
    @signal_workflow_status_change.connect
    def sig(ex):
        msg = "%s %s" % (ex, ex.status)
        if ex.status in [WorkflowStatus.successful, WorkflowStatus.failed, WorkflowStatus.killed]:
            text_message(msg)
            ex.log.info('Sent a text message')

    def text_message(message):
        from twilio.rest import TwilioRestClient

        account = "XYZ"
        token = "XYZ"
        client = TwilioRestClient(account, token)

        message = client.messages.create(to="+1231231234", from_="+1231231234", body=message)

    run_ex1(workflow)


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    workflow = cosmos.start('Example_Email', 'analysis_output/ex3', restart=True, skip_confirm=True)
    run_ex1(workflow)
コード例 #33
0
    """
    Bioinformatics variant calling workflow
    """
    contigs = sp.check_output("cat %s |cut -f1|uniq" % target_bed_path, shell=True).strip().split("\n")

    freebayes_tasks = []
    for contig in contigs:
        bed_task = execution.add_task(tools.filter_bed_by_contig, tags=dict(in_bam=bam_path, in_bed=target_bed_path, contig=contig), out_dir='work/{contig}')
        freebayes_task = execution.add_task(tools.freebayes, tags=dict(max_complex_gap=max_complex_gap), parents=bed_task, out_dir='work/{contig}')
        freebayes_tasks.append(freebayes_task)

    merge_vcf_tasks = many2one(tools.vcf_concat_parts, parents=freebayes_tasks)

    execution.run(max_attempts=max_attempts, max_cores=max_cores)


if __name__ == '__main__':
    p = argparse.ArgumentParser()
    p.add_argument('bam_path')
    p.add_argument('target_bed_path')
    p.add_argument('--max_complex_gap', type=int, default=2)
    add_execution_args(p)
    args = p.parse_args()

    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()
    execution = cosmos.start(name=args.name, output_dir='../analysis_output/variant_calling',
                             restart=args.restart, skip_confirm=args.skip_confirm)

    variant_call(execution, args.max_attempts, args.max_cores, args.bam_path, args.target_bed_path, args.max_complex_gap)
コード例 #34
0
def main():
    args = parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="awsbatch",
        default_drm_options=dict(
            container_image=args.container_image,
            s3_prefix_for_command_script_temp_files=args.s3_prefix_for_command_script_temp_files,
            # only retry on spot instance death
            retry_only_if_status_reason_matches="Host EC2 .+ terminated.",
        ),
        default_queue=args.default_queue,
    )

    cosmos.initdb()

    # sp.check_call("mkdir -p analysis_output/ex1", shell=True)
    # os.chdir("analysis_output/ex1")
    workflow = cosmos.start(f"Evaluate_{args.id}", restart=True, skip_confirm=True)

    parameters = np.load(f"optimize_awsbatch/parameters/{args.id}.npy")

    for i, par in enumerate(parameters):
        parameters_ = dict(
            mean_weight=par[0],
            c_w=par[1],
            tau_pos=par[2],
            tau_neg=par[3],
            A_pos=par[4],
            A_neg=par[5],
            weight_decay=par[6],
            n_filters=25,
            time_max=250,
            crop=20,
            kernel_size=16,
            stride=4,
            intensity=127.5,
            c_w_min=None,
            c_l=True,
            network_type="LC_SNN",

        )
        workflow.add_task(
            func=evaluate,
            params=dict(
                parameters=parameters_,
                out_s3_uri=f"{args.out_s3_uri}/scores/{args.id}/{i}.json",
                sleep=args.sleep,
                train=args.train,
                calibrate=args.calibrate,
                test=args.test
            ),
            uid=str(i),
            time_req=None,
            max_attempts=args.max_attempts,
            core_req=args.core_req,
            mem_req=args.mem_req,
        )
    workflow.run()

    sys.exit(0 if workflow.successful else 1)
コード例 #35
0
ファイル: ex1.py プロジェクト: LPM-HMS/COSMOS2
import subprocess as sp
import os
import sys
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)),
                default_drm='local')
cosmos.initdb()

sp.check_call('mkdir -p analysis_output/ex1', shell=True)
os.chdir('analysis_output/ex1')
workflow = cosmos.start('Example1', restart=True, skip_confirm=True)


def say(text, out_file):
    return r"""
        echo "{text}" > {out_file}
    """.format(text=text, out_file=out_file)


t = workflow.add_task(func=say,
                      params=dict(text='Hello World', out_file='out.txt',),
                      uid='my_task', time_req=None, core_req=1, mem_req=1024)

print('task.params', t.params)
print('task.input_map', t.input_map)
print('task.output_map', t.output_map)
print('task.core_req', t.core_req)
print('task.time_req', t.time_req)
print('task.drm', t.drm)
print('task.uid', t.uid)
コード例 #36
0
"""
Basic demonstration the structure of a Task instance
"""
import os
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                os.path.dirname(os.path.abspath(__file__)))
cosmos.initdb()

execution = cosmos.start('Example1',
                         'analysis_output/ex3',
                         restart=True,
                         skip_confirm=True)


def cmd(in_files, out_file):
    return r"""
        echo "{in_files}" > {out_file}
    """.format(**locals())


t = execution.add_task(cmd,
                       tags=dict(in_files=[('a', 'b', 'in_file')],
                                 out_file='out.txt'))

print 'Task:', t
print 'task.tags', t.tags
print 'task.input_files', t.input_files
print 'task.output_files', t.output_files
コード例 #37
0
ファイル: testxx.py プロジェクト: Romain-B/Cosmos_tests
    #--------------------
    # Generation des schemas du workflow (si pygraphviz installe)
    
    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(), 'testing/workflow_info/test_task_graph.png', format='png')
        draw_task_graph(execution.task_graph(), 'testing/workflow_info/test_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run(max_attempts=1, max_cores=10)
    
#------------------------
# EXECUTION DU WORKFLOW
if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    subprocess.check_call('mkdir -p testing testing/data testing/results testing/workflow_info', shell=True)
    #subprocess.check_call('cp extdata/4.fastq testing/data', shell=False)
    execution = cosmos.start('Testx', 'testing',restart=True, skip_confirm=True)
    run_test(execution)
"""else:   
    #--------------------
    # Connexion aux services cosmos (BDD)    
	cosmos = Cosmos('sqlite:///sqlite.db')
	cosmos.initdb()
	    
    #--------------------
    # Creation des sous-dossiers resultats
	subprocess.check_call('mkdir -p testing testing/data testing/results testing/workflow_info', shell=True)
コード例 #38
0
from cosmos.api import Execution, add_execution_args, Cosmos
from configparser import ConfigParser
from cosmos.util.helpers import mkdir

root_path = os.path.dirname(os.path.realpath(__file__))
config = ConfigParser()
config.read(os.path.join(root_path, 'settings.conf'))
settings = config['main']

if __name__ == '__main__':
    import ex1
    import ex_fail
    import ex_email

    cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                    os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-g',
        '--growl',
        action='store_true',
        help='sends a growl notification on execution status changes')
    parser.add_argument('-d',
                        '--debug',
                        action='store_true',
                        help='launch ipdb on exception')
    sps = parser.add_subparsers(title="Commands", metavar="<command>")
コード例 #39
0
        execution.add_task(tools.filter_bed_by_contig,
                           tags=dict(in_bam=bam_path,
                                     in_bed=target_bed_path,
                                     contig=contig),
                           out_dir='work/{contig}') for contig in contigs
    ]

    freebayes_tasks = one2one(tools.freebayes, bed_tasks,
                              dict(max_complex_gap=max_complex_gap))

    merge_vcf_tasks = many2one(tools.vcf_concat_parts, freebayes_tasks)

    execution.run()


if __name__ == '__main__':
    p = argparse.ArgumentParser()
    p.add_argument('bam_path')
    p.add_argument('target_bed_path')
    p.add_argument('--max_complex_gap', type=int, default=2)
    add_execution_args(p)
    start_kwargs, variant_call_args = pop_execution_args(vars(p.parse_args()))

    cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                    os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()
    execution = cosmos.start(output_dir='../analysis_output/variant_calling',
                             **start_kwargs)

    variant_call(execution, **variant_call_args)
コード例 #40
0
ファイル: runweb.py プロジェクト: qqss88/Cosmos2
import os
from cosmos.api import Cosmos
cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
cosmos.runweb('0.0.0.0', 5151)