コード例 #1
0
def main():
    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="local",
    )
    cosmos.initdb()

    sp.check_call("mkdir -p analysis_output/ex1", shell=True)
    os.chdir("analysis_output/ex1")
    workflow = cosmos.start("Example1", restart=True, skip_confirm=True)

    t = workflow.add_task(
        func=say,
        params=dict(text="Hello World", out_file="out.txt"),
        uid="my_task",
        time_req=None,
        core_req=1,
        mem_req=1024,
    )

    print(("task.params", t.params))
    print(("task.input_map", t.input_map))
    print(("task.output_map", t.output_map))
    print(("task.core_req", t.core_req))
    print(("task.time_req", t.time_req))
    print(("task.drm", t.drm))
    print(("task.uid", t.uid))

    workflow.run()

    sys.exit(0 if workflow.successful else 1)
コード例 #2
0
def main():
    # start cosmos engine
    cosmos = Cosmos(
        database_url="sqlite://",
        default_drm="local",
        # default_drm="ge",
        default_queue="dev-short",
        default_drm_options={},
        get_submit_args=partial(default_get_submit_args, parallel_env="smp"),
    )
    cosmos.initdb()

    # create cosmos workflow
    workflow = cosmos.start(
        # NOTE cosmos will make dirs in this path
        # primary_log_path=os.path.join("logs", "cosmos.log"),
        name="blah",
        restart=True,
        skip_confirm=True,
        fail_fast=True,
    )

    for i in range(100):
        print("add {}".format(i))
        silly_recipe(workflow, i, 100)

    workflow.make_output_dirs()

    # run cosmos workflow
    # with SGESignalHandler(workflow):
    workflow.run()
コード例 #3
0
def main():
    cosmos = Cosmos("cosmos.sqlite").initdb()

    workflow = cosmos.start("ex1", skip_confirm=True)

    t = workflow.add_task(
        func=say,
        params=dict(text="Hello World", out_file="out.txt"),
        uid="my_task",
        time_req=None,
        core_req=1,
        mem_req=1024,
    )

    print(("task.params", t.params))
    print(("task.input_map", t.input_map))
    print(("task.output_map", t.output_map))
    print(("task.core_req", t.core_req))
    print(("task.time_req", t.time_req))
    print(("task.drm", t.drm))
    print(("task.uid", t.uid))

    workflow.run(cmd_wrapper=py_call)

    sys.exit(0 if workflow.successful else 1)
コード例 #4
0
ファイル: env_variables.py プロジェクト: indraniel/COSMOS2
def main():
    cosmos = Cosmos()
    cosmos.initdb()
    workflow = cosmos.start("env_variables", skip_confirm=True)
    workflow.add_task(func=command_with_env_variables,
                      environment_variables=environment_variables_dict,
                      uid="special")
    workflow.run(cmd_wrapper=py_call)
コード例 #5
0
def test_zero_tasks():
    cosmos = Cosmos()
    cosmos.initdb()
    temp_dir = tempfile.mkdtemp()
    with cd(temp_dir):
        workflow = cosmos.start('workflow', skip_confirm=True)
        workflow.run(set_successful=False)
        workflow.run(cmd_wrapper=py_call)

    shutil.rmtree(temp_dir)
コード例 #6
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument("-drm",
                   default="local",
                   help="",
                   choices=("local", "drmaa:ge", "ge", "slurm"))
    p.add_argument("-j",
                   "--job-class",
                   help="Submit to this job class if the DRM supports it")
    p.add_argument("-q",
                   "--queue",
                   help="Submit to this queue if the DRM supports it")

    args = p.parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        # example of how to change arguments if you're not using default_drm='local'
        get_submit_args=partial(default_get_submit_args, parallel_env="smp"),
        default_drm=args.drm,
        default_max_attempts=2,
        default_job_class=args.job_class,
        default_queue=args.queue,
    )
    cosmos.initdb()

    sp.check_call("mkdir -p analysis_output/1000tasks/", shell=True)
    os.chdir("analysis_output/1000tasks/")

    workflow = cosmos.start("1000_tasks", restart=True, skip_confirm=True)

    recipe(workflow)

    workflow.make_output_dirs()
    workflow.run(max_cores=100)

    # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG
    # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again.
    # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address
    # that limitation at some point in the future.

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(),
                         "/tmp/ex1_task_graph.png",
                         format="png")
        draw_task_graph(workflow.task_graph(),
                        "/tmp/ex1_stage_graph.png",
                        format="png")
    else:
        print("Pygraphviz is not available :(")

    sys.exit(0 if workflow.successful else 1)
コード例 #7
0
def main():
    cosmos = Cosmos()
    cosmos.initdb()
    workflow = cosmos.start('test', skip_confirm=True)
    for i, num_gpus in enumerate([1, 1, 2, 2, 3]):
        task = workflow.add_task(use_cuda_device,
                                 dict(some_arg=i, num_gpus=num_gpus),
                                 gpu_req=num_gpus,
                                 uid=str(i))

    workflow.run(max_gpus=len(
        os.environ['COSMOS_LOCAL_GPU_DEVICES'].split(',')),
                 cmd_wrapper=py_call,
                 cleanup_at_exit=False)
コード例 #8
0
ファイル: local_gpus.py プロジェクト: indraniel/COSMOS2
def main():
    cosmos = Cosmos().initdb()
    workflow = cosmos.start("gpu", skip_confirm=True)

    for i, num_gpus in enumerate([1, 1, 2, 2, 3]):
        task = workflow.add_task(
            use_cuda_device,
            dict(some_arg=i, num_gpus=num_gpus),
            gpu_req=num_gpus,
            uid=str(i),
        )

    workflow.run(
        max_gpus=len(os.environ["COSMOS_LOCAL_GPU_DEVICES"].split(",")),
        cmd_wrapper=py_call,
    )
コード例 #9
0
ファイル: ex3_pycall.py プロジェクト: indraniel/COSMOS2
def main():
    p = ArgumentParser()
    p.add_argument("--sleep", default=0, type=int)
    args = p.parse_args()

    cosmos = Cosmos("cosmos.sqlite").initdb()
    workflow = cosmos.start("ex3", restart=True, skip_confirm=True)

    t1 = workflow.add_task(func=say,
                           params=dict(text="Hello World", out_file="out.txt"),
                           uid="my_task")
    t2 = workflow.add_task(func=sleep,
                           params=dict(secs=args.sleep),
                           uid="my_task")

    workflow.make_output_dirs()
    workflow.run(cmd_wrapper=py_call)
コード例 #10
0
ファイル: ex2_complete.py プロジェクト: indraniel/COSMOS2
def main():
    p = argparse.ArgumentParser()
    p.add_argument("-drm",
                   default="local",
                   help="",
                   choices=("local", "awsbatch", "slurm", "drmaa:ge", "ge"))
    p.add_argument("-q",
                   "--queue",
                   help="Submit to this queue if the DRM supports it")

    args = p.parse_args()

    cosmos = Cosmos("cosmos.sqlite",
                    default_drm=args.drm,
                    default_max_attempts=2,
                    default_queue=args.queue)
    cosmos.initdb()

    workflow = cosmos.start("Example2", skip_confirm=True)

    recipe(workflow)

    # any parameters that start with out_ are output directories, and will be created if
    # the user calls workflow.make_output_dirs
    workflow.make_output_dirs()
    workflow.run(max_cores=10, cmd_wrapper=py_call)

    # Noting here that if you wanted to look at the outputs of any Tasks to decide how to generate the rest of a DAG
    # you can do so here, proceed to add more tasks via workflow.add_task(), and then call workflow.run() again.
    # Yes, it does require running all Tasks in the dag to get the outputs of any Task, and we hope to address
    # that limitation at some point in the future.

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(workflow.stage_graph(),
                         "/tmp/ex1_task_graph.png",
                         format="png")
        draw_task_graph(workflow.task_graph(),
                        "/tmp/ex1_stage_graph.png",
                        format="png")
    else:
        print("Pygraphviz is not available :(")

    sys.exit(0 if workflow.successful else 1)
コード例 #11
0
ファイル: ex_gpu.py プロジェクト: egafni/COSMOS2
def main(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    cosmos = Cosmos()
    cosmos.initdb()
    workflow = cosmos.start(
        "test", skip_confirm=True, primary_log_path=os.path.join(output_dir, "workflow.log"),
    )
    for i, num_gpus in enumerate([1, 1, 2, 2, 3]):
        task = workflow.add_task(
            use_cuda_device, dict(some_arg=i, num_gpus=num_gpus), gpu_req=num_gpus, uid=str(i),
        )

    workflow.run(
        max_gpus=len(os.environ["COSMOS_LOCAL_GPU_DEVICES"].split(",")),
        cmd_wrapper=py_call_cmd_wrapper,
        do_cleanup_atexit=False,
        log_out_dir_func=partial(default_task_log_output_dir, prefix="%s" % output_dir),
    )
コード例 #12
0
ファイル: ex_awsbatch.py プロジェクト: egafni/COSMOS2
def main():
    args = parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="awsbatch",
        default_drm_options=dict(
            container_image=args.container_image,
            s3_prefix_for_command_script_temp_files=args.
            s3_prefix_for_command_script_temp_files,
            # only retry on spot instance death
            retry_only_if_status_reason_matches="Host EC2 .+ terminated.",
        ),
        default_queue=args.default_queue,
    )
    cosmos.initdb()

    sp.check_call("mkdir -p analysis_output/ex1", shell=True)
    os.chdir("analysis_output/ex1")
    workflow = cosmos.start("Example1", restart=True, skip_confirm=True)

    t = workflow.add_task(
        func=get_instance_info,
        params=dict(out_s3_uri=args.out_s3_uri, sleep=args.sleep),
        uid="",
        time_req=None,
        max_attempts=args.max_attempts,
        core_req=args.core_req,
        mem_req=1024,
    )
    workflow.run()

    print(("task.params", t.params))
    print(("task.input_map", t.input_map))
    print(("task.output_map", t.output_map))
    print(("task.core_req", t.core_req))
    print(("task.time_req", t.time_req))
    print(("task.drm", t.drm))
    print(("task.uid", t.uid))
    print(("task.drm_options", t.drm_options))
    print(("task.queue", t.queue))

    sys.exit(0 if workflow.successful else 1)
コード例 #13
0
ファイル: ex3.py プロジェクト: egafni/COSMOS2
def main():
    p = ArgumentParser()
    p.add_argument("--sleep", default=0, type=int)
    args = p.parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="local",
    )
    cosmos.initdb()
    workflow = cosmos.start("Example3", restart=True, skip_confirm=True)

    t1 = workflow.add_task(func=say,
                           params=dict(text="Hello World", out_file="out.txt"),
                           uid="my_task")
    t2 = workflow.add_task(func=sleep,
                           params=dict(secs=args.sleep),
                           uid="my_task")

    workflow.make_output_dirs()
    workflow.run(cmd_wrapper=py_call_cmd_wrapper)
コード例 #14
0
ファイル: duplicate_uids.py プロジェクト: indraniel/COSMOS2
def main():
    cosmos = Cosmos().initdb()
    workflow = cosmos.start("duplicate_uids", skip_confirm=True)
    task = workflow.add_task(func=prepare_data, params=dict(a=1), uid="x")

    # normally you can't add a task with the same uid to the same stage
    with pytest.raises(DuplicateUid):
        workflow.add_task(func=prepare_data, params=dict(a=1), uid="x")

    # normally you can't add a task with the same uid to the same stage
    with pytest.raises(DuplicateUid):
        workflow.add_task(func=prepare_data, params=dict(a=1), uid="x")

    # set if_duplicate="return" to True to get the same task back that you added
    task2 = workflow.add_task(func=prepare_data,
                              params=dict(a=1),
                              uid="x",
                              if_duplicate="return")
    assert task == task2

    # this can be especially useful in loops to avoid repeating computation
    for _ in range(3):
        task = workflow.add_task(func=prepare_data,
                                 params=dict(a=1),
                                 uid="x",
                                 if_duplicate="return")
        workflow.add_task(func=train_machine_learning_model,
                          params=dict(a=1),
                          uid="x",
                          if_duplicate="return",
                          parents=task)

    # NOTE: parameters must be identical when using this feature
    with pytest.raises(InvalidParams):
        workflow.add_task(func=prepare_data,
                          params=dict(a=1000),
                          uid="x",
                          if_duplicate="return")
コード例 #15
0
"""
Basic demonstration the structure of a Task instance
"""
import os
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                os.path.dirname(os.path.abspath(__file__)))
cosmos.initdb()

execution = cosmos.start('Example1',
                         'analysis_output/ex3',
                         restart=True,
                         skip_confirm=True)


def cmd(in_files, out_file):
    return r"""
        echo "{in_files}" > {out_file}
    """.format(**locals())


t = execution.add_task(cmd,
                       tags=dict(in_files=[('a', 'b', 'in_file')],
                                 out_file='out.txt'))

print 'Task:', t
print 'task.tags', t.tags
print 'task.input_files', t.input_files
print 'task.output_files', t.output_files
コード例 #16
0
from cosmos.api import Cosmos, signal_workflow_status_change, WorkflowStatus
from .ex1 import run_ex1
import os

def run_ex3(workflow):
    @signal_workflow_status_change.connect
    def sig(ex):
        msg = "%s %s" % (ex, ex.status)
        if ex.status in [WorkflowStatus.successful, WorkflowStatus.failed, WorkflowStatus.killed]:
            text_message(msg)
            ex.log.info('Sent a text message')

    def text_message(message):
        from twilio.rest import TwilioRestClient

        account = "XYZ"
        token = "XYZ"
        client = TwilioRestClient(account, token)

        message = client.messages.create(to="+1231231234", from_="+1231231234", body=message)

    run_ex1(workflow)


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    workflow = cosmos.start('Example_Email', 'analysis_output/ex3', restart=True, skip_confirm=True)
    run_ex1(workflow)
コード例 #17
0
ファイル: ex_email.py プロジェクト: LPM-HMS/COSMOS2
from cosmos.api import Cosmos, signal_workflow_status_change, WorkflowStatus
from ex1 import run_ex1
import os

def run_ex3(workflow):
    @signal_workflow_status_change.connect
    def sig(ex):
        msg = "%s %s" % (ex, ex.status)
        if ex.status in [WorkflowStatus.successful, WorkflowStatus.failed, WorkflowStatus.killed]:
            text_message(msg)
            ex.log.info('Sent a text message')

    def text_message(message):
        from twilio.rest import TwilioRestClient

        account = "XYZ"
        token = "XYZ"
        client = TwilioRestClient(account, token)

        message = client.messages.create(to="+1231231234", from_="+1231231234", body=message)

    run_ex1(workflow)


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    workflow = cosmos.start('Example_Email', 'analysis_output/ex3', restart=True, skip_confirm=True)
    run_ex1(workflow)
コード例 #18
0
ファイル: postvqsr.py プロジェクト: vifehe/yaps2
class Pipeline(object):
    def __init__(self, config, drm, restart, skip_confirm):
        self.config = config

        self.cosmos = Cosmos(database_url='sqlite:///{}'.format(
            self.config.db),
                             get_submit_args=default_get_submit_args,
                             default_drm=drm)

        self.cosmos.initdb()

        primary_logfile = os.path.join(
            self.config.rootdir,
            '{}.log'.format(self.config.project_name),
        )

        self.workflow = self.cosmos.start(
            self.config.project_name,
            primary_log_path=primary_logfile,
            restart=restart,
            skip_confirm=skip_confirm,
        )

        self.setup_pipeline()

    def setup_pipeline(self):
        self.construct_pipeline()
        self.workflow.make_output_dirs()

    def run(self, task_flush):
        # put set_successful to False if you intend to add more tasks to the
        # pipeline later
        custom_log_dir = lambda task: os.path.join(self.config.rootdir, 'logs',
                                                   task.stage.name, task.uid)
        self.workflow.run(set_successful=False,
                          log_out_dir_func=custom_log_dir,
                          db_task_flush=task_flush)

    def construct_pipeline(self):
        # 1. remove unused alternates
        remove_ac_0_tasks = self.create_remove_ac_0_tasks(1)
        # 2. calculate sample missingness (counting phase)
        count_sample_missingness_tasks = self.create_count_sample_missingness_tasks(
            remove_ac_0_tasks, 2)
        # 2.1 calculate sample missingness (merge and calculation phase)
        calculate_sample_missingness_task = self.create_calculate_sample_missingness_task(
            count_sample_missingness_tasks, 2.1)
        # 3. denormalize, decompose, and uniq
        dnu_tasks = self.create_decompose_normalize_unique_tasks(
            remove_ac_0_tasks, 3)
        # 4. remove symbolic alleles
        rsa_tasks = self.create_remove_symbolic_deletion_tasks(dnu_tasks, 4)
        # 5. filter missingness
        filter_variant_missingness_tasks = self.create_filter_variant_missingness_tasks(
            rsa_tasks, 5)
        # 6. annotate allele balances
        allele_balance_annotation_tasks = self.create_allele_balance_annotation_tasks(
            filter_variant_missingness_tasks, 6)
        # 7. annotate with 1000G
        annotate_1000G_tasks = self.create_1000G_annotation_tasks(
            allele_balance_annotation_tasks, 7)
        # 8. annotate with ExAC
        annotate_ExAC_tasks = self.create_ExAC_annotation_tasks(
            annotate_1000G_tasks, 8)
        # 9. VEP annotation
        annotate_vep_cadd_tasks = self.create_vep_cadd_annotation_tasks(
            annotate_ExAC_tasks, 9)
        # 10. VCF concatenation
        concatenated_vcfs = self.create_concatenate_vcfs_task(
            annotate_vep_cadd_tasks, 10)
        # 11. bcftools stats
        bcftools_stats_tasks = self.create_bcftools_stats_tasks(
            annotate_ExAC_tasks, 11)
        # 11.1 Merge & Plot bcftools stats
        bcftools_stats_summary_task = self.create_bcftools_stats_summary_task(
            bcftools_stats_tasks, 11.1)
        # 12. GATK VariantEval
        variant_eval_tasks = self.create_variant_eval_tasks(
            annotate_ExAC_tasks, 12)
        # 12.1. Merge & Plot GATK VariantEval Stats
        variant_eval_summary_task = self.create_variant_eval_summary_task(
            variant_eval_tasks, 12.1)

    def create_bcftools_stats_summary_task(self, parent_tasks, step_number):
        stage = self._construct_task_name('bcftools-stats-summary',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)

        lsf_params = get_lsf_params(bcftools_stats_summary_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': bcftools_stats_summary,
            'params': {
                'in_dir': input_dir,
                'out_dir': output_dir,
            },
            'stage_name': stage,
            'uid': 'all-chroms',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task

    def create_concatenate_vcfs_task(self, parent_tasks, step_number):
        tasks = list()
        stage = self._construct_task_name('concat-vcfs', step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(concatenate_vcfs_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        def region_key(task):
            reference_fai = os.path.join(
                '/gscmnt/ams1102/info/model_data/2869585698/build106942997',
                'all_sequences.fa.fai')
            return Region(all_sequences.fa.fai, task.params['in_chrom'])

        def chromosome_key(task):
            reference_fai = os.path.join(
                '/gscmnt/ams1102/info/model_data/2869585698/build106942997',
                'all_sequences.fa.fai')
            return Region(reference_fai, task.params['in_chrom']).chrom

        for ref_chrom, chrom_tasks in groupby(sorted(parent_tasks,
                                                     key=region_key),
                                              key=chromosome_key):
            ptasks = list(chrom_tasks)
            input_vcfs = [x.params['out_vcf'] for x in ptasks]
            output_vcf = 'concatenated.c{}.vcf.gz'.format(ref_chrom)
            output_log = 'concatenate.{}.log'.format(ref_chrom)
            task = {
                'func': concatenate_vcfs,
                'params': {
                    'in_vcfs': input_vcfs,
                    'in_chrom': ref_chrom,
                    'out_vcf': os.path.join(output_dir, ref_chrom, output_vcf),
                    'out_log': os.path.join(output_dir, ref_chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=ref_chrom),
                'drm_params': lsf_params_json,
                'parents': ptasks,
            }
            tasks.append(self.workflow.add_task(**task))
        return tasks

    def create_variant_eval_summary_task(self, parent_tasks, step_number):
        stage = self._construct_task_name('gatk-variant-eval-summary',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)

        lsf_params = get_lsf_params(variant_eval_summary_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': variant_eval_summary,
            'params': {
                'in_dir': input_dir,
                'out_dir': output_dir,
            },
            'stage_name': stage,
            'uid': 'all-chroms',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task

    def create_bcftools_stats_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('bcftools-stats', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(bcftools_stats_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_stats = '{}.stats.out'.format(chrom)
            task = {
                'func': bcftools_stats,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_stats': os.path.join(basedir, chrom, output_stats),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_variant_eval_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('gatk-variant-eval', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(gatk_variant_eval_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_stats = 'chrom-{}-variant-eval.out'.format(chrom)
            output_log = 'chrom-{}-variant-eval.log'.format(chrom)
            task = {
                'func': gatk_variant_eval,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_stats': os.path.join(basedir, chrom, output_stats),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_vep_cadd_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('vep-cadd-annotation', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_vep_cadd_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'annotated.vep.cadd.c{}.vcf.gz'.format(chrom)
            output_log = 'vep.cadd.annotation.{}.log'.format(chrom)
            task = {
                'func': annotation_vep_cadd,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_ExAC_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('annotate-w-ExAC', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_ExAC_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'ExAC-annotated.c{}.vcf.gz'.format(chrom)
            output_log = 'ExAC-annotate.{}.log'.format(chrom)
            task = {
                'func': annotation_ExAC,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_1000G_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('annotate-w-1000G', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_1000G_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = '1kg-annotated.c{}.vcf.gz'.format(chrom)
            output_log = '1000G-annotate.{}.log'.format(chrom)
            task = {
                'func': annotation_1000G,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_allele_balance_annotation_tasks(self, parent_tasks,
                                               step_number):
        tasks = []
        stage = self._construct_task_name('allele-balance-annotation',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotate_allele_balances_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'allele-balance-{}.log'.format(chrom)
            task = {
                'func': annotate_allele_balances,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_filter_variant_missingness_tasks(self, parent_tasks,
                                                step_number):
        tasks = []
        stage = self._construct_task_name('filter-variant-missingness',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(filter_variant_missingness_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'filter-missingness-{}.log'.format(chrom)
            task = {
                'func': filter_variant_missingness,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_remove_symbolic_deletion_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('remove-symbolic-alleles',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(
            remove_symbolic_deletion_alleles_lsf_params, self.config.email,
            self.config.docker)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'remove-symbolic-alleles-chrom-{}.log'.format(chrom)
            task = {
                'func': remove_symbolic_deletion_alleles,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_decompose_normalize_unique_tasks(self, parent_tasks,
                                                step_number):
        tasks = []
        stage = self._construct_task_name('decompose-normalize-uniq',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(normalize_decompose_unique_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'decompose-normalize-unique-{}.log'.format(chrom)
            task = {
                'func': normalize_decompose_unique,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_calculate_sample_missingness_task(self, parent_tasks,
                                                 step_number):
        stage = self._construct_task_name('calculate-sample-missingness',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)
        input_json_wildcard_path = os.path.join(input_dir, '*', '*.json')

        lsf_params = get_lsf_params(calculate_sample_missingness_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': calculate_sample_missingness,
            'params': {
                'in_json':
                input_json_wildcard_path,
                'out_stats':
                os.path.join(output_dir, 'sample-missingness-pct.dat'),
                'out_log':
                os.path.join(output_dir, 'sample-missingness-pct.dat.log'),
            },
            'stage_name': stage,
            'uid': '1-22',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task

    def create_count_sample_missingness_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('count-sample-missingness',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(count_sample_missingness_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']

            # only count missing genotypes on chromosomes 1-22 (not X, Y, or MT)
            if not chrom[0].isdigit(): continue

            output_json = '{chrom}-sample-missingness-counts.json'.format(
                chrom=chrom)
            output_log = '{}-sample-missingness-counts.log'.format(chrom)
            task = {
                'func': count_sample_missingness,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_json': os.path.join(basedir, chrom, output_json),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_remove_ac_0_tasks(self, step_number):
        tasks = []
        stage = self._construct_task_name('select-variants-ac-0-removal',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(
            gatk_select_variants_remove_ac_0_lsf_params, self.config.email,
            self.config.docker, self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for chrom in self.config.chroms:
            vcf = self.config.vcfs[chrom]
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'select-variants-chrom-{}-gatk.log'.format(chrom)
            task = {
                'func': gatk_select_variants_remove_ac_0,
                'params': {
                    'in_chrom': chrom,
                    'in_vcf': vcf,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def _construct_task_name(self, name, number):
        task_name = '{}-{}'.format(number, name)
        return task_name
コード例 #19
0
ファイル: recipe.py プロジェクト: yanding/COSMOS-2.0
def variant_call(execution, bam_path, target_bed_path, max_complex_gap):
    """
    Bioinformatics variant calling workflow
    """
    contigs = sp.check_output("cat %s |cut -f1|uniq" % target_bed_path, shell=True).strip().split("\n")

    bed_tasks = [execution.add_task(tools.filter_bed_by_contig, tags=dict(in_bam=bam_path, in_bed=target_bed_path, contig=contig), out_dir='work/{contig}')
                 for contig in contigs ]

    freebayes_tasks = one2one(tools.freebayes, bed_tasks, dict(max_complex_gap=max_complex_gap))

    merge_vcf_tasks = many2one(tools.vcf_concat_parts, freebayes_tasks)

    execution.run()


if __name__ == '__main__':
    p = argparse.ArgumentParser()
    p.add_argument('bam_path')
    p.add_argument('target_bed_path')
    p.add_argument('--max_complex_gap', type=int, default=2)
    add_execution_args(p)
    start_kwargs, variant_call_args = pop_execution_args(vars(p.parse_args()))

    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()
    execution = cosmos.start(output_dir='../analysis_output/variant_calling', **start_kwargs)

    variant_call(execution, **variant_call_args)
コード例 #20
0
"""
Basic demonstration the structure of a Task instance
"""
import subprocess as sp
import os
import sys
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                os.path.dirname(os.path.abspath(__file__)),
                default_drm='local')
cosmos.initdb()

sp.check_call('mkdir -p analysis_output/ex1', shell=True)
os.chdir('analysis_output/ex1')
workflow = cosmos.start('Example1', restart=True, skip_confirm=True)


def say(text, out_file):
    return r"""
        echo "{text}" > {out_file}
    """.format(text=text, out_file=out_file)


t = workflow.add_task(func=say,
                      params=dict(
                          text='Hello World',
                          out_file='out.txt',
                      ),
                      uid='my_task',
                      time_req=None,
コード例 #21
0
ファイル: mie.py プロジェクト: vifehe/yaps2
class Pipeline(object):
    def __init__(self, config, drm, restart):
        self.config = config

        self.cosmos = Cosmos(database_url='sqlite:///{}'.format(
            self.config.db),
                             get_submit_args=default_get_submit_args,
                             default_drm=drm)

        self.cosmos.initdb()

        primary_logfile = os.path.join(
            self.config.rootdir,
            '{}.log'.format(self.config.project_name),
        )

        self.workflow = self.cosmos.start(
            self.config.project_name,
            primary_log_path=primary_logfile,
            restart=restart,
        )

        self.setup_pipeline()

    def setup_pipeline(self):
        self.construct_pipeline()
        self.workflow.make_output_dirs()

    def run(self):
        # put set_successful to False if you intend to add more tasks to the
        # pipeline later
        custom_log_dir = lambda task: os.path.join(self.config.rootdir, 'logs',
                                                   task.stage.name, task.uid)
        self.workflow.run(set_successful=False,
                          log_out_dir_func=custom_log_dir)

    def construct_pipeline(self):
        partition_tasks = self.create_vcf_partition_tasks()
        plink_pipeline_tasks = self.create_plink_pipeline_tasks(
            partition_tasks)
        aggregate_mie_stats_tasks = self.create_aggregate_mie_stats_tasks(
            plink_pipeline_tasks)

    def create_aggregate_mie_stats_tasks(self, parent_tasks):
        tasks = []
        stage = '3-aggregate-mie-stats'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        input_dir = os.path.join(self.config.rootdir,
                                 parent_tasks[0].stage.name)

        parent_snp_tranche_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'snps' and task.params['method'] == 'tranche') ]

        parent_snp_percentile_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'snps' and task.params['method'] == 'percentile') ]

        parent_indel_tranche_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'indels' and task.params['method'] == 'tranche') ]

        parent_indel_percentile_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'indels' and task.params['method'] == 'percentile') ]

        task_groups = (parent_snp_tranche_tasks, parent_snp_percentile_tasks,
                       parent_indel_tranche_tasks,
                       parent_indel_percentile_tasks)

        for tgroup in task_groups:
            category = tgroup[0].params['type']
            method = tgroup[0].params['method']
            out_filename = '.'.join([category, method, 'tsv'])
            output_file = os.path.join(basedir, out_filename)
            task = {
                'func':
                aggregate_mie_statistics,
                'params': {
                    'in_category': category,
                    'in_method': method,
                    'in_dir': input_dir,
                    'out_file': output_file,
                },
                'stage_name':
                stage,
                'uid':
                '{category}:{method}'.format(method=method, category=category),
                'drm_params':
                to_json(aggregate_mie_statistics_lsf_params(email)),
                'parents':
                tgroup,
            }
            tasks.append(self.workflow.add_task(**task))

    def create_plink_pipeline_tasks(self, parent_tasks):
        tasks = []
        stage = '2-plink-pipeline'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for ptask in sorted(parent_tasks, key=lambda t: t.id):
            chrom = ptask.params['in_chrom']
            label = ptask.params['in_label']
            method = ptask.params['in_method']
            category = ptask.params['in_type']

            output_dir = os.path.join(basedir, category, method, label, chrom)
            #            ensure_directory(output_dir)

            task = {
                'func':
                plink_pipeline,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_trio_fam': self.config.plink_fam_file,
                    'chrom': chrom,
                    'type': category,
                    'method': method,
                    'chrom': chrom,
                    'label': label,
                    'out_dir': output_dir,
                },
                'stage_name':
                stage,
                'uid':
                '{category}:{method}:{label}:{chrom}'.format(chrom=chrom,
                                                             method=method,
                                                             category=category,
                                                             label=label),
                'drm_params':
                to_json(plink_pipeline_lsf_params(email)),
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks

    def create_vcf_partition_tasks(self):
        all_tasks = []
        cases = (
            ('tranche', 'snps', self.config.tranche_intervals['snps']),
            ('tranche', 'indels', self.config.tranche_intervals['indels']),
            ('percentile', 'snps', self.config.percentiles['snps']),
            ('percentile', 'indels', self.config.percentiles['indels']),
        )

        for case in cases:
            tasks = self.generate_vcf_partition_tasks(*case)
            all_tasks.extend(tasks)

        return all_tasks

    def generate_vcf_partition_tasks(self, method, category, intervals):
        # method: 'tranche' or 'percentile'
        # category: 'snps' or 'indels'
        # label:
        #     tranche : 1, 2 or 3
        #     percentile : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100
        tasks = []
        for label in sorted(intervals.keys()):
            interval = intervals[label]
            partition_tasks = self.create_vcf_partition_chromosome_tasks(
                method=method,
                label=str(label),
                category=category,
                interval=interval,
            )
            tasks.extend(partition_tasks)

        return tasks

    def create_vcf_partition_chromosome_tasks(self, method, label, category,
                                              interval):
        tasks = []
        stage = '1-partition-vcfs'
        basedir = os.path.join(self.config.rootdir, stage, category, method,
                               label)
        email = self.config.email

        for chrom in self.config.chroms:
            vcf = self.config.vcfs[chrom]
            output_vcf = 'selected.c{chrom}.vcf.gz'.format(chrom=chrom)
            task = {
                'func':
                vcf_partition,
                'params': {
                    'in_vcf': vcf,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'in_min_vqslod': interval[0],
                    'in_max_vqslod': interval[1],
                    'in_samples': self.config.control_samples_file,
                    'in_type': category,
                    'in_method': method,
                    'in_chrom': chrom,
                    'in_label': label,
                },
                'stage_name':
                stage,
                'uid':
                '{category}:{method}:{label}:{chrom}'.format(chrom=chrom,
                                                             method=method,
                                                             category=category,
                                                             label=label),
                'drm_params':
                to_json(vcf_partition_lsf_params(email)),
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
コード例 #22
0
ファイル: ex3.py プロジェクト: qqss88/Cosmos2
"""
Basic demonstration the structure of a Task instance
"""
import os
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
cosmos.initdb()

execution = cosmos.start('Example1', 'analysis_output/ex3', restart=True, skip_confirm=True)


def cmd(in_files, out_file):
    return r"""
        echo "{in_files}" > {out_file}
    """.format(**locals())


t = execution.add_task(cmd, tags=dict(in_files=[('a', 'b', 'in_file')], out_file='out.txt'))

print 'Task:', t
print 'task.tags', t.tags
print 'task.input_files', t.input_files
print 'task.output_files', t.output_files

#execution.run()

コード例 #23
0
ファイル: ex1.py プロジェクト: yanding/COSMOS-2.0
                                      tags=dict(chars=True, **cat_task.tags),
                                      parents=[cat_task],
                                      out_dir='{word}/{n}')
                   for cat_task in cats]

    # Cat the contents of all word_counts into one file.  Only one node is being created who's parents are
    # all of the WordCounts (a many2one relationship).
    summarize = execution.add_task(cat,
                                   tags=dict(),
                                   parents=word_counts,
                                   out_dir='',
                                   stage_name='Summary_Analysis')

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(), '/tmp/ex1_task_graph.png', format='png')
        draw_task_graph(execution.task_graph(), '/tmp/ex1_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run()


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    execution = cosmos.start('Example1', 'analysis_output/ex1', max_attempts=1, restart=True, skip_confirm=True,
                             max_cpus=10)
    run_ex1(execution)
コード例 #24
0
        from cosmos.util import growl
        from cosmos.api import signal_execution_status_change, ExecutionStatus

        @signal_execution_status_change.connect
        def growl_signal(execution):
            if execution.status != ExecutionStatus.running:
                growl.send('%s %s' % (execution, execution.status))

    if func.__module__.startswith('ex'):
        execution_params = {
            n: kwargs.pop(n, None)
            for n in [
                'name', 'restart', 'skip_confirm', 'max_cpus', 'max_attempts',
                'output_dir'
            ]
        }
        if not execution_params['output_dir']:
            mkdir(os.path.join(root_path, 'out_dir'))
            execution_params['output_dir'] = os.path.join(
                root_path, 'out_dir', execution_params['name'])

        ex = cosmos.start(**execution_params)
        kwargs['execution'] = ex

    if debug:
        import ipdb
        with ipdb.launch_ipdb_on_exception():
            func(**kwargs)
    else:
        func(**kwargs)
コード例 #25
0
        i = 0

    with open(out_file, "w") as fp:
        fp.write(str(i + 1))

    if i < 2:
        # fail the first 2 times
        raise


if __name__ == "__main__":
    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="local",
    )
    cosmos.initdb()
    workflow = cosmos.start("ExampleReattempt",
                            restart=True,
                            skip_confirm=True)

    if os.path.exists("out.txt"):
        os.unlink("out.txt")

    t = workflow.add_task(func=add_one,
                          params=dict(out_file="out.txt"),
                          uid="my_task",
                          max_attempts=3)

    workflow.make_output_dirs()
    workflow.run(cmd_wrapper=py_call_cmd_wrapper)
コード例 #26
0
ファイル: main.py プロジェクト: Romain-B/Cosmos_tests
    args = parser.parse_args()
    kwargs = dict(args._get_kwargs())
    func = kwargs.pop('func')
    growl = kwargs.pop('growl')
    debug = kwargs.pop('debug')
    if growl:
        from cosmos.util import growl
        from cosmos.api import signal_execution_status_change, ExecutionStatus

        @signal_execution_status_change.connect
        def growl_signal(execution):
            if execution.status != ExecutionStatus.running:
                growl.send('%s %s' % (execution, execution.status))

    if func.__module__.startswith('t'):
        execution_params = {n: kwargs.pop(n, None) for n in
                            ['name', 'restart', 'skip_confirm', 'max_cores', 'max_attempts', 'output_dir']}
        if not execution_params['output_dir']:
            mkdir(os.path.join(root_path, 'out_dir'))
            execution_params['output_dir'] = os.path.join(root_path, 'out_dir', execution_params['name'])

        ex = cosmos.start(**execution_params)
        kwargs['execution'] = ex

    if debug:
        import ipdb
        with ipdb.launch_ipdb_on_exception():
            func(**kwargs)
    else:
        func(**kwargs)
コード例 #27
0
        RDS_COSMOS_DATABASE,
        default_drm="awsbatch",
        default_drm_options=dict(
            container_image=os.getenv("ECR_CONTAINER_IMAGE"),
            s3_prefix_for_command_script_temp_files=os.path.join(
                S3_BUCKET_PATH, "cosmos-tmp"),
            shm_size=int(args.mem_req * 0.75),
            retry_only_if_status_reason_matches=
            "Host EC2 .+ terminated.",  # only retry on spot instance death
        ),
        default_queue=os.getenv("BATCH_QUEUE_NAME"),
    )
    cosmos.initdb()

    workflow_name = f"{args.name}-{uuid1().hex}"
    workflow = cosmos.start(workflow_name, restart=True, skip_confirm=True)

    task_name = uuid1().hex

    workflow.add_task(
        func=pretrain,
        params=dict(
            version=task_name,
            max_epochs=args.max_epochs,
            num_workers=args.core_req - 1,
            batch_size=args.batch_size,
            multiplier=args.multiplier,
            size=args.size,
        ),
        uid=task_name,
        time_req=None,
コード例 #28
0
ファイル: pca.py プロジェクト: vifehe/yaps2
class Pipeline(object):
    def __init__(self, config, drm, restart):
        self.config = config

        self.cosmos = Cosmos(
            database_url='sqlite:///{}'.format(self.config.db),
            get_submit_args=default_get_submit_args,
            default_drm=drm
        )

        self.cosmos.initdb()

        primary_logfile = os.path.join(
            self.config.rootdir,
            '{}.log'.format(self.config.project_name),
        )

        self.workflow = self.cosmos.start(
            self.config.project_name,
            primary_log_path=primary_logfile,
            restart=restart,
        )

        self.setup_pipeline()

    def setup_pipeline(self):
        self.construct_pipeline()
        self.workflow.make_output_dirs()

    def run(self):
	# put set_successful to False if you intend to add more tasks to the
	# pipeline later
        custom_log_dir = lambda task : os.path.join(self.config.rootdir, 'logs', task.stage.name, task.uid)
        self.workflow.run(set_successful=False, log_out_dir_func=custom_log_dir)

    def construct_pipeline(self):
        filter_biallelic_snps_tasks = self.create_filter_biallelic_snps_tasks()
        plink_binary_tasks = self.create_plink_binary_tasks(filter_biallelic_snps_tasks)
        plink_ld_prune_tasks = self.create_plink_ld_prune_tasks(plink_binary_tasks)
        plink_extract_prune_tasks = self.create_plink_extract_prune_tasks(plink_ld_prune_tasks)
        plink_merge_prune_files_task = self.create_plink_merge_prune_file_task(plink_extract_prune_tasks)
        eigenstrat_task = self.create_eigenstrat_smartpca_task(plink_merge_prune_files_task)
        data_frame_task = self.create_data_frame_task(eigenstrat_task)

    def create_data_frame_task(self, parent_task):
        stage = '7-make-data-frame'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        pca_evec_file = os.path.join(
            parent_task.params['out_prj_dir'],
            'merged.eigenstrat.pca.evec',
        )

        out_file = os.path.join(basedir, 'merged.eigenstrat.pca.evec.tsv')

        task = {
            'func' : create_evec_data_frame,
            'params' : {
                'in_file' : pca_evec_file,
                'out_file' : out_file,
            },
            'stage_name' : stage,
            'uid' : 'all-chroms',
            'drm_params' :
                to_json(create_evec_data_frame_lsf_params(email)),
            'parents' : [ parent_task ],
        }

        df_task = self.workflow.add_task(**task)

        return df_task

    def create_eigenstrat_smartpca_task(self, parent_task):
        stage = '6-eigenstrat-smartpca'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        ped_file = "{}.ped".format(parent_task.params['out_path'])
        map_file = "{}.map".format(parent_task.params['out_path'])

        task = {
            'func' : eigenstrat_smartpca_analysis,
            'params' : {
                'in_ped_file' : ped_file,
                'in_map_file' : map_file,
                'out_prj_dir' : basedir,
            },
            'stage_name' : stage,
            'uid' : 'all-chroms',
            'drm_params' :
                to_json(eigenstrat_smartpca_analysis_lsf_params(email)),
            'parents' : [ parent_task ],
        }

        eigenstrat_task = self.workflow.add_task(**task)

        return eigenstrat_task

    def create_plink_merge_prune_file_task(self, parent_tasks):
        stage = '5-plink-merge-prune-files'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        parent_tasks_sorted = sorted(parent_tasks, key=lambda t: t.id)

        first_task = parent_tasks_sorted[0]
        remaining_tasks = parent_tasks_sorted[1:]

        merge_list_file = os.path.join(basedir, 'allfiles.txt')
        self._create_merge_list(merge_list_file, remaining_tasks)

        output_path = os.path.join(basedir, 'merged')

        task = {
            'func' : plink_merge_pruned_files,
            'params' : {
                'in_ref' : first_task.params['out_path'],
                'in_merge_file' : merge_list_file,
                'out_path' : output_path,
            },
            'stage_name' : stage,
            'uid' : 'all-chroms',
            'drm_params' :
                to_json(plink_merge_pruned_files_lsf_params(email)),
            'parents' : parent_tasks_sorted,
        }

        merge_task = self.workflow.add_task(**task)

        return merge_task


    def _create_merge_list(self, merge_file, tasks):
        ensure_directory(os.path.dirname(merge_file))
        with open(merge_file, 'w') as f:
            for t in tasks:
                print(t.params['out_path'], file=f)

    def create_plink_extract_prune_tasks(self, parent_tasks):
        tasks = []
        stage = '4-plink-extract-prune'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for ptask in sorted(parent_tasks, key=lambda t: t.id):
            plink_extract_file = "{}.prune.in".format(ptask.params['out_path'])
            orig_binary_data = ptask.params['in_path']
            chrom = ptask.params['chrom']
            output_path = os.path.join(basedir, chrom, 'c{}.extracted'.format(chrom))

            task = {
                'func' : plink_extract_prune,
                'params' : {
                    'in_path' : orig_binary_data,
                    'in_extract' : plink_extract_file,
                    'out_path' : output_path,
                    'chrom' : chrom,
                },
                'stage_name' : stage,
                'uid' : '{chrom}'.format(chrom=chrom),
                'drm_params' :
                    to_json(plink_extract_prune_lsf_params(email)),
                'parents' : [ptask],
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks

    def create_plink_ld_prune_tasks(self, parent_tasks):
        tasks = []
        stage = '3-plink-ld-prune'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for ptask in sorted(parent_tasks, key=lambda t: t.id):
            chrom = ptask.params['chrom']
            output_path = os.path.join(basedir, chrom, 'c{}-pruned'.format(chrom))

            task = {
                'func' : plink_ld_prune,
                'params' : {
                    'in_path' : ptask.params['out_path'],
                    'out_path' : output_path,
                    'chrom' : chrom,
                },
                'stage_name' : stage,
                'uid' : '{chrom}'.format(chrom=chrom),
                'drm_params' :
                    to_json(plink_ld_prune_lsf_params(email)),
                'parents' : [ptask],
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks

    def create_plink_binary_tasks(self, parent_tasks):
        tasks = []
        stage = '2-plink-binaries'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for ptask in sorted(parent_tasks, key=lambda t: t.id):
            chrom = ptask.params['chrom']
            output_path = os.path.join(basedir, chrom, 'c{}'.format(chrom))

            task = {
                'func' : plink_binary,
                'params' : {
                    'in_vcf' : ptask.params['out_vcf'],
                    'out_path' : output_path,
                    'chrom' : chrom,
                },
                'stage_name' : stage,
                'uid' : '{chrom}'.format(chrom=chrom),
                'drm_params' :
                    to_json(plink_binary_lsf_params(email)),
                'parents' : [ptask],
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks

    def create_filter_biallelic_snps_tasks(self):
        tasks = []
        stage = '1-filter-biallelic-snps'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for chrom in self.config.chroms:
            vcf = self.config.vcfs[chrom]
            output_vcf = 'filtered.snps.c{chrom}.vcf.gz'.format(chrom=chrom)
            task = {
                'func'   : filter_biallelic_snps,
                'params' : {
                    'chrom' : chrom,
                    'in_vcf' : vcf,
                    'out_vcf' : os.path.join(basedir, chrom, output_vcf),
                    'in_min_vqslod' : self.config.vqslod_threshold,
                },
                'stage_name' : stage,
                'uid' : '{chrom}'.format(chrom=chrom),
                'drm_params' :
                    to_json(filter_biallelic_snps_lsf_params(email)),
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks
コード例 #29
0
    """
    Bioinformatics variant calling workflow
    """
    contigs = sp.check_output("cat %s |cut -f1|uniq" % target_bed_path, shell=True).strip().split("\n")

    freebayes_tasks = []
    for contig in contigs:
        bed_task = execution.add_task(tools.filter_bed_by_contig, tags=dict(in_bam=bam_path, in_bed=target_bed_path, contig=contig), out_dir='work/{contig}')
        freebayes_task = execution.add_task(tools.freebayes, tags=dict(max_complex_gap=max_complex_gap), parents=bed_task, out_dir='work/{contig}')
        freebayes_tasks.append(freebayes_task)

    merge_vcf_tasks = many2one(tools.vcf_concat_parts, parents=freebayes_tasks)

    execution.run(max_attempts=max_attempts, max_cores=max_cores)


if __name__ == '__main__':
    p = argparse.ArgumentParser()
    p.add_argument('bam_path')
    p.add_argument('target_bed_path')
    p.add_argument('--max_complex_gap', type=int, default=2)
    add_execution_args(p)
    args = p.parse_args()

    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()
    execution = cosmos.start(name=args.name, output_dir='../analysis_output/variant_calling',
                             restart=args.restart, skip_confirm=args.skip_confirm)

    variant_call(execution, args.max_attempts, args.max_cores, args.bam_path, args.target_bed_path, args.max_complex_gap)
コード例 #30
0
ファイル: ex_email.py プロジェクト: nh13/COSMOS-2.0
from cosmos.api import Cosmos, signal_execution_status_change, ExecutionStatus
from ex1 import run_ex1
import os
from cosmos.util.helpers import mkdir

def run_ex3(execution):
    @signal_execution_status_change.connect
    def sig(ex):
        msg = "%s %s" % (ex, ex.status)
        if ex.status in [ExecutionStatus.successful, ExecutionStatus.failed, ExecutionStatus.killed]:
            text_message(msg)
            ex.log.info('Sent a text message')

    def text_message(message):
        from twilio.rest import TwilioRestClient

        account = "XYZ"
        token = "XYZ"
        client = TwilioRestClient(account, token)

        message = client.messages.create(to="+1231231234", from_="+1231231234", body=message)

    run_ex1(execution)


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    execution = cosmos.start('Example_Email', 'analysis_output/ex3', max_attempts=2, restart=True, skip_confirm=True)
    run_ex1(execution)
コード例 #31
0
ファイル: ex1.py プロジェクト: LPM-HMS/COSMOS2
import subprocess as sp
import os
import sys
from cosmos.api import Cosmos

cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)),
                default_drm='local')
cosmos.initdb()

sp.check_call('mkdir -p analysis_output/ex1', shell=True)
os.chdir('analysis_output/ex1')
workflow = cosmos.start('Example1', restart=True, skip_confirm=True)


def say(text, out_file):
    return r"""
        echo "{text}" > {out_file}
    """.format(text=text, out_file=out_file)


t = workflow.add_task(func=say,
                      params=dict(text='Hello World', out_file='out.txt',),
                      uid='my_task', time_req=None, core_req=1, mem_req=1024)

print('task.params', t.params)
print('task.input_map', t.input_map)
print('task.output_map', t.output_map)
print('task.core_req', t.core_req)
print('task.time_req', t.time_req)
print('task.drm', t.drm)
print('task.uid', t.uid)
コード例 #32
0
def main():
    args = parse_args()

    cosmos = Cosmos(
        "sqlite:///%s/sqlite.db" % os.path.dirname(os.path.abspath(__file__)),
        default_drm="awsbatch",
        default_drm_options=dict(
            container_image=args.container_image,
            s3_prefix_for_command_script_temp_files=args.s3_prefix_for_command_script_temp_files,
            # only retry on spot instance death
            retry_only_if_status_reason_matches="Host EC2 .+ terminated.",
        ),
        default_queue=args.default_queue,
    )

    cosmos.initdb()

    # sp.check_call("mkdir -p analysis_output/ex1", shell=True)
    # os.chdir("analysis_output/ex1")
    workflow = cosmos.start(f"Evaluate_{args.id}", restart=True, skip_confirm=True)

    parameters = np.load(f"optimize_awsbatch/parameters/{args.id}.npy")

    for i, par in enumerate(parameters):
        parameters_ = dict(
            mean_weight=par[0],
            c_w=par[1],
            tau_pos=par[2],
            tau_neg=par[3],
            A_pos=par[4],
            A_neg=par[5],
            weight_decay=par[6],
            n_filters=25,
            time_max=250,
            crop=20,
            kernel_size=16,
            stride=4,
            intensity=127.5,
            c_w_min=None,
            c_l=True,
            network_type="LC_SNN",

        )
        workflow.add_task(
            func=evaluate,
            params=dict(
                parameters=parameters_,
                out_s3_uri=f"{args.out_s3_uri}/scores/{args.id}/{i}.json",
                sleep=args.sleep,
                train=args.train,
                calibrate=args.calibrate,
                test=args.test
            ),
            uid=str(i),
            time_req=None,
            max_attempts=args.max_attempts,
            core_req=args.core_req,
            mem_req=args.mem_req,
        )
    workflow.run()

    sys.exit(0 if workflow.successful else 1)
コード例 #33
0
ファイル: ex1.py プロジェクト: nh13/COSMOS-2.0
                                   parents=word_counts,
                                   out_dir='',
                                   stage_name='Summary_Analysis')

    if pygraphviz_available:
        # These images can also be seen on the fly in the web-interface
        draw_stage_graph(execution.stage_graph(),
                         '/tmp/ex1_task_graph.png',
                         format='png')
        draw_task_graph(execution.task_graph(),
                        '/tmp/ex1_stage_graph.png',
                        format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run()


if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                    os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    execution = cosmos.start('Example1',
                             'analysis_output/ex1',
                             max_attempts=1,
                             restart=True,
                             skip_confirm=True,
                             max_cpus=10)
    run_ex1(execution)
コード例 #34
0
ファイル: testxx.py プロジェクト: Romain-B/Cosmos_tests
        draw_stage_graph(execution.stage_graph(), 'testing/workflow_info/test_task_graph.png', format='png')
        draw_task_graph(execution.task_graph(), 'testing/workflow_info/test_stage_graph.png', format='png')
    else:
        print 'Pygraphviz is not available :('

    execution.run(max_attempts=1, max_cores=10)
    
#------------------------
# EXECUTION DU WORKFLOW
if __name__ == '__main__':
    cosmos = Cosmos('sqlite:///%s/sqlite.db' % os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()

    subprocess.check_call('mkdir -p testing testing/data testing/results testing/workflow_info', shell=True)
    #subprocess.check_call('cp extdata/4.fastq testing/data', shell=False)
    execution = cosmos.start('Testx', 'testing',restart=True, skip_confirm=True)
    run_test(execution)
"""else:   
    #--------------------
    # Connexion aux services cosmos (BDD)    
	cosmos = Cosmos('sqlite:///sqlite.db')
	cosmos.initdb()
	    
    #--------------------
    # Creation des sous-dossiers resultats
	subprocess.check_call('mkdir -p testing testing/data testing/results testing/workflow_info', shell=True)
	#subprocess.check_call('cp extdata/4.fastq testing/data', shell=False, stderr=subprocess.STDOUT)

    #--------------------
    # Definition de la tache et execution
	execution = cosmos.start('Test1', 'testing',restart=True, skip_confirm=True)
コード例 #35
0
        execution.add_task(tools.filter_bed_by_contig,
                           tags=dict(in_bam=bam_path,
                                     in_bed=target_bed_path,
                                     contig=contig),
                           out_dir='work/{contig}') for contig in contigs
    ]

    freebayes_tasks = one2one(tools.freebayes, bed_tasks,
                              dict(max_complex_gap=max_complex_gap))

    merge_vcf_tasks = many2one(tools.vcf_concat_parts, freebayes_tasks)

    execution.run()


if __name__ == '__main__':
    p = argparse.ArgumentParser()
    p.add_argument('bam_path')
    p.add_argument('target_bed_path')
    p.add_argument('--max_complex_gap', type=int, default=2)
    add_execution_args(p)
    start_kwargs, variant_call_args = pop_execution_args(vars(p.parse_args()))

    cosmos = Cosmos('sqlite:///%s/sqlite.db' %
                    os.path.dirname(os.path.abspath(__file__)))
    cosmos.initdb()
    execution = cosmos.start(output_dir='../analysis_output/variant_calling',
                             **start_kwargs)

    variant_call(execution, **variant_call_args)
コード例 #36
0
class Pipeline(object):
    def __init__(self, config, drm, restart):
        self.config = config

        self.cosmos = Cosmos(
            database_url='sqlite:///{}'.format(self.config.db),
            get_submit_args=default_get_submit_args,
            default_drm=drm
        )

        self.cosmos.initdb()

        primary_logfile = os.path.join(
            self.config.rootdir,
            '{}.log'.format(self.config.project_name),
        )

        self.workflow = self.cosmos.start(
            self.config.project_name,
            primary_log_path=primary_logfile,
            restart=restart,
        )

        self.setup_pipeline()

    def setup_pipeline(self):
        self.construct_pipeline()
        self.workflow.make_output_dirs()

    def run(self):
	# put set_successful to False if you intend to add more tasks to the
	# pipeline later
        custom_log_dir = lambda task : os.path.join(self.config.rootdir, 'logs', task.stage.name, task.uid)
        self.workflow.run(set_successful=False, log_out_dir_func=custom_log_dir)

    def construct_pipeline(self):
        speedseq_tasks = self.create_speedseq_realign_tasks()

    def create_speedseq_realign_tasks(self):
        tasks = []
        stage = '1-exec-speedseq-realign'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email
        lsf_job_group = self.config.drm_job_group
        sample_data = self.config.sample_data

        for sample_id in sample_data.keys():
            bam_paths = sample_data[sample_id]['bams']
            sample_name = sample_data[sample_id]['meta']['original-name']
            output_prefix = os.path.join(basedir, sample_id, "{}.b38.realign".format(sample_id))
            tmpdir = os.path.join(basedir, sample_id, 'tmpdir')
            input_bams = ' '.join(bam_paths)

            task = {
                'func'   : exec_speedseq,
                'params' : {
                    'output_prefix' : output_prefix,
                    'tmpdir' : tmpdir,
                    'input_bams' : input_bams,
                },
                'stage_name' : stage,
                'uid' : sample_id,
                'drm_params' :
                    to_json(exec_speedseq_lsf_params(email, lsf_job_group)),
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks