Esempio n. 1
0
    def testMesosLaunch(self):
        input_file_1 = Target("c39ded10-6073-11e4-9803-0800200c9a66"),
        input_file_2 = Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")

        doc = FileDocStore(file_path="./test_tmp/docstore")
        logging.info("Adding files to object store")
        sync_doc_dir("examples/simple_galaxy/",
                     doc,
                     uuid_set=[
                         "c39ded10-6073-11e4-9803-0800200c9a66",
                         "26fd12a2-9096-4af2-a989-9e2f1cb692fe"
                     ])

        task_1 = MD5Task(input_file_1)

        md5_service = nebula.service.md5_service.MD5Service(doc)

        sched = nebula.scheduler.Scheduler({})
        mesos = nebula.drms.mesos_runner.MesosDRMS(
            sched, {"mesos": "%s:%s" % (self.host_ip, CONFIG_PARENT_PORT)})
        mesos.start()
        mesos_md5_service = mesos.deploy_service(md5_service)
        job_1 = mesos_md5_service.submit(task_1)
        mesos_md5_service.wait([job_1])
        print job_1
        logging.info("Sleeping for 15")
        time.sleep(15)
        mesos.stop()
Esempio n. 2
0
    def testNebulaLaunch(self):
        input = {
            "input_file_1" :
                Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2" :
                Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {
            "tail_select" : {
                "lineNum" : 3
            }
        }

        doc = FileDocStore(
            file_path=get_abspath("../test_tmp/docstore")
        )
        logging.info("Adding files to object store")
        sync_doc_dir("examples/simple_galaxy/", doc,
            uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"]
        )
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task = nebula.tasks.GalaxyWorkflowTask(
            "test_workflow",
            workflow,
            inputs=input,
            parameters=parameters
        )

        service = GalaxyService(
            docstore=doc,
            name="nosetest_galaxy",
            galaxy="bgruening/galaxy-stable:dev",
            port=20022
        )

        task_path = get_abspath("../test_tmp/test.tasks")
        service_path = get_abspath("../test_tmp/test.service")
        taskset = TaskGroup()
        taskset.append(task)
        with open(task_path, "w") as handle:
            taskset.store(handle)

        with open(service_path, "w") as handle:
            service.get_config().set_docstore_config(cache_path=get_abspath("../test_tmp/cache")).store(handle)

        env = dict(os.environ)
        if 'PYTHONPATH' in env:
            env['PYTHONPATH'] += ":" + get_abspath("../")
        else:
            env['PYTHONPATH'] = get_abspath("../")
        subprocess.check_call([get_abspath("../bin/nebula"), "run", service_path, task_path], env=env)

        for i in doc.filter():
            print json.dumps(i, indent=4)
Esempio n. 3
0
    def testNebulaLaunch(self):
        input = {
            "input_file_1": Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2": Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {"tail_select": {"lineNum": 3}}

        doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore"))
        logging.info("Adding files to object store")
        sync_doc_dir("examples/simple_galaxy/",
                     doc,
                     uuid_set=[
                         "c39ded10-6073-11e4-9803-0800200c9a66",
                         "26fd12a2-9096-4af2-a989-9e2f1cb692fe"
                     ])
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(
            ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task = nebula.tasks.GalaxyWorkflowTask("test_workflow",
                                               workflow,
                                               inputs=input,
                                               parameters=parameters)

        service = GalaxyService(docstore=doc,
                                name="nosetest_galaxy",
                                galaxy="bgruening/galaxy-stable:dev",
                                port=20022)

        task_path = get_abspath("../test_tmp/test.tasks")
        service_path = get_abspath("../test_tmp/test.service")
        taskset = TaskGroup()
        taskset.append(task)
        with open(task_path, "w") as handle:
            taskset.store(handle)

        with open(service_path, "w") as handle:
            service.get_config().set_docstore_config(
                cache_path=get_abspath("../test_tmp/cache")).store(handle)

        env = dict(os.environ)
        if 'PYTHONPATH' in env:
            env['PYTHONPATH'] += ":" + get_abspath("../")
        else:
            env['PYTHONPATH'] = get_abspath("../")
        subprocess.check_call(
            [get_abspath("../bin/nebula"), "run", service_path, task_path],
            env=env)

        for i in doc.filter():
            print json.dumps(i, indent=4)
Esempio n. 4
0
    def testToolTagging(self):

        doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore"))
        sync_doc_dir(get_abspath("../examples/simple_galaxy/"),
                     doc,
                     uuid_set=[
                         "c39ded10-6073-11e4-9803-0800200c9a66",
                         "26fd12a2-9096-4af2-a989-9e2f1cb692fe"
                     ])

        input_file_1 = Target(uuid="c39ded10-6073-11e4-9803-0800200c9a66")
        input_file_2 = Target(uuid="26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        workflow = GalaxyWorkflow(
            ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task_tag = nebula.tasks.GalaxyWorkflowTask(
            "workflow_ok",
            workflow,
            inputs={
                'input_file_1': input_file_1,
                'input_file_2': input_file_2
            },
            parameters={"tail_select": {
                "lineNum": 3
            }},
            tags=["run:testing"],
            tool_tags={
                "tail_select": {
                    "out_file1": ["file:tail"]
                },
                "concat_out": {
                    "out_file1": ["file:output"]
                }
            })
        print "Starting Service"
        service = GalaxyService(docstore=doc,
                                name="nosetest_galaxy",
                                galaxy="bgruening/galaxy-stable:dev",
                                force=True,
                                port=20022)
        service.start()
        self.service = service
        job = service.submit(task_tag)
        print "JOB", job.get_status()
        service.wait([job])
        self.assertIn(job.get_status(), ['ok'])
        self.assertFalse(service.in_error())
        print service.in_error()
Esempio n. 5
0
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "db_snp": "dbsnp_132_b37.leftAligned.vcf",
        "centromere": "centromere_hg19.bed",
        "reference_genome": "Homo_sapiens_assembly19.fasta",
        "cosmic": "b37_cosmic_v54_120711.vcf"
    }

    if args.ref_download:
        syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values())

    dm = {}
    for k, v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = {"uuid": hit}

    if args.sample is not None:
        sync_doc_dir(os.path.join(os.path.dirname(__file__), "..",
                                  "testexomes"),
                     docstore,
                     filter=lambda x: x['donorId'] in args.sample)
    else:
        sync_doc_dir(
            os.path.join(os.path.dirname(__file__), "..", "testexomes"),
            docstore)

    tumor_uuids = {}
    normal_uuids = {}

    for id, ent in docstore.filter(sampleType="tumour"):
        tumor_uuids[ent['participant_id']] = id

    for id, ent in docstore.filter(sampleType="normal"):
        normal_uuids[ent['participant_id']] = id

    mc3_workflow = GalaxyWorkflow(
        ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga")

    reference_id = None
    for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"):
        reference_id = a[0]

    tasks = TaskGroup()
    for donor in tumor_uuids:
        if donor in normal_uuids:
            print "participant", donor

            donor_name = None
            for k, v in fake_metadata.items():
                if v['participant_id'] == donor:
                    donor_name = k

            workflow_dm = dict(dm)
            workflow_dm['tumor_bam'] = {"uuid": tumor_uuids[donor]}
            workflow_dm['normal_bam'] = {"uuid": normal_uuids[donor]}

            task = GalaxyWorkflowTask(
                "workflow_%s" % (donor),
                mc3_workflow,
                inputs=workflow_dm,
                parameters={
                    "reheader_config": {
                        "platform":
                        "Illumina",
                        "center":
                        "OHSU",
                        "reference_genome":
                        "Homo_sapiens_assembly19.fasta",
                        "participant_uuid":
                        fake_metadata[donor_name]['participant_id'],
                        "disease_code":
                        fake_metadata[donor_name]['disease'],
                        "filedate":
                        datetime.datetime.now().strftime("%Y%m%d"),
                        "normal_analysis_uuid":
                        fake_metadata[donor_name]['normal']['uuid'],
                        "normal_bam_name":
                        fake_metadata[donor_name]['normal']['file_name'],
                        "normal_aliquot_uuid":
                        fake_metadata[donor_name]['normal']['aliquot_id'],
                        "normal_aliquot_barcode":
                        fake_metadata[donor_name]['normal']['barcode'],
                        "tumor_analysis_uuid":
                        fake_metadata[donor_name]['tumour']['uuid'],
                        "tumor_bam_name":
                        fake_metadata[donor_name]['tumour']['file_name'],
                        "tumor_aliquot_uuid":
                        fake_metadata[donor_name]['tumour']['aliquot_id'],
                        "tumor_aliquot_barcode":
                        fake_metadata[donor_name]['tumour']['barcode'],
                    }
                },
                tags=["donor:%s" % (donor)],
            )
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id),
                  "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:
        service = GalaxyService(docstore=docstore,
                                galaxy=args.galaxy,
                                sudo=args.sudo,
                                tool_data=args.tool_data,
                                tool_dir=args.tool_dir,
                                work_dir=args.work_dir,
                                smp=[["gatk_bqsr", 12], ["gatk_indel", 24],
                                     ["MuSE", 8], ["pindel", 8], ["mutect", 8],
                                     ["delly", 4], ["gatk_bqsr", 12],
                                     ["gatk_indel", 12], ["bwa_mem", 12],
                                     ["radia", 8], ['radia_filter', 8]])
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
Esempio n. 6
0
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "db_snp" : "dbsnp_132_b37.leftAligned.vcf",
        "centromere" : "centromere_hg19.bed",
        "reference_genome" : "Homo_sapiens_assembly19.fasta",
        "cosmic" : "b37_cosmic_v54_120711.vcf"
    }

    if args.ref_download:
        syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values())

    dm = {}
    for k,v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = { "uuid" : hit }

    if args.sample is not None:
        sync_doc_dir(
            os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore,
            filter=lambda x: x['donorId'] in args.sample
        )
    else:
        sync_doc_dir( os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore)

    tumor_uuids = {}
    normal_uuids = {}

    for id, ent in docstore.filter(sampleType="tumour"):
        tumor_uuids[ent['participant_id']] = id

    for id, ent in docstore.filter(sampleType="normal"):
        normal_uuids[ent['participant_id']] = id

    mc3_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga")

    reference_id = None
    for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"):
        reference_id = a[0]

    tasks = TaskGroup()
    for donor in tumor_uuids:
        if donor in normal_uuids:
            print "participant", donor

            donor_name = None
            for k,v in fake_metadata.items():
                if v['participant_id'] == donor:
                    donor_name = k

            workflow_dm = dict(dm)
            workflow_dm['tumor_bam'] = { "uuid" : tumor_uuids[donor] }
            workflow_dm['normal_bam'] = { "uuid" : normal_uuids[donor] }

            task = GalaxyWorkflowTask("workflow_%s" % (donor),
                mc3_workflow,
                inputs=workflow_dm,
                parameters={
                    "reheader_config" : {
                        "platform" : "Illumina",
                        "center" : "OHSU",
                        "reference_genome" : "Homo_sapiens_assembly19.fasta",
                        "participant_uuid" : fake_metadata[donor_name]['participant_id'],
                        "disease_code" : fake_metadata[donor_name]['disease'],
                        "filedate" : datetime.datetime.now().strftime("%Y%m%d"),
                        "normal_analysis_uuid" : fake_metadata[donor_name]['normal']['uuid'],
                        "normal_bam_name" : fake_metadata[donor_name]['normal']['file_name'],
                        "normal_aliquot_uuid" : fake_metadata[donor_name]['normal']['aliquot_id'],
                        "normal_aliquot_barcode": fake_metadata[donor_name]['normal']['barcode'],
                        "tumor_analysis_uuid" : fake_metadata[donor_name]['tumour']['uuid'],
                        "tumor_bam_name" : fake_metadata[donor_name]['tumour']['file_name'],
                        "tumor_aliquot_uuid" : fake_metadata[donor_name]['tumour']['aliquot_id'],
                        "tumor_aliquot_barcode" : fake_metadata[donor_name]['tumour']['barcode'],
                    }
                },
                tags=[ "donor:%s" % (donor) ],
            )
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:
        service = GalaxyService(
            docstore=docstore,
            galaxy=args.galaxy,
            sudo=args.sudo,
            tool_data=args.tool_data,
            tool_dir=args.tool_dir,
            work_dir=args.work_dir,
            smp=[
                ["gatk_bqsr", 12],
                ["gatk_indel", 24],
                ["MuSE", 8],
                ["pindel", 8],
                ["mutect", 8],
                ["delly", 4],
                ["gatk_bqsr", 12],
                ["gatk_indel", 12],
                ["bwa_mem", 12],
                ["radia", 8],
                ['radia_filter', 8]
            ]
        )
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
Esempio n. 7
0
    def testWorkflowCaching(self):
        input = {
            "input_file_1": Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2": Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {"tail_select": {"lineNum": 3}}

        doc = nebula.docstore.FileDocStore(
            get_abspath("../test_tmp/docstore"),
            cache_path=get_abspath("../test_tmp/cache"))

        logging.info("Adding files to object store")
        sync_doc_dir(get_abspath("../examples/simple_galaxy/"),
                     doc,
                     uuid_set=[
                         "c39ded10-6073-11e4-9803-0800200c9a66",
                         "26fd12a2-9096-4af2-a989-9e2f1cb692fe"
                     ])
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(
            ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task = nebula.tasks.GalaxyWorkflowTask("test_workflow",
                                               workflow,
                                               inputs=input,
                                               parameters=parameters,
                                               tags=["run:testing"],
                                               tool_tags={
                                                   "tail_select": {
                                                       "out_file1":
                                                       ["file:tail"]
                                                   },
                                                   "concat_out": {
                                                       "out_file1":
                                                       ["file:output"]
                                                   }
                                               })

        service = GalaxyService(docstore=doc,
                                name="nosetest_galaxy",
                                galaxy="bgruening/galaxy-stable:dev",
                                force=True,
                                port=20022)
        self.service = service

        logging.info("Starting Service")
        print "Starting service"
        service.start()
        self.assertFalse(service.in_error())
        logging.info("Starting Tasks")
        job = service.submit(task)
        self.assertTrue(isinstance(job, TaskJob))
        self.assertFalse(service.in_error())
        #logging.info("Waiting")
        service.wait([job])
        found = False
        for id, info in doc.filter(tags="file:output"):
            logging.info("Found result object: %s size: %d" %
                         (id, doc.size(info)))
            self.assertTrue(doc.size(info) > 0)
            found = True
        self.assertTrue(found)
        self.assertFalse(service.in_error())
        self.assertIn(job.get_status(), ['ok'])
Esempio n. 8
0
    def testWorkflowCaching(self):
        input = {
            "input_file_1" : Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2" : Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {
            "tail_select" : {
                "lineNum" : 3
            }
        }

        doc = nebula.docstore.FileDocStore(
            get_abspath("../test_tmp/docstore"),
            cache_path=get_abspath("../test_tmp/cache")
        )

        logging.info("Adding files to object store")
        sync_doc_dir(get_abspath("../examples/simple_galaxy/"), doc,
            uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"]
        )
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task = nebula.tasks.GalaxyWorkflowTask(
            "test_workflow", workflow,
            inputs=input,
            parameters=parameters,
            tags = [
                "run:testing"
            ],
            tool_tags= {
                "tail_select" : {
                    "out_file1" : [
                        "file:tail"
                    ]
                },
                "concat_out" : {
                    "out_file1" : ["file:output"]
                }
            }
        )

        service = GalaxyService(
            docstore=doc,
            name="nosetest_galaxy",
            galaxy="bgruening/galaxy-stable:dev",
            force=True,
            port=20022
        )
        self.service = service

        logging.info("Starting Service")
        print "Starting service"
        service.start()
        self.assertFalse( service.in_error() )
        logging.info("Starting Tasks")
        job = service.submit(task)
        self.assertTrue( isinstance(job, TaskJob) )
        self.assertFalse( service.in_error() )
        #logging.info("Waiting")
        service.wait([job])
        found = False
        for id, info in doc.filter(tags="file:output"):
            logging.info("Found result object: %s size: %d" % (id, doc.size(info)))
            self.assertTrue( doc.size(info) > 0 )
            found = True
        self.assertTrue(found)
        self.assertFalse( service.in_error() )
        self.assertIn(job.get_status(), ['ok'])
Esempio n. 9
0
    def testRunSimple(self):
        input = {
            "input_file_1" :
                Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2" :
                Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {
            "tail_select" : {
                "lineNum" : 3
            }
        }
        bad_parameters = dict(parameters)
        del bad_parameters['tail_select']

        doc = FileDocStore(file_path="./test_tmp/docstore")
        logging.info("Adding files to object store")
        sync_doc_dir("examples/simple_galaxy/", doc,
            uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"]
        )
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(ga_file="examples/simple_galaxy/SimpleWorkflow.ga")
        task = nebula.tasks.GalaxyWorkflowTask(
            "test_workflow", workflow,
            inputs=input,
            parameters=parameters
        )

        task_data = task.to_dict()
        #make sure the task data can be serialized
        task_data_str = json.dumps(task_data)

        service = GalaxyService(
            docstore=doc,
            name="nosetest_galaxy",
            galaxy="bgruening/galaxy-stable",
            force=True,
            port=20022
        )
        self.service = service

        #make sure the generated task is serializable
        new_task_data = json.loads(task_data_str)
        new_task = nebula.tasks.from_dict(new_task_data)

        logging.info("Starting Service")
        print "Starting service"
        service.start()
        self.assertFalse( service.in_error() )
        logging.info("Starting Tasks")
        job = service.submit(new_task)
        self.assertTrue( isinstance(job, TaskJob) )
        self.assertFalse( service.in_error() )
        #logging.info("Waiting")
        service.wait([job])
        self.assertIn(job.get_status(), ['ok'])

        bad_task = nebula.tasks.GalaxyWorkflowTask(
            "test_workflow_bad",
            workflow,
            inputs=input,
            parameters=bad_parameters
        )
        job = service.submit(bad_task)
        service.wait([job])
        self.assertIn(job.get_status(), ['error'])

        self.assertFalse( service.in_error() )