def testMesosLaunch(self): input_file_1 = Target("c39ded10-6073-11e4-9803-0800200c9a66"), input_file_2 = Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe") doc = FileDocStore(file_path="./test_tmp/docstore") logging.info("Adding files to object store") sync_doc_dir("examples/simple_galaxy/", doc, uuid_set=[ "c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe" ]) task_1 = MD5Task(input_file_1) md5_service = nebula.service.md5_service.MD5Service(doc) sched = nebula.scheduler.Scheduler({}) mesos = nebula.drms.mesos_runner.MesosDRMS( sched, {"mesos": "%s:%s" % (self.host_ip, CONFIG_PARENT_PORT)}) mesos.start() mesos_md5_service = mesos.deploy_service(md5_service) job_1 = mesos_md5_service.submit(task_1) mesos_md5_service.wait([job_1]) print job_1 logging.info("Sleeping for 15") time.sleep(15) mesos.stop()
def testNebulaLaunch(self): input = { "input_file_1" : Target("c39ded10-6073-11e4-9803-0800200c9a66"), "input_file_2" : Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe") } parameters = { "tail_select" : { "lineNum" : 3 } } doc = FileDocStore( file_path=get_abspath("../test_tmp/docstore") ) logging.info("Adding files to object store") sync_doc_dir("examples/simple_galaxy/", doc, uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"] ) logging.info("Creating Task") workflow = GalaxyWorkflow(ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga")) task = nebula.tasks.GalaxyWorkflowTask( "test_workflow", workflow, inputs=input, parameters=parameters ) service = GalaxyService( docstore=doc, name="nosetest_galaxy", galaxy="bgruening/galaxy-stable:dev", port=20022 ) task_path = get_abspath("../test_tmp/test.tasks") service_path = get_abspath("../test_tmp/test.service") taskset = TaskGroup() taskset.append(task) with open(task_path, "w") as handle: taskset.store(handle) with open(service_path, "w") as handle: service.get_config().set_docstore_config(cache_path=get_abspath("../test_tmp/cache")).store(handle) env = dict(os.environ) if 'PYTHONPATH' in env: env['PYTHONPATH'] += ":" + get_abspath("../") else: env['PYTHONPATH'] = get_abspath("../") subprocess.check_call([get_abspath("../bin/nebula"), "run", service_path, task_path], env=env) for i in doc.filter(): print json.dumps(i, indent=4)
def testNebulaLaunch(self): input = { "input_file_1": Target("c39ded10-6073-11e4-9803-0800200c9a66"), "input_file_2": Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe") } parameters = {"tail_select": {"lineNum": 3}} doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore")) logging.info("Adding files to object store") sync_doc_dir("examples/simple_galaxy/", doc, uuid_set=[ "c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe" ]) logging.info("Creating Task") workflow = GalaxyWorkflow( ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga")) task = nebula.tasks.GalaxyWorkflowTask("test_workflow", workflow, inputs=input, parameters=parameters) service = GalaxyService(docstore=doc, name="nosetest_galaxy", galaxy="bgruening/galaxy-stable:dev", port=20022) task_path = get_abspath("../test_tmp/test.tasks") service_path = get_abspath("../test_tmp/test.service") taskset = TaskGroup() taskset.append(task) with open(task_path, "w") as handle: taskset.store(handle) with open(service_path, "w") as handle: service.get_config().set_docstore_config( cache_path=get_abspath("../test_tmp/cache")).store(handle) env = dict(os.environ) if 'PYTHONPATH' in env: env['PYTHONPATH'] += ":" + get_abspath("../") else: env['PYTHONPATH'] = get_abspath("../") subprocess.check_call( [get_abspath("../bin/nebula"), "run", service_path, task_path], env=env) for i in doc.filter(): print json.dumps(i, indent=4)
def testToolTagging(self): doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore")) sync_doc_dir(get_abspath("../examples/simple_galaxy/"), doc, uuid_set=[ "c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe" ]) input_file_1 = Target(uuid="c39ded10-6073-11e4-9803-0800200c9a66") input_file_2 = Target(uuid="26fd12a2-9096-4af2-a989-9e2f1cb692fe") workflow = GalaxyWorkflow( ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga")) task_tag = nebula.tasks.GalaxyWorkflowTask( "workflow_ok", workflow, inputs={ 'input_file_1': input_file_1, 'input_file_2': input_file_2 }, parameters={"tail_select": { "lineNum": 3 }}, tags=["run:testing"], tool_tags={ "tail_select": { "out_file1": ["file:tail"] }, "concat_out": { "out_file1": ["file:output"] } }) print "Starting Service" service = GalaxyService(docstore=doc, name="nosetest_galaxy", galaxy="bgruening/galaxy-stable:dev", force=True, port=20022) service.start() self.service = service job = service.submit(task_tag) print "JOB", job.get_status() service.wait([job]) self.assertIn(job.get_status(), ['ok']) self.assertFalse(service.in_error()) print service.in_error()
def run_gen(args): syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "db_snp": "dbsnp_132_b37.leftAligned.vcf", "centromere": "centromere_hg19.bed", "reference_genome": "Homo_sapiens_assembly19.fasta", "cosmic": "b37_cosmic_v54_120711.vcf" } if args.ref_download: syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values()) dm = {} for k, v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = {"uuid": hit} if args.sample is not None: sync_doc_dir(os.path.join(os.path.dirname(__file__), "..", "testexomes"), docstore, filter=lambda x: x['donorId'] in args.sample) else: sync_doc_dir( os.path.join(os.path.dirname(__file__), "..", "testexomes"), docstore) tumor_uuids = {} normal_uuids = {} for id, ent in docstore.filter(sampleType="tumour"): tumor_uuids[ent['participant_id']] = id for id, ent in docstore.filter(sampleType="normal"): normal_uuids[ent['participant_id']] = id mc3_workflow = GalaxyWorkflow( ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga") reference_id = None for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"): reference_id = a[0] tasks = TaskGroup() for donor in tumor_uuids: if donor in normal_uuids: print "participant", donor donor_name = None for k, v in fake_metadata.items(): if v['participant_id'] == donor: donor_name = k workflow_dm = dict(dm) workflow_dm['tumor_bam'] = {"uuid": tumor_uuids[donor]} workflow_dm['normal_bam'] = {"uuid": normal_uuids[donor]} task = GalaxyWorkflowTask( "workflow_%s" % (donor), mc3_workflow, inputs=workflow_dm, parameters={ "reheader_config": { "platform": "Illumina", "center": "OHSU", "reference_genome": "Homo_sapiens_assembly19.fasta", "participant_uuid": fake_metadata[donor_name]['participant_id'], "disease_code": fake_metadata[donor_name]['disease'], "filedate": datetime.datetime.now().strftime("%Y%m%d"), "normal_analysis_uuid": fake_metadata[donor_name]['normal']['uuid'], "normal_bam_name": fake_metadata[donor_name]['normal']['file_name'], "normal_aliquot_uuid": fake_metadata[donor_name]['normal']['aliquot_id'], "normal_aliquot_barcode": fake_metadata[donor_name]['normal']['barcode'], "tumor_analysis_uuid": fake_metadata[donor_name]['tumour']['uuid'], "tumor_bam_name": fake_metadata[donor_name]['tumour']['file_name'], "tumor_aliquot_uuid": fake_metadata[donor_name]['tumour']['aliquot_id'], "tumor_aliquot_barcode": fake_metadata[donor_name]['tumour']['barcode'], } }, tags=["donor:%s" % (donor)], ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService(docstore=docstore, galaxy=args.galaxy, sudo=args.sudo, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[["gatk_bqsr", 12], ["gatk_indel", 24], ["MuSE", 8], ["pindel", 8], ["mutect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 12], ["bwa_mem", 12], ["radia", 8], ['radia_filter', 8]]) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def run_gen(args): syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "db_snp" : "dbsnp_132_b37.leftAligned.vcf", "centromere" : "centromere_hg19.bed", "reference_genome" : "Homo_sapiens_assembly19.fasta", "cosmic" : "b37_cosmic_v54_120711.vcf" } if args.ref_download: syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values()) dm = {} for k,v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = { "uuid" : hit } if args.sample is not None: sync_doc_dir( os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore, filter=lambda x: x['donorId'] in args.sample ) else: sync_doc_dir( os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore) tumor_uuids = {} normal_uuids = {} for id, ent in docstore.filter(sampleType="tumour"): tumor_uuids[ent['participant_id']] = id for id, ent in docstore.filter(sampleType="normal"): normal_uuids[ent['participant_id']] = id mc3_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga") reference_id = None for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"): reference_id = a[0] tasks = TaskGroup() for donor in tumor_uuids: if donor in normal_uuids: print "participant", donor donor_name = None for k,v in fake_metadata.items(): if v['participant_id'] == donor: donor_name = k workflow_dm = dict(dm) workflow_dm['tumor_bam'] = { "uuid" : tumor_uuids[donor] } workflow_dm['normal_bam'] = { "uuid" : normal_uuids[donor] } task = GalaxyWorkflowTask("workflow_%s" % (donor), mc3_workflow, inputs=workflow_dm, parameters={ "reheader_config" : { "platform" : "Illumina", "center" : "OHSU", "reference_genome" : "Homo_sapiens_assembly19.fasta", "participant_uuid" : fake_metadata[donor_name]['participant_id'], "disease_code" : fake_metadata[donor_name]['disease'], "filedate" : datetime.datetime.now().strftime("%Y%m%d"), "normal_analysis_uuid" : fake_metadata[donor_name]['normal']['uuid'], "normal_bam_name" : fake_metadata[donor_name]['normal']['file_name'], "normal_aliquot_uuid" : fake_metadata[donor_name]['normal']['aliquot_id'], "normal_aliquot_barcode": fake_metadata[donor_name]['normal']['barcode'], "tumor_analysis_uuid" : fake_metadata[donor_name]['tumour']['uuid'], "tumor_bam_name" : fake_metadata[donor_name]['tumour']['file_name'], "tumor_aliquot_uuid" : fake_metadata[donor_name]['tumour']['aliquot_id'], "tumor_aliquot_barcode" : fake_metadata[donor_name]['tumour']['barcode'], } }, tags=[ "donor:%s" % (donor) ], ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService( docstore=docstore, galaxy=args.galaxy, sudo=args.sudo, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[ ["gatk_bqsr", 12], ["gatk_indel", 24], ["MuSE", 8], ["pindel", 8], ["mutect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 12], ["bwa_mem", 12], ["radia", 8], ['radia_filter', 8] ] ) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def testWorkflowCaching(self): input = { "input_file_1": Target("c39ded10-6073-11e4-9803-0800200c9a66"), "input_file_2": Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe") } parameters = {"tail_select": {"lineNum": 3}} doc = nebula.docstore.FileDocStore( get_abspath("../test_tmp/docstore"), cache_path=get_abspath("../test_tmp/cache")) logging.info("Adding files to object store") sync_doc_dir(get_abspath("../examples/simple_galaxy/"), doc, uuid_set=[ "c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe" ]) logging.info("Creating Task") workflow = GalaxyWorkflow( ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga")) task = nebula.tasks.GalaxyWorkflowTask("test_workflow", workflow, inputs=input, parameters=parameters, tags=["run:testing"], tool_tags={ "tail_select": { "out_file1": ["file:tail"] }, "concat_out": { "out_file1": ["file:output"] } }) service = GalaxyService(docstore=doc, name="nosetest_galaxy", galaxy="bgruening/galaxy-stable:dev", force=True, port=20022) self.service = service logging.info("Starting Service") print "Starting service" service.start() self.assertFalse(service.in_error()) logging.info("Starting Tasks") job = service.submit(task) self.assertTrue(isinstance(job, TaskJob)) self.assertFalse(service.in_error()) #logging.info("Waiting") service.wait([job]) found = False for id, info in doc.filter(tags="file:output"): logging.info("Found result object: %s size: %d" % (id, doc.size(info))) self.assertTrue(doc.size(info) > 0) found = True self.assertTrue(found) self.assertFalse(service.in_error()) self.assertIn(job.get_status(), ['ok'])
def testWorkflowCaching(self): input = { "input_file_1" : Target("c39ded10-6073-11e4-9803-0800200c9a66"), "input_file_2" : Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe") } parameters = { "tail_select" : { "lineNum" : 3 } } doc = nebula.docstore.FileDocStore( get_abspath("../test_tmp/docstore"), cache_path=get_abspath("../test_tmp/cache") ) logging.info("Adding files to object store") sync_doc_dir(get_abspath("../examples/simple_galaxy/"), doc, uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"] ) logging.info("Creating Task") workflow = GalaxyWorkflow(ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga")) task = nebula.tasks.GalaxyWorkflowTask( "test_workflow", workflow, inputs=input, parameters=parameters, tags = [ "run:testing" ], tool_tags= { "tail_select" : { "out_file1" : [ "file:tail" ] }, "concat_out" : { "out_file1" : ["file:output"] } } ) service = GalaxyService( docstore=doc, name="nosetest_galaxy", galaxy="bgruening/galaxy-stable:dev", force=True, port=20022 ) self.service = service logging.info("Starting Service") print "Starting service" service.start() self.assertFalse( service.in_error() ) logging.info("Starting Tasks") job = service.submit(task) self.assertTrue( isinstance(job, TaskJob) ) self.assertFalse( service.in_error() ) #logging.info("Waiting") service.wait([job]) found = False for id, info in doc.filter(tags="file:output"): logging.info("Found result object: %s size: %d" % (id, doc.size(info))) self.assertTrue( doc.size(info) > 0 ) found = True self.assertTrue(found) self.assertFalse( service.in_error() ) self.assertIn(job.get_status(), ['ok'])
def testRunSimple(self): input = { "input_file_1" : Target("c39ded10-6073-11e4-9803-0800200c9a66"), "input_file_2" : Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe") } parameters = { "tail_select" : { "lineNum" : 3 } } bad_parameters = dict(parameters) del bad_parameters['tail_select'] doc = FileDocStore(file_path="./test_tmp/docstore") logging.info("Adding files to object store") sync_doc_dir("examples/simple_galaxy/", doc, uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"] ) logging.info("Creating Task") workflow = GalaxyWorkflow(ga_file="examples/simple_galaxy/SimpleWorkflow.ga") task = nebula.tasks.GalaxyWorkflowTask( "test_workflow", workflow, inputs=input, parameters=parameters ) task_data = task.to_dict() #make sure the task data can be serialized task_data_str = json.dumps(task_data) service = GalaxyService( docstore=doc, name="nosetest_galaxy", galaxy="bgruening/galaxy-stable", force=True, port=20022 ) self.service = service #make sure the generated task is serializable new_task_data = json.loads(task_data_str) new_task = nebula.tasks.from_dict(new_task_data) logging.info("Starting Service") print "Starting service" service.start() self.assertFalse( service.in_error() ) logging.info("Starting Tasks") job = service.submit(new_task) self.assertTrue( isinstance(job, TaskJob) ) self.assertFalse( service.in_error() ) #logging.info("Waiting") service.wait([job]) self.assertIn(job.get_status(), ['ok']) bad_task = nebula.tasks.GalaxyWorkflowTask( "test_workflow_bad", workflow, inputs=input, parameters=bad_parameters ) job = service.submit(bad_task) service.wait([job]) self.assertIn(job.get_status(), ['error']) self.assertFalse( service.in_error() )