コード例 #1
0
ファイル: test_generatework.py プロジェクト: kellrott/nebula
 def testServiceGenerate(self):
     doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore"))
     service = GalaxyService(docstore=doc,
                             name="nosetest_galaxy",
                             galaxy="bgruening/galaxy-stable",
                             port=20022)
     json.dumps(service.to_dict())
コード例 #2
0
ファイル: test_generatework.py プロジェクト: kellrott/nebula
 def testServiceGenerate(self):
     doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore"))
     service = GalaxyService(
         docstore=doc,
         name="nosetest_galaxy",
         galaxy="bgruening/galaxy-stable",
         port=20022
     )
     json.dumps(service.to_dict())
コード例 #3
0
    def testToolTagging(self):

        doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore"))
        sync_doc_dir(get_abspath("../examples/simple_galaxy/"),
                     doc,
                     uuid_set=[
                         "c39ded10-6073-11e4-9803-0800200c9a66",
                         "26fd12a2-9096-4af2-a989-9e2f1cb692fe"
                     ])

        input_file_1 = Target(uuid="c39ded10-6073-11e4-9803-0800200c9a66")
        input_file_2 = Target(uuid="26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        workflow = GalaxyWorkflow(
            ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task_tag = nebula.tasks.GalaxyWorkflowTask(
            "workflow_ok",
            workflow,
            inputs={
                'input_file_1': input_file_1,
                'input_file_2': input_file_2
            },
            parameters={"tail_select": {
                "lineNum": 3
            }},
            tags=["run:testing"],
            tool_tags={
                "tail_select": {
                    "out_file1": ["file:tail"]
                },
                "concat_out": {
                    "out_file1": ["file:output"]
                }
            })
        print "Starting Service"
        service = GalaxyService(docstore=doc,
                                name="nosetest_galaxy",
                                galaxy="bgruening/galaxy-stable:dev",
                                force=True,
                                port=20022)
        service.start()
        self.service = service
        job = service.submit(task_tag)
        print "JOB", job.get_status()
        service.wait([job])
        self.assertIn(job.get_status(), ['ok'])
        self.assertFalse(service.in_error())
        print service.in_error()
コード例 #4
0
ファイル: test_commandlaunch.py プロジェクト: kellrott/nebula
    def testNebulaLaunch(self):
        input = {
            "input_file_1": Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2": Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {"tail_select": {"lineNum": 3}}

        doc = FileDocStore(file_path=get_abspath("../test_tmp/docstore"))
        logging.info("Adding files to object store")
        sync_doc_dir("examples/simple_galaxy/",
                     doc,
                     uuid_set=[
                         "c39ded10-6073-11e4-9803-0800200c9a66",
                         "26fd12a2-9096-4af2-a989-9e2f1cb692fe"
                     ])
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(
            ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task = nebula.tasks.GalaxyWorkflowTask("test_workflow",
                                               workflow,
                                               inputs=input,
                                               parameters=parameters)

        service = GalaxyService(docstore=doc,
                                name="nosetest_galaxy",
                                galaxy="bgruening/galaxy-stable:dev",
                                port=20022)

        task_path = get_abspath("../test_tmp/test.tasks")
        service_path = get_abspath("../test_tmp/test.service")
        taskset = TaskGroup()
        taskset.append(task)
        with open(task_path, "w") as handle:
            taskset.store(handle)

        with open(service_path, "w") as handle:
            service.get_config().set_docstore_config(
                cache_path=get_abspath("../test_tmp/cache")).store(handle)

        env = dict(os.environ)
        if 'PYTHONPATH' in env:
            env['PYTHONPATH'] += ":" + get_abspath("../")
        else:
            env['PYTHONPATH'] = get_abspath("../")
        subprocess.check_call(
            [get_abspath("../bin/nebula"), "run", service_path, task_path],
            env=env)

        for i in doc.filter():
            print json.dumps(i, indent=4)
コード例 #5
0
def run_gen(args):
    args = parser.parse_args()

    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "reference_genome": "genome.fa",
        "dbsnp": "dbsnp_132_b37.leftAligned.vcf",
        "cosmic": "b37_cosmic_v54_120711.vcf",
        "gold_indels":
        "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf",
        "phase_one_indels": "1000G_phase1.indels.hg19.sites.fixed.vcf",
        "centromere": "centromere_hg19.bed"
    }

    if args.ref_download:
        #download reference files from Synapse and populate the document store
        for a in syn.chunkedQuery('select * from entity where parentId=="%s"' %
                                  (REFDATA_PROJECT)):
            print "found", a['entity.name']
            if a['entity.name'] in data_mapping.values(
            ) or a['entity.name'].replace(".gz", "") in data_mapping.values():
                print "loading"
                ent = syn.get(a['entity.id'])
                id = ent.annotations['uuid'][0]
                t = Target(uuid=id)
                docstore.create(t)
                path = docstore.get_filename(t)
                name = ent.name
                if 'dataPrep' in ent.annotations:
                    if ent.annotations['dataPrep'][0] == 'gunzip':
                        subprocess.check_call("gunzip -c %s > %s" %
                                              (ent.path, path),
                                              shell=True)
                        name = name.replace(".gz", "")
                    else:
                        print "Unknown DataPrep"
                else:
                    shutil.copy(ent.path, path)
                docstore.update_from_file(t)
                meta = {}
                meta['name'] = name
                meta['uuid'] = id
                if 'dataPrep' in meta:
                    del meta['dataPrep']
                docstore.put(id, meta)

    dm = {}
    for k, v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = {"uuid": hit}

    workflow = GalaxyWorkflow(
        ga_file="workflows/Galaxy-Workflow-PCAWG_CGHUB.ga")
    tasks = TaskGroup()
    for ent in synqueue.listAssignments(syn, **config):
        #print "'%s'" % (ent['state']), ent['state'] == 'nan', type(ent['state']), type('nan')
        if not isinstance(ent['state'], basestring) and isnan(ent['state']):
            gnos_endpoint = urlparse(
                ent['meta']['Normal_WGS_alignment_GNOS_repos']).netloc
            task = GalaxyWorkflowTask(
                "workflow_%s" % (ent['id']),
                workflow,
                inputs=dm,
                parameters={
                    'normal_bam_download': {
                        "uuid":
                        ent['meta']['Normal_WGS_alignment_GNOS_analysis_ID'],
                        "gnos_endpoint":
                        gnos_endpoint,
                        "cred_file":
                        key_map[gnos_endpoint]
                    },
                    'tumor_bam_download': {
                        "uuid":
                        ent['meta']['Tumour_WGS_alignment_GNOS_analysis_IDs'],
                        "gnos_endpoint":
                        gnos_endpoint,
                        "cred_file":
                        key_map[gnos_endpoint]
                    },
                    'broad_variant_pipeline': {
                        "broad_ref_dir": "/tool_data/files/refdata",
                        "sample_id": ent['meta']['Submitter_donor_ID']
                    }
                },
                tags=["donor:%s" % (ent['meta']['Submitter_donor_ID'])])
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))

    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id),
                  "w") as handle:
            handle.write(json.dumps(data.to_dict()))
            state_file = "%s.tasks/%s.state" % (args.out_base, data.task_id)
            if os.path.exists(state_file):
                os.unlink(state_file)

    print "Tasks Created: %s" % (len(tasks))

    if args.create_service:
        service = GalaxyService(docstore=docstore,
                                galaxy="bgruening/galaxy-stable",
                                sudo=args.sudo,
                                tool_data=os.path.abspath("tool_data"),
                                tool_dir=os.path.abspath("tools"),
                                work_dir=args.work_dir,
                                smp=[["MuSE", 8], ["pindel", 8], ["muTect", 8],
                                     ["delly", 4], ["gatk_bqsr", 12],
                                     ["gatk_indel", 24], ["bwa_mem", 12],
                                     ["broad_variant_pipline", 24]])
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
コード例 #6
0
ファイル: test_wf_gen.py プロジェクト: mr-c/mc3
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "db_snp": "dbsnp_132_b37.leftAligned.vcf",
        "centromere": "centromere_hg19.bed",
        "reference_genome": "Homo_sapiens_assembly19.fasta",
        "cosmic": "b37_cosmic_v54_120711.vcf"
    }

    if args.ref_download:
        syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values())

    dm = {}
    for k, v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = {"uuid": hit}

    if args.sample is not None:
        sync_doc_dir(os.path.join(os.path.dirname(__file__), "..",
                                  "testexomes"),
                     docstore,
                     filter=lambda x: x['donorId'] in args.sample)
    else:
        sync_doc_dir(
            os.path.join(os.path.dirname(__file__), "..", "testexomes"),
            docstore)

    tumor_uuids = {}
    normal_uuids = {}

    for id, ent in docstore.filter(sampleType="tumour"):
        tumor_uuids[ent['participant_id']] = id

    for id, ent in docstore.filter(sampleType="normal"):
        normal_uuids[ent['participant_id']] = id

    mc3_workflow = GalaxyWorkflow(
        ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga")

    reference_id = None
    for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"):
        reference_id = a[0]

    tasks = TaskGroup()
    for donor in tumor_uuids:
        if donor in normal_uuids:
            print "participant", donor

            donor_name = None
            for k, v in fake_metadata.items():
                if v['participant_id'] == donor:
                    donor_name = k

            workflow_dm = dict(dm)
            workflow_dm['tumor_bam'] = {"uuid": tumor_uuids[donor]}
            workflow_dm['normal_bam'] = {"uuid": normal_uuids[donor]}

            task = GalaxyWorkflowTask(
                "workflow_%s" % (donor),
                mc3_workflow,
                inputs=workflow_dm,
                parameters={
                    "reheader_config": {
                        "platform":
                        "Illumina",
                        "center":
                        "OHSU",
                        "reference_genome":
                        "Homo_sapiens_assembly19.fasta",
                        "participant_uuid":
                        fake_metadata[donor_name]['participant_id'],
                        "disease_code":
                        fake_metadata[donor_name]['disease'],
                        "filedate":
                        datetime.datetime.now().strftime("%Y%m%d"),
                        "normal_analysis_uuid":
                        fake_metadata[donor_name]['normal']['uuid'],
                        "normal_bam_name":
                        fake_metadata[donor_name]['normal']['file_name'],
                        "normal_aliquot_uuid":
                        fake_metadata[donor_name]['normal']['aliquot_id'],
                        "normal_aliquot_barcode":
                        fake_metadata[donor_name]['normal']['barcode'],
                        "tumor_analysis_uuid":
                        fake_metadata[donor_name]['tumour']['uuid'],
                        "tumor_bam_name":
                        fake_metadata[donor_name]['tumour']['file_name'],
                        "tumor_aliquot_uuid":
                        fake_metadata[donor_name]['tumour']['aliquot_id'],
                        "tumor_aliquot_barcode":
                        fake_metadata[donor_name]['tumour']['barcode'],
                    }
                },
                tags=["donor:%s" % (donor)],
            )
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id),
                  "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:
        service = GalaxyService(docstore=docstore,
                                galaxy=args.galaxy,
                                sudo=args.sudo,
                                tool_data=args.tool_data,
                                tool_dir=args.tool_dir,
                                work_dir=args.work_dir,
                                smp=[["gatk_bqsr", 12], ["gatk_indel", 24],
                                     ["MuSE", 8], ["pindel", 8], ["mutect", 8],
                                     ["delly", 4], ["gatk_bqsr", 12],
                                     ["gatk_indel", 12], ["bwa_mem", 12],
                                     ["radia", 8], ['radia_filter', 8]])
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
コード例 #7
0
ファイル: test_wf_gen.py プロジェクト: curoverse/mc3
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "db_snp" : "dbsnp_132_b37.leftAligned.vcf",
        "centromere" : "centromere_hg19.bed",
        "reference_genome" : "Homo_sapiens_assembly19.fasta",
        "cosmic" : "b37_cosmic_v54_120711.vcf"
    }

    if args.ref_download:
        syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values())

    dm = {}
    for k,v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = { "uuid" : hit }

    if args.sample is not None:
        sync_doc_dir(
            os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore,
            filter=lambda x: x['donorId'] in args.sample
        )
    else:
        sync_doc_dir( os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore)

    tumor_uuids = {}
    normal_uuids = {}

    for id, ent in docstore.filter(sampleType="tumour"):
        tumor_uuids[ent['participant_id']] = id

    for id, ent in docstore.filter(sampleType="normal"):
        normal_uuids[ent['participant_id']] = id

    mc3_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga")

    reference_id = None
    for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"):
        reference_id = a[0]

    tasks = TaskGroup()
    for donor in tumor_uuids:
        if donor in normal_uuids:
            print "participant", donor

            donor_name = None
            for k,v in fake_metadata.items():
                if v['participant_id'] == donor:
                    donor_name = k

            workflow_dm = dict(dm)
            workflow_dm['tumor_bam'] = { "uuid" : tumor_uuids[donor] }
            workflow_dm['normal_bam'] = { "uuid" : normal_uuids[donor] }

            task = GalaxyWorkflowTask("workflow_%s" % (donor),
                mc3_workflow,
                inputs=workflow_dm,
                parameters={
                    "reheader_config" : {
                        "platform" : "Illumina",
                        "center" : "OHSU",
                        "reference_genome" : "Homo_sapiens_assembly19.fasta",
                        "participant_uuid" : fake_metadata[donor_name]['participant_id'],
                        "disease_code" : fake_metadata[donor_name]['disease'],
                        "filedate" : datetime.datetime.now().strftime("%Y%m%d"),
                        "normal_analysis_uuid" : fake_metadata[donor_name]['normal']['uuid'],
                        "normal_bam_name" : fake_metadata[donor_name]['normal']['file_name'],
                        "normal_aliquot_uuid" : fake_metadata[donor_name]['normal']['aliquot_id'],
                        "normal_aliquot_barcode": fake_metadata[donor_name]['normal']['barcode'],
                        "tumor_analysis_uuid" : fake_metadata[donor_name]['tumour']['uuid'],
                        "tumor_bam_name" : fake_metadata[donor_name]['tumour']['file_name'],
                        "tumor_aliquot_uuid" : fake_metadata[donor_name]['tumour']['aliquot_id'],
                        "tumor_aliquot_barcode" : fake_metadata[donor_name]['tumour']['barcode'],
                    }
                },
                tags=[ "donor:%s" % (donor) ],
            )
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:
        service = GalaxyService(
            docstore=docstore,
            galaxy=args.galaxy,
            sudo=args.sudo,
            tool_data=args.tool_data,
            tool_dir=args.tool_dir,
            work_dir=args.work_dir,
            smp=[
                ["gatk_bqsr", 12],
                ["gatk_indel", 24],
                ["MuSE", 8],
                ["pindel", 8],
                ["mutect", 8],
                ["delly", 4],
                ["gatk_bqsr", 12],
                ["gatk_indel", 12],
                ["bwa_mem", 12],
                ["radia", 8],
                ['radia_filter', 8]
            ]
        )
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
コード例 #8
0
ファイル: mc3_gatk_wf_gen.py プロジェクト: gaurav-kaushik/mc3
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    if args.alt_table is not None:
        config['table_id'] = args.alt_table

    docstore = from_url(args.out_base)

    if args.ref_download:
        #download reference files from Synapse and populate the document store
        for a in syn.chunkedQuery('select * from entity where parentId=="%s"' % (REFDATA_PROJECT)):
            ent = syn.get(a['entity.id'])

            id = ent.annotations['uuid'][0]
            t = Target(uuid=id)
            docstore.create(t)
            path = docstore.get_filename(t)
            name = ent.name
            if 'dataPrep' in ent.annotations:
                if ent.annotations['dataPrep'][0] == 'gunzip':
                    subprocess.check_call("gunzip -c %s > %s" % (ent.path, path), shell=True)
                    name = name.replace(".gz", "")
                else:
                    print "Unknown DataPrep"
            else:
                shutil.copy(ent.path, path)
            docstore.update_from_file(t)
            meta = {}
            meta['name'] = name
            meta['uuid'] = id
            if 'dataPrep' in meta:
                del meta['dataPrep']
            docstore.put(id, meta)

    data_mapping = {
        "dbsnp" : "dbsnp_132_b37.leftAligned.vcf",
        "cosmic" : "b37_cosmic_v54_120711.vcf",
        "gold_indels" : "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf",
        "phase_one_indels" : "1000G_phase1.indels.hg19.sites.fixed.vcf"
    }

    dm = {}
    for k,v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = { "uuid" : hit }

    workflow_2 = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-GATK_CGHub_2.ga")
    workflow_3 = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-GATK_CGHub_3.ga")

    ref_rename = {
        "HG19_Broad_variant" : "Homo_sapiens_assembly19"
    }

    tasks = TaskGroup()

    for ent in synqueue.listAssignments(syn, **config):
        bam_set = list( a[1] for a in ent['meta'].items() if a[0].startswith("id_") and isinstance(a[1], basestring)  )

        ref_set = set( a[1] for a in ent['meta'].items() if a[0].startswith("ref_assembly_") and isinstance(a[1], basestring) )
        assert(len(ref_set) == 1)
        ref_name = ref_set.pop()
        if ref_name in ref_rename:
            ref_name = ref_rename[ref_name]

        hit = None
        for a in docstore.filter(name=ref_name + ".fasta"):
            hit = a[0]
        for a in docstore.filter(name=ref_name + ".fa"):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (ref_name))
        workflow_dm = dict(dm)
        workflow_dm['reference_genome'] = { "uuid" : hit }
        if len(bam_set) == 2:
            task = GalaxyWorkflowTask("workflow_%s" % (ent['id']),
                workflow_2,
                inputs=workflow_dm,
                parameters={
                    'INPUT_BAM_1' : {
                        "uuid" : bam_set[0],
                        "gnos_endpoint" : "cghub.ucsc.edu",
                        "cred_file" : "/tool_data/files/cghub.key"
                    },
                    'INPUT_BAM_2' : {
                        "uuid" : bam_set[1],
                        "gnos_endpoint" : "cghub.ucsc.edu",
                        "cred_file" : "/tool_data/files/cghub.key"
                    }
                },
                tags=[ "donor:%s" % (ent['meta']['participant_id']) ],
                tool_tags = {
                    "BQSR_1" : {
                        "output_bam" : [ "original_bam:%s" % (bam_set[0]) ]
                    },
                    "BQSR_2" : {
                        "output_bam" : [ "original_bam:%s" % (bam_set[1]) ]
                    }

                }
            )
            tasks.append(task)
        elif len(bam_set) == 3:
            task = GalaxyWorkflowTask("workflow_%s" % (ent['id']),
                workflow_3,
                inputs=workflow_dm,
                parameters={
                    'INPUT_BAM_1' : {
                        "uuid" : bam_set[0],
                        "gnos_endpoint" : "cghub.ucsc.edu",
                        "cred_file" : "/tool_data/files/cghub.key"
                    },
                    'INPUT_BAM_2' : {
                        "uuid" : bam_set[1],
                        "gnos_endpoint" : "cghub.ucsc.edu",
                        "cred_file" : "/tool_data/files/cghub.key"
                    },
                    'INPUT_BAM_3' : {
                        "uuid" : bam_set[2],
                        "gnos_endpoint" : "cghub.ucsc.edu",
                        "cred_file" : "/tool_data/files/cghub.key"
                    }
                },
                tags=[ "donor:%s" % (ent['meta']['participant_id']) ],
                tool_tags = {
                    "BQSR_1" : {
                        "output_bam" : [ "original_bam:%s" % (bam_set[0]) ]
                    },
                    "BQSR_2" : {
                        "output_bam" : [ "original_bam:%s" % (bam_set[1]) ]
                    },
                    "BQSR_3" : {
                        "output_bam" : [ "original_bam:%s" % (bam_set[2]) ]
                    }
                }
            )
            tasks.append(task)


    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:

        service = GalaxyService(
            docstore=docstore,
            galaxy="bgruening/galaxy-stable",
            sudo=True,
            tool_data=args.tool_data,
            tool_dir=args.tool_dir,
            work_dir=args.work_dir,
            smp=[
                ["gatk_bqsr", 12],
                ["gatk_indel", 24]
            ]
        )
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
コード例 #9
0
ファイル: mc3_mut.py プロジェクト: ucscCancer/mc3
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "db_snp": "dbsnp_132_b37.leftAligned.vcf",
        "centromere": "centromere_hg19.bed",
        "cosmic": "b37_cosmic_v54_120711.vcf",
    }

    ref_genomes = [
        "Homo_sapiens_assembly19.fasta",
        "GRCh37-lite.fa",
        "GRCh37-lite-+-HPV_Redux-build.fa",
        "GRCh37-lite_WUGSC_variant_1.fa.gz",
        "GRCh37-lite_WUGSC_variant_2.fa.gz",
        "hg19_M_rCRS.fa.gz",
    ]

    if args.ref_download:
        syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values() + ref_genomes)

    dm = {}
    for k, v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = {"uuid": hit}

    mc3_dna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA.ga")
    mc3_dnarna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA_RNA.ga")

    rna_hit = None
    for a in docstore.filter(name="hg19_M_rCRS.fa"):
        rna_hit = a[0]

    tasks = TaskGroup()
    assembly_hits = {}
    with open(args.joblist) as handle:
        reader = csv.DictReader(handle, delimiter="\t")
        for row in reader:
            if row["normal_assembly"] != row["tumor_assembly"]:
                print "Row Mispatch", row["normal_assembly"], row["tumor_assembly"]
                # raise Exception("Mismatch reference")
            ref_name = row["normal_assembly"]
            if ref_name in ref_rename:
                ref_name = ref_rename[ref_name]
            if ref_name in assembly_hits:
                hit = assembly_hits[ref_name]
            else:
                hit = None
                for a in docstore.filter(name=ref_name + ".fasta"):
                    hit = a[0]
                for a in docstore.filter(name=ref_name + ".fa"):
                    hit = a[0]
                if hit is None:
                    raise Exception("%s not found" % (ref_name))
                assembly_hits[ref_name] = hit
            workflow_dm = dict(dm)
            workflow_dm["reference_genome"] = {"uuid": hit}

            params = {
                "tumor_bam": {
                    "uuid": row["tumor_analysis_id"],
                    "gnos_endpoint": "cghub.ucsc.edu",
                    "cred_file": "/tool_data/files/cghub.key",
                },
                "normal_bam": {
                    "uuid": row["normal_analysis_id"],
                    "gnos_endpoint": "cghub.ucsc.edu",
                    "cred_file": "/tool_data/files/cghub.key",
                },
                "reheader_config": {
                    "platform": "Illumina",
                    "center": "OHSU",
                    "reference_genome": ref_name,
                    "participant_uuid": row["participant_id"],
                    "disease_code": row["disease"],
                    "filedate": datetime.datetime.now().strftime("%Y%m%d"),
                    "normal_analysis_uuid": row["normal_analysis_id"],
                    "normal_bam_name": row["normal_filename"],
                    "normal_aliquot_uuid": row["normal_aliquot_id"],
                    "normal_aliquot_barcode": row["normal_barcode"],
                    "tumor_analysis_uuid": row["tumor_analysis_id"],
                    "tumor_bam_name": row["tumor_filename"],
                    "tumor_aliquot_uuid": row["tumor_aliquot_id"],
                    "tumor_aliquot_barcode": row["tumor_barcode"],
                },
            }

            if row["rna_analysis_id"] != "NA":
                params["rna_tumor_bam"] = {
                    "uuid": row["rna_analysis_id"],
                    "gnos_endpoint": "cghub.ucsc.edu",
                    "cred_file": "/tool_data/files/cghub.key",
                }
                workflow_dm["rna_reference_genome"] = {"uuid": rna_hit}
                task = GalaxyWorkflowTask(
                    "workflow_%s" % (row["job_id"]),
                    mc3_dnarna_workflow,
                    inputs=workflow_dm,
                    parameters=params,
                    tags=["donor:%s" % (row["participant_id"])],
                )
            else:
                task = GalaxyWorkflowTask(
                    "workflow_%s" % (row["job_id"]),
                    mc3_dna_workflow,
                    inputs=workflow_dm,
                    parameters=params,
                    tags=["donor:%s" % (row["participant_id"])],
                )
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    service = GalaxyService(
        docstore=docstore,
        galaxy=args.galaxy,
        sudo=args.sudo,
        tool_data=args.tool_data,
        tool_dir=args.tool_dir,
        work_dir=args.work_dir,
        smp=[
            ["gatk_bqsr", 12],
            ["gatk_indel", 24],
            ["MuSE", 8],
            ["pindel", 8],
            ["mutect", 8],
            ["delly", 4],
            ["gatk_bqsr", 12],
            ["gatk_indel", 12],
            ["bwa_mem", 12],
            ["radia", 8],
            ["radia_filter", 8],
        ],
    )
    with open("%s.service" % (args.out_base), "w") as handle:
        s = service.get_config()
        if args.scratch:
            print "Using scratch", args.scratch
            s.set_docstore_config(cache_path=args.scratch, open_perms=True)
        s.store(handle)
コード例 #10
0
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "db_snp" : "dbsnp_132_b37.leftAligned.vcf",
        "centromere" : "centromere_hg19.bed",
        "cosmic" : "b37_cosmic_v54_120711.vcf"
    }

    ref_genomes = [
        "Homo_sapiens_assembly19.fasta",
        "GRCh37-lite.fa",
        "GRCh37-lite-+-HPV_Redux-build.fa",
        "GRCh37-lite_WUGSC_variant_1.fa.gz",
        "GRCh37-lite_WUGSC_variant_2.fa.gz",
        "hg19_M_rCRS.fa.gz"
    ]

    if args.ref_download:
        syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values() + ref_genomes)

    dm = {}
    for k,v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = { "uuid" : hit }

    mc3_dna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA.ga")
    mc3_dnarna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA_RNA.ga")

    rna_hit = None
    for a in docstore.filter(name="hg19_M_rCRS.fa"):
        rna_hit = a[0]

    tasks = TaskGroup()
    assembly_hits = {}
    with open(args.joblist) as handle:
        reader = csv.DictReader(handle, delimiter="\t")
        for row in reader:
            if row['normal_assembly'] != row['tumor_assembly']:
                print "Row Mispatch", row['normal_assembly'], row['tumor_assembly']
                #raise Exception("Mismatch reference")
            ref_name = row['normal_assembly']
            if ref_name in ref_rename:
                ref_name = ref_rename[ref_name]
            if ref_name in assembly_hits:
                hit = assembly_hits[ref_name]
            else:
                hit = None
                for a in docstore.filter(name=ref_name + ".fasta"):
                    hit = a[0]
                for a in docstore.filter(name=ref_name + ".fa"):
                    hit = a[0]
                if hit is None:
                    raise Exception("%s not found" % (ref_name))
                assembly_hits[ref_name] = hit
            workflow_dm = dict(dm)
            workflow_dm['reference_genome'] = { "uuid" : hit }
            
            params = {
                'tumor_bam' : {
                    "uuid" : row['tumor_analysis_id'],
                    "gnos_endpoint" : "cghub.ucsc.edu",
                    "cred_file" : "/tool_data/files/cghub.key"
                },
                'normal_bam' : {
                    "uuid" : row['normal_analysis_id'],
                    "gnos_endpoint" : "cghub.ucsc.edu",
                    "cred_file" : "/tool_data/files/cghub.key"
                },
                "reheader_config" : {
                    "platform" : "Illumina",
                    "center" : "OHSU",
                    "reference_genome" : ref_name,
                    "participant_uuid" : row['participant_id'],
                    "disease_code" : row['disease'],
                    "filedate" : datetime.datetime.now().strftime("%Y%m%d"),
                    "normal_analysis_uuid" : row['normal_analysis_id'],
                    "normal_bam_name" : row['normal_filename'],
                    "normal_aliquot_uuid" : row['normal_aliquot_id'],
                    "normal_aliquot_barcode": row['normal_barcode'],
                    "tumor_analysis_uuid" : row['tumor_analysis_id'],
                    "tumor_bam_name" : row['tumor_filename'],
                    "tumor_aliquot_uuid" : row['tumor_aliquot_id'],
                    "tumor_aliquot_barcode" : row['tumor_barcode'],
                }
            }
            
            if row['rna_analysis_id'] != "NA":
                params['rna_tumor_bam'] = {
                    "uuid" : row['rna_analysis_id'],
                    "gnos_endpoint" : "cghub.ucsc.edu",
                    "cred_file" : "/tool_data/files/cghub.key"
                }
                workflow_dm['rna_reference_genome'] = { "uuid" : rna_hit }
                task = GalaxyWorkflowTask("workflow_%s" % (row['job_id']),
                    mc3_dnarna_workflow,
                    inputs=workflow_dm,
                    parameters=params,
                    tags=[ "donor:%s" % (row['participant_id']) ],
                )            
            else: 
                task = GalaxyWorkflowTask("workflow_%s" % (row['job_id']),
                    mc3_dna_workflow,
                    inputs=workflow_dm,
                    parameters=params,
                    tags=[ "donor:%s" % (row['participant_id']) ],
                )
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    service = GalaxyService(
        docstore=docstore,
        galaxy=args.galaxy,
        sudo=args.sudo,
        tool_data=args.tool_data,
        tool_dir=args.tool_dir,
        work_dir=args.work_dir,
        smp=[
            ["gatk_bqsr", 12],
            ["gatk_indel", 24],
            ["MuSE", 8],
            ["pindel", 8],
            ["mutect", 8],
            ["delly", 4],
            ["gatk_bqsr", 12],
            ["gatk_indel", 12],
            ["bwa_mem", 12],
            ["radia", 8],
            ['radia_filter', 8]
        ]
    )
    with open("%s.service" % (args.out_base), "w") as handle:
        s = service.get_config()
        if args.scratch:
            print "Using scratch", args.scratch
            s.set_docstore_config(cache_path=args.scratch, open_perms=True)
        s.store(handle)
コード例 #11
0
ファイル: test_objectstore.py プロジェクト: kellrott/nebula
    def testWorkflowCaching(self):
        input = {
            "input_file_1": Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2": Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {"tail_select": {"lineNum": 3}}

        doc = nebula.docstore.FileDocStore(
            get_abspath("../test_tmp/docstore"),
            cache_path=get_abspath("../test_tmp/cache"))

        logging.info("Adding files to object store")
        sync_doc_dir(get_abspath("../examples/simple_galaxy/"),
                     doc,
                     uuid_set=[
                         "c39ded10-6073-11e4-9803-0800200c9a66",
                         "26fd12a2-9096-4af2-a989-9e2f1cb692fe"
                     ])
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(
            ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task = nebula.tasks.GalaxyWorkflowTask("test_workflow",
                                               workflow,
                                               inputs=input,
                                               parameters=parameters,
                                               tags=["run:testing"],
                                               tool_tags={
                                                   "tail_select": {
                                                       "out_file1":
                                                       ["file:tail"]
                                                   },
                                                   "concat_out": {
                                                       "out_file1":
                                                       ["file:output"]
                                                   }
                                               })

        service = GalaxyService(docstore=doc,
                                name="nosetest_galaxy",
                                galaxy="bgruening/galaxy-stable:dev",
                                force=True,
                                port=20022)
        self.service = service

        logging.info("Starting Service")
        print "Starting service"
        service.start()
        self.assertFalse(service.in_error())
        logging.info("Starting Tasks")
        job = service.submit(task)
        self.assertTrue(isinstance(job, TaskJob))
        self.assertFalse(service.in_error())
        #logging.info("Waiting")
        service.wait([job])
        found = False
        for id, info in doc.filter(tags="file:output"):
            logging.info("Found result object: %s size: %d" %
                         (id, doc.size(info)))
            self.assertTrue(doc.size(info) > 0)
            found = True
        self.assertTrue(found)
        self.assertFalse(service.in_error())
        self.assertIn(job.get_status(), ['ok'])
コード例 #12
0
ファイル: pcawg_wf_gen.py プロジェクト: jhl667/pcawg_tools
            tags=[ "sample:%s" % (ent['meta']['Donor_ID']) ]
        )
        tasks.append(task)

    if not os.path.exists("pcawg.tasks"):
        os.mkdir("pcawg.tasks")
    for data in tasks:
        with open("pcawg.tasks/%s" % (data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:
        service = GalaxyService(
            docstore=docstore,
            galaxy="bgruening/galaxy-stable:dev",
            sudo=True,
            tool_data=os.path.abspath("tool_data"),
            tool_dir=os.path.abspath("tools"),
            smp=[
                ["MuSE", 8],
                ["pindel", 8],
                ["muTect", 8],
                ["delly", 4],
                ["gatk_bqsr", 12],
                ["gatk_indel", 12],
                ["bwa_mem", 12],
                ["broad_variant_pipline", 28]
            ]
        )
        with open("pcawg.service", "w") as handle:
            service.get_config().store(handle)
コード例 #13
0
def run_gen(args):
    syn = synapseclient.Synapse()
    syn.login()

    if args.alt_table is not None:
        config['table_id'] = args.alt_table

    docstore = from_url(args.out_base)

    if args.ref_download:
        #download reference files from Synapse and populate the document store
        for a in syn.chunkedQuery('select * from entity where parentId=="%s"' %
                                  (REFDATA_PROJECT)):
            ent = syn.get(a['entity.id'])

            id = ent.annotations['uuid'][0]
            t = Target(uuid=id)
            docstore.create(t)
            path = docstore.get_filename(t)
            name = ent.name
            if 'dataPrep' in ent.annotations:
                if ent.annotations['dataPrep'][0] == 'gunzip':
                    subprocess.check_call("gunzip -c %s > %s" %
                                          (ent.path, path),
                                          shell=True)
                    name = name.replace(".gz", "")
                else:
                    print "Unknown DataPrep"
            else:
                shutil.copy(ent.path, path)
            docstore.update_from_file(t)
            meta = {}
            meta['name'] = name
            meta['uuid'] = id
            if 'dataPrep' in meta:
                del meta['dataPrep']
            docstore.put(id, meta)

    data_mapping = {
        "dbsnp": "dbsnp_132_b37.leftAligned.vcf",
        "cosmic": "b37_cosmic_v54_120711.vcf",
        "gold_indels":
        "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf",
        "phase_one_indels": "1000G_phase1.indels.hg19.sites.fixed.vcf"
    }

    dm = {}
    for k, v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = {"uuid": hit}

    workflow_2 = GalaxyWorkflow(
        ga_file="workflows/Galaxy-Workflow-GATK_CGHub_2.ga")
    workflow_3 = GalaxyWorkflow(
        ga_file="workflows/Galaxy-Workflow-GATK_CGHub_3.ga")

    ref_rename = {"HG19_Broad_variant": "Homo_sapiens_assembly19"}

    tasks = TaskGroup()

    for ent in synqueue.listAssignments(syn, **config):
        bam_set = list(
            a[1] for a in ent['meta'].items()
            if a[0].startswith("id_") and isinstance(a[1], basestring))

        ref_set = set(a[1] for a in ent['meta'].items()
                      if a[0].startswith("ref_assembly_")
                      and isinstance(a[1], basestring))
        assert (len(ref_set) == 1)
        ref_name = ref_set.pop()
        if ref_name in ref_rename:
            ref_name = ref_rename[ref_name]

        hit = None
        for a in docstore.filter(name=ref_name + ".fasta"):
            hit = a[0]
        for a in docstore.filter(name=ref_name + ".fa"):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (ref_name))
        workflow_dm = dict(dm)
        workflow_dm['reference_genome'] = {"uuid": hit}
        if len(bam_set) == 2:
            task = GalaxyWorkflowTask(
                "workflow_%s" % (ent['id']),
                workflow_2,
                inputs=workflow_dm,
                parameters={
                    'INPUT_BAM_1': {
                        "uuid": bam_set[0],
                        "gnos_endpoint": "cghub.ucsc.edu",
                        "cred_file": "/tool_data/files/cghub.key"
                    },
                    'INPUT_BAM_2': {
                        "uuid": bam_set[1],
                        "gnos_endpoint": "cghub.ucsc.edu",
                        "cred_file": "/tool_data/files/cghub.key"
                    }
                },
                tags=["donor:%s" % (ent['meta']['participant_id'])],
                tool_tags={
                    "BQSR_1": {
                        "output_bam": ["original_bam:%s" % (bam_set[0])]
                    },
                    "BQSR_2": {
                        "output_bam": ["original_bam:%s" % (bam_set[1])]
                    }
                })
            tasks.append(task)
        elif len(bam_set) == 3:
            task = GalaxyWorkflowTask(
                "workflow_%s" % (ent['id']),
                workflow_3,
                inputs=workflow_dm,
                parameters={
                    'INPUT_BAM_1': {
                        "uuid": bam_set[0],
                        "gnos_endpoint": "cghub.ucsc.edu",
                        "cred_file": "/tool_data/files/cghub.key"
                    },
                    'INPUT_BAM_2': {
                        "uuid": bam_set[1],
                        "gnos_endpoint": "cghub.ucsc.edu",
                        "cred_file": "/tool_data/files/cghub.key"
                    },
                    'INPUT_BAM_3': {
                        "uuid": bam_set[2],
                        "gnos_endpoint": "cghub.ucsc.edu",
                        "cred_file": "/tool_data/files/cghub.key"
                    }
                },
                tags=["donor:%s" % (ent['meta']['participant_id'])],
                tool_tags={
                    "BQSR_1": {
                        "output_bam": ["original_bam:%s" % (bam_set[0])]
                    },
                    "BQSR_2": {
                        "output_bam": ["original_bam:%s" % (bam_set[1])]
                    },
                    "BQSR_3": {
                        "output_bam": ["original_bam:%s" % (bam_set[2])]
                    }
                })
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))
    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id),
                  "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:

        service = GalaxyService(docstore=docstore,
                                galaxy="bgruening/galaxy-stable",
                                sudo=True,
                                tool_data=args.tool_data,
                                tool_dir=args.tool_dir,
                                work_dir=args.work_dir,
                                smp=[["gatk_bqsr", 12], ["gatk_indel", 24]])
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
コード例 #14
0
        os.mkdir("%s.tasks" % (args.out_base))

    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))

    if args.create_service:
        service = GalaxyService(
            docstore=docstore,
            galaxy="bgruening/galaxy-stable",
            sudo=True,
            tool_data=os.path.abspath("tool_data"),
            tool_dir=os.path.abspath("tools"),
            smp=[
                ["MuSE", 8],
                ["pindel", 8],
                ["muTect", 8],
                ["delly", 4],
                ["gatk_bqsr", 12],
                ["gatk_indel", 24],
                ["bwa_mem", 12],
                ["broad_variant_pipline", 24]
            ]
        )
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
コード例 #15
0
def run_gen(args):
    args = parser.parse_args()

    syn = synapseclient.Synapse()
    syn.login()

    docstore = from_url(args.out_base)

    data_mapping = {
        "reference_genome" : "genome.fa",
        "dbsnp" : "dbsnp_132_b37.leftAligned.vcf",
        "cosmic" : "b37_cosmic_v54_120711.vcf",
        "gold_indels" : "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf",
        "phase_one_indels" : "1000G_phase1.indels.hg19.sites.fixed.vcf",
        "centromere" : "centromere_hg19.bed"
    }

    if args.ref_download:
        #download reference files from Synapse and populate the document store
        for a in syn.chunkedQuery('select * from entity where parentId=="%s"' % (REFDATA_PROJECT)):
            print "found",  a['entity.name']
            if a['entity.name'] in data_mapping.values() or a['entity.name'].replace(".gz", "") in data_mapping.values():
                print "loading"
                ent = syn.get(a['entity.id'])
                id = ent.annotations['uuid'][0]
                t = Target(uuid=id)
                docstore.create(t)
                path = docstore.get_filename(t)
                name = ent.name
                if 'dataPrep' in ent.annotations:
                    if ent.annotations['dataPrep'][0] == 'gunzip':
                        subprocess.check_call("gunzip -c %s > %s" % (ent.path, path), shell=True)
                        name = name.replace(".gz", "")
                    else:
                        print "Unknown DataPrep"
                else:
                    shutil.copy(ent.path, path)
                docstore.update_from_file(t)
                meta = {}
                meta['name'] = name
                meta['uuid'] = id
                if 'dataPrep' in meta:
                    del meta['dataPrep']
                docstore.put(id, meta)

    dm = {}
    for k,v in data_mapping.items():
        hit = None
        for a in docstore.filter(name=v):
            hit = a[0]
        if hit is None:
            raise Exception("%s not found" % (v))
        dm[k] = { "uuid" : hit }

    workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-PCAWG_CGHUB.ga")
    tasks = TaskGroup()
    for ent in synqueue.listAssignments(syn, **config):
        #print "'%s'" % (ent['state']), ent['state'] == 'nan', type(ent['state']), type('nan')
        if not isinstance(ent['state'], basestring) and isnan(ent['state']):
            gnos_endpoint = urlparse(ent['meta']['Normal_WGS_alignment_GNOS_repos']).netloc
            task = GalaxyWorkflowTask("workflow_%s" % (ent['id']),
                workflow,
                inputs=dm,
                parameters={
                    'normal_bam_download' : {
                        "uuid" : ent['meta']['Normal_WGS_alignment_GNOS_analysis_ID'],
                        "gnos_endpoint" : gnos_endpoint,
                        "cred_file" : key_map[gnos_endpoint]
                    },
                    'tumor_bam_download' : {
                        "uuid" : ent['meta']['Tumour_WGS_alignment_GNOS_analysis_IDs'],
                        "gnos_endpoint" : gnos_endpoint,
                        "cred_file" : key_map[gnos_endpoint]
                    },
                    'broad_variant_pipeline' : {
                        "broad_ref_dir" : "/tool_data/files/refdata",
                        "sample_id" : ent['meta']['Submitter_donor_ID']
                    }
                },
                tags=[ "donor:%s" % (ent['meta']['Submitter_donor_ID']) ]
            )
            tasks.append(task)

    if not os.path.exists("%s.tasks" % (args.out_base)):
        os.mkdir("%s.tasks" % (args.out_base))

    for data in tasks:
        with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle:
            handle.write(json.dumps(data.to_dict()))
            state_file = "%s.tasks/%s.state" % (args.out_base, data.task_id)
            if os.path.exists( state_file ):
                os.unlink( state_file )

    print "Tasks Created: %s" % (len(tasks))

    if args.create_service:
        service = GalaxyService(
            docstore=docstore,
            galaxy="bgruening/galaxy-stable",
            sudo=args.sudo,
            tool_data=os.path.abspath("tool_data"),
            tool_dir=os.path.abspath("tools"),
            work_dir=args.work_dir,
            smp=[
                ["MuSE", 8],
                ["pindel", 8],
                ["muTect", 8],
                ["delly", 4],
                ["gatk_bqsr", 12],
                ["gatk_indel", 24],
                ["bwa_mem", 12],
                ["broad_variant_pipline", 24]
            ]
        )
        with open("%s.service" % (args.out_base), "w") as handle:
            s = service.get_config()
            if args.scratch:
                print "Using scratch", args.scratch
                s.set_docstore_config(cache_path=args.scratch, open_perms=True)
            s.store(handle)
コード例 #16
0
ファイル: test_objectstore.py プロジェクト: kellrott/nebula
    def testWorkflowCaching(self):
        input = {
            "input_file_1" : Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2" : Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {
            "tail_select" : {
                "lineNum" : 3
            }
        }

        doc = nebula.docstore.FileDocStore(
            get_abspath("../test_tmp/docstore"),
            cache_path=get_abspath("../test_tmp/cache")
        )

        logging.info("Adding files to object store")
        sync_doc_dir(get_abspath("../examples/simple_galaxy/"), doc,
            uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"]
        )
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(ga_file=get_abspath("../examples/simple_galaxy/SimpleWorkflow.ga"))
        task = nebula.tasks.GalaxyWorkflowTask(
            "test_workflow", workflow,
            inputs=input,
            parameters=parameters,
            tags = [
                "run:testing"
            ],
            tool_tags= {
                "tail_select" : {
                    "out_file1" : [
                        "file:tail"
                    ]
                },
                "concat_out" : {
                    "out_file1" : ["file:output"]
                }
            }
        )

        service = GalaxyService(
            docstore=doc,
            name="nosetest_galaxy",
            galaxy="bgruening/galaxy-stable:dev",
            force=True,
            port=20022
        )
        self.service = service

        logging.info("Starting Service")
        print "Starting service"
        service.start()
        self.assertFalse( service.in_error() )
        logging.info("Starting Tasks")
        job = service.submit(task)
        self.assertTrue( isinstance(job, TaskJob) )
        self.assertFalse( service.in_error() )
        #logging.info("Waiting")
        service.wait([job])
        found = False
        for id, info in doc.filter(tags="file:output"):
            logging.info("Found result object: %s size: %d" % (id, doc.size(info)))
            self.assertTrue( doc.size(info) > 0 )
            found = True
        self.assertTrue(found)
        self.assertFalse( service.in_error() )
        self.assertIn(job.get_status(), ['ok'])
コード例 #17
0
ファイル: test_runworkflow.py プロジェクト: kellrott/nebula
    def testRunSimple(self):
        input = {
            "input_file_1" :
                Target("c39ded10-6073-11e4-9803-0800200c9a66"),
            "input_file_2" :
                Target("26fd12a2-9096-4af2-a989-9e2f1cb692fe")
        }
        parameters = {
            "tail_select" : {
                "lineNum" : 3
            }
        }
        bad_parameters = dict(parameters)
        del bad_parameters['tail_select']

        doc = FileDocStore(file_path="./test_tmp/docstore")
        logging.info("Adding files to object store")
        sync_doc_dir("examples/simple_galaxy/", doc,
            uuid_set=["c39ded10-6073-11e4-9803-0800200c9a66", "26fd12a2-9096-4af2-a989-9e2f1cb692fe"]
        )
        logging.info("Creating Task")
        workflow = GalaxyWorkflow(ga_file="examples/simple_galaxy/SimpleWorkflow.ga")
        task = nebula.tasks.GalaxyWorkflowTask(
            "test_workflow", workflow,
            inputs=input,
            parameters=parameters
        )

        task_data = task.to_dict()
        #make sure the task data can be serialized
        task_data_str = json.dumps(task_data)

        service = GalaxyService(
            docstore=doc,
            name="nosetest_galaxy",
            galaxy="bgruening/galaxy-stable",
            force=True,
            port=20022
        )
        self.service = service

        #make sure the generated task is serializable
        new_task_data = json.loads(task_data_str)
        new_task = nebula.tasks.from_dict(new_task_data)

        logging.info("Starting Service")
        print "Starting service"
        service.start()
        self.assertFalse( service.in_error() )
        logging.info("Starting Tasks")
        job = service.submit(new_task)
        self.assertTrue( isinstance(job, TaskJob) )
        self.assertFalse( service.in_error() )
        #logging.info("Waiting")
        service.wait([job])
        self.assertIn(job.get_status(), ['ok'])

        bad_task = nebula.tasks.GalaxyWorkflowTask(
            "test_workflow_bad",
            workflow,
            inputs=input,
            parameters=bad_parameters
        )
        job = service.submit(bad_task)
        service.wait([job])
        self.assertIn(job.get_status(), ['error'])

        self.assertFalse( service.in_error() )