def run_gen(args): syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "db_snp": "dbsnp_132_b37.leftAligned.vcf", "centromere": "centromere_hg19.bed", "reference_genome": "Homo_sapiens_assembly19.fasta", "cosmic": "b37_cosmic_v54_120711.vcf" } if args.ref_download: syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values()) dm = {} for k, v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = {"uuid": hit} if args.sample is not None: sync_doc_dir(os.path.join(os.path.dirname(__file__), "..", "testexomes"), docstore, filter=lambda x: x['donorId'] in args.sample) else: sync_doc_dir( os.path.join(os.path.dirname(__file__), "..", "testexomes"), docstore) tumor_uuids = {} normal_uuids = {} for id, ent in docstore.filter(sampleType="tumour"): tumor_uuids[ent['participant_id']] = id for id, ent in docstore.filter(sampleType="normal"): normal_uuids[ent['participant_id']] = id mc3_workflow = GalaxyWorkflow( ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga") reference_id = None for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"): reference_id = a[0] tasks = TaskGroup() for donor in tumor_uuids: if donor in normal_uuids: print "participant", donor donor_name = None for k, v in fake_metadata.items(): if v['participant_id'] == donor: donor_name = k workflow_dm = dict(dm) workflow_dm['tumor_bam'] = {"uuid": tumor_uuids[donor]} workflow_dm['normal_bam'] = {"uuid": normal_uuids[donor]} task = GalaxyWorkflowTask( "workflow_%s" % (donor), mc3_workflow, inputs=workflow_dm, parameters={ "reheader_config": { "platform": "Illumina", "center": "OHSU", "reference_genome": "Homo_sapiens_assembly19.fasta", "participant_uuid": fake_metadata[donor_name]['participant_id'], "disease_code": fake_metadata[donor_name]['disease'], "filedate": datetime.datetime.now().strftime("%Y%m%d"), "normal_analysis_uuid": fake_metadata[donor_name]['normal']['uuid'], "normal_bam_name": fake_metadata[donor_name]['normal']['file_name'], "normal_aliquot_uuid": fake_metadata[donor_name]['normal']['aliquot_id'], "normal_aliquot_barcode": fake_metadata[donor_name]['normal']['barcode'], "tumor_analysis_uuid": fake_metadata[donor_name]['tumour']['uuid'], "tumor_bam_name": fake_metadata[donor_name]['tumour']['file_name'], "tumor_aliquot_uuid": fake_metadata[donor_name]['tumour']['aliquot_id'], "tumor_aliquot_barcode": fake_metadata[donor_name]['tumour']['barcode'], } }, tags=["donor:%s" % (donor)], ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService(docstore=docstore, galaxy=args.galaxy, sudo=args.sudo, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[["gatk_bqsr", 12], ["gatk_indel", 24], ["MuSE", 8], ["pindel", 8], ["mutect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 12], ["bwa_mem", 12], ["radia", 8], ['radia_filter', 8]]) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def run_gen(args): syn = synapseclient.Synapse() syn.login() if args.alt_table is not None: config['table_id'] = args.alt_table docstore = from_url(args.out_base) if args.ref_download: #download reference files from Synapse and populate the document store for a in syn.chunkedQuery('select * from entity where parentId=="%s"' % (REFDATA_PROJECT)): ent = syn.get(a['entity.id']) id = ent.annotations['uuid'][0] t = Target(uuid=id) docstore.create(t) path = docstore.get_filename(t) name = ent.name if 'dataPrep' in ent.annotations: if ent.annotations['dataPrep'][0] == 'gunzip': subprocess.check_call("gunzip -c %s > %s" % (ent.path, path), shell=True) name = name.replace(".gz", "") else: print "Unknown DataPrep" else: shutil.copy(ent.path, path) docstore.update_from_file(t) meta = {} meta['name'] = name meta['uuid'] = id if 'dataPrep' in meta: del meta['dataPrep'] docstore.put(id, meta) data_mapping = { "dbsnp" : "dbsnp_132_b37.leftAligned.vcf", "cosmic" : "b37_cosmic_v54_120711.vcf", "gold_indels" : "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf", "phase_one_indels" : "1000G_phase1.indels.hg19.sites.fixed.vcf" } dm = {} for k,v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = { "uuid" : hit } workflow_2 = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-GATK_CGHub_2.ga") workflow_3 = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-GATK_CGHub_3.ga") ref_rename = { "HG19_Broad_variant" : "Homo_sapiens_assembly19" } tasks = TaskGroup() for ent in synqueue.listAssignments(syn, **config): bam_set = list( a[1] for a in ent['meta'].items() if a[0].startswith("id_") and isinstance(a[1], basestring) ) ref_set = set( a[1] for a in ent['meta'].items() if a[0].startswith("ref_assembly_") and isinstance(a[1], basestring) ) assert(len(ref_set) == 1) ref_name = ref_set.pop() if ref_name in ref_rename: ref_name = ref_rename[ref_name] hit = None for a in docstore.filter(name=ref_name + ".fasta"): hit = a[0] for a in docstore.filter(name=ref_name + ".fa"): hit = a[0] if hit is None: raise Exception("%s not found" % (ref_name)) workflow_dm = dict(dm) workflow_dm['reference_genome'] = { "uuid" : hit } if len(bam_set) == 2: task = GalaxyWorkflowTask("workflow_%s" % (ent['id']), workflow_2, inputs=workflow_dm, parameters={ 'INPUT_BAM_1' : { "uuid" : bam_set[0], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" }, 'INPUT_BAM_2' : { "uuid" : bam_set[1], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" } }, tags=[ "donor:%s" % (ent['meta']['participant_id']) ], tool_tags = { "BQSR_1" : { "output_bam" : [ "original_bam:%s" % (bam_set[0]) ] }, "BQSR_2" : { "output_bam" : [ "original_bam:%s" % (bam_set[1]) ] } } ) tasks.append(task) elif len(bam_set) == 3: task = GalaxyWorkflowTask("workflow_%s" % (ent['id']), workflow_3, inputs=workflow_dm, parameters={ 'INPUT_BAM_1' : { "uuid" : bam_set[0], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" }, 'INPUT_BAM_2' : { "uuid" : bam_set[1], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" }, 'INPUT_BAM_3' : { "uuid" : bam_set[2], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" } }, tags=[ "donor:%s" % (ent['meta']['participant_id']) ], tool_tags = { "BQSR_1" : { "output_bam" : [ "original_bam:%s" % (bam_set[0]) ] }, "BQSR_2" : { "output_bam" : [ "original_bam:%s" % (bam_set[1]) ] }, "BQSR_3" : { "output_bam" : [ "original_bam:%s" % (bam_set[2]) ] } } ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService( docstore=docstore, galaxy="bgruening/galaxy-stable", sudo=True, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[ ["gatk_bqsr", 12], ["gatk_indel", 24] ] ) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def run_gen(args): syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "db_snp" : "dbsnp_132_b37.leftAligned.vcf", "centromere" : "centromere_hg19.bed", "reference_genome" : "Homo_sapiens_assembly19.fasta", "cosmic" : "b37_cosmic_v54_120711.vcf" } if args.ref_download: syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values()) dm = {} for k,v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = { "uuid" : hit } if args.sample is not None: sync_doc_dir( os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore, filter=lambda x: x['donorId'] in args.sample ) else: sync_doc_dir( os.path.join( os.path.dirname(__file__), "..", "testexomes" ), docstore) tumor_uuids = {} normal_uuids = {} for id, ent in docstore.filter(sampleType="tumour"): tumor_uuids[ent['participant_id']] = id for id, ent in docstore.filter(sampleType="normal"): normal_uuids[ent['participant_id']] = id mc3_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_Test.ga") reference_id = None for a in docstore.filter(name="Homo_sapiens_assembly19.fasta"): reference_id = a[0] tasks = TaskGroup() for donor in tumor_uuids: if donor in normal_uuids: print "participant", donor donor_name = None for k,v in fake_metadata.items(): if v['participant_id'] == donor: donor_name = k workflow_dm = dict(dm) workflow_dm['tumor_bam'] = { "uuid" : tumor_uuids[donor] } workflow_dm['normal_bam'] = { "uuid" : normal_uuids[donor] } task = GalaxyWorkflowTask("workflow_%s" % (donor), mc3_workflow, inputs=workflow_dm, parameters={ "reheader_config" : { "platform" : "Illumina", "center" : "OHSU", "reference_genome" : "Homo_sapiens_assembly19.fasta", "participant_uuid" : fake_metadata[donor_name]['participant_id'], "disease_code" : fake_metadata[donor_name]['disease'], "filedate" : datetime.datetime.now().strftime("%Y%m%d"), "normal_analysis_uuid" : fake_metadata[donor_name]['normal']['uuid'], "normal_bam_name" : fake_metadata[donor_name]['normal']['file_name'], "normal_aliquot_uuid" : fake_metadata[donor_name]['normal']['aliquot_id'], "normal_aliquot_barcode": fake_metadata[donor_name]['normal']['barcode'], "tumor_analysis_uuid" : fake_metadata[donor_name]['tumour']['uuid'], "tumor_bam_name" : fake_metadata[donor_name]['tumour']['file_name'], "tumor_aliquot_uuid" : fake_metadata[donor_name]['tumour']['aliquot_id'], "tumor_aliquot_barcode" : fake_metadata[donor_name]['tumour']['barcode'], } }, tags=[ "donor:%s" % (donor) ], ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService( docstore=docstore, galaxy=args.galaxy, sudo=args.sudo, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[ ["gatk_bqsr", 12], ["gatk_indel", 24], ["MuSE", 8], ["pindel", 8], ["mutect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 12], ["bwa_mem", 12], ["radia", 8], ['radia_filter', 8] ] ) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def run_gen(args): syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "db_snp" : "dbsnp_132_b37.leftAligned.vcf", "centromere" : "centromere_hg19.bed", "cosmic" : "b37_cosmic_v54_120711.vcf" } ref_genomes = [ "Homo_sapiens_assembly19.fasta", "GRCh37-lite.fa", "GRCh37-lite-+-HPV_Redux-build.fa", "GRCh37-lite_WUGSC_variant_1.fa.gz", "GRCh37-lite_WUGSC_variant_2.fa.gz", "hg19_M_rCRS.fa.gz" ] if args.ref_download: syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values() + ref_genomes) dm = {} for k,v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = { "uuid" : hit } mc3_dna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA.ga") mc3_dnarna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA_RNA.ga") rna_hit = None for a in docstore.filter(name="hg19_M_rCRS.fa"): rna_hit = a[0] tasks = TaskGroup() assembly_hits = {} with open(args.joblist) as handle: reader = csv.DictReader(handle, delimiter="\t") for row in reader: if row['normal_assembly'] != row['tumor_assembly']: print "Row Mispatch", row['normal_assembly'], row['tumor_assembly'] #raise Exception("Mismatch reference") ref_name = row['normal_assembly'] if ref_name in ref_rename: ref_name = ref_rename[ref_name] if ref_name in assembly_hits: hit = assembly_hits[ref_name] else: hit = None for a in docstore.filter(name=ref_name + ".fasta"): hit = a[0] for a in docstore.filter(name=ref_name + ".fa"): hit = a[0] if hit is None: raise Exception("%s not found" % (ref_name)) assembly_hits[ref_name] = hit workflow_dm = dict(dm) workflow_dm['reference_genome'] = { "uuid" : hit } params = { 'tumor_bam' : { "uuid" : row['tumor_analysis_id'], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" }, 'normal_bam' : { "uuid" : row['normal_analysis_id'], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" }, "reheader_config" : { "platform" : "Illumina", "center" : "OHSU", "reference_genome" : ref_name, "participant_uuid" : row['participant_id'], "disease_code" : row['disease'], "filedate" : datetime.datetime.now().strftime("%Y%m%d"), "normal_analysis_uuid" : row['normal_analysis_id'], "normal_bam_name" : row['normal_filename'], "normal_aliquot_uuid" : row['normal_aliquot_id'], "normal_aliquot_barcode": row['normal_barcode'], "tumor_analysis_uuid" : row['tumor_analysis_id'], "tumor_bam_name" : row['tumor_filename'], "tumor_aliquot_uuid" : row['tumor_aliquot_id'], "tumor_aliquot_barcode" : row['tumor_barcode'], } } if row['rna_analysis_id'] != "NA": params['rna_tumor_bam'] = { "uuid" : row['rna_analysis_id'], "gnos_endpoint" : "cghub.ucsc.edu", "cred_file" : "/tool_data/files/cghub.key" } workflow_dm['rna_reference_genome'] = { "uuid" : rna_hit } task = GalaxyWorkflowTask("workflow_%s" % (row['job_id']), mc3_dnarna_workflow, inputs=workflow_dm, parameters=params, tags=[ "donor:%s" % (row['participant_id']) ], ) else: task = GalaxyWorkflowTask("workflow_%s" % (row['job_id']), mc3_dna_workflow, inputs=workflow_dm, parameters=params, tags=[ "donor:%s" % (row['participant_id']) ], ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) service = GalaxyService( docstore=docstore, galaxy=args.galaxy, sudo=args.sudo, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[ ["gatk_bqsr", 12], ["gatk_indel", 24], ["MuSE", 8], ["pindel", 8], ["mutect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 12], ["bwa_mem", 12], ["radia", 8], ['radia_filter', 8] ] ) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def run_gen(args): syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "db_snp": "dbsnp_132_b37.leftAligned.vcf", "centromere": "centromere_hg19.bed", "cosmic": "b37_cosmic_v54_120711.vcf", } ref_genomes = [ "Homo_sapiens_assembly19.fasta", "GRCh37-lite.fa", "GRCh37-lite-+-HPV_Redux-build.fa", "GRCh37-lite_WUGSC_variant_1.fa.gz", "GRCh37-lite_WUGSC_variant_2.fa.gz", "hg19_M_rCRS.fa.gz", ] if args.ref_download: syn_sync(syn, REFDATA_PROJECT, docstore, data_mapping.values() + ref_genomes) dm = {} for k, v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = {"uuid": hit} mc3_dna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA.ga") mc3_dnarna_workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-MC3_Pipeline_CGHub_DNA_RNA.ga") rna_hit = None for a in docstore.filter(name="hg19_M_rCRS.fa"): rna_hit = a[0] tasks = TaskGroup() assembly_hits = {} with open(args.joblist) as handle: reader = csv.DictReader(handle, delimiter="\t") for row in reader: if row["normal_assembly"] != row["tumor_assembly"]: print "Row Mispatch", row["normal_assembly"], row["tumor_assembly"] # raise Exception("Mismatch reference") ref_name = row["normal_assembly"] if ref_name in ref_rename: ref_name = ref_rename[ref_name] if ref_name in assembly_hits: hit = assembly_hits[ref_name] else: hit = None for a in docstore.filter(name=ref_name + ".fasta"): hit = a[0] for a in docstore.filter(name=ref_name + ".fa"): hit = a[0] if hit is None: raise Exception("%s not found" % (ref_name)) assembly_hits[ref_name] = hit workflow_dm = dict(dm) workflow_dm["reference_genome"] = {"uuid": hit} params = { "tumor_bam": { "uuid": row["tumor_analysis_id"], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key", }, "normal_bam": { "uuid": row["normal_analysis_id"], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key", }, "reheader_config": { "platform": "Illumina", "center": "OHSU", "reference_genome": ref_name, "participant_uuid": row["participant_id"], "disease_code": row["disease"], "filedate": datetime.datetime.now().strftime("%Y%m%d"), "normal_analysis_uuid": row["normal_analysis_id"], "normal_bam_name": row["normal_filename"], "normal_aliquot_uuid": row["normal_aliquot_id"], "normal_aliquot_barcode": row["normal_barcode"], "tumor_analysis_uuid": row["tumor_analysis_id"], "tumor_bam_name": row["tumor_filename"], "tumor_aliquot_uuid": row["tumor_aliquot_id"], "tumor_aliquot_barcode": row["tumor_barcode"], }, } if row["rna_analysis_id"] != "NA": params["rna_tumor_bam"] = { "uuid": row["rna_analysis_id"], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key", } workflow_dm["rna_reference_genome"] = {"uuid": rna_hit} task = GalaxyWorkflowTask( "workflow_%s" % (row["job_id"]), mc3_dnarna_workflow, inputs=workflow_dm, parameters=params, tags=["donor:%s" % (row["participant_id"])], ) else: task = GalaxyWorkflowTask( "workflow_%s" % (row["job_id"]), mc3_dna_workflow, inputs=workflow_dm, parameters=params, tags=["donor:%s" % (row["participant_id"])], ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) service = GalaxyService( docstore=docstore, galaxy=args.galaxy, sudo=args.sudo, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[ ["gatk_bqsr", 12], ["gatk_indel", 24], ["MuSE", 8], ["pindel", 8], ["mutect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 12], ["bwa_mem", 12], ["radia", 8], ["radia_filter", 8], ], ) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def run_gen(args): args = parser.parse_args() syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "reference_genome": "genome.fa", "dbsnp": "dbsnp_132_b37.leftAligned.vcf", "cosmic": "b37_cosmic_v54_120711.vcf", "gold_indels": "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf", "phase_one_indels": "1000G_phase1.indels.hg19.sites.fixed.vcf", "centromere": "centromere_hg19.bed" } if args.ref_download: #download reference files from Synapse and populate the document store for a in syn.chunkedQuery('select * from entity where parentId=="%s"' % (REFDATA_PROJECT)): print "found", a['entity.name'] if a['entity.name'] in data_mapping.values( ) or a['entity.name'].replace(".gz", "") in data_mapping.values(): print "loading" ent = syn.get(a['entity.id']) id = ent.annotations['uuid'][0] t = Target(uuid=id) docstore.create(t) path = docstore.get_filename(t) name = ent.name if 'dataPrep' in ent.annotations: if ent.annotations['dataPrep'][0] == 'gunzip': subprocess.check_call("gunzip -c %s > %s" % (ent.path, path), shell=True) name = name.replace(".gz", "") else: print "Unknown DataPrep" else: shutil.copy(ent.path, path) docstore.update_from_file(t) meta = {} meta['name'] = name meta['uuid'] = id if 'dataPrep' in meta: del meta['dataPrep'] docstore.put(id, meta) dm = {} for k, v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = {"uuid": hit} workflow = GalaxyWorkflow( ga_file="workflows/Galaxy-Workflow-PCAWG_CGHUB.ga") tasks = TaskGroup() for ent in synqueue.listAssignments(syn, **config): #print "'%s'" % (ent['state']), ent['state'] == 'nan', type(ent['state']), type('nan') if not isinstance(ent['state'], basestring) and isnan(ent['state']): gnos_endpoint = urlparse( ent['meta']['Normal_WGS_alignment_GNOS_repos']).netloc task = GalaxyWorkflowTask( "workflow_%s" % (ent['id']), workflow, inputs=dm, parameters={ 'normal_bam_download': { "uuid": ent['meta']['Normal_WGS_alignment_GNOS_analysis_ID'], "gnos_endpoint": gnos_endpoint, "cred_file": key_map[gnos_endpoint] }, 'tumor_bam_download': { "uuid": ent['meta']['Tumour_WGS_alignment_GNOS_analysis_IDs'], "gnos_endpoint": gnos_endpoint, "cred_file": key_map[gnos_endpoint] }, 'broad_variant_pipeline': { "broad_ref_dir": "/tool_data/files/refdata", "sample_id": ent['meta']['Submitter_donor_ID'] } }, tags=["donor:%s" % (ent['meta']['Submitter_donor_ID'])]) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) state_file = "%s.tasks/%s.state" % (args.out_base, data.task_id) if os.path.exists(state_file): os.unlink(state_file) print "Tasks Created: %s" % (len(tasks)) if args.create_service: service = GalaxyService(docstore=docstore, galaxy="bgruening/galaxy-stable", sudo=args.sudo, tool_data=os.path.abspath("tool_data"), tool_dir=os.path.abspath("tools"), work_dir=args.work_dir, smp=[["MuSE", 8], ["pindel", 8], ["muTect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 24], ["bwa_mem", 12], ["broad_variant_pipline", 24]]) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
tags=[ "sample:%s" % (ent['meta']['Donor_ID']) ] ) tasks.append(task) if not os.path.exists("pcawg.tasks"): os.mkdir("pcawg.tasks") for data in tasks: with open("pcawg.tasks/%s" % (data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService( docstore=docstore, galaxy="bgruening/galaxy-stable:dev", sudo=True, tool_data=os.path.abspath("tool_data"), tool_dir=os.path.abspath("tools"), smp=[ ["MuSE", 8], ["pindel", 8], ["muTect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 12], ["bwa_mem", 12], ["broad_variant_pipline", 28] ] ) with open("pcawg.service", "w") as handle: service.get_config().store(handle)
def run_gen(args): syn = synapseclient.Synapse() syn.login() if args.alt_table is not None: config['table_id'] = args.alt_table docstore = from_url(args.out_base) if args.ref_download: #download reference files from Synapse and populate the document store for a in syn.chunkedQuery('select * from entity where parentId=="%s"' % (REFDATA_PROJECT)): ent = syn.get(a['entity.id']) id = ent.annotations['uuid'][0] t = Target(uuid=id) docstore.create(t) path = docstore.get_filename(t) name = ent.name if 'dataPrep' in ent.annotations: if ent.annotations['dataPrep'][0] == 'gunzip': subprocess.check_call("gunzip -c %s > %s" % (ent.path, path), shell=True) name = name.replace(".gz", "") else: print "Unknown DataPrep" else: shutil.copy(ent.path, path) docstore.update_from_file(t) meta = {} meta['name'] = name meta['uuid'] = id if 'dataPrep' in meta: del meta['dataPrep'] docstore.put(id, meta) data_mapping = { "dbsnp": "dbsnp_132_b37.leftAligned.vcf", "cosmic": "b37_cosmic_v54_120711.vcf", "gold_indels": "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf", "phase_one_indels": "1000G_phase1.indels.hg19.sites.fixed.vcf" } dm = {} for k, v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = {"uuid": hit} workflow_2 = GalaxyWorkflow( ga_file="workflows/Galaxy-Workflow-GATK_CGHub_2.ga") workflow_3 = GalaxyWorkflow( ga_file="workflows/Galaxy-Workflow-GATK_CGHub_3.ga") ref_rename = {"HG19_Broad_variant": "Homo_sapiens_assembly19"} tasks = TaskGroup() for ent in synqueue.listAssignments(syn, **config): bam_set = list( a[1] for a in ent['meta'].items() if a[0].startswith("id_") and isinstance(a[1], basestring)) ref_set = set(a[1] for a in ent['meta'].items() if a[0].startswith("ref_assembly_") and isinstance(a[1], basestring)) assert (len(ref_set) == 1) ref_name = ref_set.pop() if ref_name in ref_rename: ref_name = ref_rename[ref_name] hit = None for a in docstore.filter(name=ref_name + ".fasta"): hit = a[0] for a in docstore.filter(name=ref_name + ".fa"): hit = a[0] if hit is None: raise Exception("%s not found" % (ref_name)) workflow_dm = dict(dm) workflow_dm['reference_genome'] = {"uuid": hit} if len(bam_set) == 2: task = GalaxyWorkflowTask( "workflow_%s" % (ent['id']), workflow_2, inputs=workflow_dm, parameters={ 'INPUT_BAM_1': { "uuid": bam_set[0], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key" }, 'INPUT_BAM_2': { "uuid": bam_set[1], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key" } }, tags=["donor:%s" % (ent['meta']['participant_id'])], tool_tags={ "BQSR_1": { "output_bam": ["original_bam:%s" % (bam_set[0])] }, "BQSR_2": { "output_bam": ["original_bam:%s" % (bam_set[1])] } }) tasks.append(task) elif len(bam_set) == 3: task = GalaxyWorkflowTask( "workflow_%s" % (ent['id']), workflow_3, inputs=workflow_dm, parameters={ 'INPUT_BAM_1': { "uuid": bam_set[0], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key" }, 'INPUT_BAM_2': { "uuid": bam_set[1], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key" }, 'INPUT_BAM_3': { "uuid": bam_set[2], "gnos_endpoint": "cghub.ucsc.edu", "cred_file": "/tool_data/files/cghub.key" } }, tags=["donor:%s" % (ent['meta']['participant_id'])], tool_tags={ "BQSR_1": { "output_bam": ["original_bam:%s" % (bam_set[0])] }, "BQSR_2": { "output_bam": ["original_bam:%s" % (bam_set[1])] }, "BQSR_3": { "output_bam": ["original_bam:%s" % (bam_set[2])] } }) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService(docstore=docstore, galaxy="bgruening/galaxy-stable", sudo=True, tool_data=args.tool_data, tool_dir=args.tool_dir, work_dir=args.work_dir, smp=[["gatk_bqsr", 12], ["gatk_indel", 24]]) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) if args.create_service: service = GalaxyService( docstore=docstore, galaxy="bgruening/galaxy-stable", sudo=True, tool_data=os.path.abspath("tool_data"), tool_dir=os.path.abspath("tools"), smp=[ ["MuSE", 8], ["pindel", 8], ["muTect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 24], ["bwa_mem", 12], ["broad_variant_pipline", 24] ] ) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)
def run_gen(args): args = parser.parse_args() syn = synapseclient.Synapse() syn.login() docstore = from_url(args.out_base) data_mapping = { "reference_genome" : "genome.fa", "dbsnp" : "dbsnp_132_b37.leftAligned.vcf", "cosmic" : "b37_cosmic_v54_120711.vcf", "gold_indels" : "Mills_and_1000G_gold_standard.indels.hg19.sites.fixed.vcf", "phase_one_indels" : "1000G_phase1.indels.hg19.sites.fixed.vcf", "centromere" : "centromere_hg19.bed" } if args.ref_download: #download reference files from Synapse and populate the document store for a in syn.chunkedQuery('select * from entity where parentId=="%s"' % (REFDATA_PROJECT)): print "found", a['entity.name'] if a['entity.name'] in data_mapping.values() or a['entity.name'].replace(".gz", "") in data_mapping.values(): print "loading" ent = syn.get(a['entity.id']) id = ent.annotations['uuid'][0] t = Target(uuid=id) docstore.create(t) path = docstore.get_filename(t) name = ent.name if 'dataPrep' in ent.annotations: if ent.annotations['dataPrep'][0] == 'gunzip': subprocess.check_call("gunzip -c %s > %s" % (ent.path, path), shell=True) name = name.replace(".gz", "") else: print "Unknown DataPrep" else: shutil.copy(ent.path, path) docstore.update_from_file(t) meta = {} meta['name'] = name meta['uuid'] = id if 'dataPrep' in meta: del meta['dataPrep'] docstore.put(id, meta) dm = {} for k,v in data_mapping.items(): hit = None for a in docstore.filter(name=v): hit = a[0] if hit is None: raise Exception("%s not found" % (v)) dm[k] = { "uuid" : hit } workflow = GalaxyWorkflow(ga_file="workflows/Galaxy-Workflow-PCAWG_CGHUB.ga") tasks = TaskGroup() for ent in synqueue.listAssignments(syn, **config): #print "'%s'" % (ent['state']), ent['state'] == 'nan', type(ent['state']), type('nan') if not isinstance(ent['state'], basestring) and isnan(ent['state']): gnos_endpoint = urlparse(ent['meta']['Normal_WGS_alignment_GNOS_repos']).netloc task = GalaxyWorkflowTask("workflow_%s" % (ent['id']), workflow, inputs=dm, parameters={ 'normal_bam_download' : { "uuid" : ent['meta']['Normal_WGS_alignment_GNOS_analysis_ID'], "gnos_endpoint" : gnos_endpoint, "cred_file" : key_map[gnos_endpoint] }, 'tumor_bam_download' : { "uuid" : ent['meta']['Tumour_WGS_alignment_GNOS_analysis_IDs'], "gnos_endpoint" : gnos_endpoint, "cred_file" : key_map[gnos_endpoint] }, 'broad_variant_pipeline' : { "broad_ref_dir" : "/tool_data/files/refdata", "sample_id" : ent['meta']['Submitter_donor_ID'] } }, tags=[ "donor:%s" % (ent['meta']['Submitter_donor_ID']) ] ) tasks.append(task) if not os.path.exists("%s.tasks" % (args.out_base)): os.mkdir("%s.tasks" % (args.out_base)) for data in tasks: with open("%s.tasks/%s" % (args.out_base, data.task_id), "w") as handle: handle.write(json.dumps(data.to_dict())) state_file = "%s.tasks/%s.state" % (args.out_base, data.task_id) if os.path.exists( state_file ): os.unlink( state_file ) print "Tasks Created: %s" % (len(tasks)) if args.create_service: service = GalaxyService( docstore=docstore, galaxy="bgruening/galaxy-stable", sudo=args.sudo, tool_data=os.path.abspath("tool_data"), tool_dir=os.path.abspath("tools"), work_dir=args.work_dir, smp=[ ["MuSE", 8], ["pindel", 8], ["muTect", 8], ["delly", 4], ["gatk_bqsr", 12], ["gatk_indel", 24], ["bwa_mem", 12], ["broad_variant_pipline", 24] ] ) with open("%s.service" % (args.out_base), "w") as handle: s = service.get_config() if args.scratch: print "Using scratch", args.scratch s.set_docstore_config(cache_path=args.scratch, open_perms=True) s.store(handle)