Beispiel #1
0
def main(argv):

    faspRunner = FASPRunner(pauseSecs=0)
    settings = faspRunner.settings
    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = BigQuerySearchClient()
    # 	query = """
    #      	SELECT subject_id, read_drs_id
    #      	FROM `isbcgc-216220.COPDGene.phenotype_drs`
    #      	where weight_kg between 91.8 and 93.0
    #      	LIMIT 1"""
    query = """
		SELECT submitter_id, read_drs_id
		FROM `isbcgc-216220.onek_genomes.ssd_drs`
		where population = 'BEB'
		LIMIT 1"""

    # BioDataCatalyst
    drsClient = bdcDRSClient('~/.keys/bdc_credentials.json', 'gs')
    location = 'projects/{}/locations/{}'.format(settings['GCPProject'],
                                                 settings['GCPPipelineRegion'])
    mysam = GCPLSsamtools(location, settings['GCPOutputBucket'])

    faspRunner.configure(searchClient, drsClient, mysam)

    faspRunner.runQuery(query, 'One k  query ')
Beispiel #2
0
	def __init__(self, debug=False):
		self.drsClients = { 
			"insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
			"crdc": crdcDRSClient('~/.keys/crdc_credentials.json','s3'),
			"bdc": bdcDRSClient('~/.keys/bdc_credentials.json','gs'),
			"anv": anvilDRSClient('~/.keys/anvil_credentials.json', '', 'gs'),
			"insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
			"sbcgc": sbcgcDRSClient('~/.keys/sevenbridges_keys.json','s3'),
			"sbcav": cavaticaDRSClient('~/.keys/sevenbridges_keys.json','s3'),
			"srapub": DRSClient('https://locate.ncbi.nlm.nih.gov', debug=False)
		}
		self.registeredClients = []
		self.hostNameIndex = {}
		self.debug = debug
Beispiel #3
0
	def DRSClientFromRegistryEntry(self, service, prefix):
		
			if prefix == "crdc": 
				drsClient = crdcDRSClient('~/.keys/crdc_credentials.json','s3')
			elif prefix == "bdc": 
				drsClient = bdcDRSClient('~/.keys/bdc_credentials.json','gs')
			elif prefix == "insdc": 
				drsClient = sdlDRSClient('~/.keys/prj_11218_D17199.ngc')
			elif prefix == "sbcgc": 
				drsClient = sbcgcDRSClient('~/.keys/sevenbridges_keys.json','s3')
			elif prefix == "sbcav": 
				drsClient = cavaticaDRSClient('~/.keys/sevenbridges_keys.json','s3')
			else: 
				drsClient = DRSClient.fromRegistryEntry(service)
			return drsClient
    def __init__(self, debug=False, getReg=True):
        self.drsClients = {
            "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
            "crdc": crdcDRSClient('~/.keys/crdc_credentials.json', 's3'),
            "bdc": bdcDRSClient('~/.keys/bdc_credentials.json', 'gs'),
            "anv": anvilDRSClient('~/.keys/anvil_credentials.json', '', 'gs'),
            "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
            "sbcgc": sbcgcDRSClient('~/.keys/sevenbridges_keys.json', 's3'),
            "sbcav": cavaticaDRSClient('~/.keys/sevenbridges_keys.json', 'gs'),
            'sbbdc': sbbdcDRSClient('~/.keys/sevenbridges_keys.json', 's3'),
            "sradrs": SRADRSClient('https://locate.be-md.ncbi.nlm.nih.gov')
        }
        self.registeredClients = []
        self.hostNameIndex = {}
        self.debug = debug

        if getReg: self.getRegisteredDRSServices()
Beispiel #5
0
def main(argv):

    faspRunner = FASPRunner(pauseSecs=0)

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
    query = "select submitter_id, read_drs_id drsid from thousand_genomes.onek_genomes.ssd_drs where population = 'ACB' limit 3"

    # Step 2 - DRS - set up a DRS Client
    drsClient = bdcDRSClient('~/.keys/bdc_credentials.json', 'gs')

    # Step 3 - set up a class that run a compute for us
    wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json')

    faspRunner.configure(searchClient, drsClient, wesClient)

    faspRunner.runQuery(query, 'One k query using Search and WES')
Beispiel #6
0
def main(argv):


	faspRunner = FASPRunner(pauseSecs=0)
	settings =faspRunner.settings
	# Step 1 - Discovery
	# query for relevant DRS objects
	searchClient = DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
	query = "select submitter_id, read_drs_id drsid from thousand_genomes.onek_genomes.ssd_drs where population = 'BEB' limit 3"

	# Step 2 - DRS - set up a DRS Client
	# CRDC
	drsClient = bdcDRSClient('~/.keys/bdc_credentials.json', 'gs')

	
	# Step 3 - set up a class that runs samtools for us
	# providing the location where we the results to go
	location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])
	mysam = GCPLSsamtools(location, settings['GCPOutputBucket'])	

	faspRunner.configure(searchClient, drsClient, mysam)
		
	faspRunner.runQuery(query, 'One k query using Search')
Beispiel #7
0
def main(argv):

	
	faspRunner = FASPRunner(pauseSecs=0)
	creditor = faspRunner.creditor
	settings = faspRunner.settings
	
	# set your Seven Bridges CGC project using what you have put in FASP Settings
	sbProject = settings['SevenBridgesProject']
	sbInstance = settings['SevenBridgesInstance']

	# Step 1 - Discovery
	# query for relevant DRS objects
	discoveryClients = {
		"sb": DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com/'),
		"bdc": BigQuerySearchClient()
	}

	crdcquery = "SELECT sp.dbGaP_Subject_ID,  'sb:'||sb_drs_id FROM dbgap_demo.scr_gecco_susceptibility.subject_phenotypes_multi sp join dbgap_demo.scr_gecco_susceptibility.sample_multi sm on sm.dbgap_subject_id = sp.dbgap_subject_id join dbgap_demo.scr_gecco_susceptibility.sb_drs_index di on di.sample_id = sm.sample_id where AGE between 45 and 55 and sex = 'Female' and file_type = 'cram' limit 3"
		


	bdcquery = """
		SELECT sp.dbGaP_Subject_ID,  'bdc:'||read_drs_id
		FROM `isbcgc-216220.COPDGene.Subject_MULTI` sm
		join `isbcgc-216220.COPDGene.Subject_Phenotypes_HMB` sp on sp.dbgap_subject_id = sm.dbgap_subject_id
		join `isbcgc-216220.COPDGene.COPD_DRS` drs on drs.su_submitter_id = sm.subject_id
 		where gender = '2'
 		and Age_Enroll between 45 and 55
 		LIMIT 3"""
		

	results = discoveryClients['sb'].runQuery(crdcquery)  # Send the query
	creditor.creditFromList('dbGapSSD')
	creditor.creditClass(discoveryClients['sb'])
	results += discoveryClients['bdc'].runQuery(bdcquery) 
	creditor.creditFromList('BDCData')
	

	# Step 2 - DRS - set up DRS Clients	
	drsClients = {
		"sb": sbcgcDRSClient('~/.keys/sevenbridges_keys.json', 's3'),
		"bdc": bdcDRSClient('~/.keys/bdc_credentials.json', 'gs')
	}
	print('setting credentials ')
	creditor.creditFromList('dbGaPFence')
		
	# Step 3 - set up a class that runs samtools for us
	# providing the location for the results
	location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])
	sam2 = GCPLSsamtools(location, settings['GCPOutputBucket'])
	samClients = {
		"sb": samtoolsSBClient(sbInstance, sbProject),
		"bdc": sam2
	}

	
	# repeat steps 2 and 3 for each row of the query
	for row in results:

		print("subject={}, drsID={}".format(row[0], row[1]))
		resRow = [row[0], row[1]]
		# Step 2 - Use DRS to get the URL
		# get the prefix
		prefix, drsid = row[1].split(":", 1)
		drsClient = drsClients[prefix]
		searchClient = discoveryClients[prefix]
		creditor.creditClass(drsClient)
		url = drsClient.getAccessURL(drsid)
		print(url)
		#objInfo = drsClient.getObject(drsid)
		#print (objInfo)
		#fileSize = objInfo['size']
		fileSize = 0
				
		# Step 3 - Run a pipeline on the file at the drs url
		if url != None:
			outfile = "{}.txt".format(row[0])
			mysam = samClients[prefix]
			creditor.creditClass(mysam)
			via = 'sh'
			note = 'Two dbGaP sources'
			time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
			run_id = mysam.runWorkflow(url, outfile)
			faspRunner.logRun(time, via, note,  run_id, outfile, fileSize,
				searchClient, drsClient, mysam)
			resRow.append('OK')
		else:
			print('could not get DRS url')
			resRow.append('unauthorized')
Beispiel #8
0
def main(argv):

	
	faspRunner = FASPRunner(pauseSecs=0)
	creditor = faspRunner.creditor
	settings = faspRunner.settings
	
	# Step 1 - Discovery
	# query for relevant DRS objects
	searchClient = BigQuerySearchClient()

	# TCGA Query - CRDC
	crdcquery = """
     	SELECT 'case_'||associated_entities__case_gdc_id , 'crdc:'||file_id
		FROM `isb-cgc.GDC_metadata.rel24_fileData_active` 
		where data_format = 'BAM' 
		and project_disease_type = 'Breast Invasive Carcinoma'
		limit 3"""
	
	#COPD query - Topmed	
	bdcquery = """
  		SELECT SUBJECT_ID, 'bdc:'||read_drs_id
  		FROM `isbcgc-216220.COPDGene.phenotype_drs`
      	where Weight_KG between 92.5 and 93.0
      	LIMIT 3"""
  		
	results = searchClient.runQuery(crdcquery)  # Send the query
	creditor.creditFromList('ISBGDCData')
	results += searchClient.runQuery(bdcquery)  
	creditor.creditFromList('BDCData')

	# Step 2 - DRS - set up DRS Clients	
	drsClients = {
		"crdc": crdcDRSClient('~/.keys/crdc_credentials.json', ''),
		"bdc": bdcDRSClient('~/.keys/bdc_credentials.json', '')
	}
	print('setting credentials ')
	creditor.creditFromList('dbGaPFence')
	
	# Step 3 - set up a class that runs samtools for us
	# providing the location for the results
	location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])
	mysam = GCPLSsamtools(location, settings['GCPOutputBucket'])
	
	# repeat steps 2 and 3 for each row of the query
	for row in results:

		print("subject={}, drsID={}".format(row[0], row[1]))
		
		# Step 2 - Use DRS to get the URL
		# get the prefix
		prefix, drsid = row[1].split(":", 1)
		url = drsClients[prefix].getAccessURL(drsid, 'gs')
		drsClient = drsClients[prefix]
		creditor.creditClass(drsClient)
		objInfo = drsClient.getObject(drsid)
		fileSize = objInfo['size']
				
		# Step 3 - Run a pipeline on the file at the drs url
		outfile = "{}.txt".format(row[0])
		mysam.runWorkflow(url, outfile)
		creditor.creditClass(mysam)
		via = 'sh'
		pipeline_id = 'paste here'
		note = 'Two sources'
		time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
		faspRunner.logRun(time, via, note,  pipeline_id, outfile, fileSize,
			searchClient, drsClient, mysam)
			
	creditor.creditFromList('FASPScript2_sdrf', closeImage=False)