Example #1
0
	def __init__(self, debug=False):
		self.drsClients = { 
			"insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
			"crdc": crdcDRSClient('~/.keys/crdc_credentials.json','s3'),
			"bdc": bdcDRSClient('~/.keys/bdc_credentials.json','gs'),
			"anv": anvilDRSClient('~/.keys/anvil_credentials.json', '', 'gs'),
			"insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
			"sbcgc": sbcgcDRSClient('~/.keys/sevenbridges_keys.json','s3'),
			"sbcav": cavaticaDRSClient('~/.keys/sevenbridges_keys.json','s3'),
			"srapub": DRSClient('https://locate.ncbi.nlm.nih.gov', debug=False)
		}
		self.registeredClients = []
		self.hostNameIndex = {}
		self.debug = debug
    def __init__(self, debug=False, getReg=True):
        self.drsClients = {
            "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
            "crdc": crdcDRSClient('~/.keys/crdc_credentials.json', 's3'),
            "bdc": bdcDRSClient('~/.keys/bdc_credentials.json', 'gs'),
            "anv": anvilDRSClient('~/.keys/anvil_credentials.json', '', 'gs'),
            "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'),
            "sbcgc": sbcgcDRSClient('~/.keys/sevenbridges_keys.json', 's3'),
            "sbcav": cavaticaDRSClient('~/.keys/sevenbridges_keys.json', 'gs'),
            'sbbdc': sbbdcDRSClient('~/.keys/sevenbridges_keys.json', 's3'),
            "sradrs": SRADRSClient('https://locate.be-md.ncbi.nlm.nih.gov')
        }
        self.registeredClients = []
        self.hostNameIndex = {}
        self.debug = debug

        if getReg: self.getRegisteredDRSServices()
Example #3
0
	def DRSClientFromRegistryEntry(self, service, prefix):
		
			if prefix == "crdc": 
				drsClient = crdcDRSClient('~/.keys/crdc_credentials.json','s3')
			elif prefix == "bdc": 
				drsClient = bdcDRSClient('~/.keys/bdc_credentials.json','gs')
			elif prefix == "insdc": 
				drsClient = sdlDRSClient('~/.keys/prj_11218_D17199.ngc')
			elif prefix == "sbcgc": 
				drsClient = sbcgcDRSClient('~/.keys/sevenbridges_keys.json','s3')
			elif prefix == "sbcav": 
				drsClient = cavaticaDRSClient('~/.keys/sevenbridges_keys.json','s3')
			else: 
				drsClient = DRSClient.fromRegistryEntry(service)
			return drsClient
Example #4
0
def main(argv):

    # edit the following line for where you put your ngc credentials file from dbGaP
    credentials_file = '~/.keys/prj_14565.ngc'

    faspRunner = FASPRunner(pauseSecs=0)
    settings = faspRunner.settings
    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = localSearchClient()
    query_job = searchClient.runQuery('')

    drsClient = drsClient = sdlDRSClient(credentials_file, debug=True)

    location = 'projects/{}/locations/{}'.format(settings['GCPProject'],
                                                 settings['GCPPipelineRegion'])
    mysam = GCPLSsamtools(location, settings['GCPOutputBucket'], debug=True)

    faspRunner = FASPRunner()

    # repeat steps 2 and 3 for each row of the query
    for row in query_job:

        print("subject={}, drsID={}".format(row[0], row[1]))

        # Step 2 - Use DRS to get the URL
        #objInfo = drsClient.getObject(row[1])
        # for testing
        objInfo = drsClient.getObject(row[1])
        fileSize = objInfo['size']
        print(fileSize)
        # we've predetermined we want to use the gs copy in this case
        #url = drsClient.getAccessURL(row[1], 'gs')
        res = drsClient.getAccessURL(row[1], 'gs.us')
        url = res['url']
        print(url)
        # Step 3 - Run a pipeline on the file at the drs url
        outfile = "{}.txt".format(row[0])
        pipeline_id = mysam.runWorkflow(url, outfile)
        print('submitted:{}'.format(pipeline_id))

        via = ''
        note = 'Anvil GTEX Test via SDL'

        time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        faspRunner.logRun(time, via, note, pipeline_id, outfile, str(fileSize),
                          searchClient, drsClient, mysam)
Example #5
0
def main(argv):

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
    query = "select submitter_id, read_drs_id drsid from thousand_genomes.onek_genomes.ssd_drs where population = 'ACB' limit 1"
    query_job = searchClient.runQuery(query)

    # Step 2 - DRS - set up a DRS Client
    # CRDC
    drsClient = sdlDRSClient('~/.keys/prj_11218_D17199.ngc')

    # Step 3 - set up a class that run a compute for us
    wesClient = DNAStackWESClient('~/.keys/DNAStackWESkey.json')

    # A log is helpful to keep track of the computes we've submitted
    faspRunner = FASPRunner()

    # repeat steps 2 and 3 for each row of the query
    for row in query_job:

        print("subject={}, drsID={}".format(row[0], row[1]))

        # Step 2 - Use DRS to get the URL
        #objInfo = drsClient.getObject(row[1])
        # for testing
        acc = 'SRR5368359.sra'
        objInfo = drsClient.getObject(acc)
        fileSize = objInfo['size']
        print(fileSize)
        # we've predetermined we want to use the gs copy in this case
        #url = drsClient.getAccessURL(row[1], 'gs')
        res = drsClient.getAccessURL(acc, 'gs.us')
        url = res['url']
        print(url)
        # Step 3 - Run a pipeline on the file at the drs url
        outfile = "{}.txt".format(row[0])
        pipeline_id = wesClient.runWorkflow(url, outfile)
        print('submitted:{}'.format(pipeline_id))

        via = 'WES'
        note = 'WES MD5 on NCBI SDL'

        time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        faspRunner.logRun(time, via, note, pipeline_id, outfile, str(fileSize),
                          searchClient, drsClient, wesClient)
Example #6
0
def main(argv):

	faspRunner = FASPRunner(pauseSecs=0)
	
	# Step 1 - Discovery
	# query for relevant DRS objects
	searchClient = BigQuerySearchClient()
	query = """SELECT sra.biosample, sra.acc||'.cram'
		FROM `isbcgc-216220.GECCO_CRC_Susceptibility.Subject_Phenotypes` sp
		join `isbcgc-216220.GECCO_CRC_Susceptibility.Sample_MULTI` sm on
		sm.dbgap_subject_id = sp.dbgap_subject_id
		join `nih-sra-datastore.sra.metadata` sra on sm.BioSample_Accession = sra.biosample
		where AGE between 45 and 55 and sex = 'Female' limit 3"""
	query_job = searchClient.runQuery(query)
	
	# Step 2 - DRS - set up a DRS Client
	# CRDC
	drsClient = sdlDRSClient('~/.keys/prj_14565.ngc', True)
	
	# Step 3 - set up a class that run a compute for us
	wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json')
	
	# repeat steps 2 and 3 for each row of the query
	for row in query_job:

		print("sample={}, drsID={}".format(row[0], row[1]))
		
		# Step 2 - Use DRS to get the URL
		objInfo = drsClient.getObject(row[1])
		fileSize = objInfo['size']
		print(fileSize)
		# we've predetermined we want to use the gs copy in this case
		#url = drsClient.getAccessURL(row[1], 'gs')
		res = drsClient.getAccessURL(row[1],'gs.us')
		url = res['url']
		print(url)
		# Step 3 - Run a pipeline on the file at the drs url
		outfile = "{}.txt".format(row[0])
		pipeline_id = wesClient.runWorkflow(url, outfile)
		print('submitted:{}'.format(pipeline_id))
		
		via = 'WES'
		note = 'WES MD5 on NCBI SDL'

		time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
		faspRunner.logRun(time, via, note,  pipeline_id, outfile, str(fileSize),
			searchClient, drsClient, wesClient)
    print('in download')
    file_name = 'test_file.bam'

    req = requests.get(url)
    print(req)
    file = open(file_name, 'wb')
    for chunk in req.iter_content(100000):
        print('writing a chunk')
        file.write(chunk)
    file.close()


if __name__ == "__main__":
    # 	client1 = sdlDRSClient('~/.keys/prj_14565.ngc')
    # 	res = client1.getObject('SRR1999478.bam')
    # 	print('--Get Info--')
    # 	print (res)
    # 	print('--Get a URL--')
    # 	res = client1.getAccessURL('SRR1999478.bam','gs.us')
    # 	print (res)
    # 	print ('-----------------')
    client2 = sdlDRSClient('~/.keys/prj_11218_D17199.ngc', debug=True)
    res = client2.getObject('SRR5368359.sra')
    print('--Get Info--')
    print(res)
    print('--Get a URL--')
    res = client2.getAccessURL('SRR5368359.sra', 'gs.us')
    #print (json.dumps(res, indent=2))
    print(res['url'])
    download(res['url'])