def __init__(self, debug=False): self.drsClients = { "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'), "crdc": crdcDRSClient('~/.keys/crdc_credentials.json','s3'), "bdc": bdcDRSClient('~/.keys/bdc_credentials.json','gs'), "anv": anvilDRSClient('~/.keys/anvil_credentials.json', '', 'gs'), "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'), "sbcgc": sbcgcDRSClient('~/.keys/sevenbridges_keys.json','s3'), "sbcav": cavaticaDRSClient('~/.keys/sevenbridges_keys.json','s3'), "srapub": DRSClient('https://locate.ncbi.nlm.nih.gov', debug=False) } self.registeredClients = [] self.hostNameIndex = {} self.debug = debug
def __init__(self, debug=False, getReg=True): self.drsClients = { "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'), "crdc": crdcDRSClient('~/.keys/crdc_credentials.json', 's3'), "bdc": bdcDRSClient('~/.keys/bdc_credentials.json', 'gs'), "anv": anvilDRSClient('~/.keys/anvil_credentials.json', '', 'gs'), "insdc": sdlDRSClient('~/.keys/prj_11218_D17199.ngc'), "sbcgc": sbcgcDRSClient('~/.keys/sevenbridges_keys.json', 's3'), "sbcav": cavaticaDRSClient('~/.keys/sevenbridges_keys.json', 'gs'), 'sbbdc': sbbdcDRSClient('~/.keys/sevenbridges_keys.json', 's3'), "sradrs": SRADRSClient('https://locate.be-md.ncbi.nlm.nih.gov') } self.registeredClients = [] self.hostNameIndex = {} self.debug = debug if getReg: self.getRegisteredDRSServices()
def DRSClientFromRegistryEntry(self, service, prefix): if prefix == "crdc": drsClient = crdcDRSClient('~/.keys/crdc_credentials.json','s3') elif prefix == "bdc": drsClient = bdcDRSClient('~/.keys/bdc_credentials.json','gs') elif prefix == "insdc": drsClient = sdlDRSClient('~/.keys/prj_11218_D17199.ngc') elif prefix == "sbcgc": drsClient = sbcgcDRSClient('~/.keys/sevenbridges_keys.json','s3') elif prefix == "sbcav": drsClient = cavaticaDRSClient('~/.keys/sevenbridges_keys.json','s3') else: drsClient = DRSClient.fromRegistryEntry(service) return drsClient
def main(argv): # edit the following line for where you put your ngc credentials file from dbGaP credentials_file = '~/.keys/prj_14565.ngc' faspRunner = FASPRunner(pauseSecs=0) settings = faspRunner.settings # Step 1 - Discovery # query for relevant DRS objects searchClient = localSearchClient() query_job = searchClient.runQuery('') drsClient = drsClient = sdlDRSClient(credentials_file, debug=True) location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion']) mysam = GCPLSsamtools(location, settings['GCPOutputBucket'], debug=True) faspRunner = FASPRunner() # repeat steps 2 and 3 for each row of the query for row in query_job: print("subject={}, drsID={}".format(row[0], row[1])) # Step 2 - Use DRS to get the URL #objInfo = drsClient.getObject(row[1]) # for testing objInfo = drsClient.getObject(row[1]) fileSize = objInfo['size'] print(fileSize) # we've predetermined we want to use the gs copy in this case #url = drsClient.getAccessURL(row[1], 'gs') res = drsClient.getAccessURL(row[1], 'gs.us') url = res['url'] print(url) # Step 3 - Run a pipeline on the file at the drs url outfile = "{}.txt".format(row[0]) pipeline_id = mysam.runWorkflow(url, outfile) print('submitted:{}'.format(pipeline_id)) via = '' note = 'Anvil GTEX Test via SDL' time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S") faspRunner.logRun(time, via, note, pipeline_id, outfile, str(fileSize), searchClient, drsClient, mysam)
def main(argv): # Step 1 - Discovery # query for relevant DRS objects searchClient = DiscoverySearchClient( 'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/') query = "select submitter_id, read_drs_id drsid from thousand_genomes.onek_genomes.ssd_drs where population = 'ACB' limit 1" query_job = searchClient.runQuery(query) # Step 2 - DRS - set up a DRS Client # CRDC drsClient = sdlDRSClient('~/.keys/prj_11218_D17199.ngc') # Step 3 - set up a class that run a compute for us wesClient = DNAStackWESClient('~/.keys/DNAStackWESkey.json') # A log is helpful to keep track of the computes we've submitted faspRunner = FASPRunner() # repeat steps 2 and 3 for each row of the query for row in query_job: print("subject={}, drsID={}".format(row[0], row[1])) # Step 2 - Use DRS to get the URL #objInfo = drsClient.getObject(row[1]) # for testing acc = 'SRR5368359.sra' objInfo = drsClient.getObject(acc) fileSize = objInfo['size'] print(fileSize) # we've predetermined we want to use the gs copy in this case #url = drsClient.getAccessURL(row[1], 'gs') res = drsClient.getAccessURL(acc, 'gs.us') url = res['url'] print(url) # Step 3 - Run a pipeline on the file at the drs url outfile = "{}.txt".format(row[0]) pipeline_id = wesClient.runWorkflow(url, outfile) print('submitted:{}'.format(pipeline_id)) via = 'WES' note = 'WES MD5 on NCBI SDL' time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S") faspRunner.logRun(time, via, note, pipeline_id, outfile, str(fileSize), searchClient, drsClient, wesClient)
def main(argv): faspRunner = FASPRunner(pauseSecs=0) # Step 1 - Discovery # query for relevant DRS objects searchClient = BigQuerySearchClient() query = """SELECT sra.biosample, sra.acc||'.cram' FROM `isbcgc-216220.GECCO_CRC_Susceptibility.Subject_Phenotypes` sp join `isbcgc-216220.GECCO_CRC_Susceptibility.Sample_MULTI` sm on sm.dbgap_subject_id = sp.dbgap_subject_id join `nih-sra-datastore.sra.metadata` sra on sm.BioSample_Accession = sra.biosample where AGE between 45 and 55 and sex = 'Female' limit 3""" query_job = searchClient.runQuery(query) # Step 2 - DRS - set up a DRS Client # CRDC drsClient = sdlDRSClient('~/.keys/prj_14565.ngc', True) # Step 3 - set up a class that run a compute for us wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json') # repeat steps 2 and 3 for each row of the query for row in query_job: print("sample={}, drsID={}".format(row[0], row[1])) # Step 2 - Use DRS to get the URL objInfo = drsClient.getObject(row[1]) fileSize = objInfo['size'] print(fileSize) # we've predetermined we want to use the gs copy in this case #url = drsClient.getAccessURL(row[1], 'gs') res = drsClient.getAccessURL(row[1],'gs.us') url = res['url'] print(url) # Step 3 - Run a pipeline on the file at the drs url outfile = "{}.txt".format(row[0]) pipeline_id = wesClient.runWorkflow(url, outfile) print('submitted:{}'.format(pipeline_id)) via = 'WES' note = 'WES MD5 on NCBI SDL' time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S") faspRunner.logRun(time, via, note, pipeline_id, outfile, str(fileSize), searchClient, drsClient, wesClient)
print('in download') file_name = 'test_file.bam' req = requests.get(url) print(req) file = open(file_name, 'wb') for chunk in req.iter_content(100000): print('writing a chunk') file.write(chunk) file.close() if __name__ == "__main__": # client1 = sdlDRSClient('~/.keys/prj_14565.ngc') # res = client1.getObject('SRR1999478.bam') # print('--Get Info--') # print (res) # print('--Get a URL--') # res = client1.getAccessURL('SRR1999478.bam','gs.us') # print (res) # print ('-----------------') client2 = sdlDRSClient('~/.keys/prj_11218_D17199.ngc', debug=True) res = client2.getObject('SRR5368359.sra') print('--Get Info--') print(res) print('--Get a URL--') res = client2.getAccessURL('SRR5368359.sra', 'gs.us') #print (json.dumps(res, indent=2)) print(res['url']) download(res['url'])