Python DiscoverySearchClient 예제들, fasp.search.DiscoverySearchClient Python 예제들

예제 #1

0

파일 보기

def main(argv):

    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com')

    query = 'select id, disease from kidsfirst.ga4gh_tables.ncpi_disease'
    res = searchClient.runQuery(query)

    diseases = {}
    for r in res:
        disease = r[1]
        dName = disease['identifier'][0]['value']
        code = disease['code']['coding'][0]['code']
        text = disease['code']['text']
        diseases[code] = text

    disease_df = pd.DataFrame.from_dict(diseases,
                                        orient='index',
                                        columns=['Term'])

    for k, v in diseases.items():
        print(k, v)

    print("found {} disease records".format(len(res)))
    print("There were {} disease codes used".format(len(diseases)))

    disease_df.to_csv('~/ncpi_kf_disease_terms.tsv', sep='\t')

예제 #2

0

파일 보기

def main(argv):

    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com')

    query = """select id, patient from kidsfirst.ga4gh_tables.patient 
	where json_extract_scalar(patient, '$.extension[0].url') = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' 
	limit 3"""
    #TODO query on the value of ethnicity

    showDetail = True

    res = searchClient.runQuery(query)

    if showDetail:
        print(json.dumps(res, indent=2))

    for r in res:
        patient = r[1]
        print(patient['id'], patient['gender'])
        for e in patient['extension']:
            print(e['url'])
            print(e['extension'][0]['url'])
            vc = e['extension'][0]['valueCoding']
            print(vc['code'], vc['display'])

예제 #3

0

파일 보기

def main(argv):

    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
    #query = "select id, phenopacket from sample_phenopackets.ga4gh_tables.gecco_phenopackets limit 10"
    query = "select id from sample_phenopackets.ga4gh_tables.gecco_phenopackets where json_extract_scalar(phenopacket, '$.subject.sex') = 'MALE'"

    bqSearchClient = BigQuerySearchClient()
    #query = "select id, phenopacket from sample_phenopackets.ga4gh_tables.gecco_phenopackets limit 10"

    crdcquery = """
		SELECT BioSample_Accession id
		FROM `isbcgc-216220.GECCO_CRC_Susceptibility.Subject_Phenotypes` sp
		join `isbcgc-216220.GECCO_CRC_Susceptibility.Sample_MULTI` sm on sm.dbgap_subject_id = sp.dbgap_subject_id
		and sex = 'Male'
		"""

    dbList = []
    results = bqSearchClient.runQuery(crdcquery)
    print(len(results))
    for r in results:
        dbList.append(r['id'])
    ppList = []
    query_job = searchClient.runQuery(query)  # Send the query
    print(len(query_job))
    for r in query_job:
        ppList.append(r[0])

    # compare the lists
    dbList.sort()
    ppList.sort()
    if dbList == ppList:
        print("The lists dbList and ppList are the same")
    else:
        print("The lists dbList and ppList are not the same")

예제 #4

0

파일 보기

def main(argv):

	searchClient = DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com')

	
	
	query = 'select id, patient from kidsfirst.ga4gh_tables.patient limit 3'
	res = searchClient.runQuery(query)

	print(json.dumps(res, indent=2))

예제 #5

0

파일 보기

파일: FASPScript14.py 프로젝트: STRIDES-Codes/subject-sample-search

def main(argv):

    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com',
        debug=False)

    query = """SELECT s.su_submitter_id, drs_id 
	FROM thousand_genomes.onek_genomes.ssd_drs s 
	join thousand_genomes.onek_genomes.sra_drs_files f on f.sample_name = s.su_submitter_id 
	where filetype = 'bam' and mapped = 'mapped' and sequencing_type ='exome' and  population = 'JPT' LIMIT 3"""

    searchClient.runQuery(query)

예제 #6

0

파일 보기

파일: FASPScript4.py 프로젝트: STRIDES-Codes/subject-sample-search

def main(argv):

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com')
    query = """select submitter_id, read_drs_id drsid 
	from thousand_genomes.onek_genomes.ssd_drs 
	where population = 'ACB' limit 3"""

    res = searchClient.runQuery(query)

    print(res)

예제 #7

0

파일 보기

def main():

    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com')
    query = """SELECT sample_submitter_id, fileid, filename 
    FROM dbgap_demo.scr_ega.scr_egapancreatic_sample_multi p 
    join dbgap_demo.scr_ega.scr_egapancreatic_files f on f.sample_primary_id = p.sample_primary_id 
    where phenotype = 'pancreatic adenocarcinoma' limit 3"""
    query_job = searchClient.runQuery(query)

    for row in query_job:

        print("sample={}, EGAFileID={}".format(row[0], row[1]))

예제 #8

0

파일 보기

def main(argv):

    faspRunner = FASPRunner(pauseSecs=0)
    creditor = faspRunner.creditor

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/',
        debug=False)

    query = "SELECT file_name, compact_drs_id, hostbased_drs_id, drs_id from thousand_genomes.onek_genomes.onek_recal_variants_drs where chromosome = 'chr21' and annotated = false"
    print(query)

    query_job = searchClient.runQuery(query)  # Send the query
    creditor.creditClass(searchClient)

    # Step 2 - DRS - use the MetaResolver send drs ids to the right service
    drsResolver = DRSMetaResolver(getReg=False)

    # Step 3 - set up a class that run a compute for us
    wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json')

    # repeat steps 2 and 3 for each row of the query
    # this example should find id's for the same file in both BioDataCatalyst and Anvil
    for row in query_job:
        drs_id = row[1]
        print("vcffile={}, compact drsID={}".format(row[0], drs_id))

        # Step 2 - Use DRS to get the URL
        objInfo = drsResolver.getObject(drs_id)
        drsClient, localid = drsResolver.getClient(drs_id)
        print(drsClient)
        creditor.creditClass(drsClient)
        fileSize = objInfo['size']

        vcfurl = drsResolver.getAccessURL(drs_id, 'gs')
        # Step 3 - Run a pipeline on the file at the drs url
        pipeline_id = wesClient.runGWASWorkflow(
            vcfurl, 'gs://dnastack-public-bucket/thousand_genomes_meta.csv')
        creditor.creditClass(wesClient)
        print('submitted:{}'.format(pipeline_id))

        outfile = ''
        via = 'WES'
        note = 'GWAS'

        time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        faspRunner.logRun(time, via, note, pipeline_id, outfile, str(fileSize),
                          searchClient, drsClient, wesClient)

예제 #9

0

파일 보기

파일: FASPScript6.py 프로젝트: STRIDES-Codes/subject-sample-search

def main(argv):

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com')

    query = """select submitter_id, read_drs_id drsid 
	from thousand_genomes.onek_genomes.ssd_drs 
	where population = 'ACB' limit 1"""

    query_job = searchClient.runQuery(query)

    for row in query_job:

        print("subject={}, drsID={}".format(row[0], row[1]))

예제 #10

0

파일 보기

파일: FASPScript9.py 프로젝트: STRIDES-Codes/subject-sample-search

def main(argv):

    # Step 1 - Discovery
    # query for relevant DRS objects
    discoveryClients = {
        "sb":
        DiscoverySearchClient(
            'https://ga4gh-search-adapter-presto-public.prod.dnastack.com'),
        "bdc":
        BigQuerySearchClient()
    }

    crdcquery = """SELECT sp.dbGaP_Subject_ID,  'sb:'||sb_drs_id 
	FROM dbgap_demo.scr_gecco_susceptibility.subject_phenotypes_multi sp 
	join dbgap_demo.scr_gecco_susceptibility.sample_multi sm on sm.dbgap_subject_id = sp.dbgap_subject_id 
	join dbgap_demo.scr_gecco_susceptibility.sb_drs_index di on di.sample_id = sm.sample_id 
	where AGE between 45 and 55 and sex = 'Female' and file_type = 'cram' limit 3"""

    bdcquery = """
		SELECT sp.dbGaP_Subject_ID,  'bdc:'||read_drs_id
		FROM `isbcgc-216220.COPDGene.Subject_MULTI` sm
		join `isbcgc-216220.COPDGene.Subject_Phenotypes_HMB` sp on sp.dbgap_subject_id = sm.dbgap_subject_id
		join `isbcgc-216220.COPDGene.COPD_DRS` drs on drs.su_submitter_id = sm.subject_id
 		where gender = '2'
 		and Age_Enroll between 45 and 55
 		LIMIT 3"""

    results = discoveryClients['sb'].runQuery(crdcquery)  # Send the query
    results += discoveryClients['bdc'].runQuery(bdcquery)

    # repeat steps 2 and 3 for each row of the query
    for row in results:

        print("subject={}, drsID={}".format(row[0], row[1]))

예제 #11

0

파일 보기

파일: FASPScript6.py 프로젝트: lifebit-ai/fasp-scripts

def main(argv):

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
    query = "select submitter_id, read_drs_id drsid from thousand_genomes.onek_genomes.ssd_drs where population = 'ACB' limit 1"
    query_job = searchClient.runQuery(query)

    # Step 2 - DRS - set up a DRS Client
    # CRDC
    drsClient = sdlDRSClient('~/.keys/prj_11218_D17199.ngc')

    # Step 3 - set up a class that run a compute for us
    wesClient = DNAStackWESClient('~/.keys/DNAStackWESkey.json')

    # A log is helpful to keep track of the computes we've submitted
    faspRunner = FASPRunner()

    # repeat steps 2 and 3 for each row of the query
    for row in query_job:

        print("subject={}, drsID={}".format(row[0], row[1]))

        # Step 2 - Use DRS to get the URL
        #objInfo = drsClient.getObject(row[1])
        # for testing
        acc = 'SRR5368359.sra'
        objInfo = drsClient.getObject(acc)
        fileSize = objInfo['size']
        print(fileSize)
        # we've predetermined we want to use the gs copy in this case
        #url = drsClient.getAccessURL(row[1], 'gs')
        res = drsClient.getAccessURL(acc, 'gs.us')
        url = res['url']
        print(url)
        # Step 3 - Run a pipeline on the file at the drs url
        outfile = "{}.txt".format(row[0])
        pipeline_id = wesClient.runWorkflow(url, outfile)
        print('submitted:{}'.format(pipeline_id))

        via = 'WES'
        note = 'WES MD5 on NCBI SDL'

        time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        faspRunner.logRun(time, via, note, pipeline_id, outfile, str(fileSize),
                          searchClient, drsClient, wesClient)

예제 #12

0

파일 보기

파일: FASPScript12.py 프로젝트: lifebit-ai/fasp-scripts

def main(argv):

    faspRunner = FASPRunner(pauseSecs=0)
    creditor = faspRunner.creditor
    settings = faspRunner.settings
	
	# Step 1 - Discovery
    # query for relevant files
    searchClient = DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
    query = "SELECT sample_submitter_id, fileid, filename FROM dbgap_demo.scr_ega.scr_egapancreatic_sample_multi p join dbgap_demo.scr_ega.scr_egapancreatic_files f on f.sample_primary_id = p.sample_primary_id where phenotype = 'pancreatic adenocarcinoma' limit 3"
    query_job = searchClient.runQuery(query)
    
    # Step 2 - Use htsget at EGA
    htsgetClient = EGAhtsget('~/.keys/ega.credentials')
    
    # Step 3 - set up a class that run a compute for us
    location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])
    wesClient = GCPLSsamtools(location, settings['GCPOutputBucket'])


    
    # repeat steps 2 and 3 for each row of the query
    for row in query_job:

        print("sample={}, EGAFileID={}".format(row[0], row[1]))
        
        # Step 2 - Use DRS to get the URL
        fileSize = htsgetClient.getSize(row[1])
        print(fileSize)
        # we've predetermined we want to use the gs copy in this case
        #url = drsClient.getAccessURL(row[1], 'gs')
        #htsgetClient.htsget(row[1], 'chr1', 100000, 102000, 'BAM', row[2])
		
        localfile = 'NA19377.unmapped.ILLUMINA.bwa.LWK.low_coverage.20120522.bam'
		#row[2]
        # Step 3 - Run a pipeline on the file at the drs url
        outfile = "{}.txt".format(row[0])
        pipeline_id = wesClient.runWorkflow(localfile, outfile)
        #print('submitted:{}'.format(pipeline_id))
        
        via = 'local'
        note = 'samtools on htsget BAM'

        time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        faspRunner.logRun(time, via, note,  pipeline_id, outfile, str(fileSize),
            searchClient, htsgetClient, wesClient)

예제 #13

0

파일 보기

파일: ga4gh_search_covid_example.py 프로젝트: STRIDES-Codes/subject-sample-search

def main(argv):

    # Step 1 - Discovery
    # query for relevant DRS objects

    searchClient = DiscoverySearchClient(
        'https://search-presto-public-covid19.prod.dnastack.com')

    # List tables
    #searchClient.listTables()

    # List table schema
    #searchClient.listTableInfo('coronavirus_dnastack_curated.covid_cloud_production.sequences')

    query = 'select accession, biosample, genus, species from coronavirus_dnastack_curated.covid_cloud_production.sequences limit 10'
    res = searchClient.runQuery(query, returnType='dataframe')
    print(res)

예제 #14

0

파일 보기

def main(argv):

    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com')

    query = """select id, patient from kidsfirst.ga4gh_tables.patient 
	where json_extract_scalar(patient, '$.gender') = 'female' 
	limit 3"""

    showDetail = True

    res = searchClient.runQuery(query)

    if showDetail:
        print(json.dumps(res, indent=2))

    for r in res:
        patient = r[1]
        print(patient['id'], patient['gender'])

예제 #15

0

파일 보기

파일: covid_search.py 프로젝트: jb-adams/fasp-scripts

def main(argv):

    # Step 1 - Discovery
    # query for relevant DRS objects

    searchClient = DiscoverySearchClient(
        'https://search-presto-public-covid19.prod.dnastack.com')

    # List tables
    tList = searchClient.listTables(verbose=False)

    # List table schema
    for t in tList:
        res = searchClient.listTableInfo(t, verbose=False)
        print(t)
        if 'data_model' in res:
            print(res['data_model']['description'])
        else:
            print('No data model')

예제 #16

0

파일 보기

파일: FASPScript10.py 프로젝트: STRIDES-Codes/subject-sample-search

def main(argv):

    pp_dbgap_join = """SELECT sp.dbGaP_Subject_ID,  'sbcgc:'||sb_drs_id 
	FROM dbgap_demo.scr_gecco_susceptibility.subject_phenotypes_multi sp 
	join dbgap_demo.scr_gecco_susceptibility.sample_multi sm on sm.dbgap_subject_id = sp.dbgap_subject_id 
	join dbgap_demo.scr_gecco_susceptibility.sb_drs_index di on di.sample_id = sm.sample_id 
	join sample_phenopackets.ga4gh_tables.gecco_phenopackets pp on pp.id = sm.biosample_accession 
	where  json_extract_scalar(pp.phenopacket, '$.subject.sex') = 'MALE' and file_type = 'cram' limit 3"""

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com',
        debug=False)

    results = searchClient.runQuery(pp_dbgap_join)

    # repeat steps 2 and 3 for each row of the query
    for row in results:

        print("subject={}, drsID={}".format(row[0], row[1]))

예제 #17

0

파일 보기

파일: search_fhir_patient2.py 프로젝트: STRIDES-Codes/subject-sample-search

def main(argv):

	# Step 1 - Discovery
	# query for relevant DRS objects
	
	searchClient = DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com')
	
	# List tables
	#searchClient.listTables()
	
	# List table schema
	#searchClient.listTableInfo('coronavirus_dnastack_curated.covid_cloud_production.sequences')
	
	
	query = """select id, patient from kidsfirst.ga4gh_tables.patient 
	where json_extract_scalar(patient, '$.id') = '451133' limit 3"""
	
	
	
	res = searchClient.runQuery(query)

	print(json.dumps(res, indent=2))

예제 #18

0

파일 보기

파일: FASPScript14.py 프로젝트: jb-adams/fasp-scripts

def main(argv):


	faspRunner = FASPRunner(pauseSecs=0)
	settings = faspRunner.settings
	# Step 1 - Discovery
	# query for relevant DRS objects
	searchClient = DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com/', debug=True)

	query = "SELECT s.su_submitter_id, drs_id FROM thousand_genomes.onek_genomes.ssd_drs s join thousand_genomes.onek_genomes.sra_drs_files f on f.sample_name = s.su_submitter_id where filetype = 'bam' and mapped = 'mapped' and sequencing_type ='exome' and  population = 'JPT' LIMIT 3"

	drsClient = DRSClient('https://locate.ncbi.nlm.nih.gov',access_id='2' ,debug=True, public=True)
	location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])
	mysam = GCPLSsamtools(location, settings['GCPOutputBucket'])

	faspRunner.configure(searchClient, drsClient, mysam)
		
	faspRunner.runQuery(query, 'One k query SRA DRS')

예제 #19

0

파일 보기

파일: FASPScript4.py 프로젝트: jb-adams/fasp-scripts

def main(argv):

    faspRunner = FASPRunner(pauseSecs=0)

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
    query = "select submitter_id, read_drs_id drsid from thousand_genomes.onek_genomes.ssd_drs where population = 'ACB' limit 3"

    # Step 2 - DRS - set up a DRS Client
    drsClient = bdcDRSClient('~/.keys/bdc_credentials.json', 'gs')

    # Step 3 - set up a class that run a compute for us
    wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json')

    faspRunner.configure(searchClient, drsClient, wesClient)

    faspRunner.runQuery(query, 'One k query using Search and WES')

예제 #20

0

파일 보기

파일: FASPScript10.py 프로젝트: jb-adams/fasp-scripts

def main(argv):

    faspRunner = FASPRunner(pauseSecs=0)

    pp_dbgap_join = "SELECT sp.dbGaP_Subject_ID,  'sbcgc:'||sb_drs_id FROM dbgap_demo.scr_gecco_susceptibility.subject_phenotypes_multi sp join dbgap_demo.scr_gecco_susceptibility.sample_multi sm on sm.dbgap_subject_id = sp.dbgap_subject_id join dbgap_demo.scr_gecco_susceptibility.sb_drs_index di on di.sample_id = sm.sample_id join sample_phenopackets.ga4gh_tables.gecco_phenopackets pp on pp.id = sm.biosample_accession where  json_extract_scalar(pp.phenopacket, '$.subject.sex') = 'MALE' and file_type = 'cram' limit 3"

    # Step 1 - Discovery
    # query for relevant DRS objects
    searchClient = DiscoverySearchClient(
        'https://ga4gh-search-adapter-presto-public.prod.dnastack.com/',
        debug=True)

    # Step 2 - DRS - a metaresolver will deal with which drs server is required
    drsClient = DRSMetaResolver()

    # Step 3 - set up a class that run a compute for us
    wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json')

    faspRunner.configure(searchClient, drsClient, wesClient)

    faspRunner.runQuery(pp_dbgap_join, 'Phenopacket Gecco')

예제 #21

0

파일 보기

def main(argv):


	faspRunner = FASPRunner(pauseSecs=0)
	settings =faspRunner.settings
	# Step 1 - Discovery
	# query for relevant DRS objects
	searchClient = DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com/')
	query = "select submitter_id, read_drs_id drsid from thousand_genomes.onek_genomes.ssd_drs where population = 'BEB' limit 3"

	# Step 2 - DRS - set up a DRS Client
	# CRDC
	drsClient = bdcDRSClient('~/.keys/bdc_credentials.json', 'gs')

	
	# Step 3 - set up a class that runs samtools for us
	# providing the location where we the results to go
	location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])
	mysam = GCPLSsamtools(location, settings['GCPOutputBucket'])	

	faspRunner.configure(searchClient, drsClient, mysam)
		
	faspRunner.runQuery(query, 'One k query using Search')

예제 #22

0

파일 보기

파일: ga4gh_mapping_example.py 프로젝트: lifebit-ai/fasp-scripts


def getMapping(searchClient, table, column):

    query = "select valueString, maptoValue from search_cloud.cshcodeathon.md_value_map where table_name = '{}' and column_name='{}'".format(
        table, column)

    mapping = searchClient.runQuery(query)
    mapDict = {}
    for row in mapping:
        mapDict[row[0]] = row[1]
    return mapDict


searchClient = DiscoverySearchClient(
    'https://ga4gh-search-adapter-presto-public.prod.dnastack.com',
    debug=False)

table_name = 'search_cloud.cshcodeathon.organoid_profiling_pc_subject_phenotypes_gru'
map_col = 'sex'
mapping = getMapping(searchClient, table_name, map_col)
print(mapping)

res = searchClient.runOneTableQuery(
    column_list=['dbgap_subject_id', 'age', 'race', 'sex'],
    table=table_name,
    limit=100)
print(res)

res[map_col] = res[map_col].replace(mapping.keys(), mapping.values())

예제 #23

0

파일 보기

# imports
from fasp.search  import DiscoverySearchClient



searchClient = DiscoverySearchClient('https://search-presto-public-covid19.prod.dnastack.com')

res = searchClient.runOneTableQuery(column_list=['accession', 'biosample', 'genus', 'species'], 
									table='coronavirus_dnastack_curated.covid_cloud_production.sequences',
						  			limit=15)
print(res)

예제 #24

0

파일 보기

파일: FASPScript9.py 프로젝트: jb-adams/fasp-scripts

def main(argv):

	
	faspRunner = FASPRunner(pauseSecs=0)
	creditor = faspRunner.creditor
	settings = faspRunner.settings
	
	# set your Seven Bridges CGC project using what you have put in FASP Settings
	sbProject = settings['SevenBridgesProject']
	sbInstance = settings['SevenBridgesInstance']

	# Step 1 - Discovery
	# query for relevant DRS objects
	discoveryClients = {
		"sb": DiscoverySearchClient('https://ga4gh-search-adapter-presto-public.prod.dnastack.com/'),
		"bdc": BigQuerySearchClient()
	}

	crdcquery = "SELECT sp.dbGaP_Subject_ID,  'sb:'||sb_drs_id FROM dbgap_demo.scr_gecco_susceptibility.subject_phenotypes_multi sp join dbgap_demo.scr_gecco_susceptibility.sample_multi sm on sm.dbgap_subject_id = sp.dbgap_subject_id join dbgap_demo.scr_gecco_susceptibility.sb_drs_index di on di.sample_id = sm.sample_id where AGE between 45 and 55 and sex = 'Female' and file_type = 'cram' limit 3"
		


	bdcquery = """
		SELECT sp.dbGaP_Subject_ID,  'bdc:'||read_drs_id
		FROM `isbcgc-216220.COPDGene.Subject_MULTI` sm
		join `isbcgc-216220.COPDGene.Subject_Phenotypes_HMB` sp on sp.dbgap_subject_id = sm.dbgap_subject_id
		join `isbcgc-216220.COPDGene.COPD_DRS` drs on drs.su_submitter_id = sm.subject_id
 		where gender = '2'
 		and Age_Enroll between 45 and 55
 		LIMIT 3"""
		

	results = discoveryClients['sb'].runQuery(crdcquery)  # Send the query
	creditor.creditFromList('dbGapSSD')
	creditor.creditClass(discoveryClients['sb'])
	results += discoveryClients['bdc'].runQuery(bdcquery) 
	creditor.creditFromList('BDCData')
	

	# Step 2 - DRS - set up DRS Clients	
	drsClients = {
		"sb": sbcgcDRSClient('~/.keys/sevenbridges_keys.json', 's3'),
		"bdc": bdcDRSClient('~/.keys/bdc_credentials.json', 'gs')
	}
	print('setting credentials ')
	creditor.creditFromList('dbGaPFence')
		
	# Step 3 - set up a class that runs samtools for us
	# providing the location for the results
	location = 'projects/{}/locations/{}'.format(settings['GCPProject'], settings['GCPPipelineRegion'])
	sam2 = GCPLSsamtools(location, settings['GCPOutputBucket'])
	samClients = {
		"sb": samtoolsSBClient(sbInstance, sbProject),
		"bdc": sam2
	}

	
	# repeat steps 2 and 3 for each row of the query
	for row in results:

		print("subject={}, drsID={}".format(row[0], row[1]))
		resRow = [row[0], row[1]]
		# Step 2 - Use DRS to get the URL
		# get the prefix
		prefix, drsid = row[1].split(":", 1)
		drsClient = drsClients[prefix]
		searchClient = discoveryClients[prefix]
		creditor.creditClass(drsClient)
		url = drsClient.getAccessURL(drsid)
		print(url)
		#objInfo = drsClient.getObject(drsid)
		#print (objInfo)
		#fileSize = objInfo['size']
		fileSize = 0
				
		# Step 3 - Run a pipeline on the file at the drs url
		if url != None:
			outfile = "{}.txt".format(row[0])
			mysam = samClients[prefix]
			creditor.creditClass(mysam)
			via = 'sh'
			note = 'Two dbGaP sources'
			time = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
			run_id = mysam.runWorkflow(url, outfile)
			faspRunner.logRun(time, via, note,  run_id, outfile, fileSize,
				searchClient, drsClient, mysam)
			resRow.append('OK')
		else:
			print('could not get DRS url')
			resRow.append('unauthorized')