def results(res, field, mark): for doc in res: index_id = doc['_id'] query = doc['_source'][field] index = doc['_index'] output_formatter.output('%s\t%s\t%s' % (query, index_id, index), output, True)
def sample_query(*argv): if type(argv[0]) == list: samples = argv[0] else: samples = argv output_formatter.output('Query: %s' % ' '.join(samples), output, False) try: for samp in samples: output_formatter.output(samp, output, True) output_formatter.output( 'S_SAMPLEID\tS_SUBJECTID\tS_STUDYID\tALIASTYPE\tALIASID', output, True) sample = Sample() sample.select(lv, S_SAMPLEID=samp) sf = SampleFamily() sf.select(lv, S_SAMPLEFAMILYID=sample.SAMPLEFAMILYID) subject = Subject() subject.select(lv, S_SUBJECTID=sf.SUBJECTID) cohort = Cohort() cohort.select(lv, SUBJECTID=subject.S_SUBJECTID) aliases = SDIAlias.selectAll(lv, KEYID1=subject.S_SUBJECTID) for alias in aliases: output_formatter.output( '%s\t%s\t%s\t%s' % (subject.S_SUBJECTID, cohort.STUDYID, alias['ALIASTYPE'], alias['ALIASID']), output, True) except Exception as e: print e
def redmine_query(*argv): if type(argv[0]) == list: keywords = argv[0] else: keywords = argv output_formatter.output('Query: %s\n' % ' '.join(keywords), output, False) output_formatter.output('ID\tCategory\tSubject\tPath', output, True) for keyword in keywords: try: datasets = Issue.find(None, None, tracker='Dataset', project_id=keyword) datasets = json.loads(datasets[0].to_json()) for ds in datasets['issue']['issues']: path = 'N/A' for cf in ds.get('custom_fields', []): if cf['name'] == 'URL': path = cf['value'] category = 'N/A' if 'category' in ds: category = ds['category']['name'] output_formatter.output( '%s\t%s\t"%s"\t(%s)' % (ds['id'], category, ds['subject'], path), output, True) except: output_formatter.output('No results found for %s' % arg, output, True)
def marker_query(*argv): if type(argv[0]) == list: markers = argv[0] else: markers = argv output_formatter.output('Query: %s' % ' '.join(markers), output, False) output_formatter.output('Marker\tIndex ID\tIndex', output, True) for marker in markers: docs = ['vcf_record'] q = {'query': {'bool': {'must': [{'match': {'id': marker}}]}}} res = es.search(index=index, doc_type=docs, body=q) res_filtered = strictCheck(res, 'id', marker) results(res_filtered, 'id', 'marker')
def results(res): data = [] for doc in res['hits']['hits']: rsid = doc['_source']['id'] chrom = doc['_source']['chrom'] start = doc['_source']['start_pos'] alleles = doc['_source']['alleles'] ref = None alts_list = [] for i in range(0,len(alleles)): if len(alleles[i]) == 2: ref = alleles[i].get('allele') else: alts_list.append(alleles[i].get('allele')) alts = ''.join(alts_list) ind = doc['_index'] output_formatter.output('%s\t%s:%s\t%s\t%s\t%s' % (rsid, chrom, start, ref, alts, ind), output, True)
def subject_query(*argv): if type(argv[0]) == list: subjects = argv[0] else: subjects = argv output_formatter.output('Query: %s' % ' '.join(subjects), output, False) output_formatter.output('Subject\tIndex ID\tIndex', output, False) for subject in subjects: docs = ['plink_fam_record'] q = {'query': {'bool': {'must': [{'match': {'subject_id': subject}}]}}} res = es.search(index=index, doc_type=docs, body=q) if strict: res = strictCheck(res, 'subject_id', subject) else: res = res['hits']['hits'] results(res, 'subject_id', 'subjectID')
def marker_query(*argv): if re.match('^rs\d+', argv[0]): index = getCurrentIndex() rsids = argv else: index = argv[0] rsids = argv[1:] output_formatter.output('Query: %s' % ' '.join(argv), output, False) output_formatter.output('rs number\tlocation\treference allele\talternative allele\tindex', output, True) for rsid in rsids: q = { 'query': { 'bool': { 'must': [{'match': {'id': rsid}}] } } } res = es.search(index=index, size=10000, body=q) results(res)
def keyword_query(*argv): if type(argv[0]) == list: keywords = argv[0] else: keywords = argv output_formatter.output('Query: %s\n' % ' '.join(keywords), output, False) output_formatter.output('ID\tType\tIndex\tPath', output, True) for keyword in keywords: indices = [] search_ind = [] for index in es.indices.get_alias("*"): if keyword.lower() in index.lower(): res = es.search(index=index) search_ind.append(res) for result in search_ind: for ind in result['hits']['hits']: path = 'N/A' if 'path' in ind: path = ind['path'] output_formatter.output( '%s\t%s\t"%s"\t(%s)' % (ind['_id'], ind['_type'], ind['_index'], path), output, True)
def subject_query(*argv): if type(argv[0]) == list: subjects = argv[0] else: subjects = argv output_formatter.output('Query: %s' % ' '.join(subjects), output, False) for sub in subjects: try: subject = Subject() subject.select(lv, S_SUBJECTID=sub) cohort = Cohort() cohort.select(lv, SUBJECTID=sub) output_formatter.output(sub, output, True) for (k, v) in subject.__dict__.items(): output_formatter.output('%s\t%s' % (k, v), output, True) for (k, v) in cohort.__dict__.items(): output_formatter.output('%s\t%s' % (k, v), output, True) except Exception as e: output_formatter.output(e, output, True)
def chrom_query(*argv): if re.match('^\d:\d+', argv[0]): index = getCurrentIndex() locs = argv else: index = argv[0] locs = argv[1:] output_formatter.output('Query: %s' % ' '.join(argv), output, False) output_formatter.output('rs number\tlocation\treference allele\talternative allele\tindex', output, True) for loc in locs: (chrom, position) = loc.split(':') q = { 'query': { 'bool': { 'minimum_should_match': '1', 'must': [{'match': {'chrom': chrom}}], 'should': [{'match': {'start_pos': position}}] } } } res = es.search(index=index, size=10000, body=q) return results(res)
def alias_query(*argv): if type(argv[0]) == list: aliases = argv[0] else: aliases = argv output_formatter.output('Query: %s' % ' '.join(aliases), output, False) for alias in aliases: output_formatter.output(alias, output, True) output_formatter.output('S_SAMPLEID\tS_SUBJECTID\tS_STUDYID', output, True) alias_list = SDIAlias().selectAll(lv, ALIASID=alias) n = len(alias_list) for a in alias_list: sid = a['KEYID1'].strip() study = a['KEYID2'] if sid.startswith('S-'): s = Sample() s.select(lv, S_SAMPLEID=sid) sf = SampleFamily() sf.select(lv, S_SAMPLEFAMILYID=s.SAMPLEFAMILYID) subj = sf.SUBJECTID elif sid.startswith('ST-'): subj = sid else: raise ValueError(sid) if not study or study == '(null)': try: coh = Cohort() coh.select(lv, SUBJECTID=subj) study = coh.STUDYID except: study = 'N/A' output_formatter.output('%s\t%s\t%s' % (sid, subj, study), output, True)
def bim_query(*argv): if argv[0].endswith('.bim'): index = getCurrentIndex() bims = argv else: index = argv[0] bims = argv[1:] output_formatter.output('Query: %s' % ' '.join(argv), output, False) for bim in bims: rsids = numpy.genfromtxt(bim, dtype='str')[:,1].tolist() output_formatter.output('%s' % bim, output, True) output_formatter.output('rs number\tlocation\treference allele\talternative allele\tindex', output, True) for rsid in rsids: q = { 'query': { 'bool': { 'must': [{'match': {'id': rsid}}] } } } res = es.search(index=index, size=10000, body=q) results(res)