def build_id_query(self, vid, scopes=None): _default_scopes = '_id' scopes = scopes or _default_scopes if is_str(scopes): _query = { "match": { scopes: { "query": "{}".format(vid), "operator": "and" } } } elif is_seq(scopes): _query = { "multi_match": { "query": "{}".format(vid), "fields": scopes, "operator": "and" } } else: raise ValueError('"scopes" cannot be "%s" type'.format( type(scopes))) _q = {"query": _query} self._query_options.pop( "query", None) # avoid "query" be overwritten by self.query_options _q.update(self._query_options) return _q
def build_id_query(self, vid, scopes=None): # _default_scopes = [ # '_id', # 'rsid', "dbnsfp.rsid", "dbsnp.rsid", "evs.rsid", "mutdb.rsid" # for rsid # "dbsnp.gene.symbol", 'evs.gene.symbol', 'clinvar.gene.symbol', # 'dbnsfp.genename', "cadd.gene.genename", "docm.genename", # for gene symbols # ] _default_scopes = '_id' scopes = scopes or _default_scopes if is_str(scopes): _query = { "match": { scopes: { "query": "{}".format(vid), "operator": "and" } } } elif is_seq(scopes): _query = { "multi_match": { "query": "{}".format(vid), "fields": scopes, "operator": "and" } } else: raise ValueError('"scopes" cannot be "%s" type'.format(type(scopes))) _q = {"query": _query} self._query_options.pop("query", None) # avoid "query" be overwritten by self.query_options _q.update(self._query_options) return _q
def build_id_query(self, vid, scopes=None): _default_scopes = '_id' scopes = scopes or _default_scopes if is_str(scopes): _query = { "match": { scopes: { "query": "{}".format(vid), "operator": "and" } } } elif is_seq(scopes): _query = { "multi_match": { "query": "{}".format(vid), "fields": scopes, "operator": "and" } } else: raise ValueError('"scopes" cannot be "%s" type'.format(type(scopes))) _q = {"query": _query} self._query_options.pop("query", None) # avoid "query" be overwritten by self.query_options _q.update(self._query_options) return _q
def build_id_query(self, vid, scopes=None): # _default_scopes = [ # '_id', # 'rsid', "dbnsfp.rsid", "dbsnp.rsid", "evs.rsid", "mutdb.rsid" # for rsid # "dbsnp.gene.symbol", 'evs.gene.symbol', 'clinvar.gene.symbol', # 'dbnsfp.genename', "cadd.gene.genename", "docm.genename", # for gene symbols # ] _default_scopes = '_id' scopes = scopes or _default_scopes if is_str(scopes): _query = { "match": { scopes: { "query": "{}".format(vid), "operator": "and" } } } elif is_seq(scopes): _query = { "multi_match": { "query": "{}".format(vid), "fields": scopes, "operator": "and" } } else: raise ValueError('"scopes" cannot be "%s" type'.format( type(scopes))) _q = {"query": _query} self._query_options.pop( "query", None) # avoid "query" be overwritten by self.query_options _q.update(self._query_options) return _q
def to_number(val): """convert an input string to int/float.""" if is_str(val): try: return int(val) except ValueError: try: return float(val) except ValueError: pass return val
def validate_src(self, collection, return_false=False, return_none=False, return_true=False, verbose=False, flag_invalid=False): '''Validate hgvs ids from a src collection.''' return_dict = { False: return_false, True: return_true, None: return_none } # read in the collection from mongodb if is_str(collection): src = get_src_db() _coll = src[collection] else: _coll = collection cursor = doc_feeder(_coll, step=10000) out = {} print_only = not (return_false or return_none or return_true) if not print_only: # output dictionary, three keys: 'false','true','none' for k in return_dict: if return_dict[k]: out[k] = [] # initialize the count cnt_d = {True: 0, False: 0, None: 0} # cnt_d # validate each item in the cursor for item in cursor: _id = item['_id'] valid = self.validate_hgvs(_id, verbose=verbose) if valid == False and flag_invalid: collection.update({"_id": _id}, {'$set': { "unmatched_ref": "True" }}) cnt_d[valid] += 1 if return_dict[valid]: out[valid].append(_id) # print out counts print("\n# of VALID HGVS IDs:\t{0}".format(cnt_d[True])) print("# of INVALID HGVS IDs:\t{0}".format(cnt_d[False])) print("# of HGVS IDs skipped:\t {0}".format(cnt_d[None])) out['summary'] = cnt_d return out
def _cleaned_fields(self, fields): '''return a cleaned fields parameter. should be either None (return all fields) or a list fields. ''' if fields: if is_str(fields): if fields.lower() == 'all': fields = None # all fields will be returned. else: fields = [x.strip() for x in fields.split(',')] else: fields = self._default_fields return fields
def _cleaned_scopes(self, scopes): '''return a cleaned scopes parameter. should be either a string or a list of scope fields. ''' if scopes: if is_str(scopes): scopes = [x.strip() for x in scopes.split(',')] if is_seq(scopes): scopes = [x for x in scopes if x] if len(scopes) == 1: scopes = scopes[0] else: scopes = None else: scopes = None return scopes
def validate_src(self, collection, return_false=False, return_none=False, return_true=False, verbose=False, flag_invalid=False, generator=False): '''Validate hgvs ids from a src collection.''' return_dict = { False: return_false, True: return_true, None: return_none } # read in the collection from mongodb if is_str(collection): src = get_src_db() _coll = src[collection] else: _coll = collection cursor = doc_feeder(_coll, step=10000) out = {} print_only = not (return_false or return_none or return_true) if not print_only: # output dictionary, three keys: 'false','true','none' for k in return_dict: if return_dict[k]: out[k] = [] # initialize the count cnt_d = {True: 0, False: 0, None: 0} # cnt_d # validate each item in the cursor for item in cursor: _id = item['_id'] valid = self.validate_hgvs(_id, verbose=verbose) if valid == False and flag_invalid: collection.update({"_id": _id}, {'$set':{"unmatched_ref": "True"}}) cnt_d[valid] += 1 if return_dict[valid]: out[valid].append(_id) # print out counts print("\n# of VALID HGVS IDs:\t{0}".format(cnt_d[True])) print("# of INVALID HGVS IDs:\t{0}".format(cnt_d[False])) print("# of HGVS IDs skipped:\t {0}".format(cnt_d[None])) out['summary'] = cnt_d return out