Esempio n. 1
0
 def build_id_query(self, vid, scopes=None):
     _default_scopes = '_id'
     scopes = scopes or _default_scopes
     if is_str(scopes):
         _query = {
             "match": {
                 scopes: {
                     "query": "{}".format(vid),
                     "operator": "and"
                 }
             }
         }
     elif is_seq(scopes):
         _query = {
             "multi_match": {
                 "query": "{}".format(vid),
                 "fields": scopes,
                 "operator": "and"
             }
         }
     else:
         raise ValueError('"scopes" cannot be "%s" type'.format(
             type(scopes)))
     _q = {"query": _query}
     self._query_options.pop(
         "query",
         None)  # avoid "query" be overwritten by self.query_options
     _q.update(self._query_options)
     return _q
Esempio n. 2
0
 def build_id_query(self, vid, scopes=None):
     # _default_scopes = [
     #     '_id',
     #     'rsid', "dbnsfp.rsid", "dbsnp.rsid", "evs.rsid", "mutdb.rsid"  # for rsid
     #     "dbsnp.gene.symbol", 'evs.gene.symbol', 'clinvar.gene.symbol',
     #     'dbnsfp.genename', "cadd.gene.genename", "docm.genename",      # for gene symbols
     # ]
     _default_scopes = '_id'
     scopes = scopes or _default_scopes
     if is_str(scopes):
         _query = {
             "match": {
                 scopes: {
                     "query": "{}".format(vid),
                     "operator": "and"
                 }
             }
         }
     elif is_seq(scopes):
         _query = {
             "multi_match": {
                 "query": "{}".format(vid),
                 "fields": scopes,
                 "operator": "and"
             }
         }
     else:
         raise ValueError('"scopes" cannot be "%s" type'.format(type(scopes)))
     _q = {"query": _query}
     self._query_options.pop("query", None)    # avoid "query" be overwritten by self.query_options
     _q.update(self._query_options)
     return _q
Esempio n. 3
0
 def build_id_query(self, vid, scopes=None):
     _default_scopes = '_id'
     scopes = scopes or _default_scopes
     if is_str(scopes):
         _query = {
             "match": {
                 scopes: {
                     "query": "{}".format(vid),
                     "operator": "and"
                 }
             }
         }
     elif is_seq(scopes):
         _query = {
             "multi_match": {
                 "query": "{}".format(vid),
                 "fields": scopes,
                 "operator": "and"
             }
         }
     else:
         raise ValueError('"scopes" cannot be "%s" type'.format(type(scopes)))
     _q = {"query": _query}
     self._query_options.pop("query", None)    # avoid "query" be overwritten by self.query_options
     _q.update(self._query_options)
     return _q
Esempio n. 4
0
 def build_id_query(self, vid, scopes=None):
     # _default_scopes = [
     #     '_id',
     #     'rsid', "dbnsfp.rsid", "dbsnp.rsid", "evs.rsid", "mutdb.rsid"  # for rsid
     #     "dbsnp.gene.symbol", 'evs.gene.symbol', 'clinvar.gene.symbol',
     #     'dbnsfp.genename', "cadd.gene.genename", "docm.genename",      # for gene symbols
     # ]
     _default_scopes = '_id'
     scopes = scopes or _default_scopes
     if is_str(scopes):
         _query = {
             "match": {
                 scopes: {
                     "query": "{}".format(vid),
                     "operator": "and"
                 }
             }
         }
     elif is_seq(scopes):
         _query = {
             "multi_match": {
                 "query": "{}".format(vid),
                 "fields": scopes,
                 "operator": "and"
             }
         }
     else:
         raise ValueError('"scopes" cannot be "%s" type'.format(
             type(scopes)))
     _q = {"query": _query}
     self._query_options.pop(
         "query",
         None)  # avoid "query" be overwritten by self.query_options
     _q.update(self._query_options)
     return _q
Esempio n. 5
0
def to_number(val):
    """convert an input string to int/float."""
    if is_str(val):
        try:
            return int(val)
        except ValueError:
            try:
                return float(val)
            except ValueError:
                pass
    return val
Esempio n. 6
0
def to_number(val):
    """convert an input string to int/float."""
    if is_str(val):
        try:
            return int(val)
        except ValueError:
            try:
                return float(val)
            except ValueError:
                pass
    return val
Esempio n. 7
0
    def validate_src(self,
                     collection,
                     return_false=False,
                     return_none=False,
                     return_true=False,
                     verbose=False,
                     flag_invalid=False):
        '''Validate hgvs ids from a src collection.'''

        return_dict = {
            False: return_false,
            True: return_true,
            None: return_none
        }

        # read in the collection from mongodb
        if is_str(collection):
            src = get_src_db()
            _coll = src[collection]
        else:
            _coll = collection
        cursor = doc_feeder(_coll, step=10000)

        out = {}
        print_only = not (return_false or return_none or return_true)
        if not print_only:
            # output dictionary, three keys: 'false','true','none'
            for k in return_dict:
                if return_dict[k]:
                    out[k] = []

        # initialize the count
        cnt_d = {True: 0, False: 0, None: 0}  # cnt_d
        # validate each item in the cursor
        for item in cursor:
            _id = item['_id']
            valid = self.validate_hgvs(_id, verbose=verbose)
            if valid == False and flag_invalid:
                collection.update({"_id": _id},
                                  {'$set': {
                                      "unmatched_ref": "True"
                                  }})
            cnt_d[valid] += 1
            if return_dict[valid]:
                out[valid].append(_id)

        # print out counts
        print("\n# of VALID HGVS IDs:\t{0}".format(cnt_d[True]))
        print("# of INVALID HGVS IDs:\t{0}".format(cnt_d[False]))
        print("# of HGVS IDs skipped:\t {0}".format(cnt_d[None]))

        out['summary'] = cnt_d
        return out
Esempio n. 8
0
 def _cleaned_fields(self, fields):
     '''return a cleaned fields parameter.
         should be either None (return all fields) or a list fields.
     '''
     if fields:
         if is_str(fields):
             if fields.lower() == 'all':
                 fields = None  # all fields will be returned.
             else:
                 fields = [x.strip() for x in fields.split(',')]
     else:
         fields = self._default_fields
     return fields
Esempio n. 9
0
 def _cleaned_fields(self, fields):
     '''return a cleaned fields parameter.
         should be either None (return all fields) or a list fields.
     '''
     if fields:
         if is_str(fields):
             if fields.lower() == 'all':
                 fields = None     # all fields will be returned.
             else:
                 fields = [x.strip() for x in fields.split(',')]
     else:
         fields = self._default_fields
     return fields
Esempio n. 10
0
 def _cleaned_scopes(self, scopes):
     '''return a cleaned scopes parameter.
         should be either a string or a list of scope fields.
     '''
     if scopes:
         if is_str(scopes):
             scopes = [x.strip() for x in scopes.split(',')]
         if is_seq(scopes):
             scopes = [x for x in scopes if x]
             if len(scopes) == 1:
                 scopes = scopes[0]
         else:
             scopes = None
     else:
         scopes = None
     return scopes
Esempio n. 11
0
 def _cleaned_scopes(self, scopes):
     '''return a cleaned scopes parameter.
         should be either a string or a list of scope fields.
     '''
     if scopes:
         if is_str(scopes):
             scopes = [x.strip() for x in scopes.split(',')]
         if is_seq(scopes):
             scopes = [x for x in scopes if x]
             if len(scopes) == 1:
                 scopes = scopes[0]
         else:
             scopes = None
     else:
         scopes = None
     return scopes
Esempio n. 12
0
    def validate_src(self, collection, return_false=False,
                     return_none=False, return_true=False, verbose=False, flag_invalid=False, generator=False):
        '''Validate hgvs ids from a src collection.'''

        return_dict = {
            False: return_false,
            True: return_true,
            None: return_none
        }

        # read in the collection from mongodb
        if is_str(collection):
            src = get_src_db()
            _coll = src[collection]
        else:
            _coll = collection
        cursor = doc_feeder(_coll, step=10000)

        out = {}
        print_only = not (return_false or return_none or return_true)
        if not print_only:
            # output dictionary, three keys: 'false','true','none'
            for k in return_dict:
                if return_dict[k]:
                    out[k] = []

        # initialize the count
        cnt_d = {True: 0, False: 0, None: 0}    # cnt_d
        # validate each item in the cursor
        for item in cursor:
            _id = item['_id']
            valid = self.validate_hgvs(_id, verbose=verbose)
            if valid == False and flag_invalid:
                collection.update({"_id": _id}, {'$set':{"unmatched_ref": "True"}})
            cnt_d[valid] += 1
            if return_dict[valid]:
                out[valid].append(_id)

        # print out counts
        print("\n# of VALID HGVS IDs:\t{0}".format(cnt_d[True]))
        print("# of INVALID HGVS IDs:\t{0}".format(cnt_d[False]))
        print("# of HGVS IDs skipped:\t {0}".format(cnt_d[None]))

        out['summary'] = cnt_d
        return out