Python Database Examples

Programming Language: Python

Namespace/Package Name: gene.database

Class/Type: Database

Examples at hotexamples.com: 7

Python Database - 7 examples found. These are the top rated real world Python examples of gene.database.Database extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Database(6)

create_db_tables(1)

delete_all_db_tables(1)

get_record_by_id(1)

get_records_by_type(1)

Example #1

Show file

 class DB:
     def __init__(self):
         self.db = Database()
         self.merge = Merge(database=self.db)
         if is_test_env:
             self.db.delete_all_db_tables()
             self.db.create_db_tables()

Example #2

Show file

File: query.py Project: cancervariants/gene-normalization

    def __init__(self, db_url: str = '', db_region: str = 'us-east-2'):
        """Initialize QueryHandler instance.

        :param str db_url: URL to database source.
        :param str db_region: AWS default region.
        """
        self.db = Database(db_url=db_url, region_name=db_region)

Example #3

Show file

File: cli.py Project: cancervariants/gene-normalization

    def update_normalizer_db(normalizer, prod, db_url, update_all,
                             update_merged):
        """Update selected normalizer source(s) in the gene database."""
        if prod:
            environ['GENE_NORM_PROD'] = "TRUE"
            db: Database = Database()
        else:
            if db_url:
                endpoint_url = db_url
            elif 'GENE_NORM_DB_URL' in environ.keys():
                endpoint_url = environ['GENE_NORM_DB_URL']
            else:
                endpoint_url = 'http://localhost:8000'
            db: Database = Database(db_url=endpoint_url)

        if update_all:
            normalizers = [src for src in SOURCES]
            CLI()._update_normalizers(normalizers, db, update_merged)
        elif not normalizer:
            CLI()._help_msg()
        else:
            normalizers = normalizer.lower().split()

            if len(normalizers) == 0:
                raise Exception("Must enter a normalizer")

            non_sources = set(normalizers) - {src for src in SOURCES}

            if len(non_sources) != 0:
                raise Exception(f"Not valid source(s): {non_sources}")

            CLI()._update_normalizers(normalizers, db, update_merged)

Example #4

Show file

def add_other_id_refs():
    """Add other_id reference for other_identifiers attribute."""
    db = Database()
    batch = db.genes.batch_writer()

    last_evaluated_key = None
    while True:
        if last_evaluated_key:
            response = db.genes.scan(ExclusiveStartKey=last_evaluated_key)
        else:
            response = db.genes.scan()
        last_evaluated_key = response.get('LastEvaluatedKey')

        records = response['Items']
        for record in records:
            if record['label_and_type'].endswith('##identity'):
                for other_id in record.get('other_identifiers', []):
                    batch.put_item(
                        Item={
                            'label_and_type': f"{other_id.lower()}##other_id",
                            'concept_id': record['concept_id'].lower(),
                            'src_name': record['src_name']
                        })

        if not last_evaluated_key:
            break

Example #5

Show file

    def update_normalizer_db(normalizer, prod, db_url, update_all,
                             update_merged):
        """Update selected normalizer source(s) in the gene database."""
        # Sometimes GENE_NORM_EB_PROD is accidentally set. We should verify that
        # it should actually be used in CLI
        if "GENE_NORM_EB_PROD" in environ:
            confirm_aws_db_use("PROD")

        if prod:
            environ['GENE_NORM_PROD'] = "TRUE"
            db: Database = Database()
        else:
            if db_url:
                endpoint_url = db_url
            elif 'GENE_NORM_DB_URL' in environ.keys():
                endpoint_url = environ['GENE_NORM_DB_URL']
            else:
                endpoint_url = 'http://localhost:8000'
            db: Database = Database(db_url=endpoint_url)

        if update_all:
            normalizers = [src for src in SOURCES]
            CLI()._update_normalizers(normalizers, db, update_merged)
        elif not normalizer:
            if update_merged:
                CLI()._load_merge(db, [])
            else:
                CLI()._help_msg()
        else:
            normalizers = normalizer.lower().split()

            if len(normalizers) == 0:
                raise Exception("Must enter a normalizer")

            non_sources = set(normalizers) - {src for src in SOURCES}

            if len(non_sources) != 0:
                raise Exception(f"Not valid source(s): {non_sources}")

            CLI()._update_normalizers(normalizers, db, update_merged)

Example #6

Show file

File: query.py Project: cancervariants/gene-normalization

class QueryHandler:
    """Class for normalizer management. Stores reference to database instance
    and normalizes query input.
    """
    def __init__(self, db_url: str = '', db_region: str = 'us-east-2'):
        """Initialize QueryHandler instance.

        :param str db_url: URL to database source.
        :param str db_region: AWS default region.
        """
        self.db = Database(db_url=db_url, region_name=db_region)

    @staticmethod
    def emit_warnings(query_str: str) -> List:
        """Emit warnings if query contains non breaking space characters.

        :param str query_str: query string
        :return: List of warnings
        """
        warnings = []
        nbsp = re.search('\xa0|&nbsp;', query_str)
        if nbsp:
            warnings = [{
                "non_breaking_space_characters":
                "Query contains non-breaking space characters"
            }]
            logger.warning(
                f'Query ({query_str}) contains non-breaking space characters.')
        return warnings

    def fetch_meta(self, src_name: str) -> SourceMeta:
        """Fetch metadata for src_name.

        :param str src_name: name of source to get metadata for
        :return: SourceMeta object containing source metadata
        """
        if src_name in self.db.cached_sources.keys():
            return self.db.cached_sources[src_name]
        else:
            try:
                db_response = self.db.metadata.get_item(
                    Key={'src_name': src_name})
                response = SourceMeta(**db_response['Item'])
                self.db.cached_sources[src_name] = response
                return response
            except ClientError as e:
                logger.error(e.response['Error']['Message'])

    @staticmethod
    def _cast_location_ints(record: Dict) -> Dict:
        """Ensure Locations are formatted correctly -- interval start and end need to
        be recast to ints from how they're structured in DynamoDB

        :param Dict record: original record
        :return: record with corrected locations attributes, if applicable
        """
        if 'locations' in record:
            for loc in record['locations']:
                if loc['type'] == 'SequenceLocation':
                    loc['start']['value'] = int(loc['start']['value'])
                    loc['end']['value'] = int(loc['end']['value'])
        return record

    def add_record(self, response: Dict[str, Dict], item: Dict,
                   match_type: MatchType) -> (Dict, str):
        """Add individual record (i.e. Item in DynamoDB) to response object

        :param Dict[str, Dict] response: in-progress response object to return
            to client
        :param Dict item: Item retrieved from DynamoDB
        :param MatchType match_type: match type for query
        :return: Tuple containing updated response object, and string
            containing name of the source of the match
        """
        del item['label_and_type']
        # DynamoDB Numbers get converted to Decimal
        item = self._cast_location_ints(item)
        item["match_type"] = match_type
        gene = Gene(**item)
        src_name = item['src_name']

        matches = response['source_matches']
        if src_name not in matches.keys():
            pass
        elif matches[src_name] is None:
            matches[src_name] = {
                'records': [gene],
                'source_meta_': self.fetch_meta(src_name)
            }
        else:
            matches[src_name]['records'].append(gene)

        return response, src_name

    def fetch_record(self, response: Dict[str, Dict], concept_id: str,
                     match_type: MatchType) -> None:
        """Add fetched record to response

        :param Dict[str, Dict] response: in-progress response object to return
            to client.
        :param str concept_id: Concept id to fetch record for.
            Should be all lower-case.
        :param MatchType match_type: match type for record
        """
        try:
            pk = f'{concept_id}##identity'
            filter_exp = Key('label_and_type').eq(pk)
            result = self.db.genes.query(KeyConditionExpression=filter_exp)
            match = result['Items'][0]
            self.add_record(response, match, match_type)
        except ClientError as e:
            logger.error(e.response['Error']['Message'])

    def post_process_resp(self, resp: Dict) -> Dict:
        """Fill all empty source_matches slots with NO_MATCH results and
        sort source records by descending `match_type`.

        :param Dict resp: incoming response object
        :return: response object with empty source slots filled with
                NO_MATCH results and corresponding source metadata
        """
        for src_name in resp['source_matches'].keys():
            if resp['source_matches'][src_name] is None:
                resp['source_matches'][src_name] = {
                    'match_type': MatchType.NO_MATCH,
                    'records': [],
                    'source_meta_': self.fetch_meta(src_name)
                }
            else:
                records = resp['source_matches'][src_name]['records']
                if len(records) > 1:
                    records = sorted(records,
                                     key=lambda k: k.match_type,
                                     reverse=True)
        return resp

    def response_keyed(self, query: str, sources: Set[str]) -> Dict:
        """Return response as dict where key is source name and value
        is a list of records. Corresponds to `keyed=true` API parameter.

        :param str query: string to match against
        :param Set[str] sources: sources to match from
        :return: completed response object to return to client
        """
        resp = {
            'query': query,
            'warnings': self.emit_warnings(query),
            'source_matches': {source: None
                               for source in sources}
        }
        if query == '':
            return self.post_process_resp(resp)
        query_l = query.lower()

        queries = list()
        if [p for p in PREFIX_LOOKUP.keys() if query_l.startswith(p)]:
            pk = f'{query_l}##identity'
            queries.append(pk)

        for prefix in [
                p for p in NAMESPACE_LOOKUP.keys() if query_l.startswith(p)
        ]:
            pk = f'{NAMESPACE_LOOKUP[prefix].lower()}:{query_l}##identity'
            queries.append(pk)

        for match in ITEM_TYPES.values():
            pk = f'{query_l}##{match}'
            queries.append(pk)

        matched_concept_ids = list()
        for q in queries:
            try:
                query_resp = self.db.genes.query(
                    KeyConditionExpression=Key('label_and_type').eq(q))
                for record in query_resp['Items']:
                    concept_id = record['concept_id']
                    if concept_id in matched_concept_ids:
                        continue
                    else:
                        if record['item_type'] == "identity":
                            self.add_record(resp, record, MatchType.CONCEPT_ID)
                        else:
                            self.fetch_record(
                                resp, concept_id,
                                MatchType[record['item_type'].upper()])
                        matched_concept_ids.append(concept_id)

            except ClientError as e:
                logger.error(e.response['Error']['Message'])
                continue

        # remaining sources get no match
        return self.post_process_resp(resp)

    def response_list(self, query: str, sources: Set[str]) -> Dict:
        """Return response as list, where the first key-value in each item
        is the source name. Corresponds to `keyed=false` API parameter.

        :param str query: string to match against
        :param List[str] sources: sources to match from
        :return: completed response object to return to client
        """
        response_dict = self.response_keyed(query, sources)
        source_list = []
        for src_name in response_dict['source_matches'].keys():
            src = {
                "source": src_name,
            }
            to_merge = response_dict['source_matches'][src_name]
            src.update(to_merge)

            source_list.append(src)
        response_dict['source_matches'] = source_list

        return response_dict

    @staticmethod
    def _get_service_meta() -> ServiceMeta:
        """Return metadata about gene-normalizer service.

        :return: Service Meta
        """
        return ServiceMeta(version=__version__,
                           response_datetime=str(datetime.now()))

    def search(self,
               query_str: str,
               keyed: bool = False,
               incl: str = '',
               excl: str = '',
               **params) -> SearchService:
        """Return highest match for each source.

        :param str query_str: query, a string, to search for
        :param bool keyed: if true, return response as dict keying source names
            to source objects; otherwise, return list of source objects
        :param str incl: str containing comma-separated names of sources to
            use. Will exclude all other sources. Case-insensitive. Raises
            InvalidParameterException if both incl and excl args are
            provided, or if invalid source names are given.
        :param str excl: str containing comma-separated names of source to
            exclude. Will include all other source. Case-insensitive. Raises
            InvalidParameterException if both incl and excl args are
            provided, or if invalid source names are given.
        :return: SearchService class containing all matches found in sources.
        """
        possible_sources = {
            name.value.lower(): name.value
            for name in SourceName.__members__.values()
        }
        sources = dict()
        for k, v in possible_sources.items():
            if self.db.metadata.get_item(Key={'src_name': v}).get('Item'):
                sources[k] = v

        if not incl and not excl:
            query_sources = set(sources.values())
        elif incl and excl:
            detail = "Cannot request both source inclusions and exclusions."
            raise InvalidParameterException(detail)
        elif incl:
            req_sources = [n.strip() for n in incl.split(',')]
            invalid_sources = []
            query_sources = set()
            for source in req_sources:
                if source.lower() in sources.keys():
                    query_sources.add(sources[source.lower()])
                else:
                    invalid_sources.append(source)
            if invalid_sources:
                detail = f"Invalid source name(s): {invalid_sources}"
                raise InvalidParameterException(detail)
        else:
            req_exclusions = [n.strip() for n in excl.lower().split(',')]
            req_excl_dict = {r.lower(): r for r in req_exclusions}
            invalid_sources = []
            query_sources = set()
            for req_l, req in req_excl_dict.items():
                if req_l not in sources.keys():
                    invalid_sources.append(req)
            for src_l, src in sources.items():
                if src_l not in req_excl_dict.keys():
                    query_sources.add(src)
            if invalid_sources:
                detail = f"Invalid source name(s): {invalid_sources}"
                raise InvalidParameterException(detail)

        query_str = query_str.strip()

        if keyed:
            resp = self.response_keyed(query_str, query_sources)
        else:
            resp = self.response_list(query_str, query_sources)

        resp['service_meta_'] = self._get_service_meta()
        return SearchService(**resp)

    def _add_merged_meta(self, response: NormalizeService) -> NormalizeService:
        """Add source metadata to response object.

        :param Dict response: in-progress response object
        :return: completed response object.
        """
        sources_meta = {}
        gene_descr = response.gene_descriptor
        xrefs = gene_descr.xrefs or []  # type: ignore
        ids = [gene_descr.gene_id] + xrefs  # type: ignore
        for concept_id in ids:
            prefix = concept_id.split(':')[0]
            src_name = PREFIX_LOOKUP[prefix.lower()]
            if src_name not in sources_meta:
                sources_meta[src_name] = self.fetch_meta(src_name)
        response.source_meta_ = sources_meta
        return response

    def _add_alt_matches(self, response: NormService, record: Dict,
                         possible_concepts: List[str]) -> NormService:
        """Add alternate matches warning to response object

        :param NormService response: in-progress response object
        :param Dict record: normalized record
        :param List[str] possible_concepts: other possible matches
        :return: updated response object
        """
        norm_concepts = set()
        for concept_id in possible_concepts:
            r = self.db.get_record_by_id(concept_id, True)
            if r:
                merge_ref = r.get("merge_ref")
                if merge_ref:
                    norm_concepts.add(merge_ref)
        norm_concepts = norm_concepts - {record["concept_id"]}
        if norm_concepts:
            response.warnings.append(
                {"multiple_normalized_concepts_found": list(norm_concepts)})
        return response

    def add_gene_descriptor(
            self,
            response: NormalizeService,
            record: Dict,
            match_type: MatchType,
            possible_concepts: Optional[List[str]] = None) -> NormalizeService:
        """Add gene descriptor to response.

        :param Dict response: Response object
        :param Dict record: Gene record
        :param MatchType match_type: query's match type
        :param Optional[List[str]] possible_concepts: List of other normalized
            concepts found
        :return: Response with gene descriptor
        """
        params = {
            "id": f"normalize.gene:{quote(response.query)}",
            "label": record["symbol"],
            "gene_id": record["concept_id"]
        }

        # xrefs
        if "xrefs" in record and record["xrefs"]:
            params["xrefs"] = record["xrefs"]

        # alternate labels
        alt_labels = set()
        for key in ["previous_symbols", "aliases"]:
            if key in record and record[key]:
                val = record[key]
                if isinstance(val, str):
                    val = [val]
                alt_labels.update(val)
        if alt_labels:
            params["alternate_labels"] = list(alt_labels)

        # extensions
        extensions = list()
        extension_and_record_labels = [
            ("symbol_status", "symbol_status"),
            ("approved_name", "label"),
            ("chromosome_location", "locations"),
            ("associated_with", "associated_with"),
            ("previous_symbols", "previous_symbols"),
        ]
        for ext_label, record_label in extension_and_record_labels:
            if record_label in record and record[record_label]:
                if ext_label == 'chromosome_location':
                    record[record_label] = record[record_label][0]
                extensions.append(
                    Extension(name=ext_label, value=record[record_label]))
        # handle gene types separately because they're wonky
        if record["item_type"] == "identity":
            gene_type = record.get("gene_type")
            if gene_type:
                extensions.append(
                    Extension(name=GeneTypeFieldName[
                        record["src_name"].upper()].value,
                              value=gene_type))
        else:
            for f in GeneTypeFieldName:
                field_name = f.value
                values = record.get(field_name, [])
                for value in values:
                    extensions.append(Extension(name=field_name, value=value))
        if extensions:
            params["extensions"] = extensions

        # add warnings
        if possible_concepts:
            response = self._add_alt_matches(response, record,
                                             possible_concepts)

        response.gene_descriptor = GeneDescriptor(**params)
        response = self._add_merged_meta(response)
        response.match_type = match_type
        return response

    @staticmethod
    def _record_order(record: Dict) -> (int, str):
        """Construct priority order for matching. Only called by sort().

        :param Dict record: individual record item in iterable to sort
        :return: tuple with rank value and concept ID
        """
        src = record['src_name'].upper()
        source_rank = SourcePriority[src]
        return source_rank, record['concept_id']

    @staticmethod
    def _handle_failed_merge_ref(record, response, query) -> Dict:
        """Log + fill out response for a failed merge reference lookup.

        :param Dict record: record containing failed merge_ref
        :param Dict response: in-progress response object
        :param str query: original query value
        :return: response with no match
        """
        logger.error(f"Merge ref lookup failed for ref {record['merge_ref']} "
                     f"in record {record['concept_id']} from query {query}")
        response['match_type'] = MatchType.NO_MATCH
        return response

    def _prepare_normalized_response(self, query: str) -> Dict[str, Any]:
        """Provide base response object for normalize endpoints.

        :param str query: user-provided query
        :return: basic normalization response boilerplate
        """
        return {
            "query":
            query,
            "match_type":
            MatchType.NO_MATCH,
            "warnings":
            self.emit_warnings(query),
            "service_meta_":
            ServiceMeta(version=__version__,
                        response_datetime=str(datetime.now()))
        }

    def normalize(self, query: str) -> NormalizeService:
        """Return normalized concept for query.

        :param str query: String to find normalized concept for
        :return: Normalized gene concept
        """
        response = NormalizeService(**self._prepare_normalized_response(query))
        return self._perform_normalized_lookup(response, query,
                                               self.add_gene_descriptor)

    def _resolve_merge(
            self,
            response: NormService,
            record: Dict,
            match_type: MatchType,
            callback: Callable,
            possible_concepts: Optional[List[str]] = None) -> NormService:
        """Given a record, return the corresponding normalized record

        :param NormalizationService response: in-progress response object
        :param Dict record: record to retrieve normalized concept for
        :param MatchType match_type: type of match that returned these records
        :param Callable callback: response constructor method
        :param Optional[List[str]] possible_concepts: alternate possible matches
        :return: Normalized response object
        """
        merge_ref = record.get("merge_ref")
        if merge_ref:
            # follow merge_ref
            merge = self.db.get_record_by_id(merge_ref, False, True)
            if merge is None:
                query = response.query
                logger.error(
                    f"Merge ref lookup failed for ref {record['merge_ref']} "
                    f"in record {record['concept_id']} from query `{query}`")
                return response
            else:
                return callback(response, merge, match_type, possible_concepts)
        else:
            # record is sole member of concept group
            return callback(response, record, match_type, possible_concepts)

    def _get_matches_by_type(self, query: str, match_type: str) -> List[Dict]:
        """Get matches list for match tier.
        :param str query: user-provided query
        :param str match_type: keyword of match type to check
        :return: List of records matching the query and match level
        """
        matching_refs = self.db.get_records_by_type(query, match_type)
        matching_records = [
            self.db.get_record_by_id(m["concept_id"], False)
            for m in matching_refs
        ]
        return sorted(matching_records, key=self._record_order)  # type: ignore

    def _perform_normalized_lookup(self, response: NormService, query: str,
                                   response_builder: Callable) -> NormService:
        """Retrieve normalized concept, for use in normalization endpoints
        :param NormService response: in-progress response object
        :param str query: user-provided query
        :param Callable response_builder: response constructor callback method
        :return: completed service response object
        """
        if query == "":
            return response
        query_str = query.lower().strip()

        # check merged concept ID match
        record = self.db.get_record_by_id(query_str,
                                          case_sensitive=False,
                                          merge=True)
        if record:
            return response_builder(response, record, MatchType.CONCEPT_ID)

        # check concept ID match
        record = self.db.get_record_by_id(query_str, case_sensitive=False)
        if record:
            return self._resolve_merge(response, record, MatchType.CONCEPT_ID,
                                       response_builder)

        for match_type in ITEM_TYPES.values():
            # get matches list for match tier
            matching_refs = self.db.get_records_by_type(query_str, match_type)
            matching_records = \
                [self.db.get_record_by_id(m['concept_id'], False)
                 for m in matching_refs]
            matching_records.sort(key=self._record_order)  # type: ignore

            if len(matching_refs) > 1:
                possible_concepts = \
                    [ref["concept_id"] for ref in matching_refs]
            else:
                possible_concepts = None

            # attempt merge ref resolution until successful
            for match in matching_records:
                assert match is not None
                record = self.db.get_record_by_id(match["concept_id"], False)
                if record:
                    match_type_value = MatchType[match_type.upper()]
                    return self._resolve_merge(response, record,
                                               match_type_value,
                                               response_builder,
                                               possible_concepts)
        return response

    def _add_normalized_records(
        self,
        response: UnmergedNormalizationService,
        normalized_record: Dict,
        match_type: MatchType,
        possible_concepts: Optional[List[str]] = None
    ) -> UnmergedNormalizationService:
        """Add individual records to unmerged normalize response.

        :param UnmergedNormalizationService response: in-progress response
        :param Dict normalized_record: record associated with normalized concept,
            either merged or single identity
        :param MatchType match_type: type of match achieved
        :param Optional[List[str]] possible_concepts: other possible results
        :return: Completed response object
        """
        response.match_type = match_type
        response.normalized_concept_id = normalized_record["concept_id"]
        if normalized_record["item_type"] == "identity":
            record_source = SourceName[normalized_record["src_name"].upper()]
            response.source_matches[record_source] = MatchesNormalized(
                records=[
                    BaseGene(**self._cast_location_ints(normalized_record))
                ],
                source_meta_=self.fetch_meta(record_source.value))
        else:
            concept_ids = [normalized_record["concept_id"]] + \
                normalized_record.get("xrefs", [])
            for concept_id in concept_ids:
                record = self.db.get_record_by_id(concept_id,
                                                  case_sensitive=False)
                if not record:
                    continue
                record_source = SourceName[record["src_name"].upper()]
                gene = BaseGene(**self._cast_location_ints(record))
                if record_source in response.source_matches:
                    response.source_matches[record_source].records.append(gene)
                else:
                    response.source_matches[record_source] = MatchesNormalized(
                        records=[gene],
                        source_meta_=self.fetch_meta(record_source.value))
        if possible_concepts:
            response = self._add_alt_matches(response, normalized_record,
                                             possible_concepts)
        return response

    def normalize_unmerged(self, query: str) -> UnmergedNormalizationService:
        """Return all source records under the normalized concept for the
        provided query string.

        :param str query: string to search against
        :return: Normalized response object
        """
        response = UnmergedNormalizationService(
            source_matches={}, **self._prepare_normalized_response(query))
        return self._perform_normalized_lookup(response, query,
                                               self._add_normalized_records)

Example #7

Show file

File: add_item_type.py Project: cancervariants/gene-normalization

import sys
from pathlib import Path
from timeit import default_timer as timer
import click
from botocore.exceptions import ClientError
import logging

APP_ROOT = Path(__file__).resolve().parents[1]
sys.path.append(f"{APP_ROOT}")

from gene.database import Database  # noqa: E402

logger = logging.getLogger('gene')
logger.setLevel(logging.DEBUG)

db = Database()


def add_item_type(label_and_type: str, concept_id: str, item_type: str):
    """Add item_type to individual db item."""
    key = {'label_and_type': label_and_type, 'concept_id': concept_id}
    update_expression = "set item_type=:r"
    update_values = {':r': item_type}
    try:
        db.genes.update_item(Key=key,
                             UpdateExpression=update_expression,
                             ExpressionAttributeValues=update_values)
    except ClientError as e:
        logger.error(f"boto3 client error in `database.update_record()`: "
                     f"{e.response['Error']['Message']}")