예제 #1
0
 def __init__(self, graph: Graph):
     self.graph = graph
     self.mdr_graph = MdrGraph(graph)
"""CCDH Concept References: classes and endpoints"""
from fastapi import Request
from ccdh.api.cache import cache
from starlette.responses import StreamingResponse
from tccm_api.routers import concept_reference

from ccdh.api.routers.mappings import generate_sssom_tsv
from ccdh.api.routers.models import MappingSet
from ccdh.config import neo4j_graph
from ccdh.db.mdr_graph import MdrGraph

# TODO: Architecturally, why is 'concept_reference' in TCCM-API when so much else
#  ...is in CCDH terminology service? What is the logic behind what is and isn't
#  ...in TCCM? - joeflack4 2021/10/27
router = concept_reference.router
mdr_graph = MdrGraph(neo4j_graph())


@router.get(
    '/{curie}/mappings',
    description=
    'An SSSOM TSV file depending on request headers, otherwise a list of mappings',
    response_model=MappingSet)
@cache()
def get_concept_reference_mappings(curie: str, request: Request):
    """Get concept reference mappings"""
    mapping_set = mdr_graph.find_mappings_of_concept_reference(curie)
    if request.headers['accept'] == 'text/tab-separated-values+sssom':
        return StreamingResponse(generate_sssom_tsv(
            MappingSet.parse_obj(mapping_set.__dict__)),
                                 media_type='text/tab-separated-values+sssom')
예제 #3
0
class Importer:
    def __init__(self, graph: Graph):
        self.graph = graph
        self.mdr_graph = MdrGraph(graph)

    def import_node_attribute(self, node_attribute):
        entity = node_attribute['entity']
        attribute = node_attribute['attribute']
        system = node_attribute['system']
        logger.info(
            f'Importing NodeAttribute {system}.{entity}.{attribute} ...')

        na_node = self.mdr_graph.get_node_attribute(system, entity, attribute)
        if na_node is not None:  # already exists. Skip
            # TODO: Update the node
            return

        na_node = self.mdr_graph.create_node_attribute(system, entity,
                                                       attribute)
        na_node['definition'] = node_attribute['definition']
        if 'cadsr_cde' in node_attribute:
            if node_attribute["cadsr_cde"]:
                na_node[
                    'reference'] = f'https://cdebrowser.nci.nih.gov/cdebrowserClient/cdeBrowser.html#/search?version=2.0&publicId={node_attribute["cadsr_cde"]}'
        subgraph = Subgraph([na_node])

        permissible_values = node_attribute['permissible_values']
        enum_node = self.mdr_graph.create_enumeration()
        subgraph |= enum_node
        subgraph |= Relationship(na_node, 'USES', enum_node)

        for value, description in permissible_values.items():
            pv_node = self.mdr_graph.create_permissible_value(
                value, description)
            subgraph |= pv_node
            subgraph |= Relationship(enum_node, 'HAS_PERMISSIBLE_VALUE',
                                     pv_node)

        tx = self.graph.begin()
        tx.create(subgraph)
        self.graph.commit(tx)

        logger.info(f'Importing {system}.{entity}.{attribute} was successful')

    def import_node_attributes(self, node_attributes: Dict[str, Dict]):
        for key, node_attribute in node_attributes.items():
            self.import_node_attribute(node_attribute)

    def import_harmonized_attributes(self, harmonized_attributes: Dict[str,
                                                                       Dict]):
        logger.info("Importing CRDC-H model -- started")
        for key, harmonized_attribute in harmonized_attributes.items():
            self.import_harmonized_attribute(harmonized_attribute)
        logger.info("Processed attributes: " + str(len(harmonized_attributes)))
        logger.info("Importing CRDC-H model -- completed")

    def import_harmonized_attribute(self, harmonized_attribute):
        system = harmonized_attribute['system']
        entity = harmonized_attribute['entity']
        attribute = harmonized_attribute['attribute']
        logger.info(
            f'Importing HarmonizedAttribute {system}.{entity}.{attribute} ...')

        ha_node = self.mdr_graph.get_harmonized_attribute(
            system, entity, attribute)

        if ha_node is not None:  # already exists. Skip
            # TODO: Update
            return
        ha_node = self.mdr_graph.create_harmonized_attribute(
            system, entity, attribute)
        ha_node['definition'] = harmonized_attribute['definition']

        subgraph = Subgraph([ha_node])

        # to-do: What's created here is empty. Is it updated later anywhere? If not,
        # ...is this just something that Dazhi never got to? - joeflack4 2021/11/30
        cs_node = self.mdr_graph.create_code_set()
        subgraph |= cs_node
        subgraph |= Relationship(ha_node, 'HAS_MEANING', cs_node)

        # node_attributes: Looks like will only be mappings, of the format:
        # ...<MODEL>:<ENTITY>.<ATTR> - joeflack4 2021/11/19
        if 'node_attributes' in harmonized_attribute:
            # TODO: Shouldn't exact_mapping be nested within node_attributes or
            #  ...node_attributes/mappings?= instead? (updated here and in
            #  ...CRDCH.import_harmonized_attributes()
            for node_attribute in harmonized_attribute['node_attributes']:
                try:
                    system, entity_attribute = node_attribute.split(':')
                    entity, attribute = entity_attribute.split('.')
                except ValueError as e:
                    logger.error(
                        f'Failed to parse the mapping attribute name: {node_attribute}'
                    )
                    logger.error(e)
                    continue
                na_node = self.mdr_graph.get_node_attribute(
                    system, entity, attribute)

                if na_node is None:
                    logger.warning(node_attribute + ' not found in database')
                else:
                    subgraph |= Relationship(na_node, 'MAPS_TO', ha_node)

        tx = self.graph.begin()
        tx.create(subgraph)
        self.graph.commit(tx)

        logger.info(
            f'Importing HarmonizedAttribute {system}.{entity}.{attribute} was successful'
        )

    def import_ncit_mapping(self,
                            gdc_ncit_mappings: Dict[str, Dict[str, List[str]]],
                            system: str):
        """Cypher query notes:
        ON CREATE/MATCH SET: "SET" doesn't mean the mathematical concept of a set;
        it means "to set the value of a property". So, gramatically in English it
        would actally be "on create/match, set x property with y value".
        ON CREATE is a conditinoal where a creation is done, and ON MATCH is a conditional
        where a match is done.
        """
        query = '''
        MATCH (cs:CodeSet:Resource)<-[:HAS_MEANING]-
          (:HarmonizedAttribute)<-[:MAPS_TO]-
          (:NodeAttribute {system: $system, attribute: $attribute})-[:USES]->
          (:Enumeration)-[:HAS_PERMISSIBLE_VALUE]->(pv:PermissibleValue {pref_label: $pv_label})
        MATCH (cr:ConceptReference:Resource {uri: $cr_uri})
        MERGE (cr)<-[:MAPPED_TO]-(m:Mapping:Resource)-[:MAPPED_FROM]->(pv)
        ON CREATE SET m.predicate_id = $predicate_id, m.creator_id = $creator_id
        ON MATCH SET m.predicate_id = $predicate_id, m.creator_id = $creator_id
        MERGE (cs)-[:HAS_MEMBER]->(cr)
        RETURN cr
        '''
        for _, attr in gdc_ncit_mappings.items():
            for _, value in attr.items():
                code, _pref_label, predicate_id, attribute, pv_label = list(
                    value[0:5])  # isnt it always list anyway??
                if predicate_id == 'Has Synonym':
                    predicate_id = SKOS.exactMatch
                elif predicate_id == 'Related To':
                    predicate_id = SKOS.relatedMatch
                params = {
                    'system':
                    system,
                    'attribute':
                    attribute,
                    'predicate_id':
                    str(predicate_id),
                    'pv_label':
                    pv_label,
                    'creator_id':
                    'https://gdc.cancer.gov',
                    'cr_uri':
                    f'http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#{code}',
                }
                self.graph.run(query, **params)

    # def import_mapping_set(self, mapping_set: MappingSet, curie_map: Dict[str, str]):
    def import_mapping_set(self, mapping_set: MappingSet):
        """Used by endpoint: /mappings/upload/"""
        for mapping in mapping_set.mappings:
            # self.import_mapping(mapping, curie_map)
            self.import_mapping(mapping)

    # def import_mapping(self, mapping: Mapping, curie_map: Dict[str, str]):
    def import_mapping(self, mapping: Mapping):
        """Used by: self.import_mapping_set()"""
        de_system, entity, attribute = mapping.subject_match_field.split('.')
        dec_system, dec_entity, dec_attribute = mapping.object_match_field.split(
            '.')
        curie = mapping.object_id
        if curie:  # not mapped
            print(curie)
            in_scheme, notation = curie.split(':')
            vm_uri = decode_uri(mapping.object_id)
            query = '''
            MATCH (cd:CodeSet:Resource)<-[:HAS_MEANING]-
              (c:HarmonizedAttribute {system: $dec_system, entity: $dec_entity, attribute: $dec_attribute})<-[:MAPS_TO]-
              (de:NodeAttribute {system: $de_system, entity: $entity, attribute: $attribute})-[:USES]->
              (vd:Enumeration)-[:HAS_PERMISSIBLE_VALUE]->(pv:PermissibleValue {pref_label: $pv_pref_label})
            MERGE (cr:ConceptReference:Resource {uri: $cr_uri})
            ON CREATE SET vm.pref_label = $vm_pref_label, vm.notation = $vm_notation, vm.scheme = $vm_in_scheme
            ON MATCH SET vm.pref_label = $vm_pref_label, vm.notation = $vm_notation, vm.scheme = $vm_in_scheme
            MERGE (vm)<-[:HAS_MEMBER]-(cd)
            MERGE (p)<-[rpr:HAS_REPRESENTATION]-(vm)
            ON CREATE SET rpr.predicate_id = $predicate_id, rpr.creator_id = $creator_id, rpr.comment = $comment
            ON MATCH SET rpr.predicate_id = $predicate_id, rpr.creator_id = $creator_id, rpr.comment = $comment        
            '''
            params = {
                'dec_system': dec_system,
                'entity': entity,
                'attribute': attribute,
                'de_system': de_system,
                'dec_entity': dec_entity,
                'dec_attribute': dec_attribute,
                'predicate_id': mapping.predicate_id,
                'pv_pref_label': mapping.subject_label,
                'vm_pref_label': mapping.object_label,
                'vm_notation': notation,
                'vm_in_scheme': in_scheme,
                'vm_uri': vm_uri,
                'creator_id': mapping.creator_id,
                'comment': mapping.comment or '',
            }
            self.graph.run(query, **params)
        else:
            if mapping.comment:
                query = '''
                    MATCH (cd:ConceptualDomain:Resource:CodeSet)<-[:USES]-
                      (c:DataElementConcept {system: $dec_system, entity: $dec_entity, attribute: $dec_attribute})<-[:HAS_MEANING]-
                      (de:DataElement {system: $de_system, entity: $entity, attribute: $attribute})-[:USES]->
                      (vd:ValueDomain)-[:HAS_MEMBER]->(p:PermissibleValue {pref_label: $pv_pref_label})
                    SET p.comment = CASE EXISTS(p.comment) WHEN True THEN p.comment + $comment else [$comment] END
                    '''
                params = {
                    'dec_system': dec_system,
                    'entity': entity,
                    'attribute': attribute,
                    'de_system': de_system,
                    'dec_entity': dec_entity,
                    'dec_attribute': dec_attribute,
                    'predicate_id': mapping.predicate_id,
                    'pv_pref_label': mapping.subject_label,
                    'comment': mapping.comment,
                }
                self.graph.run(query, **params)

    def import_ncit(self):
        # CALL 'n10s.rdf.import.fetch("file:///var/lib/neo4j/import/ncit-termci.ttl", "Turtle", {predicateExclusionList : [ "https://hotecosystem.org/termci/contents"] })'
        # to-do: How to make this import work? Saw error message upon closer examination. Check GitHub issue: #125
        path = 'file:///var/lib/neo4j/import/ncit-termci.ttl'  # neo4j container location
        # path = 'file:///app/data/tccm/ncit-termci.ttl'  # ccdh-api container location
        self.graph.call('n10s.rdf.import.fetch', path, "Turtle", {
            'predicateExclusionList':
            ["https://hotecosystem.org/termci/contents"]
        })

    @staticmethod
    def import_all():
        """Import data from all nodes / data sources"""
        Importer(neo4j_graph()).import_ncit()
        Importer(neo4j_graph()).import_node_attributes(
            PdcImporter.read_data_dictionary())
        Importer(neo4j_graph()).import_node_attributes(
            GdcImporter.read_data_dictionary())
        Importer(neo4j_graph()).import_harmonized_attributes(
            CrdcHImporter.read_harmonized_attributes())
        Importer(neo4j_graph()).import_ncit_mapping(
            GdcImporter.read_ncit_mappings(), 'GDC')
        Importer(neo4j_graph()).import_ncit_mapping(
            GdcImporter.read_ncit_mappings(), 'PDC')