예제 #1
0
    def __init__(self,
                 fms="vs",
                 systemBase="http://livevista.caregraf.info/",
                 k3Base="http://datasets.caregraf.org/"):

        self.fms = fms
        self.systemBase = systemBase
        self.k3Base = k3Base
        self.rdfb = RDFBuilder(
            ["%s" % fms, "http://datasets.caregraf.org/%s/" % fms],
            "http://datasets.caregraf.org/%s/" % fms,
            extraNSInfos=[["fms", "http://datasets.caregraf.org/fms/"],
                          ["xsd", "http://www.w3.org/2001/XMLSchema#"],
                          ["dc", "http://purl.org/dc/elements/1.1/"]])
        self.fileTypes = set()
        self.outsideReferences = set()
        self.codedValueReferences = set()
        self.described = set()  # recheck outside refs as build graph
예제 #2
0
 def __init__(self, fms="vs", systemBase="http://livevista.caregraf.info/", k3Base="http://schemes.caregraf.info/"):
                 
     self.fms = fms
     self.systemBase = systemBase 
     self.k3Base = k3Base              
     self.rdfb = RDFBuilder(["%s" % fms, "http://datasets.caregraf.org/%s/" % fms], "http://datasets.caregraf.org/%s/" % fms, extraNSInfos=[["fms", "http://datasets.caregraf.org/fms/"], ["xsd", "http://www.w3.org/2001/XMLSchema#"], ["dc", "http://purl.org/dc/elements/1.1/"]])
     self.fileTypes = set()
     self.outsideReferences = set()
     self.described = set()
예제 #3
0
class DescribeRepliesToSGraph:
    """
    This builds on DescribeReplyToRDF to produce a self contained (all URI's labeled) graph from one or more Describe Replies. These replies may be about a Patient or a Ward or System information.
    
    TODO: 
    - FIX TO DescribeReply: needs to count Enum values in outside references
    - add support for SAMEAS described things ie/ even though described, still sameas
    - add a Dataset/Graph header
    - SUPPORT exposure of outside refs ie/ to non same as types
    - DO DUMMY/SIMPLE rdfb
    """
    def __init__(self,
                 fms="vs",
                 systemBase="http://livevista.caregraf.info/",
                 k3Base="http://datasets.caregraf.org/"):

        self.fms = fms
        self.systemBase = systemBase
        self.k3Base = k3Base
        self.rdfb = RDFBuilder(
            ["%s" % fms, "http://datasets.caregraf.org/%s/" % fms],
            "http://datasets.caregraf.org/%s/" % fms,
            extraNSInfos=[["fms", "http://datasets.caregraf.org/fms/"],
                          ["xsd", "http://www.w3.org/2001/XMLSchema#"],
                          ["dc", "http://purl.org/dc/elements/1.1/"]])
        self.fileTypes = set()
        self.outsideReferences = set()
        self.codedValueReferences = set()
        self.described = set()  # recheck outside refs as build graph

    def processReply(self, describeReply):

        drrdf = DescribeReplyToRDF(self.rdfb, self.fms, self.systemBase)
        drrdf.processReply(describeReply)

        self.fileTypes |= describeReply.fileTypes()
        # Track outside references as go, removing if completely define a record
        self.outsideReferences |= describeReply.outsideReferences()
        self.codedValueReferences |= describeReply.codedValueReferences(
        )  # outside by defn
        self.described |= set(record.asReference()
                              for record in describeReply.records())

    def done(self):

        self.outsideReferences -= self.described

        sameAss = set()
        sameAss2 = defaultdict(list)
        for reference in self.outsideReferences:
            if not self.rdfb.startNode(self.systemBase + reference.id, ""):
                raise Exception(
                    "Unexpected to try to reference define the fully defined: "
                    + str(reference))
            self.rdfb.addLiteralAssertion("rdfs:label",
                                          {"value": reference.label})
            self.rdfb.addURIAssertion(
                "rdf:type", {
                    "value": ("http://datasets.caregraf.org/%s/" % self.fms) +
                    reference.fileType
                })
            if reference.sameAs:
                eSameAs = self.__expandSameAsURI(reference.sameAs)
                self.rdfb.addURIAssertion("owl:sameAs", {"value": eSameAs})
                sameAss.add((eSameAs, reference.sameAsLabel))
                sameAss2[eSameAs].append(reference.sameAsLabel)
            self.rdfb.endNode()

        # These used to be tagged with rdf:type fms:OutsideConcept
        for sameAs in sameAss:
            if not self.rdfb.startNode(sameAs[0], ""):
                continue  # for now skip second definition of same sameas ex/ > 1 80 to an ICD. Should fix TODO intercept before this point
                # raise Exception("Unexpected to try to reference define the defined: " + str(sameAs))
            self.rdfb.addLiteralAssertion("rdfs:label", {"value": sameAs[1]})
            self.rdfb.addURIAssertion(
                "rdf:type",
                {"value": "http://datasets.caregraf.org/fms/CommonConcept"})
            self.rdfb.endNode()

        for codedValueReference in self.codedValueReferences:
            self.rdfb.startNode(
                ("http://datasets.caregraf.org/%s/" % self.fms) +
                codedValueReference.id, "")
            self.rdfb.addLiteralAssertion("rdfs:label", {
                "type": "literal",
                "value": codedValueReference.label
            })
            # Rem: in schema, Enum == instance of Class defined with owl:oneOf
            # ex/ http://datasets.caregraf.org/chcss/2__02_E has
            # http://datasets.caregraf.org/chcss/2__02_E-M with label MALE
            self.rdfb.addURIAssertion(
                "rdf:type", {
                    "type":
                    "uri",
                    "value":
                    self.fms + ":" + codedValueReference.id.split("_E-")[0] +
                    "_E"
                })
            self.rdfb.endNode()

        for fileType in self.fileTypes:
            self.rdfb.startNode(
                ("http://datasets.caregraf.org/%s/" % self.fms) + fileType[0],
                "")
            self.rdfb.addLiteralAssertion("rdfs:label", {
                "type": "literal",
                "value": fileType[1]
            })
            self.rdfb.addURIAssertion("rdf:type", {
                "type": "uri",
                "value": "owl:Class"
            })
            self.rdfb.endNode()

        return self.rdfb.done()

    SCHEMEMNMAP = {"ICD9": "ICD9CM", "PROVIDER": "HPTC"}

    def __expandSameAsURI(self, sameAsURI):
        # in mixed files (local only and sameas'ed terminologies), the local only entries are marked "LOCAL" in their same as fields
        if sameAsURI == "LOCAL":
            return ""
        uriMatch = re.match(r'([^:]+):(.+)$', sameAsURI)
        if not uriMatch:
            return ""
        # TODO: what if define. Should process this earlier
        if uriMatch.group(1) == "LOCAL":
            return self.systemBase + uriMatch.group(2)
        # Tmp map til FMQL v1
        schemeMN = self.SCHEMEMNMAP[uriMatch.group(1)] if uriMatch.group(
            1) in self.SCHEMEMNMAP else uriMatch.group(1)
        id = uriMatch.group(2)
        if schemeMN == "ICD9CM":
            id = re.sub(r'\_$', '',
                        re.sub(r'\.', '_',
                               id))  # code form to kGraf form, no trailing .'s
        return self.k3Base + schemeMN.lower() + "/" + id
예제 #4
0
class DescribeRepliesToSGraph:
    """
    This builds on DescribeReplyToRDF to produce a self contained (all URI's labeled) graph from one or more Describe Replies. These replies may be about a Patient or a Ward or System information.
    
    TODO: 
    - FIX TO DescribeReply: needs to count Enum values in outside references
    - add support for SAMEAS described things ie/ even though described, still sameas
    - add a Dataset/Graph header
    - SUPPORT exposure of outside refs ie/ to non same as types
    - DO DUMMY/SIMPLE rdfb
    """
    def __init__(self, fms="vs", systemBase="http://livevista.caregraf.info/", k3Base="http://datasets.caregraf.org/"):
                    
        self.fms = fms
        self.systemBase = systemBase 
        self.k3Base = k3Base              
        self.rdfb = RDFBuilder(["%s" % fms, "http://datasets.caregraf.org/%s/" % fms], "http://datasets.caregraf.org/%s/" % fms, extraNSInfos=[["fms", "http://datasets.caregraf.org/fms/"], ["xsd", "http://www.w3.org/2001/XMLSchema#"], ["dc", "http://purl.org/dc/elements/1.1/"]])
        self.fileTypes = set()
        self.outsideReferences = set()
        self.codedValueReferences = set()
        self.described = set() # recheck outside refs as build graph
        
    def processReply(self, describeReply):
        
        drrdf = DescribeReplyToRDF(self.rdfb, self.fms, self.systemBase)
        drrdf.processReply(describeReply)
        
        self.fileTypes |= describeReply.fileTypes()
        # Track outside references as go, removing if completely define a record
        self.outsideReferences |= describeReply.outsideReferences()
        self.codedValueReferences |= describeReply.codedValueReferences() # outside by defn
        self.described |= set(record.asReference() for record in describeReply.records())
        
    def done(self):
    
        self.outsideReferences -= self.described
            
        sameAss = set()
        sameAss2 = defaultdict(list)
        for reference in self.outsideReferences:
            if not self.rdfb.startNode(self.systemBase + reference.id, ""):
                raise Exception("Unexpected to try to reference define the fully defined: " + str(reference))
            self.rdfb.addLiteralAssertion("rdfs:label", {"value": reference.label})
            self.rdfb.addURIAssertion("rdf:type", {"value": ("http://datasets.caregraf.org/%s/" % self.fms) + reference.fileType})
            if reference.sameAs:
                eSameAs = self.__expandSameAsURI(reference.sameAs)
                self.rdfb.addURIAssertion("owl:sameAs", {"value": eSameAs})
                sameAss.add((eSameAs, reference.sameAsLabel))
                sameAss2[eSameAs].append(reference.sameAsLabel)
            self.rdfb.endNode()  
            
        # These used to be tagged with rdf:type fms:OutsideConcept
        for sameAs in sameAss:
            if not self.rdfb.startNode(sameAs[0], ""):
                continue # for now skip second definition of same sameas ex/ > 1 80 to an ICD. Should fix TODO intercept before this point
                # raise Exception("Unexpected to try to reference define the defined: " + str(sameAs))
            self.rdfb.addLiteralAssertion("rdfs:label", {"value": sameAs[1]})   
            self.rdfb.addURIAssertion("rdf:type", {"value": "http://datasets.caregraf.org/fms/CommonConcept"})         
            self.rdfb.endNode()  
            
        for codedValueReference in self.codedValueReferences:
            self.rdfb.startNode(("http://datasets.caregraf.org/%s/" % self.fms) + codedValueReference.id, "")
            self.rdfb.addLiteralAssertion("rdfs:label", {"type": "literal", "value": codedValueReference.label})
            # Rem: in schema, Enum == instance of Class defined with owl:oneOf
            # ex/ http://datasets.caregraf.org/chcss/2__02_E has 
            # http://datasets.caregraf.org/chcss/2__02_E-M with label MALE
            self.rdfb.addURIAssertion("rdf:type", {"type": "uri", "value": self.fms + ":" + codedValueReference.id.split("_E-")[0] + "_E"})
            self.rdfb.endNode()             
            
        for fileType in self.fileTypes:
            self.rdfb.startNode(("http://datasets.caregraf.org/%s/" % self.fms) + fileType[0], "")
            self.rdfb.addLiteralAssertion("rdfs:label", {"type": "literal", "value": fileType[1]})
            self.rdfb.addURIAssertion("rdf:type", {"type": "uri", "value": "owl:Class"})
            self.rdfb.endNode()          
            
        return self.rdfb.done()
        
    SCHEMEMNMAP = {"ICD9": "ICD9CM", "PROVIDER": "HPTC"}
    def __expandSameAsURI(self, sameAsURI):
        # in mixed files (local only and sameas'ed terminologies), the local only entries are marked "LOCAL" in their same as fields
        if sameAsURI == "LOCAL":
            return ""
        uriMatch = re.match(r'([^:]+):(.+)$', sameAsURI)
        if not uriMatch:
            return ""
        # TODO: what if define. Should process this earlier
        if uriMatch.group(1) == "LOCAL":
            return self.systemBase + uriMatch.group(2)
        # Tmp map til FMQL v1
        schemeMN = self.SCHEMEMNMAP[uriMatch.group(1)] if uriMatch.group(1) in self.SCHEMEMNMAP else uriMatch.group(1)
        id = uriMatch.group(2)
        if schemeMN == "ICD9CM":
           id = re.sub(r'\_$', '', re.sub(r'\.', '_', id)) # code form to kGraf form, no trailing .'s
        return self.k3Base + schemeMN.lower() + "/" + id