def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # FIXME: SHOULD be able to handle cases when the label is not supplied, # or if the label does not match any label from the ConceptValue # Either by instantiating a keyword without a concept_id or by # or by looking for say an external identifier attached to the concept and # building upon that. try: # assume a list, and as this is a ConceptDataType, assume a single entry json_ld_node = json_ld_node[0] except KeyError as e: pass concept_uri = json_ld_node.get('@id') label_node = json_ld_node.get(str(RDFS.label)) # Consume the labels, such that we don't recurse into them if label_node: del json_ld_node[str(RDFS.label)] concept_id = lang = None import re p = re.compile( r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$" ) m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") else: # could be an external id, rather than an Arches only URI hits = [ ident for ident in models.Value.objects.all().filter( value__exact=str(concept_uri), valuetype__category="identifiers") ] # print("Could be external URI - hits from RDM: {0}".format(len(hits))) if len(hits) == 1: concept_id = hits[0].concept_id # Still need to find the label or prefLabel for this concept else: print( "ERROR: Multiple hits for {0} external identifier in RDM:". format(concept_uri)) for hit in hits: print("ConceptValue {0}, Concept {1} - '{2}'".format( hit.valueid, hit.conceptid, hit.value)) # print("Trying to get a label from the concept node.") if label_node: label, lang = get_value_from_jsonld(label_node) if label: # Could be: # - Blank node E55_Type with a label - a Keyword # - Concept ID URI, with a label - a conventional Concept # - Concept ID via an external URI, hosted in Arches # find a matching Concept Value to the label values = get_valueids_from_concept_label( label, concept_id, lang) if values: return values[0]["id"] else: if concept_id: # print("FAILED TO FIND MATCHING LABEL '{0}'@{2} FOR CONCEPT '{1}' in ES").format( # label, concept_id, lang) # print("Attempting a match from label via the DB:") hits = [ ident for ident in models.Value.objects.all().filter( value__exact=label) ] if hits and len(hits) == 1: # print "FOUND: %s" % hits[0].pk return str(hits[0].pk) label = None else: print("No Concept ID URI supplied for rdf") else: label = None if concept_id and label is None: # got a concept URI but the label is nonexistant # or cannot be resolved in Arches value = get_preflabel_from_conceptid(concept_id, lang=lang) return value['id'] if concept_id is None and (label is None or label == ""): print( "Concept lookup in from_rdf FAILED: No concept id found and no label either" ) # a keyword of some type. If the code execution gets here their either # was no RDFS:label literal value to note or the keyword cannot be found # amongst the current Arches ConceptValues pass
def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # But might not always get them both. try: # assume a list, and as this is a ConceptDataType, assume a single entry json_ld_node = json_ld_node[0] except KeyError as e: pass concept_uri = json_ld_node.get("@id") label_node = json_ld_node.get(str(RDFS.label)) concept_id = lang = None import re # FIXME: This should use settings for host and check for UUID p = re.compile(r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$") m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") else: # could be an external id, rather than an Arches only URI hits = [ident for ident in models.Value.objects.all().filter(value__exact=str(concept_uri), valuetype__category="identifiers")] if len(hits) == 1: concept_id = hits[0].concept_id else: print("ERROR: Multiple hits for {0} external identifier in RDM:".format(concept_uri)) for hit in hits: print("ConceptValue {0}, Concept {1} - '{2}'".format(hit.valueid, hit.conceptid, hit.value)) # Just try the first one and hope concept_id = hits[0].concept_id if label_node: label, lang = get_value_from_jsonld(label_node) if label: values = get_valueids_from_concept_label(label, concept_id, lang) if values: return values[0]["id"] else: if concept_id: hits = [ident for ident in models.Value.objects.all().filter(value__exact=label)] if hits and len(hits) == 1: return str(hits[0].pk) label = None else: print("No Concept ID URI supplied for rdf") else: label = None if concept_id and label is None: value = get_preflabel_from_conceptid(concept_id, lang=lang) if value["id"]: return value["id"] else: hits = [ident for ident in models.Value.objects.all()] if hits: return str(hits[0].pk) else: print(f"No labels for concept: {concept_id}!") return None else: # No concept_id means not in RDM at all return None