def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # FIXME: SHOULD be able to handle cases when the label is not supplied, # or if the label does not match any label from the ConceptValue # Either by instantiating a keyword without a concept_id or by # or by looking for say an external identifier attached to the concept and # building upon that. concept_uri = json_ld_node.get('id') label = json_ld_node.get(str(RDFS.label)) concept_id = None import re p = re.compile( r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$" ) m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") # FIXME: assert that the type of this node is a E55_Type? # FIXME when pyld supports uppercase lang in strings, include # language handling here. if label: # Could be: # - Blank node E55_Type with a label - a Keyword # - Concept ID URI, with a label - a conventional Concept # find a matching Concept Value to the label values = get_valueids_from_concept_label(label, concept_id) if values: return values[0]["id"] else: if concept_id: print( "FAILED TO FIND MATCHING LABEL '{0}' FOR CONCEPT '{1}'" ).format(label, concept_id) label = None else: print("No Concept ID URI supplied for rdf") if concept_id and label is None: # got a concept URI but the label is nonexistant # or cannot be resolved in Arches value = get_preflabel_from_conceptid(concept_id, lang=None) return value['id'] if concept_id is None and (label is None or label == ""): # a keyword of some type. If the code execution gets here their either # was no RDFS:label literal value to note or the keyword cannot be found # amongst the current Arches ConceptValues pass
def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # FIXME: SHOULD be able to handle cases when the label is not supplied, # or if the label does not match any label from the ConceptValue # Either by instantiating a keyword without a concept_id or by # or by looking for say an external identifier attached to the concept and # building upon that. try: # assume a list, and as this is a ConceptDataType, assume a single entry json_ld_node = json_ld_node[0] except KeyError as e: pass concept_uri = json_ld_node.get('@id') label_node = json_ld_node.get(str(RDFS.label)) # Consume the labels, such that we don't recurse into them if label_node: del json_ld_node[str(RDFS.label)] concept_id = lang = None import re p = re.compile( r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$" ) m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") else: # could be an external id, rather than an Arches only URI hits = [ ident for ident in models.Value.objects.all().filter( value__exact=str(concept_uri), valuetype__category="identifiers") ] # print("Could be external URI - hits from RDM: {0}".format(len(hits))) if len(hits) == 1: concept_id = hits[0].concept_id # Still need to find the label or prefLabel for this concept else: print( "ERROR: Multiple hits for {0} external identifier in RDM:". format(concept_uri)) for hit in hits: print("ConceptValue {0}, Concept {1} - '{2}'".format( hit.valueid, hit.conceptid, hit.value)) # print("Trying to get a label from the concept node.") if label_node: label, lang = get_value_from_jsonld(label_node) if label: # Could be: # - Blank node E55_Type with a label - a Keyword # - Concept ID URI, with a label - a conventional Concept # - Concept ID via an external URI, hosted in Arches # find a matching Concept Value to the label values = get_valueids_from_concept_label( label, concept_id, lang) if values: return values[0]["id"] else: if concept_id: # print("FAILED TO FIND MATCHING LABEL '{0}'@{2} FOR CONCEPT '{1}' in ES").format( # label, concept_id, lang) # print("Attempting a match from label via the DB:") hits = [ ident for ident in models.Value.objects.all().filter( value__exact=label) ] if hits and len(hits) == 1: # print "FOUND: %s" % hits[0].pk return str(hits[0].pk) label = None else: print("No Concept ID URI supplied for rdf") else: label = None if concept_id and label is None: # got a concept URI but the label is nonexistant # or cannot be resolved in Arches value = get_preflabel_from_conceptid(concept_id, lang=lang) return value['id'] if concept_id is None and (label is None or label == ""): print( "Concept lookup in from_rdf FAILED: No concept id found and no label either" ) # a keyword of some type. If the code execution gets here their either # was no RDFS:label literal value to note or the keyword cannot be found # amongst the current Arches ConceptValues pass
def search_terms(request): lang = request.GET.get("lang", request.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get("q", "") user_is_reviewer = user_is_resource_reviewer(request.user) i = 0 ret = {} for index in ["terms", "concepts"]: query = Query(se, start=0, limit=0) boolquery = Bool() boolquery.should( Match(field="value", query=searchString.lower(), type="phrase_prefix")) boolquery.should( Match(field="value.folded", query=searchString.lower(), type="phrase_prefix")) boolquery.should( Match(field="value.folded", query=searchString.lower(), fuzziness="AUTO", prefix_length=settings.SEARCH_TERM_SENSITIVITY)) if user_is_reviewer is False and index == "terms": boolquery.filter(Terms(field="provisional", terms=["false"])) query.add_query(boolquery) base_agg = Aggregation(name="value_agg", type="terms", field="value.raw", size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name="nodegroupid", type="terms", field="nodegroupid") top_concept_agg = Aggregation(name="top_concept", type="terms", field="top_concept") conceptid_agg = Aggregation(name="conceptid", type="terms", field="conceptid") max_score_agg = MaxAgg(name="max_score", script="_score") top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) ret[index] = [] results = query.search(index=index) if results is not None: for result in results["aggregations"]["value_agg"]["buckets"]: if len(result["top_concept"]["buckets"]) > 0: for top_concept in result["top_concept"]["buckets"]: top_concept_id = top_concept["key"] top_concept_label = get_preflabel_from_conceptid( top_concept["key"], lang)["value"] for concept in top_concept["conceptid"]["buckets"]: ret[index].append({ "type": "concept", "context": top_concept_id, "context_label": top_concept_label, "id": i, "text": result["key"], "value": concept["key"], }) i = i + 1 else: ret[index].append({ "type": "term", "context": "", "context_label": get_resource_model_label(result), "id": i, "text": result["key"], "value": result["key"], }) i = i + 1 return JSONResponse(ret)
def from_rdf(self, json_ld_node): # Expects a label and a concept URI within the json_ld_node # But might not always get them both. try: # assume a list, and as this is a ConceptDataType, assume a single entry json_ld_node = json_ld_node[0] except KeyError as e: pass concept_uri = json_ld_node.get("@id") label_node = json_ld_node.get(str(RDFS.label)) concept_id = lang = None import re # FIXME: This should use settings for host and check for UUID p = re.compile(r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$") m = p.match(concept_uri) if m is not None: concept_id = m.groupdict().get("concept_id") else: # could be an external id, rather than an Arches only URI hits = [ident for ident in models.Value.objects.all().filter(value__exact=str(concept_uri), valuetype__category="identifiers")] if len(hits) == 1: concept_id = hits[0].concept_id else: print("ERROR: Multiple hits for {0} external identifier in RDM:".format(concept_uri)) for hit in hits: print("ConceptValue {0}, Concept {1} - '{2}'".format(hit.valueid, hit.conceptid, hit.value)) # Just try the first one and hope concept_id = hits[0].concept_id if label_node: label, lang = get_value_from_jsonld(label_node) if label: values = get_valueids_from_concept_label(label, concept_id, lang) if values: return values[0]["id"] else: if concept_id: hits = [ident for ident in models.Value.objects.all().filter(value__exact=label)] if hits and len(hits) == 1: return str(hits[0].pk) label = None else: print("No Concept ID URI supplied for rdf") else: label = None if concept_id and label is None: value = get_preflabel_from_conceptid(concept_id, lang=lang) if value["id"]: return value["id"] else: hits = [ident for ident in models.Value.objects.all()] if hits: return str(hits[0].pk) else: print(f"No labels for concept: {concept_id}!") return None else: # No concept_id means not in RDM at all return None
def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get('q', '') user_is_reviewer = request.user.groups.filter( name='Resource Reviewer').exists() i = 0 ret = {} for index in ['terms', 'concepts']: query = Query(se, start=0, limit=0) boolquery = Bool() boolquery.should( Match(field='value', query=searchString.lower(), type='phrase_prefix')) boolquery.should( Match(field='value.folded', query=searchString.lower(), type='phrase_prefix')) boolquery.should( Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO', prefix_length=settings.SEARCH_TERM_SENSITIVITY)) if user_is_reviewer is False and index == 'terms': boolquery.filter(Terms(field='provisional', terms=['false'])) query.add_query(boolquery) base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid') top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept') conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid') max_score_agg = MaxAgg(name='max_score', script='_score') top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) ret[index] = [] results = query.search(index=index) for result in results['aggregations']['value_agg']['buckets']: if len(result['top_concept']['buckets']) > 0: for top_concept in result['top_concept']['buckets']: top_concept_id = top_concept['key'] top_concept_label = get_preflabel_from_conceptid( top_concept['key'], lang)['value'] for concept in top_concept['conceptid']['buckets']: ret[index].append({ 'type': 'concept', 'context': top_concept_id, 'context_label': top_concept_label, 'id': i, 'text': result['key'], 'value': concept['key'] }) i = i + 1 else: ret[index].append({ 'type': 'term', 'context': '', 'context_label': get_resource_model_label(result), 'id': i, 'text': result['key'], 'value': result['key'] }) i = i + 1 return JSONResponse(ret)