예제 #1
0
    def from_rdf(self, json_ld_node):
        # Expects a label and a concept URI within the json_ld_node

        # FIXME: SHOULD be able to handle cases when the label is not supplied,
        # or if the label does not match any label from the ConceptValue
        # Either by instantiating a keyword without a concept_id or by
        # or by looking for say an external identifier attached to the concept and
        # building upon that.
        concept_uri = json_ld_node.get('id')
        label = json_ld_node.get(str(RDFS.label))

        concept_id = None
        import re
        p = re.compile(
            r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$"
        )
        m = p.match(concept_uri)
        if m is not None:
            concept_id = m.groupdict().get("concept_id")

        # FIXME: assert that the type of this node is a E55_Type?

        # FIXME when pyld supports uppercase lang in strings, include
        # language handling here.

        if label:
            # Could be:
            #  - Blank node E55_Type with a label - a Keyword
            #  - Concept ID URI, with a label - a conventional Concept
            # find a matching Concept Value to the label
            values = get_valueids_from_concept_label(label, concept_id)

            if values:
                return values[0]["id"]
            else:
                if concept_id:
                    print(
                        "FAILED TO FIND MATCHING LABEL '{0}' FOR CONCEPT '{1}'"
                    ).format(label, concept_id)
                    label = None
                else:
                    print("No Concept ID URI supplied for rdf")

        if concept_id and label is None:
            # got a concept URI but the label is nonexistant
            # or cannot be resolved in Arches
            value = get_preflabel_from_conceptid(concept_id, lang=None)
            return value['id']

        if concept_id is None and (label is None or label == ""):
            # a keyword of some type. If the code execution gets here their either
            # was no RDFS:label literal value to note or the keyword cannot be found
            # amongst the current Arches ConceptValues
            pass
예제 #2
0
    def from_rdf(self, json_ld_node):
        # Expects a label and a concept URI within the json_ld_node

        # FIXME: SHOULD be able to handle cases when the label is not supplied,
        # or if the label does not match any label from the ConceptValue
        # Either by instantiating a keyword without a concept_id or by
        # or by looking for say an external identifier attached to the concept and
        # building upon that.

        try:
            # assume a list, and as this is a ConceptDataType, assume a single entry
            json_ld_node = json_ld_node[0]
        except KeyError as e:
            pass

        concept_uri = json_ld_node.get('@id')
        label_node = json_ld_node.get(str(RDFS.label))

        # Consume the labels, such that we don't recurse into them
        if label_node:
            del json_ld_node[str(RDFS.label)]

        concept_id = lang = None
        import re
        p = re.compile(
            r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$"
        )
        m = p.match(concept_uri)
        if m is not None:
            concept_id = m.groupdict().get("concept_id")
        else:
            # could be an external id, rather than an Arches only URI
            hits = [
                ident for ident in models.Value.objects.all().filter(
                    value__exact=str(concept_uri),
                    valuetype__category="identifiers")
            ]
            # print("Could be external URI - hits from RDM: {0}".format(len(hits)))
            if len(hits) == 1:
                concept_id = hits[0].concept_id
                # Still need to find the label or prefLabel for this concept
            else:
                print(
                    "ERROR: Multiple hits for {0} external identifier in RDM:".
                    format(concept_uri))
                for hit in hits:
                    print("ConceptValue {0}, Concept {1} - '{2}'".format(
                        hit.valueid, hit.conceptid, hit.value))

        # print("Trying to get a label from the concept node.")
        if label_node:
            label, lang = get_value_from_jsonld(label_node)
            if label:
                # Could be:
                #  - Blank node E55_Type with a label - a Keyword
                #  - Concept ID URI, with a label - a conventional Concept
                #  - Concept ID via an external URI, hosted in Arches
                # find a matching Concept Value to the label
                values = get_valueids_from_concept_label(
                    label, concept_id, lang)

                if values:
                    return values[0]["id"]
                else:
                    if concept_id:
                        # print("FAILED TO FIND MATCHING LABEL '{0}'@{2} FOR CONCEPT '{1}' in ES").format(
                        #     label, concept_id, lang)
                        # print("Attempting a match from label via the DB:")
                        hits = [
                            ident
                            for ident in models.Value.objects.all().filter(
                                value__exact=label)
                        ]
                        if hits and len(hits) == 1:
                            # print "FOUND: %s" % hits[0].pk
                            return str(hits[0].pk)
                        label = None
                    else:
                        print("No Concept ID URI supplied for rdf")
        else:
            label = None

        if concept_id and label is None:
            # got a concept URI but the label is nonexistant
            # or cannot be resolved in Arches
            value = get_preflabel_from_conceptid(concept_id, lang=lang)
            return value['id']

        if concept_id is None and (label is None or label == ""):
            print(
                "Concept lookup in from_rdf FAILED: No concept id found and no label either"
            )
            # a keyword of some type. If the code execution gets here their either
            # was no RDFS:label literal value to note or the keyword cannot be found
            # amongst the current Arches ConceptValues
            pass
예제 #3
0
def search_terms(request):
    lang = request.GET.get("lang", request.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get("q", "")
    user_is_reviewer = user_is_resource_reviewer(request.user)

    i = 0
    ret = {}
    for index in ["terms", "concepts"]:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(
            Match(field="value",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  fuzziness="AUTO",
                  prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == "terms":
            boolquery.filter(Terms(field="provisional", terms=["false"]))

        query.add_query(boolquery)
        base_agg = Aggregation(name="value_agg",
                               type="terms",
                               field="value.raw",
                               size=settings.SEARCH_DROPDOWN_LENGTH,
                               order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name="nodegroupid",
                                      type="terms",
                                      field="nodegroupid")
        top_concept_agg = Aggregation(name="top_concept",
                                      type="terms",
                                      field="top_concept")
        conceptid_agg = Aggregation(name="conceptid",
                                    type="terms",
                                    field="conceptid")
        max_score_agg = MaxAgg(name="max_score", script="_score")

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        if results is not None:
            for result in results["aggregations"]["value_agg"]["buckets"]:
                if len(result["top_concept"]["buckets"]) > 0:
                    for top_concept in result["top_concept"]["buckets"]:
                        top_concept_id = top_concept["key"]
                        top_concept_label = get_preflabel_from_conceptid(
                            top_concept["key"], lang)["value"]
                        for concept in top_concept["conceptid"]["buckets"]:
                            ret[index].append({
                                "type": "concept",
                                "context": top_concept_id,
                                "context_label": top_concept_label,
                                "id": i,
                                "text": result["key"],
                                "value": concept["key"],
                            })
                        i = i + 1
                else:
                    ret[index].append({
                        "type":
                        "term",
                        "context":
                        "",
                        "context_label":
                        get_resource_model_label(result),
                        "id":
                        i,
                        "text":
                        result["key"],
                        "value":
                        result["key"],
                    })
                    i = i + 1

    return JSONResponse(ret)
예제 #4
0
    def from_rdf(self, json_ld_node):
        # Expects a label and a concept URI within the json_ld_node
        # But might not always get them both.

        try:
            # assume a list, and as this is a ConceptDataType, assume a single entry
            json_ld_node = json_ld_node[0]
        except KeyError as e:
            pass

        concept_uri = json_ld_node.get("@id")
        label_node = json_ld_node.get(str(RDFS.label))
        concept_id = lang = None
        import re

        # FIXME: This should use settings for host and check for UUID
        p = re.compile(r"(http|https)://(?P<host>[^/]*)/concepts/(?P<concept_id>[A-Fa-f0-9\-]*)/?$")
        m = p.match(concept_uri)
        if m is not None:
            concept_id = m.groupdict().get("concept_id")
        else:
            # could be an external id, rather than an Arches only URI
            hits = [ident for ident in models.Value.objects.all().filter(value__exact=str(concept_uri), valuetype__category="identifiers")]
            if len(hits) == 1:
                concept_id = hits[0].concept_id
            else:
                print("ERROR: Multiple hits for {0} external identifier in RDM:".format(concept_uri))
                for hit in hits:
                    print("ConceptValue {0}, Concept {1} - '{2}'".format(hit.valueid, hit.conceptid, hit.value))
                # Just try the first one and hope
                concept_id = hits[0].concept_id

        if label_node:
            label, lang = get_value_from_jsonld(label_node)
            if label:
                values = get_valueids_from_concept_label(label, concept_id, lang)
                if values:
                    return values[0]["id"]
                else:
                    if concept_id:
                        hits = [ident for ident in models.Value.objects.all().filter(value__exact=label)]
                        if hits and len(hits) == 1:
                            return str(hits[0].pk)
                        label = None
                    else:
                        print("No Concept ID URI supplied for rdf")
        else:
            label = None

        if concept_id and label is None:
            value = get_preflabel_from_conceptid(concept_id, lang=lang)
            if value["id"]:
                return value["id"]
            else:
                hits = [ident for ident in models.Value.objects.all()]
                if hits:
                    return str(hits[0].pk)
                else:
                    print(f"No labels for concept: {concept_id}!")
                    return None
        else:
            # No concept_id means not in RDM at all
            return None
예제 #5
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    user_is_reviewer = request.user.groups.filter(
        name='Resource Reviewer').exists()

    i = 0
    ret = {}
    for index in ['terms', 'concepts']:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(
            Match(field='value',
                  query=searchString.lower(),
                  type='phrase_prefix'))
        boolquery.should(
            Match(field='value.folded',
                  query=searchString.lower(),
                  type='phrase_prefix'))
        boolquery.should(
            Match(field='value.folded',
                  query=searchString.lower(),
                  fuzziness='AUTO',
                  prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == 'terms':
            boolquery.filter(Terms(field='provisional', terms=['false']))

        query.add_query(boolquery)
        base_agg = Aggregation(name='value_agg',
                               type='terms',
                               field='value.raw',
                               size=settings.SEARCH_DROPDOWN_LENGTH,
                               order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name='nodegroupid',
                                      type='terms',
                                      field='nodegroupid')
        top_concept_agg = Aggregation(name='top_concept',
                                      type='terms',
                                      field='top_concept')
        conceptid_agg = Aggregation(name='conceptid',
                                    type='terms',
                                    field='conceptid')
        max_score_agg = MaxAgg(name='max_score', script='_score')

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        for result in results['aggregations']['value_agg']['buckets']:
            if len(result['top_concept']['buckets']) > 0:
                for top_concept in result['top_concept']['buckets']:
                    top_concept_id = top_concept['key']
                    top_concept_label = get_preflabel_from_conceptid(
                        top_concept['key'], lang)['value']
                    for concept in top_concept['conceptid']['buckets']:
                        ret[index].append({
                            'type': 'concept',
                            'context': top_concept_id,
                            'context_label': top_concept_label,
                            'id': i,
                            'text': result['key'],
                            'value': concept['key']
                        })
                    i = i + 1
            else:
                ret[index].append({
                    'type':
                    'term',
                    'context':
                    '',
                    'context_label':
                    get_resource_model_label(result),
                    'id':
                    i,
                    'text':
                    result['key'],
                    'value':
                    result['key']
                })
                i = i + 1

    return JSONResponse(ret)