Beispiel #1
0
def manage_parents(request, conceptid):
    if request.method == 'POST':
        json = request.body
        if json != None:
            data = JSONDeserializer().deserialize(json)

            with transaction.atomic():
                if len(data['deleted']) > 0:
                    concept = Concept().get(id=conceptid, include=None)
                    for deleted in data['deleted']:
                        concept.addparent(deleted)

                    concept.delete()
                    concept.bulk_index()

                if len(data['added']) > 0:
                    concept = Concept().get(id=conceptid)
                    for added in data['added']:
                        concept.addparent(added)

                    concept.save()
                    concept.bulk_index()

            return JSONResponse(data)

    else:
        return HttpResponseNotAllowed(['POST'])

    return HttpResponseNotFound()
Beispiel #2
0
def manage_parents(request, conceptid):
    if request.method == 'POST':
        json = request.body
        if json != None:
            data = JSONDeserializer().deserialize(json)

            with transaction.atomic():
                if len(data['deleted']) > 0:
                    concept = Concept().get(id=conceptid, include=None)
                    for deleted in data['deleted']:
                        concept.addparent(deleted)

                    concept.delete()
                    concept.bulk_index()

                if len(data['added']) > 0:
                    concept = Concept().get(id=conceptid)
                    for added in data['added']:
                        concept.addparent(added)

                    concept.save()
                    concept.bulk_index()

            return JSONResponse(data)

    else:
        return HttpResponseNotAllowed(['POST'])

    return HttpResponseNotFound()
Beispiel #3
0
    def save_concepts_from_skos(self,
                                graph,
                                overwrite_options="overwrite",
                                staging_options="keep",
                                bulk_load=False,
                                path=""):
        """
        given an RDF graph, tries to save the concpets to the system

        Keyword arguments:
        overwrite_options -- 'overwrite', 'ignore'
        staging_options -- 'stage', 'keep'

        """

        baseuuid = uuid.uuid4()
        allowed_languages = models.DLanguage.objects.values_list("pk",
                                                                 flat=True)
        default_lang = settings.LANGUAGE_CODE
        if bulk_load is True:
            self.logger.setLevel(logging.ERROR)

        value_types = models.DValueType.objects.all()
        skos_value_types = value_types.filter(
            Q(namespace="skos") | Q(namespace="arches"))
        skos_value_types_list = list(
            skos_value_types.values_list("valuetype", flat=True))
        skos_value_types = {
            valuetype.valuetype: valuetype
            for valuetype in skos_value_types
        }
        dcterms_value_types = value_types.filter(namespace="dcterms")
        dcterms_identifier_type = dcterms_value_types.get(
            valuetype=str(DCTERMS.identifier).replace(str(DCTERMS), ""))

        # if the graph is of the type rdflib.graph.Graph
        if isinstance(graph, Graph):
            values = []

            # Search for ConceptSchemes first
            for scheme, v, o in graph.triples(
                (None, RDF.type, SKOS.ConceptScheme)):
                identifier = self.unwrapJsonLiteral(str(scheme))
                scheme_id = self.generate_uuid_from_subject(baseuuid, scheme)
                if bulk_load is True:
                    concept_scheme = models.Concept(
                        pk=scheme_id,
                        legacyoid=str(scheme),
                        nodetype_id="ConceptScheme")
                else:
                    concept_scheme = Concept({
                        'id': scheme_id,
                        'legacyoid': str(scheme),
                        'nodetype': 'ConceptScheme'
                    })

                for predicate, object in graph.predicate_objects(
                        subject=scheme):
                    if str(DCTERMS) in predicate and predicate.replace(
                            DCTERMS, "") in dcterms_value_types.values_list(
                                "valuetype", flat=True):
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list(
                                "pk", flat=True)

                        try:
                            # first try and get any values associated with the concept_scheme
                            # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc..
                            value_type = dcterms_value_types.get(
                                valuetype=predicate.replace(DCTERMS, ""))
                            val = self.unwrapJsonLiteral(object)
                            if predicate == DCTERMS.title:
                                if bulk_load is True:
                                    values.append(
                                        models.Value(
                                            pk=val["value_id"] if
                                            (val["value_id"] != ""
                                             and val["value_id"] is not None)
                                            else str(uuid.uuid4()),
                                            concept_id=concept_scheme.pk,
                                            value=val["value"],
                                            language_id=object.language
                                            or default_lang,
                                            valuetype_id="prefLabel",
                                        ))
                                else:
                                    concept_scheme.addvalue({
                                        'id':
                                        val['value_id'],
                                        'value':
                                        val['value'],
                                        'language':
                                        object.language or default_lang,
                                        'type':
                                        'prefLabel',
                                        'category':
                                        value_type.category
                                    })
                                # print('Casting dcterms:title to skos:prefLabel')
                            elif predicate == DCTERMS.description:
                                if bulk_load is True:
                                    values.append(
                                        models.Value(
                                            pk=val["value_id"] if
                                            (val["value_id"] != ""
                                             and val["value_id"] is not None)
                                            else str(uuid.uuid4()),
                                            concept_id=concept_scheme.pk,
                                            value=val["value"],
                                            language_id=object.language
                                            or default_lang,
                                            valuetype_id="scopeNote",
                                        ))
                                else:
                                    concept_scheme.addvalue({
                                        'id':
                                        val['value_id'],
                                        'value':
                                        val['value'],
                                        'language':
                                        object.language or default_lang,
                                        'type':
                                        'scopeNote',
                                        'category':
                                        value_type.category
                                    })
                                # print('Casting dcterms:description to skos:scopeNote')
                            elif predicate == DCTERMS.identifier:
                                identifier = self.unwrapJsonLiteral(
                                    str(object))
                        except:
                            pass

                    if str(SKOS) in predicate:
                        # print predicate
                        if predicate == SKOS.hasTopConcept:
                            top_concept_id = self.generate_uuid_from_subject(
                                baseuuid, object)
                            self.relations.append({
                                "source": scheme_id,
                                "type": "hasTopConcept",
                                "target": top_concept_id,
                            })

                if bulk_load is True:
                    values.append(
                        models.Value(
                            pk=identifier["value_id"] if
                            (identifier["value_id"] != ""
                             and identifier["value_id"] is not None) else str(
                                 uuid.uuid4()),
                            concept_id=concept_scheme.pk,
                            value=identifier["value"],
                            language_id=default_lang,
                            valuetype_id=dcterms_identifier_type.valuetype,
                        ))
                else:
                    concept_scheme.addvalue({
                        'id':
                        identifier['value_id'],
                        'value':
                        identifier['value'],
                        'language':
                        default_lang,
                        'type':
                        dcterms_identifier_type.valuetype,
                        'category':
                        dcterms_identifier_type.category
                    })
                self.nodes.append(concept_scheme)

                # Search for Concepts
                for s, v, o in graph.triples((None, SKOS.inScheme, scheme)):
                    identifier = self.unwrapJsonLiteral(str(s))
                    if bulk_load is True:
                        concept = models.Concept(
                            pk=self.generate_uuid_from_subject(baseuuid, s),
                            legacyoid=str(s),
                            nodetype_id="Concept",
                        )
                    else:
                        concept = Concept({
                            'id':
                            self.generate_uuid_from_subject(baseuuid, s),
                            'legacyoid':
                            str(s),
                            'nodetype':
                            'Concept'
                        })

                    # loop through all the elements within a <skos:Concept> element
                    for predicate, object in graph.predicate_objects(
                            subject=s):
                        if str(SKOS) in predicate or str(ARCHES) in predicate:
                            if not self.language_exists(
                                    object, allowed_languages):
                                allowed_languages = models.DLanguage.objects.values_list(
                                    "pk", flat=True)

                            # this is essentially the skos element type within a <skos:Concept>
                            # element (eg: prefLabel, broader, etc...)
                            relation_or_value_type = predicate.replace(
                                SKOS, "").replace(ARCHES, "")

                            if relation_or_value_type in skos_value_types_list:
                                value_type = skos_value_types[
                                    relation_or_value_type]
                                val = self.unwrapJsonLiteral(object)
                                if bulk_load is True:
                                    values.append(
                                        models.Value(
                                            pk=val["value_id"] if
                                            (val["value_id"] != ""
                                             and val["value_id"] is not None)
                                            else str(uuid.uuid4()),
                                            concept_id=concept.pk,
                                            value=val["value"],
                                            language_id=object.language
                                            or default_lang,
                                            valuetype_id=value_type.valuetype,
                                        ))
                                else:
                                    concept.addvalue({
                                        'id':
                                        val['value_id'],
                                        'value':
                                        val['value'],
                                        'language':
                                        object.language or default_lang,
                                        'type':
                                        value_type.valuetype,
                                        'category':
                                        value_type.category
                                    })
                            elif predicate == SKOS.broader:
                                self.relations.append({
                                    "source":
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                    "type":
                                    "narrower",
                                    "target":
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                })
                            elif predicate == SKOS.narrower:
                                self.relations.append({
                                    "source":
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    "type":
                                    relation_or_value_type,
                                    "target":
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                })
                            elif predicate == SKOS.related:
                                self.relations.append({
                                    "source":
                                    self.generate_uuid_from_subject(
                                        baseuuid, s),
                                    "type":
                                    relation_or_value_type,
                                    "target":
                                    self.generate_uuid_from_subject(
                                        baseuuid, object),
                                })

                        elif predicate == DCTERMS.identifier:
                            identifier = self.unwrapJsonLiteral(str(object))

                    if bulk_load is True:
                        values.append(
                            models.Value(
                                pk=identifier["value_id"] if
                                (identifier["value_id"] != ""
                                 and identifier["value_id"] is not None) else
                                str(uuid.uuid4()),
                                concept_id=concept.pk,
                                value=identifier["value"],
                                language_id=default_lang,
                                valuetype_id=dcterms_identifier_type.valuetype,
                            ))
                    else:
                        concept.addvalue({
                            'id':
                            identifier['value_id'],
                            'value':
                            identifier['value'],
                            'language':
                            default_lang,
                            'type':
                            dcterms_identifier_type.valuetype,
                            'category':
                            dcterms_identifier_type.category
                        })
                    self.nodes.append(concept)

            # Search for SKOS.Collections
            for s, v, o in graph.triples((None, RDF.type, SKOS.Collection)):
                # print "%s %s %s " % (s,v,o)
                if bulk_load is True:
                    concept = models.Concept(
                        pk=self.generate_uuid_from_subject(baseuuid, s),
                        legacyoid=str(s),
                        nodetype_id="Collection",
                    )
                else:
                    concept = Concept({
                        'id':
                        self.generate_uuid_from_subject(baseuuid, s),
                        'legacyoid':
                        str(s),
                        'nodetype':
                        'Collection'
                    })
                # loop through all the elements within a <skos:Concept> element
                for predicate, object in graph.predicate_objects(subject=s):
                    if str(SKOS) in predicate or str(ARCHES) in predicate:
                        if not self.language_exists(object, allowed_languages):
                            allowed_languages = models.DLanguage.objects.values_list(
                                "pk", flat=True)

                        # this is essentially the skos element type within a <skos:Concept>
                        # element (eg: prefLabel, broader, etc...)
                        relation_or_value_type = predicate.replace(
                            SKOS, "").replace(ARCHES, "")

                        if relation_or_value_type in skos_value_types_list:
                            value_type = skos_value_types[
                                relation_or_value_type]
                            val = self.unwrapJsonLiteral(object)
                            if bulk_load is True:
                                values.append(
                                    models.Value(
                                        pk=val["value_id"],
                                        concept_id=concept.pk,
                                        value=val["value"],
                                        language_id=object.language
                                        or default_lang,
                                        valuetype_id=value_type.valuetype,
                                    ))
                            else:
                                concept.addvalue({
                                    'id':
                                    val['value_id'],
                                    'value':
                                    val['value'],
                                    'language':
                                    object.language or default_lang,
                                    'type':
                                    value_type.valuetype,
                                    'category':
                                    value_type.category
                                })

                self.nodes.append(concept)

            for s, v, o in graph.triples((None, SKOS.member, None)):
                # print "%s %s %s " % (s,v,o)
                self.member_relations.append({
                    "source":
                    self.generate_uuid_from_subject(baseuuid, s),
                    "type":
                    "member",
                    "target":
                    self.generate_uuid_from_subject(baseuuid, o),
                })

            # insert and index the concpets
            scheme_node = None
            orphaned_concepts = {}
            concepts = []
            # bulk_create() does NOT call the object's save() method, nor pre_save/post_save
            # TODO: figure out how to ensure functions get called with bulk_create()
            with transaction.atomic():
                if bulk_load is True:
                    models.Concept.objects.bulk_create(self.nodes,
                                                       ignore_conflicts=True)
                    models.Value.objects.bulk_create(values,
                                                     ignore_conflicts=True)
                    self.logger.info(
                        f"Bulk created: {len(self.nodes)} concepts and {len(values)} values from {path}"
                    )
                    for node in self.nodes:
                        if node.nodetype.nodetype == "ConceptScheme":
                            scheme_node = Concept({
                                "id": node.conceptid,
                                "legacyoid": str(scheme),
                                "nodetype": "ConceptScheme",
                            })
                        elif node.nodetype.nodetype == "Concept":
                            orphaned_concepts[str(node.conceptid)] = node
                        if staging_options == "stage":
                            try:
                                models.Concept.objects.get(pk=node.conceptid)
                            except:
                                # this is a new concept, so add a reference to it in the Candiates schema
                                if node.nodetype.nodetype != "ConceptScheme":
                                    self.relations.append({
                                        "source":
                                        "00000000-0000-0000-0000-000000000006",
                                        "type":
                                        "narrower",
                                        "target":
                                        node.conceptid,
                                    })

                        if overwrite_options == "overwrite":
                            node.save()
                            # concepts.append(node)
                        elif overwrite_options == "ignore":
                            try:
                                # don't do anything if the concept already exists
                                models.Concept.objects.get(pk=node.conceptid)
                            except:
                                # else save it
                                node.save()
                                # concepts.append(node)
                else:
                    for node in self.nodes:
                        if node.nodetype == 'ConceptScheme':
                            scheme_node = node
                        elif node.nodetype == 'Concept':
                            orphaned_concepts[str(node.id)] = node
                        if staging_options == 'stage':
                            try:
                                models.Concept.objects.get(pk=node.id)
                            except:
                                # this is a new concept, so add a reference to it in the Candiates schema
                                if node.nodetype != 'ConceptScheme':
                                    self.relations.append({
                                        'source':
                                        '00000000-0000-0000-0000-000000000006',
                                        'type': 'narrower',
                                        'target': node.id
                                    })

                        if overwrite_options == 'overwrite':
                            node.save()
                        elif overwrite_options == 'ignore':
                            try:
                                # don't do anything if the concept already exists
                                models.Concept.objects.get(pk=node.id)
                            except:
                                # else save it
                                node.save()

                # Concept().bulk_save(concepts, None)

                # insert the concept relations
                # TODO: make sure this still works with code commented out, then remove
                # relation_objs = []
                for relation in self.relations:
                    newrelation, created = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation["source"],
                        conceptto_id=relation["target"],
                        relationtype_id=relation["type"],
                    )
                    # models.Relation.objects.bulk_create(relation_objs)
                    # check for orphaned concepts, every concept except the concept scheme should have an edge pointing to it
                    if (relation["type"] == "narrower" or relation["type"]
                            == "hasTopConcept") and orphaned_concepts.get(
                                relation["target"]) is not None:
                        orphaned_concepts.pop(str(relation["target"]))
                    # relation_objs.append(newrelation)

                if len(orphaned_concepts.keys()) > 0:
                    if scheme_node:
                        orphaned_scheme = Concept({
                            "id": uuid.uuid4(),
                            "legacyoid": uuid.uuid4(),
                            "nodetype": "ConceptScheme",
                        })
                        orphaned_scheme_value = None
                        for value in scheme_node.values:
                            if value.type == "prefLabel":
                                orphaned_scheme.addvalue({
                                    "id":
                                    uuid.uuid4(),
                                    "value":
                                    "ORPHANS - " + value.value,
                                    "language":
                                    value.language,
                                    "type":
                                    value.type,
                                    "category":
                                    value.category,
                                })
                        orphaned_scheme.save()
                        for (
                                orphaned_concept_id,
                                orphaned_concept,
                        ) in orphaned_concepts.items():
                            models.Relation.objects.create(
                                conceptfrom_id=str(orphaned_scheme.id),
                                conceptto_id=orphaned_concept_id,
                                relationtype_id="narrower",
                            )
                        self.logger.warning(
                            "The SKOS file appears to have orphaned concepts.")

                # need to index after the concepts and relations have been entered into the db
                # so that the proper context gets indexed with the concept
                if scheme_node:
                    scheme_node.bulk_index()

            # insert the concept collection relations
            # we do this outide a transaction so that we can load incomplete collections
            # relation_objs = []
            # TODO: debug bulk_create to speed up this section of skos
            for relation in self.member_relations:
                try:
                    # if bulk_load is True:
                    # newrelation = models.Relation(
                    #     conceptfrom_id=relation['source'],
                    #     conceptto_id=relation['target'],
                    #     relationtype_id=relation['type']
                    # )
                    # relation_objs.append(newrelation)
                    # else:
                    newrelation, created = models.Relation.objects.get_or_create(
                        conceptfrom_id=relation["source"],
                        conceptto_id=relation["target"],
                        relationtype_id=relation["type"],
                    )
                except IntegrityError as e:
                    self.logger.warning(e)
                    pass

            # if bulk_load is True:
            #   models.Relation.objects.bulk_create(relation_objs, ignore_conflicts=True)

            return scheme_node
        else:
            raise Exception(
                "graph argument should be of type rdflib.graph.Graph")