def get_concepts(self, uris): """ Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087 """ default_lang = settings.LANGUAGE_CODE dcterms_identifier_type = DValueType.objects.get(valuetype=str( DCTERMS.identifier).replace(str(DCTERMS), ''), namespace='dcterms') concepts = [] langs = [] for lang in self.allowed_languages: # the AAT expects language codes to be all lower case langs.append('\"%s\"' % (lang.lower())) for uri in uris.split(','): query = """ SELECT ?value ?type WHERE { { <%s> skos:prefLabel ?value . BIND('prefLabel' AS ?type) } UNION { <%s> skos:scopeNote [rdf:value ?value] . BIND('scopeNote' AS ?type) } FILTER (lang(?value) in (%s)) }""" % (uri, uri, ','.join(langs)) results = self.perform_sparql_query(query) if len(results["results"]["bindings"]) > 0: concept = Concept() concept.nodetype = 'Concept' for result in results["results"]["bindings"]: concept.addvalue({ 'type': result["type"]["value"], 'value': result["value"]["value"], 'language': result["value"]["xml:lang"] }) concept.addvalue({ 'value': uri, 'language': settings.LANGUAGE_CODE, 'type': dcterms_identifier_type.valuetype, 'category': dcterms_identifier_type.category }) concepts.append(concept) else: raise Exception( _("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors. You may need to add the appropriate languages into the database for this query to work<br><br>" ) % (query.replace('<', '<').replace('>', '>'))) return concepts
def get_concepts(self, uris): """ Get a list of concepts given a list of AAT uris like http://vocab.getty.edu/aat/300380087 """ concepts = [] langs = [] for lang in self.allowed_languages: langs.append('\"%s\"' % (lang)) for uri in uris.split(','): query = """ SELECT ?value ?type WHERE { { <%s> skos:prefLabel ?value . BIND('prefLabel' AS ?type) } UNION { <%s> skos:scopeNote [rdf:value ?value] . BIND('scopeNote' AS ?type) } FILTER (lang(?value) in (%s)) }""" % (uri, uri, ','.join(langs)) results = self.perform_sparql_query(query) if len(results["results"]["bindings"]) > 0 : concept = Concept() concept.nodetype = 'Concept' for result in results["results"]["bindings"]: concept.addvalue({ 'type': result["type"]["value"], 'value': result["value"]["value"], 'language': result["value"]["xml:lang"] }) concepts.append(concept) else: raise Exception(_("<strong>Error in SPARQL query:</strong><br>Test this query directly by pasting the query below into the Getty's own SPARQL endpoint at <a href='http://vocab.getty.edu/sparql' target='_blank'>http://vocab.getty.edu/sparql</a><i><pre>%s</pre></i>Query returned 0 results, please check the query for errors. You may need to add the appropriate languages into the database for this query to work<br><br>") % (query.replace('<', '<').replace('>', '>'))) return concepts
def save_concepts_from_skos(self, graph): """ given an RDF graph, tries to save the concpets to the system """ baseuuid = uuid.uuid4() allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) value_types = models.DValueType.objects.all() skos_value_types = value_types.filter(namespace = 'skos') skos_value_types_list = skos_value_types.values_list('valuetype', flat=True) dcterms_value_types = value_types.filter(namespace = 'dcterms') relation_types = models.DRelationType.objects.all() skos_relation_types = relation_types.filter(namespace = 'skos') # if the graph is of the type rdflib.graph.Graph if isinstance(graph, Graph): # Search for ConceptSchemes first for scheme, v, o in graph.triples((None, RDF.type , SKOS.ConceptScheme)): scheme_id = self.generate_uuid_from_subject(baseuuid, scheme) concept_scheme = Concept({ 'id': scheme_id, 'legacyoid': str(scheme), 'nodetype': 'ConceptScheme' }) for predicate, object in graph.predicate_objects(subject = scheme): if str(DCTERMS) in predicate and predicate.replace(DCTERMS, '') in dcterms_value_types.values_list('valuetype', flat=True): if hasattr(object, 'language') and object.language not in allowed_languages: newlang = models.DLanguage() newlang.pk = object.language newlang.languagename = object.language newlang.isdefault = False newlang.save() allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) try: # first try and get any values associated with the concept_scheme value_type = dcterms_value_types.get(valuetype=predicate.replace(DCTERMS, '')) # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc.. if predicate == DCTERMS.title: concept_scheme.addvalue({'value':object, 'language': object.language, 'type': 'prefLabel', 'category': value_type.category}) print 'Casting dcterms:title to skos:prefLabel' if predicate == DCTERMS.description: concept_scheme.addvalue({'value':object, 'language': object.language, 'type': 'scopeNote', 'category': value_type.category}) print 'Casting dcterms:description to skos:scopeNote' except: pass if str(SKOS) in predicate: if predicate == SKOS.hasTopConcept: self.relations.append({'source': scheme_id, 'type': 'hasTopConcept', 'target': self.generate_uuid_from_subject(baseuuid, object)}) self.nodes.append(concept_scheme) if len(self.nodes) == 0: raise Exception('No ConceptScheme found in file.') # Search for Concepts for s, v, o in graph.triples((None, SKOS.inScheme , scheme)): concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Concept' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects(subject = s): if str(SKOS) in predicate: if hasattr(object, 'language') and object.language not in allowed_languages: newlang = models.DLanguage() newlang.pk = object.language newlang.languagename = object.language newlang.isdefault = False newlang.save() allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) relation_or_value_type = predicate.replace(SKOS, '') # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...) if relation_or_value_type in skos_value_types_list: value_type = skos_value_types.get(valuetype=relation_or_value_type) concept.addvalue({'value':object, 'language': object.language, 'type': value_type.valuetype, 'category': value_type.category}) elif predicate == SKOS.broader: self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, object), 'type': 'narrower', 'target': self.generate_uuid_from_subject(baseuuid, s)}) elif predicate == SKOS.narrower: self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)}) elif predicate == SKOS.related: self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)}) self.nodes.append(concept) # insert and index the concpets with transaction.atomic(): for node in self.nodes: node.save() # insert the concept relations for relation in self.relations: newrelation = models.Relation() newrelation.relationid = str(uuid.uuid4()) newrelation.conceptfrom_id = relation['source'] newrelation.conceptto_id = relation['target'] newrelation.relationtype_id = relation['type'] newrelation.save() # need to index after the concepts and relations have been entered into the db # so that the proper context gets indexed with the concept for node in self.nodes: node.index() return self else: raise Exception('graph argument should be of type rdflib.graph.Graph')
def load_authority_file(cursor, path_to_authority_files, filename, auth_file_to_entity_concept_mapping): print filename.upper() start = time() value_types = models.ValueTypes.objects.all() filepath = os.path.join(path_to_authority_files, filename) unicodecsv.field_size_limit(sys.maxint) errors = [] lookups = Lookups() #create nodes for each authority document file and relate them to the authority document node in the concept schema auth_doc_file_name = str(filename) display_file_name = string.capwords(auth_doc_file_name.replace('_',' ').replace('AUTHORITY DOCUMENT.csv', '').strip()) if auth_doc_file_name.upper() != 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.CSV': top_concept = Concept() top_concept.id = str(uuid.uuid4()) top_concept.nodetype = 'Concept' top_concept.legacyoid = auth_doc_file_name top_concept.addvalue({'value':display_file_name, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'}) lookups.add_relationship(source='00000000-0000-0000-0000-000000000001', type='hasTopConcept', target=top_concept.id) else: top_concept = Concept().get(id = '00000000-0000-0000-0000-000000000005') top_concept.legacyoid = 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.csv' lookups.add_lookup(concept=top_concept, rownum=0) try: with open(filepath, 'rU') as f: rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','PREFLABEL','ALTLABELS','PARENTCONCEPTID','CONCEPTTYPE','PROVIDER'], encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING') rows.next() # skip header row for row in rows: try: if 'MISSING' in row: raise Exception('The row wasn\'t parsed properly. Missing %s' % (row['MISSING'])) else: legacyoid = row[u'CONCEPTID'] concept = Concept() concept.id = legacyoid if is_uuid(legacyoid) == True else str(uuid.uuid4()) concept.nodetype = 'Concept'# if row[u'CONCEPTTYPE'].upper() == 'INDEX' else 'Collection' concept.legacyoid = row[u'CONCEPTID'] concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'}) if row['CONCEPTTYPE'].lower() == 'collector': concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'collector', 'category': 'label'}) if row[u'ALTLABELS'] != '': altlabel_list = row[u'ALTLABELS'].split(';') for altlabel in altlabel_list: concept.addvalue({'value':altlabel, 'language': settings.LANGUAGE_CODE, 'type': 'altLabel', 'category': 'label'}) parent_concept_id = lookups.get_lookup(legacyoid=row[u'PARENTCONCEPTID']).id lookups.add_relationship(source=parent_concept_id, type='narrower', target=concept.id, rownum=rows.line_num) # don't add a member relationship between a top concept and it's children if parent_concept_id != top_concept.id: lookups.add_relationship(source=parent_concept_id, type='member', target=concept.id, rownum=rows.line_num) # add the member relationship from the E55 type (typically) to their top members if auth_doc_file_name in auth_file_to_entity_concept_mapping and row[u'PARENTCONCEPTID'] == auth_doc_file_name: for entitytype_info in auth_file_to_entity_concept_mapping[auth_doc_file_name]: lookups.add_relationship(source=entitytype_info['ENTITYTYPE_CONCEPTID'], type='member', target=concept.id, rownum=rows.line_num) if row[u'PARENTCONCEPTID'] == '' or (row[u'CONCEPTTYPE'].upper() != 'INDEX' and row[u'CONCEPTTYPE'].upper() != 'COLLECTOR'): raise Exception('The row has invalid values.') lookups.add_lookup(concept=concept, rownum=rows.line_num) except Exception as e: errors.append('ERROR in row %s: %s' % (rows.line_num, str(e))) except UnicodeDecodeError as e: errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc())) except Exception as e: errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename)) errors.append('\n\n\n\n') try: # try and open the values file if it exists if exists(filepath.replace('.csv', '.values.csv')): with open(filepath.replace('.csv', '.values.csv'), 'rU') as f: rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','VALUE','VALUETYPE','PROVIDER'], encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING') rows.next() # skip header row for row in rows: try: if 'ADDITIONAL' in row: raise Exception('The row wasn\'t parsed properly. Additional fields found %s. Add quotes to values that have commas in them.' % (row['ADDITIONAL'])) else: row_valuetype = row[u'VALUETYPE'].strip() if row_valuetype not in value_types.values_list('valuetype', flat=True): valuetype = models.ValueTypes() valuetype.valuetype = row_valuetype valuetype.category = 'undefined' valuetype.namespace = 'arches' valuetype.save() value_types = models.ValueTypes.objects.all() concept = lookups.get_lookup(legacyoid=row[u'CONCEPTID']) category = value_types.get(valuetype=row_valuetype).category concept.addvalue({'value':row[u'VALUE'], 'type': row[u'VALUETYPE'], 'category': category}) except Exception as e: errors.append('ERROR in row %s (%s): %s' % (rows.line_num, str(e), row)) except UnicodeDecodeError as e: errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc())) except Exception as e: errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename.replace('.csv', '.values.csv'))) errors.append('\n\n\n\n') # insert and index the concpets for key in lookups.lookup: try: lookups.lookup[key]['concept'].save() except Exception as e: errors.append('ERROR in row %s (%s):\n%s\n' % (lookups.lookup[key]['rownum'], str(e), traceback.format_exc())) lookups.lookup[key]['concept'].index(scheme=top_concept) # insert the concept relations for relation in lookups.concept_relationships: sql = """ INSERT INTO concepts.relations(conceptidfrom, conceptidto, relationtype) VALUES ('%s', '%s', '%s'); """%(relation['source'], relation['target'], relation['type']) #print sql try: cursor.execute(sql) except Exception as e: errors.append('ERROR in row %s (%s):\n%s\n' % (relation['rownum'], str(e), traceback.format_exc())) if len(errors) > 0: errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename)) errors.append('\n\n\n\n') #print 'Time to parse = %s' % ("{0:.2f}".format(time() - start)) return errors
def save_concepts_from_skos(self, graph, overwrite_options="overwrite", staging_options="keep", bulk_load=False, path=""): """ given an RDF graph, tries to save the concpets to the system Keyword arguments: overwrite_options -- 'overwrite', 'ignore' staging_options -- 'stage', 'keep' """ baseuuid = uuid.uuid4() allowed_languages = models.DLanguage.objects.values_list("pk", flat=True) default_lang = settings.LANGUAGE_CODE if bulk_load is True: self.logger.setLevel(logging.ERROR) value_types = models.DValueType.objects.all() skos_value_types = value_types.filter( Q(namespace="skos") | Q(namespace="arches")) skos_value_types_list = list( skos_value_types.values_list("valuetype", flat=True)) skos_value_types = { valuetype.valuetype: valuetype for valuetype in skos_value_types } dcterms_value_types = value_types.filter(namespace="dcterms") dcterms_identifier_type = dcterms_value_types.get( valuetype=str(DCTERMS.identifier).replace(str(DCTERMS), "")) # if the graph is of the type rdflib.graph.Graph if isinstance(graph, Graph): values = [] # Search for ConceptSchemes first for scheme, v, o in graph.triples( (None, RDF.type, SKOS.ConceptScheme)): identifier = self.unwrapJsonLiteral(str(scheme)) scheme_id = self.generate_uuid_from_subject(baseuuid, scheme) if bulk_load is True: concept_scheme = models.Concept( pk=scheme_id, legacyoid=str(scheme), nodetype_id="ConceptScheme") else: concept_scheme = Concept({ 'id': scheme_id, 'legacyoid': str(scheme), 'nodetype': 'ConceptScheme' }) for predicate, object in graph.predicate_objects( subject=scheme): if str(DCTERMS) in predicate and predicate.replace( DCTERMS, "") in dcterms_value_types.values_list( "valuetype", flat=True): if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list( "pk", flat=True) try: # first try and get any values associated with the concept_scheme # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc.. value_type = dcterms_value_types.get( valuetype=predicate.replace(DCTERMS, "")) val = self.unwrapJsonLiteral(object) if predicate == DCTERMS.title: if bulk_load is True: values.append( models.Value( pk=val["value_id"] if (val["value_id"] != "" and val["value_id"] is not None) else str(uuid.uuid4()), concept_id=concept_scheme.pk, value=val["value"], language_id=object.language or default_lang, valuetype_id="prefLabel", )) else: concept_scheme.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': 'prefLabel', 'category': value_type.category }) # print('Casting dcterms:title to skos:prefLabel') elif predicate == DCTERMS.description: if bulk_load is True: values.append( models.Value( pk=val["value_id"] if (val["value_id"] != "" and val["value_id"] is not None) else str(uuid.uuid4()), concept_id=concept_scheme.pk, value=val["value"], language_id=object.language or default_lang, valuetype_id="scopeNote", )) else: concept_scheme.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': 'scopeNote', 'category': value_type.category }) # print('Casting dcterms:description to skos:scopeNote') elif predicate == DCTERMS.identifier: identifier = self.unwrapJsonLiteral( str(object)) except: pass if str(SKOS) in predicate: # print predicate if predicate == SKOS.hasTopConcept: top_concept_id = self.generate_uuid_from_subject( baseuuid, object) self.relations.append({ "source": scheme_id, "type": "hasTopConcept", "target": top_concept_id, }) if bulk_load is True: values.append( models.Value( pk=identifier["value_id"] if (identifier["value_id"] != "" and identifier["value_id"] is not None) else str( uuid.uuid4()), concept_id=concept_scheme.pk, value=identifier["value"], language_id=default_lang, valuetype_id=dcterms_identifier_type.valuetype, )) else: concept_scheme.addvalue({ 'id': identifier['value_id'], 'value': identifier['value'], 'language': default_lang, 'type': dcterms_identifier_type.valuetype, 'category': dcterms_identifier_type.category }) self.nodes.append(concept_scheme) # Search for Concepts for s, v, o in graph.triples((None, SKOS.inScheme, scheme)): identifier = self.unwrapJsonLiteral(str(s)) if bulk_load is True: concept = models.Concept( pk=self.generate_uuid_from_subject(baseuuid, s), legacyoid=str(s), nodetype_id="Concept", ) else: concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Concept' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects( subject=s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists( object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list( "pk", flat=True) # this is essentially the skos element type within a <skos:Concept> # element (eg: prefLabel, broader, etc...) relation_or_value_type = predicate.replace( SKOS, "").replace(ARCHES, "") if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[ relation_or_value_type] val = self.unwrapJsonLiteral(object) if bulk_load is True: values.append( models.Value( pk=val["value_id"] if (val["value_id"] != "" and val["value_id"] is not None) else str(uuid.uuid4()), concept_id=concept.pk, value=val["value"], language_id=object.language or default_lang, valuetype_id=value_type.valuetype, )) else: concept.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category }) elif predicate == SKOS.broader: self.relations.append({ "source": self.generate_uuid_from_subject( baseuuid, object), "type": "narrower", "target": self.generate_uuid_from_subject( baseuuid, s), }) elif predicate == SKOS.narrower: self.relations.append({ "source": self.generate_uuid_from_subject( baseuuid, s), "type": relation_or_value_type, "target": self.generate_uuid_from_subject( baseuuid, object), }) elif predicate == SKOS.related: self.relations.append({ "source": self.generate_uuid_from_subject( baseuuid, s), "type": relation_or_value_type, "target": self.generate_uuid_from_subject( baseuuid, object), }) elif predicate == DCTERMS.identifier: identifier = self.unwrapJsonLiteral(str(object)) if bulk_load is True: values.append( models.Value( pk=identifier["value_id"] if (identifier["value_id"] != "" and identifier["value_id"] is not None) else str(uuid.uuid4()), concept_id=concept.pk, value=identifier["value"], language_id=default_lang, valuetype_id=dcterms_identifier_type.valuetype, )) else: concept.addvalue({ 'id': identifier['value_id'], 'value': identifier['value'], 'language': default_lang, 'type': dcterms_identifier_type.valuetype, 'category': dcterms_identifier_type.category }) self.nodes.append(concept) # Search for SKOS.Collections for s, v, o in graph.triples((None, RDF.type, SKOS.Collection)): # print "%s %s %s " % (s,v,o) if bulk_load is True: concept = models.Concept( pk=self.generate_uuid_from_subject(baseuuid, s), legacyoid=str(s), nodetype_id="Collection", ) else: concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Collection' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects(subject=s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list( "pk", flat=True) # this is essentially the skos element type within a <skos:Concept> # element (eg: prefLabel, broader, etc...) relation_or_value_type = predicate.replace( SKOS, "").replace(ARCHES, "") if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[ relation_or_value_type] val = self.unwrapJsonLiteral(object) if bulk_load is True: values.append( models.Value( pk=val["value_id"], concept_id=concept.pk, value=val["value"], language_id=object.language or default_lang, valuetype_id=value_type.valuetype, )) else: concept.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category }) self.nodes.append(concept) for s, v, o in graph.triples((None, SKOS.member, None)): # print "%s %s %s " % (s,v,o) self.member_relations.append({ "source": self.generate_uuid_from_subject(baseuuid, s), "type": "member", "target": self.generate_uuid_from_subject(baseuuid, o), }) # insert and index the concpets scheme_node = None orphaned_concepts = {} concepts = [] # bulk_create() does NOT call the object's save() method, nor pre_save/post_save # TODO: figure out how to ensure functions get called with bulk_create() with transaction.atomic(): if bulk_load is True: models.Concept.objects.bulk_create(self.nodes, ignore_conflicts=True) models.Value.objects.bulk_create(values, ignore_conflicts=True) self.logger.info( f"Bulk created: {len(self.nodes)} concepts and {len(values)} values from {path}" ) for node in self.nodes: if node.nodetype.nodetype == "ConceptScheme": scheme_node = Concept({ "id": node.conceptid, "legacyoid": str(scheme), "nodetype": "ConceptScheme", }) elif node.nodetype.nodetype == "Concept": orphaned_concepts[str(node.conceptid)] = node if staging_options == "stage": try: models.Concept.objects.get(pk=node.conceptid) except: # this is a new concept, so add a reference to it in the Candiates schema if node.nodetype.nodetype != "ConceptScheme": self.relations.append({ "source": "00000000-0000-0000-0000-000000000006", "type": "narrower", "target": node.conceptid, }) if overwrite_options == "overwrite": node.save() # concepts.append(node) elif overwrite_options == "ignore": try: # don't do anything if the concept already exists models.Concept.objects.get(pk=node.conceptid) except: # else save it node.save() # concepts.append(node) else: for node in self.nodes: if node.nodetype == 'ConceptScheme': scheme_node = node elif node.nodetype == 'Concept': orphaned_concepts[str(node.id)] = node if staging_options == 'stage': try: models.Concept.objects.get(pk=node.id) except: # this is a new concept, so add a reference to it in the Candiates schema if node.nodetype != 'ConceptScheme': self.relations.append({ 'source': '00000000-0000-0000-0000-000000000006', 'type': 'narrower', 'target': node.id }) if overwrite_options == 'overwrite': node.save() elif overwrite_options == 'ignore': try: # don't do anything if the concept already exists models.Concept.objects.get(pk=node.id) except: # else save it node.save() # Concept().bulk_save(concepts, None) # insert the concept relations # TODO: make sure this still works with code commented out, then remove # relation_objs = [] for relation in self.relations: newrelation, created = models.Relation.objects.get_or_create( conceptfrom_id=relation["source"], conceptto_id=relation["target"], relationtype_id=relation["type"], ) # models.Relation.objects.bulk_create(relation_objs) # check for orphaned concepts, every concept except the concept scheme should have an edge pointing to it if (relation["type"] == "narrower" or relation["type"] == "hasTopConcept") and orphaned_concepts.get( relation["target"]) is not None: orphaned_concepts.pop(str(relation["target"])) # relation_objs.append(newrelation) if len(orphaned_concepts.keys()) > 0: if scheme_node: orphaned_scheme = Concept({ "id": uuid.uuid4(), "legacyoid": uuid.uuid4(), "nodetype": "ConceptScheme", }) orphaned_scheme_value = None for value in scheme_node.values: if value.type == "prefLabel": orphaned_scheme.addvalue({ "id": uuid.uuid4(), "value": "ORPHANS - " + value.value, "language": value.language, "type": value.type, "category": value.category, }) orphaned_scheme.save() for ( orphaned_concept_id, orphaned_concept, ) in orphaned_concepts.items(): models.Relation.objects.create( conceptfrom_id=str(orphaned_scheme.id), conceptto_id=orphaned_concept_id, relationtype_id="narrower", ) self.logger.warning( "The SKOS file appears to have orphaned concepts.") # need to index after the concepts and relations have been entered into the db # so that the proper context gets indexed with the concept if scheme_node: scheme_node.bulk_index() # insert the concept collection relations # we do this outide a transaction so that we can load incomplete collections # relation_objs = [] # TODO: debug bulk_create to speed up this section of skos for relation in self.member_relations: try: # if bulk_load is True: # newrelation = models.Relation( # conceptfrom_id=relation['source'], # conceptto_id=relation['target'], # relationtype_id=relation['type'] # ) # relation_objs.append(newrelation) # else: newrelation, created = models.Relation.objects.get_or_create( conceptfrom_id=relation["source"], conceptto_id=relation["target"], relationtype_id=relation["type"], ) except IntegrityError as e: self.logger.warning(e) pass # if bulk_load is True: # models.Relation.objects.bulk_create(relation_objs, ignore_conflicts=True) return scheme_node else: raise Exception( "graph argument should be of type rdflib.graph.Graph")
def create_reference_data(new_concepts, create_collections): errors = [] candidates = Concept().get(id='00000000-0000-0000-0000-000000000006') for arches_nodeid, concepts in new_concepts.iteritems(): collectionid = str(uuid.uuid4()) topconceptid = str(uuid.uuid4()) node = Node.objects.get(nodeid=arches_nodeid) # if node.datatype is concept or concept-list create concepts and collections if node.datatype in ['concept', 'concept-list']: # create collection if create_collections = create, otherwise append to collection already assigned to node if create_collections == True: collection_legacyoid = node.name + '_' + str(node.graph_id) + '_import' # check to see that there is not already a collection for this node if node.config['rdmCollection'] != None: errors.append({'type': 'WARNING', 'message': 'A collection already exists for the {0} node. Use the add option to add concepts to this collection.'.format(node.name)}) if len(errors) > 0: self.errors += errors collection = None else: # if there is no collection assigned to this node, create one and assign it to the node try: # check to see that a collection with this legacyid does not already exist collection = Concept().get(legacyoid=collection_legacyoid) errors.append({'type': 'WARNING', 'message': 'A collection with the legacyid {0} already exists.'.format(node.name + '_' + str(node.graph_id) + '_import')}) if len(errors) > 0: self.errors += errors except: collection = Concept({ 'id': collectionid, 'legacyoid': collection_legacyoid, 'nodetype': 'Collection' }) collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'}) node.config['rdmCollection'] = collectionid node.save() collection.save() else: # if create collection = add check that there is a collection associated with node, if no collection associated with node create a collection and associated with the node try: collection = Concept().get(id=node.config['rdmCollection']) except: collection = Concept({ 'id': collectionid, 'legacyoid': node.name + '_' + str(node.graph_id) + '_import', 'nodetype': 'Collection' }) collection.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'}) node.config['rdmCollection'] = collectionid node.save() collection.save() if collection != None: topconcept_legacyoid = node.name + '_' + str(node.graph_id) # Check if top concept already exists, if not create it and add to candidates scheme try: topconcept = Concept().get(legacyoid=topconcept_legacyoid) except: topconcept = Concept({ 'id': topconceptid, 'legacyoid': topconcept_legacyoid, 'nodetype': 'Concept' }) topconcept.addvalue({'id': str(uuid.uuid4()), 'value': node.name + '_import', 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'}) topconcept.save() candidates.add_relation(topconcept, 'narrower') # create child concepts and relate to top concept and collection accordingly for conceptid, value in concepts.iteritems(): concept_legacyoid = value + '_' + node.name + '_' + str(node.graph_id) # check if concept already exists, if not create and add to topconcept and collection try: conceptid = [concept for concept in topconcept.get_child_concepts(topconcept.id) if concept[1] == value][0][0] concept = Concept().get(id=conceptid) except: concept = Concept({ 'id': conceptid, 'legacyoid': concept_legacyoid, 'nodetype': 'Concept' }) concept.addvalue({'id': str(uuid.uuid4()), 'value': value, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel'}) concept.save() collection.add_relation(concept, 'member') topconcept.add_relation(concept, 'narrower') #if node.datatype is domain or domain-list create options array in node.config elif node.datatype in ['domain-value', 'domain-value-list']: for domainid, value in new_concepts[arches_nodeid].iteritems(): # check if value already exists in domain if value not in [t['text'] for t in node.config['options']]: domainvalue = { "text": value, "selected": False, "id": domainid } node.config['options'].append(domainvalue) node.save()
def save_concepts_from_skos(self, graph, overwrite_options='overwrite', staging_options='keep'): """ given an RDF graph, tries to save the concpets to the system Keyword arguments: overwrite_options -- 'overwrite', 'ignore' staging_options -- 'stage', 'keep' """ baseuuid = uuid.uuid4() allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) default_lang = settings.LANGUAGE_CODE value_types = models.DValueType.objects.all() skos_value_types = value_types.filter( Q(namespace='skos') | Q(namespace='arches')) skos_value_types_list = list( skos_value_types.values_list('valuetype', flat=True)) skos_value_types = { valuetype.valuetype: valuetype for valuetype in skos_value_types } dcterms_value_types = value_types.filter(namespace='dcterms') dcterms_identifier_type = dcterms_value_types.get( valuetype=str(DCTERMS.identifier).replace(str(DCTERMS), '')) # if the graph is of the type rdflib.graph.Graph if isinstance(graph, Graph): # Search for ConceptSchemes first for scheme, v, o in graph.triples( (None, RDF.type, SKOS.ConceptScheme)): identifier = self.unwrapJsonLiteral(str(scheme)) scheme_id = self.generate_uuid_from_subject(baseuuid, scheme) concept_scheme = Concept({ 'id': scheme_id, 'legacyoid': str(scheme), 'nodetype': 'ConceptScheme' }) for predicate, object in graph.predicate_objects( subject=scheme): if str(DCTERMS) in predicate and predicate.replace( DCTERMS, '') in dcterms_value_types.values_list( 'valuetype', flat=True): if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list( 'pk', flat=True) try: # first try and get any values associated with the concept_scheme # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc.. value_type = dcterms_value_types.get( valuetype=predicate.replace(DCTERMS, '')) val = self.unwrapJsonLiteral(object) if predicate == DCTERMS.title: concept_scheme.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': 'prefLabel', 'category': value_type.category }) print 'Casting dcterms:title to skos:prefLabel' elif predicate == DCTERMS.description: concept_scheme.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': 'scopeNote', 'category': value_type.category }) print 'Casting dcterms:description to skos:scopeNote' elif predicate == DCTERMS.identifier: identifier = self.unwrapJsonLiteral( str(object)) except: pass if str(SKOS) in predicate: # print predicate if predicate == SKOS.hasTopConcept: top_concept_id = self.generate_uuid_from_subject( baseuuid, object) self.relations.append({ 'source': scheme_id, 'type': 'hasTopConcept', 'target': top_concept_id }) concept_scheme.addvalue({ 'id': identifier['value_id'], 'value': identifier['value'], 'language': default_lang, 'type': dcterms_identifier_type.valuetype, 'category': dcterms_identifier_type.category }) self.nodes.append(concept_scheme) # Search for Concepts for s, v, o in graph.triples((None, SKOS.inScheme, scheme)): identifier = self.unwrapJsonLiteral(str(s)) concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Concept' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects( subject=s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists( object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list( 'pk', flat=True) # this is essentially the skos element type within a <skos:Concept> # element (eg: prefLabel, broader, etc...) relation_or_value_type = predicate.replace( SKOS, '').replace(ARCHES, '') if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[ relation_or_value_type] val = self.unwrapJsonLiteral(object) concept.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category }) elif predicate == SKOS.broader: self.relations.append({ 'source': self.generate_uuid_from_subject( baseuuid, object), 'type': 'narrower', 'target': self.generate_uuid_from_subject( baseuuid, s) }) elif predicate == SKOS.narrower: self.relations.append({ 'source': self.generate_uuid_from_subject( baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject( baseuuid, object) }) elif predicate == SKOS.related: self.relations.append({ 'source': self.generate_uuid_from_subject( baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject( baseuuid, object) }) elif predicate == DCTERMS.identifier: identifier = self.unwrapJsonLiteral(str(object)) concept.addvalue({ 'id': identifier['value_id'], 'value': identifier['value'], 'language': default_lang, 'type': dcterms_identifier_type.valuetype, 'category': dcterms_identifier_type.category }) self.nodes.append(concept) # Search for SKOS.Collections for s, v, o in graph.triples((None, RDF.type, SKOS.Collection)): # print "%s %s %s " % (s,v,o) concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Collection' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects(subject=s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list( 'pk', flat=True) # this is essentially the skos element type within a <skos:Concept> # element (eg: prefLabel, broader, etc...) relation_or_value_type = predicate.replace( SKOS, '').replace(ARCHES, '') if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[ relation_or_value_type] val = self.unwrapJsonLiteral(object) concept.addvalue({ 'id': val['value_id'], 'value': val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category }) self.nodes.append(concept) for s, v, o in graph.triples((None, SKOS.member, None)): # print "%s %s %s " % (s,v,o) self.member_relations.append({ 'source': self.generate_uuid_from_subject(baseuuid, s), 'type': 'member', 'target': self.generate_uuid_from_subject(baseuuid, o) }) # insert and index the concpets scheme_node = None with transaction.atomic(): for node in self.nodes: if node.nodetype == 'ConceptScheme': scheme_node = node if staging_options == 'stage': try: models.Concept.objects.get(pk=node.id) except: # this is a new concept, so add a reference to it in the Candiates schema if node.nodetype != 'ConceptScheme': self.relations.append({ 'source': '00000000-0000-0000-0000-000000000006', 'type': 'narrower', 'target': node.id }) if overwrite_options == 'overwrite': node.save() elif overwrite_options == 'ignore': try: # don't do anything if the concept already exists models.Concept.objects.get(pk=node.id) except: # else save it node.save() # insert the concept relations for relation in self.relations: newrelation = models.Relation.objects.get_or_create( conceptfrom_id=relation['source'], conceptto_id=relation['target'], relationtype_id=relation['type']) # need to index after the concepts and relations have been entered into the db # so that the proper context gets indexed with the concept if scheme_node: scheme_node.bulk_index() # insert the concept collection relations # we do this outide a transaction so that we can load incomplete collections for relation in self.member_relations: try: newrelation = models.Relation.objects.get_or_create( conceptfrom_id=relation['source'], conceptto_id=relation['target'], relationtype_id=relation['type']) except IntegrityError as e: self.logger.warning(e.message) return scheme_node else: raise Exception( 'graph argument should be of type rdflib.graph.Graph')
def save_concepts_from_skos(self, graph): """ given an RDF graph, tries to save the concpets to the system """ baseuuid = uuid.uuid4() allowed_languages = models.DLanguages.objects.values_list('pk', flat=True) value_types = models.ValueTypes.objects.all() skos_value_types = value_types.filter(namespace='skos') skos_value_types_list = skos_value_types.values_list('valuetype', flat=True) dcterms_value_types = value_types.filter(namespace='dcterms') relation_types = models.DRelationtypes.objects.all() skos_relation_types = relation_types.filter(namespace='skos') # if the graph is of the type rdflib.graph.Graph if isinstance(graph, Graph): # Search for ConceptSchemes first for scheme, v, o in graph.triples( (None, RDF.type, SKOS.ConceptScheme)): scheme_id = self.generate_uuid_from_subject(baseuuid, scheme) concept_scheme = Concept({ 'id': scheme_id, 'legacyoid': str(scheme), 'nodetype': 'ConceptScheme' }) for predicate, object in graph.predicate_objects( subject=scheme): if str(DCTERMS) in predicate and predicate.replace( DCTERMS, '') in dcterms_value_types.values_list( 'valuetype', flat=True): if hasattr( object, 'language' ) and object.language not in allowed_languages: newlang = models.DLanguages() newlang.pk = object.language newlang.languagename = object.language newlang.isdefault = False newlang.save() allowed_languages = models.DLanguages.objects.values_list( 'pk', flat=True) try: # first try and get any values associated with the concept_scheme value_type = dcterms_value_types.get( valuetype=predicate.replace(DCTERMS, '') ) # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc.. if predicate == DCTERMS.title: concept_scheme.addvalue({ 'value': object, 'language': object.language, 'type': 'prefLabel', 'category': value_type.category }) print 'Casting dcterms:title to skos:prefLabel' if predicate == DCTERMS.description: concept_scheme.addvalue({ 'value': object, 'language': object.language, 'type': 'scopeNote', 'category': value_type.category }) print 'Casting dcterms:description to skos:scopeNote' except: pass if str(SKOS) in predicate: if predicate == SKOS.hasTopConcept: self.relations.append({ 'source': scheme_id, 'type': 'hasTopConcept', 'target': self.generate_uuid_from_subject( baseuuid, object) }) self.nodes.append(concept_scheme) if len(self.nodes) == 0: raise Exception('No ConceptScheme found in file.') # Search for Concepts for s, v, o in graph.triples((None, SKOS.inScheme, scheme)): concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Concept' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects( subject=s): if str(SKOS) in predicate: if hasattr( object, 'language' ) and object.language not in allowed_languages: newlang = models.DLanguages() newlang.pk = object.language newlang.languagename = object.language newlang.isdefault = False newlang.save() allowed_languages = models.DLanguages.objects.values_list( 'pk', flat=True) relation_or_value_type = predicate.replace( SKOS, '' ) # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...) if relation_or_value_type in skos_value_types_list: value_type = skos_value_types.get( valuetype=relation_or_value_type) concept.addvalue({ 'value': object, 'language': object.language, 'type': value_type.valuetype, 'category': value_type.category }) elif predicate == SKOS.broader: self.relations.append({ 'source': self.generate_uuid_from_subject( baseuuid, object), 'type': 'narrower', 'target': self.generate_uuid_from_subject( baseuuid, s) }) elif predicate == SKOS.narrower: self.relations.append({ 'source': self.generate_uuid_from_subject( baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject( baseuuid, object) }) elif predicate == SKOS.related: self.relations.append({ 'source': self.generate_uuid_from_subject( baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject( baseuuid, object) }) self.nodes.append(concept) # insert and index the concpets with transaction.atomic(): for node in self.nodes: node.save() # insert the concept relations for relation in self.relations: newrelation = models.ConceptRelations() newrelation.relationid = str(uuid.uuid4()) newrelation.conceptidfrom_id = relation['source'] newrelation.conceptidto_id = relation['target'] newrelation.relationtype_id = relation['type'] newrelation.save() # need to index after the concepts and relations have been entered into the db # so that the proper context gets indexed with the concept for node in self.nodes: node.index() return self else: raise Exception( 'graph argument should be of type rdflib.graph.Graph')
def save_concepts_from_skos(self, graph, overwrite_options='overwrite', staging_options='keep'): """ given an RDF graph, tries to save the concpets to the system Keyword arguments: overwrite_options -- 'overwrite', 'ignore' staging_options -- 'stage', 'keep' """ baseuuid = uuid.uuid4() allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) default_lang = settings.LANGUAGE_CODE value_types = models.DValueType.objects.all() skos_value_types = value_types.filter(Q(namespace = 'skos') | Q(namespace = 'arches')) skos_value_types_list = list(skos_value_types.values_list('valuetype', flat=True)) skos_value_types = {valuetype.valuetype: valuetype for valuetype in skos_value_types} dcterms_value_types = value_types.filter(namespace = 'dcterms') # relation_types = models.DRelationType.objects.all() # skos_relation_types = relation_types.filter(namespace = 'skos') # if the graph is of the type rdflib.graph.Graph if isinstance(graph, Graph): # Search for ConceptSchemes first for scheme, v, o in graph.triples((None, RDF.type , SKOS.ConceptScheme)): scheme_id = self.generate_uuid_from_subject(baseuuid, scheme) concept_scheme = Concept({ 'id': scheme_id, 'legacyoid': str(scheme), 'nodetype': 'ConceptScheme' }) for predicate, object in graph.predicate_objects(subject = scheme): if str(DCTERMS) in predicate and predicate.replace(DCTERMS, '') in dcterms_value_types.values_list('valuetype', flat=True): if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) try: # first try and get any values associated with the concept_scheme value_type = dcterms_value_types.get(valuetype=predicate.replace(DCTERMS, '')) # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc.. val = self.unwrapJsonLiteral(object) if predicate == DCTERMS.title: concept_scheme.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': 'prefLabel', 'category': value_type.category}) print 'Casting dcterms:title to skos:prefLabel' elif predicate == DCTERMS.description: concept_scheme.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': 'scopeNote', 'category': value_type.category}) print 'Casting dcterms:description to skos:scopeNote' except: pass if str(SKOS) in predicate: #print predicate if predicate == SKOS.hasTopConcept: top_concept_id = self.generate_uuid_from_subject(baseuuid, object) self.relations.append({'source': scheme_id, 'type': 'hasTopConcept', 'target': top_concept_id}) self.nodes.append(concept_scheme) if len(self.nodes) == 0: raise Exception('No ConceptScheme found in file.') # Search for Concepts for s, v, o in graph.triples((None, SKOS.inScheme , scheme)): concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Concept' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects(subject = s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) relation_or_value_type = predicate.replace(SKOS, '').replace(ARCHES, '') # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...) if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[relation_or_value_type] val = self.unwrapJsonLiteral(object) concept.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category}) elif predicate == SKOS.broader: self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, object), 'type': 'narrower', 'target': self.generate_uuid_from_subject(baseuuid, s)}) elif predicate == SKOS.narrower: self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)}) elif predicate == SKOS.related: self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': relation_or_value_type, 'target': self.generate_uuid_from_subject(baseuuid, object)}) self.nodes.append(concept) # Search for SKOS.Collections for s, v, o in graph.triples((None, RDF.type , SKOS.Collection)): #print "%s %s %s " % (s,v,o) concept = Concept({ 'id': self.generate_uuid_from_subject(baseuuid, s), 'legacyoid': str(s), 'nodetype': 'Collection' }) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects(subject = s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list('pk', flat=True) relation_or_value_type = predicate.replace(SKOS, '').replace(ARCHES, '') # this is essentially the skos element type within a <skos:Concept> element (eg: prefLabel, broader, etc...) if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[relation_or_value_type] val = self.unwrapJsonLiteral(object) concept.addvalue({'id': val['value_id'], 'value':val['value'], 'language': object.language or default_lang, 'type': value_type.valuetype, 'category': value_type.category}) self.nodes.append(concept) for s, v, o in graph.triples((None, SKOS.member, None)): #print "%s %s %s " % (s,v,o) self.relations.append({'source': self.generate_uuid_from_subject(baseuuid, s), 'type': 'member', 'target': self.generate_uuid_from_subject(baseuuid, o)}) # insert and index the concpets with transaction.atomic(): for node in self.nodes: if staging_options == 'stage': try: models.Concept.objects.get(pk=node.id) except: # this is a new concept, so add a reference to it in the Candiates schema if node.nodetype != 'ConceptScheme': self.relations.append({'source': '00000000-0000-0000-0000-000000000006', 'type': 'narrower', 'target': node.id}) if overwrite_options == 'overwrite': node.save() elif overwrite_options == 'ignore': try: # don't do anything if the concept already exists models.Concept.objects.get(pk=node.id) except: # else save it node.save() # insert the concept relations for relation in self.relations: newrelation = models.Relation.objects.get_or_create( conceptfrom_id = relation['source'], conceptto_id = relation['target'], relationtype_id = relation['type'] ) # need to index after the concepts and relations have been entered into the db # so that the proper context gets indexed with the concept for node in self.nodes: node.index() return self else: raise Exception('graph argument should be of type rdflib.graph.Graph')
def save_concepts_from_skos(self, graph, overwrite_options="overwrite", staging_options="keep", prevent_indexing=False): """ given an RDF graph, tries to save the concpets to the system Keyword arguments: overwrite_options -- 'overwrite', 'ignore' staging_options -- 'stage', 'keep' prevent_indexing -- True to prevent indexing of concepts """ baseuuid = uuid.uuid4() allowed_languages = models.DLanguage.objects.values_list("pk", flat=True) default_lang = settings.LANGUAGE_CODE value_types = models.DValueType.objects.all() skos_value_types = value_types.filter(Q(namespace="skos") | Q(namespace="arches")) skos_value_types_list = list(skos_value_types.values_list("valuetype", flat=True)) skos_value_types = {valuetype.valuetype: valuetype for valuetype in skos_value_types} dcterms_value_types = value_types.filter(namespace="dcterms") dcterms_identifier_type = dcterms_value_types.get(valuetype=str(DCTERMS.identifier).replace(str(DCTERMS), "")) # if the graph is of the type rdflib.graph.Graph if isinstance(graph, Graph): # Search for ConceptSchemes first for scheme, v, o in graph.triples((None, RDF.type, SKOS.ConceptScheme)): identifier = self.unwrapJsonLiteral(str(scheme)) scheme_id = self.generate_uuid_from_subject(baseuuid, scheme) concept_scheme = Concept({"id": scheme_id, "legacyoid": str(scheme), "nodetype": "ConceptScheme"}) for predicate, object in graph.predicate_objects(subject=scheme): if str(DCTERMS) in predicate and predicate.replace(DCTERMS, "") in dcterms_value_types.values_list( "valuetype", flat=True ): if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list("pk", flat=True) try: # first try and get any values associated with the concept_scheme # predicate.replace(SKOS, '') should yield something like 'prefLabel' or 'scopeNote', etc.. value_type = dcterms_value_types.get(valuetype=predicate.replace(DCTERMS, "")) val = self.unwrapJsonLiteral(object) if predicate == DCTERMS.title: concept_scheme.addvalue( { "id": val["value_id"], "value": val["value"], "language": object.language or default_lang, "type": "prefLabel", "category": value_type.category, } ) # print('Casting dcterms:title to skos:prefLabel') elif predicate == DCTERMS.description: concept_scheme.addvalue( { "id": val["value_id"], "value": val["value"], "language": object.language or default_lang, "type": "scopeNote", "category": value_type.category, } ) # print('Casting dcterms:description to skos:scopeNote') elif predicate == DCTERMS.identifier: identifier = self.unwrapJsonLiteral(str(object)) except: pass if str(SKOS) in predicate: # print predicate if predicate == SKOS.hasTopConcept: top_concept_id = self.generate_uuid_from_subject(baseuuid, object) self.relations.append({"source": scheme_id, "type": "hasTopConcept", "target": top_concept_id}) concept_scheme.addvalue( { "id": identifier["value_id"], "value": identifier["value"], "language": default_lang, "type": dcterms_identifier_type.valuetype, "category": dcterms_identifier_type.category, } ) self.nodes.append(concept_scheme) # Search for Concepts for s, v, o in graph.triples((None, SKOS.inScheme, scheme)): identifier = self.unwrapJsonLiteral(str(s)) concept = Concept({"id": self.generate_uuid_from_subject(baseuuid, s), "legacyoid": str(s), "nodetype": "Concept"}) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects(subject=s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list("pk", flat=True) # this is essentially the skos element type within a <skos:Concept> # element (eg: prefLabel, broader, etc...) relation_or_value_type = predicate.replace(SKOS, "").replace(ARCHES, "") if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[relation_or_value_type] val = self.unwrapJsonLiteral(object) concept.addvalue( { "id": val["value_id"], "value": val["value"], "language": object.language or default_lang, "type": value_type.valuetype, "category": value_type.category, } ) elif predicate == SKOS.broader: self.relations.append( { "source": self.generate_uuid_from_subject(baseuuid, object), "type": "narrower", "target": self.generate_uuid_from_subject(baseuuid, s), } ) elif predicate == SKOS.narrower: self.relations.append( { "source": self.generate_uuid_from_subject(baseuuid, s), "type": relation_or_value_type, "target": self.generate_uuid_from_subject(baseuuid, object), } ) elif predicate == SKOS.related: self.relations.append( { "source": self.generate_uuid_from_subject(baseuuid, s), "type": relation_or_value_type, "target": self.generate_uuid_from_subject(baseuuid, object), } ) elif predicate == DCTERMS.identifier: identifier = self.unwrapJsonLiteral(str(object)) concept.addvalue( { "id": identifier["value_id"], "value": identifier["value"], "language": default_lang, "type": dcterms_identifier_type.valuetype, "category": dcterms_identifier_type.category, } ) self.nodes.append(concept) # Search for SKOS.Collections for s, v, o in graph.triples((None, RDF.type, SKOS.Collection)): # print "%s %s %s " % (s,v,o) concept = Concept({"id": self.generate_uuid_from_subject(baseuuid, s), "legacyoid": str(s), "nodetype": "Collection"}) # loop through all the elements within a <skos:Concept> element for predicate, object in graph.predicate_objects(subject=s): if str(SKOS) in predicate or str(ARCHES) in predicate: if not self.language_exists(object, allowed_languages): allowed_languages = models.DLanguage.objects.values_list("pk", flat=True) # this is essentially the skos element type within a <skos:Concept> # element (eg: prefLabel, broader, etc...) relation_or_value_type = predicate.replace(SKOS, "").replace(ARCHES, "") if relation_or_value_type in skos_value_types_list: value_type = skos_value_types[relation_or_value_type] val = self.unwrapJsonLiteral(object) concept.addvalue( { "id": val["value_id"], "value": val["value"], "language": object.language or default_lang, "type": value_type.valuetype, "category": value_type.category, } ) self.nodes.append(concept) for s, v, o in graph.triples((None, SKOS.member, None)): # print "%s %s %s " % (s,v,o) self.member_relations.append( { "source": self.generate_uuid_from_subject(baseuuid, s), "type": "member", "target": self.generate_uuid_from_subject(baseuuid, o), } ) # insert and index the concpets scheme_node = None orphaned_concepts = {} with transaction.atomic(): for node in self.nodes: if node.nodetype == "ConceptScheme": scheme_node = node elif node.nodetype == "Concept": orphaned_concepts[str(node.id)] = node if staging_options == "stage": try: models.Concept.objects.get(pk=node.id) except: # this is a new concept, so add a reference to it in the Candiates schema if node.nodetype != "ConceptScheme": self.relations.append( {"source": "00000000-0000-0000-0000-000000000006", "type": "narrower", "target": node.id} ) if overwrite_options == "overwrite": node.save() elif overwrite_options == "ignore": try: # don't do anything if the concept already exists models.Concept.objects.get(pk=node.id) except: # else save it node.save() # insert the concept relations for relation in self.relations: newrelation = models.Relation.objects.get_or_create( conceptfrom_id=relation["source"], conceptto_id=relation["target"], relationtype_id=relation["type"] ) # check for orphaned concepts, every concept except the concept scheme should have an edge pointing to it if (relation["type"] == "narrower" or relation["type"] == "hasTopConcept") and orphaned_concepts.get( relation["target"] ) is not None: orphaned_concepts.pop(str(relation["target"])) if len(orphaned_concepts.keys()) > 0: if scheme_node: orphaned_scheme = Concept({"id": uuid.uuid4(), "legacyoid": uuid.uuid4(), "nodetype": "ConceptScheme"}) orphaned_scheme_value = None for value in scheme_node.values: if value.type == "prefLabel": orphaned_scheme.addvalue( { "id": uuid.uuid4(), "value": "ORPHANS - " + value.value, "language": value.language, "type": value.type, "category": value.category, } ) orphaned_scheme.save() for orphaned_concept_id, orphaned_concept in orphaned_concepts.items(): models.Relation.objects.create( conceptfrom_id=str(orphaned_scheme.id), conceptto_id=orphaned_concept_id, relationtype_id="narrower" ) self.logger.warning(f'\nThe SKOS file "{os.path.split(self.path_to_file)[1]}" appears to have orphaned concepts.') # need to index after the concepts and relations have been entered into the db # so that the proper context gets indexed with the concept if scheme_node and not prevent_indexing: scheme_node.bulk_index() # insert the concept collection relations # we do this outide a transaction so that we can load incomplete collections for relation in self.member_relations: try: newrelation = models.Relation.objects.get_or_create( conceptfrom_id=relation["source"], conceptto_id=relation["target"], relationtype_id=relation["type"] ) except IntegrityError as e: self.logger.warning(e) return scheme_node else: raise Exception("graph argument should be of type rdflib.graph.Graph")