def update_ontology_doc(self, filename): """ Changes categories in the ontology document """ filepath = self.root_export_dir + '/' + filename newfilepath = self.root_export_dir + '/rev-' + filename if os.path.isfile(filepath): print('Found: ' + filepath) with open(filepath, 'r') as myfile: data = myfile.read() for revision in self.REVISION_LIST: search_old_db = revision['old'] search_old_file = search_old_db.replace(self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) replace_db = revision['new'] replace_file = replace_db.replace(self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) data = data.replace(search_old_file, replace_file) old_uri = URImanagement.convert_prefix_to_full_uri(search_old_db) new_uri = URImanagement.convert_prefix_to_full_uri(replace_db) data = data.replace(old_uri, new_uri) file = codecs.open(newfilepath, 'w', 'utf-8') file.write(data) file.close() else: print('Ouch! Cannot find: '+ filepath)
def mass_revise_category_uris(self): """ Revises category uris in a mass edit """ for revision in self.REVISION_LIST: search_old_db = revision['old'] replace_db = revision['new'] old_uri = URImanagement.convert_prefix_to_full_uri(search_old_db) new_uri = URImanagement.convert_prefix_to_full_uri(replace_db) Manifest.objects\ .filter(class_uri=search_old_db)\ .update(class_uri=replace_db) LinkAnnotation.objects\ .filter(subject=search_old_db)\ .update(subject=replace_db) LinkAnnotation.objects\ .filter(subject=old_uri)\ .update(subject=new_uri) LinkAnnotation.objects\ .filter(object_uri=search_old_db)\ .update(object_uri=replace_db) LinkAnnotation.objects\ .filter(object_uri=old_uri)\ .update(object_uri=new_uri) LinkEntity.objects\ .filter(uri=old_uri)\ .update(uri=new_uri)
def update_ontology_doc(self, filename): """ Changes categories in the ontology document """ filepath = self.root_export_dir + '/' + filename newfilepath = self.root_export_dir + '/rev-' + filename if os.path.isfile(filepath): print('Found: ' + filepath) with open(filepath, 'r') as myfile: data = myfile.read() for revision in self.REVISION_LIST: search_old_db = revision['old'] search_old_file = search_old_db.replace( self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) replace_db = revision['new'] replace_file = replace_db.replace( self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) data = data.replace(search_old_file, replace_file) old_uri = URImanagement.convert_prefix_to_full_uri( search_old_db) new_uri = URImanagement.convert_prefix_to_full_uri(replace_db) data = data.replace(old_uri, new_uri) file = codecs.open(newfilepath, 'w', 'utf-8') file.write(data) file.close() else: print('Ouch! Cannot find: ' + filepath)
def get_identifier_list_variants(self, id_list): """ makes different variants of identifiers for a list of identifiers """ output_list = [] if not isinstance(id_list, list): id_list = [str(id_list)] for identifier in id_list: output_list.append(identifier) if (identifier[:7] == 'http://' or identifier[:8] == 'https://'): oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if oc_uuid is not False: output_list.append(oc_uuid) else: prefix_id = URImanagement.prefix_common_uri(identifier) output_list.append(prefix_id) elif ':' in identifier: full_uri = URImanagement.convert_prefix_to_full_uri(identifier) output_list.append(full_uri) else: # probably an open context uuid or a slug ent = Entity() found = ent.dereference(identifier) if found: full_uri = ent.uri output_list.append(full_uri) prefix_uri = URImanagement.prefix_common_uri(full_uri) if prefix_uri != full_uri: output_list.append(prefix_uri) return output_list
def save_icons(self, predicate_uri='oc-gen:hasIcon'): """ Saves icons in the general Open Context namespace """ data = False if (self.graph is not False and self.vocabulary_uri is not False): data = [] if (self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter( source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if (predicate_uri == 'oc-gen:hasIcon'): # for subClassOf predicates full_pred_uri = URImanagement.convert_prefix_to_full_uri( predicate_uri) icon_pred = URIRef(full_pred_uri) for s, p, o in self.graph.triples((None, icon_pred, None)): subject_uri = s.__str__( ) # get the URI of the subject as a string object_uri = o.__str__( ) # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if (subject_uri != object_uri): data.append(act_t) if (len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def get_identifier_list_variants(self, id_list): """ makes different variants of identifiers for a list of identifiers """ output_list = [] if not isinstance(id_list, list): id_list = [str(id_list)] for identifier in id_list: output_list.append(identifier) if(identifier[:7] == 'http://' or identifier[:8] == 'https://'): oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if oc_uuid is not False: output_list.append(oc_uuid) else: prefix_id = URImanagement.prefix_common_uri(identifier) output_list.append(prefix_id) elif ':' in identifier: full_uri = URImanagement.convert_prefix_to_full_uri(identifier) output_list.append(full_uri) else: # probably an open context uuid or a slug ent = Entity() found = ent.dereference(identifier) if found: full_uri = ent.uri output_list.append(full_uri) prefix_uri = URImanagement.prefix_common_uri(full_uri) if prefix_uri != full_uri: output_list.append(prefix_uri) return output_list
def dereference(self, identifier, link_entity_slug=False): """ Dereferences an entity identified by an identifier, checks if a URI, if, not a URI, then looks in the OC manifest for the item """ output = False # Only try to dereference if the identifier is a string. if not isinstance(identifier, str): return output identifier = URImanagement.convert_prefix_to_full_uri(identifier) oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if not oc_uuid and (settings.CANONICAL_HOST + '/tables/') in identifier: # Special case for probable open context table item. oc_uuid = identifier.replace( (settings.CANONICAL_HOST + '/tables/'), '') if not oc_uuid: # We don't have an Open Context UUID, so look up a linked # data entity. link_entity_found = self.dereference_linked_data( identifier, link_entity_slug=link_entity_slug) if link_entity_found: # Found what we want, so skip the rest and return True. return True # If we haven't found a link_entity, check for manifest items. if oc_uuid: # We found an Open Context uuid by parsing a URI. So that # should be the identifier to lookup. identifier = oc_uuid manifest_item_found = self.dereference_manifest_item(identifier) if manifest_item_found: return True return output
def save_icons(self, predicate_uri='oc-gen:hasIcon'): """ Saves icons in the general Open Context namespace """ data = False if(self.graph is not False and self.vocabulary_uri is not False): data = [] if(self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter(source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if(predicate_uri == 'oc-gen:hasIcon'): # for subClassOf predicates full_pred_uri = URImanagement.convert_prefix_to_full_uri(predicate_uri) icon_pred = URIRef(full_pred_uri) for s, p, o in self.graph.triples((None, icon_pred, None)): subject_uri = s.__str__() # get the URI of the subject as a string object_uri = o.__str__() # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if(subject_uri != object_uri): data.append(act_t) if(len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def get_identifier_list_variants(self, id_list): """ makes different variants of identifiers for a list of identifiers """ output_list = [] if not isinstance(id_list, list): id_list = [str(id_list)] for identifier in id_list: output_list.append(identifier) if(identifier.startswith('http://') or identifier.startswith('https://')): oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if oc_uuid: output_list.append(oc_uuid) prefix_id = URImanagement.prefix_common_uri(identifier) if prefix_id: output_list.append(prefix_id) elif ':' in identifier: full_uri = URImanagement.convert_prefix_to_full_uri(identifier) output_list.append(full_uri) else: # probably an open context uuid or a slug m_cache = MemoryCache() ent = m_cache.get_entity(identifier) if ent: full_uri = ent.uri output_list.append(full_uri) prefix_uri = URImanagement.prefix_common_uri(full_uri) if prefix_uri != full_uri: output_list.append(prefix_uri) return output_list
def make_alt_uri(self, uri): """ makes an alternative URI, changing a prefixed to a full uri or a full uri to a prefix """ output = uri if(uri[:7] == 'http://' or uri[:8] == 'https://'): output = URImanagement.prefix_common_uri(uri) else: output = URImanagement.convert_prefix_to_full_uri(uri) return output
def make_alt_uri(self, uri): """ makes an alternative URI, changing a prefixed to a full uri or a full uri to a prefix """ output = uri if (uri[:7] == 'http://' or uri[:8] == 'https://'): output = URImanagement.prefix_common_uri(uri) else: output = URImanagement.convert_prefix_to_full_uri(uri) return output
def make_uri_equivalence_list(raw_term, alt_suffix="/"): """ Makes Prefixed, HTTP, HTTPS and '/' ending options list for URLs """ # NOTE: Open Context often references Web URL/URIs to "linked data" # entities. Open Context considers http:// and https:// URLs to be # equivalent. This function takes a raw term and makes http:// # https:// variants. It also makes a prefixed URL if a namespace # is recognized in URImanagement. Finally, it will by default, # make variants that have and do not have a trailing "/". output_list = [] if not isinstance(raw_term, str): return None output_list.append(raw_term) url_terms = [] if raw_term.startswith('http://') or raw_term.startswith('https://'): # NOTE: The raw_term looks like a Web URL. We need to make # variants that start with http, https, and end in a slash, and # do not end in a slash. url_terms = make_suffix_no_suffix_list(raw_term, suffix=alt_suffix) elif raw_term.count(':') == 1: full_uri = URImanagement.convert_prefix_to_full_uri(raw_term) if full_uri: url_terms = make_suffix_no_suffix_list(full_uri, suffix=alt_suffix) url_terms.append(raw_term) for term in url_terms: http_alts = make_alternative_prefix_list(term, alt_prefixes=( 'http://', 'https://', )) if not http_alts: continue for http_alt in http_alts: if http_alt not in output_list: output_list.append(http_alt) prefix_id = URImanagement.prefix_common_uri(http_alt) if alt_suffix and prefix_id.endswith(alt_suffix): # Remove any trailing slash with prefixed IDs. prefix_id = prefix_id[:-len(alt_suffix)] if prefix_id and prefix_id not in output_list: output_list.append(prefix_id) return output_list
def make_alternative_prefix_list(raw_term, alt_prefixes=( 'http://', 'https://', )): """Makes list where a string does and does not end with a suffix""" if not isinstance(raw_term, str): return None if (not raw_term.startswith('http://') and not raw_term.startswith('https://') and ':' in raw_term): full_uri = URImanagement.convert_prefix_to_full_uri(raw_term) if full_uri: raw_term = full_uri alt_term = None if raw_term.startswith(alt_prefixes[0]): alt_term = alt_prefixes[1] + raw_term[len(alt_prefixes[0]):] elif raw_term.startswith(alt_prefixes[1]): alt_term = alt_prefixes[0] + raw_term[len(alt_prefixes[1]):] if not alt_term: return None return [raw_term, alt_term]
def dereference(self, identifier, link_entity_slug=False): """ Dereferences an entity identified by an identifier, checks if a URI, if, not a URI, then looks in the OC manifest for the item """ output = False try_manifest = True identifier = URImanagement.convert_prefix_to_full_uri(identifier) if(link_entity_slug or (len(identifier) > 8)): if(link_entity_slug or (identifier[:7] == 'http://' or identifier[:8] == 'https://')): try: try_manifest = False ld_entity = LinkEntity.objects.get(Q(uri=identifier) | Q(slug=identifier)) except LinkEntity.DoesNotExist: ld_entity = False if(ld_entity is not False): output = True self.uri = ld_entity.uri self.slug = ld_entity.slug self.label = ld_entity.label self.item_type = 'uri' self.alt_label = ld_entity.alt_label self.entity_type = ld_entity.ent_type self.vocab_uri = ld_entity.vocab_uri self.ld_object_ok = True try: vocab_entity = LinkEntity.objects.get(uri=self.vocab_uri) except LinkEntity.DoesNotExist: vocab_entity = False if(vocab_entity is not False): self.vocabulary = vocab_entity.label if self.get_icon: prefix_uri = URImanagement.prefix_common_uri(ld_entity.uri) icon_anno = LinkAnnotation.objects\ .filter(Q(subject=ld_entity.uri) | Q(subject=identifier) | Q(subject=prefix_uri), predicate_uri='oc-gen:hasIcon')[:1] if len(icon_anno) > 0: self.icon = icon_anno[0].object_uri else: try_manifest = True # couldn't find the item in the linked entities table identifier = URImanagement.get_uuid_from_oc_uri(identifier) if(try_manifest): try: manifest_item = Manifest.objects.get(Q(uuid=identifier) | Q(slug=identifier)) except Manifest.DoesNotExist: manifest_item = False if(manifest_item is not False): output = True self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type) self.uuid = manifest_item.uuid self.slug = manifest_item.slug self.label = manifest_item.label self.item_type = manifest_item.item_type self.class_uri = manifest_item.class_uri self.project_uuid = manifest_item.project_uuid if(manifest_item.item_type == 'media' and self.get_thumbnail): # a media item. get information about its thumbnail. try: thumb_obj = Mediafile.objects.get(uuid=manifest_item.uuid, file_type='oc-gen:thumbnail') except Mediafile.DoesNotExist: thumb_obj = False if thumb_obj is not False: self.thumbnail_media = thumb_obj self.thumbnail_uri = thumb_obj.file_uri elif(manifest_item.item_type == 'types'): tl = TypeLookup() tl.get_octype_without_manifest(identifier) self.content = tl.content elif(manifest_item.item_type == 'predicates'): try: oc_pred = Predicate.objects.get(uuid=manifest_item.uuid) except Predicate.DoesNotExist: oc_pred = False if(oc_pred is not False): self.data_type = oc_pred.data_type elif(manifest_item.item_type == 'subjects' and self.get_context): try: subj = Subject.objects.get(uuid=manifest_item.uuid) except Subject.DoesNotExist: subj = False if subj is not False: self.context = subj.context return output
def dereference(self, identifier, link_entity_slug=False): """ Dereferences an entity identified by an identifier, checks if a URI, if, not a URI, then looks in the OC manifest for the item """ output = False if isinstance(identifier, str): # only try to dereference if the identifier is a string. try_manifest = True identifier = URImanagement.convert_prefix_to_full_uri(identifier) if (settings.CANONICAL_HOST + '/tables/') in identifier: identifier = identifier.replace((settings.CANONICAL_HOST + '/tables/'), '') if link_entity_slug or (len(identifier) > 8): if link_entity_slug or (identifier[:7] == 'http://' or identifier[:8] == 'https://'): ent_equivs = EntityEquivalents() uris = ent_equivs.make_uri_variants(identifier) ld_entities = LinkEntity.objects.filter(Q(uri__in=uris) | Q(slug=identifier))[:1] if len(ld_entities) > 0: ld_entity = ld_entities[0] else: ld_entity = False if ld_entity is not False: output = True self.uri = ld_entity.uri self.slug = ld_entity.slug self.label = ld_entity.label self.item_type = 'uri' self.alt_label = ld_entity.alt_label self.entity_type = ld_entity.ent_type self.vocab_uri = ld_entity.vocab_uri self.ld_object_ok = True try: if 'https://' in self.vocab_uri: alt_vocab_uri = self.vocab_uri.replace('https://', 'http://') else: alt_vocab_uri = self.vocab_uri.replace('http://', 'https://') vocab_entity = LinkEntity.objects.get(Q(uri=self.vocab_uri) | Q(uri=alt_vocab_uri)) except LinkEntity.DoesNotExist: vocab_entity = False if vocab_entity is not False: self.vocabulary = vocab_entity.label if self.get_icon: prefix_uri = URImanagement.prefix_common_uri(ld_entity.uri) icon_anno = LinkAnnotation.objects\ .filter(Q(subject=ld_entity.uri) | Q(subject=identifier) | Q(subject=prefix_uri), predicate_uri='oc-gen:hasIcon')[:1] if len(icon_anno) > 0: self.icon = icon_anno[0].object_uri else: try_manifest = True # couldn't find the item in the linked entities table identifier = URImanagement.get_uuid_from_oc_uri(identifier) if try_manifest: try: manifest_item = Manifest.objects.get(Q(uuid=identifier) | Q(slug=identifier)) except Manifest.DoesNotExist: manifest_item = False if manifest_item is not False: output = True self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type) self.uuid = manifest_item.uuid self.slug = manifest_item.slug self.label = manifest_item.label self.item_type = manifest_item.item_type self.class_uri = manifest_item.class_uri self.project_uuid = manifest_item.project_uuid if manifest_item.item_type == 'media' and self.get_thumbnail: # a media item. get information about its thumbnail. try: thumb_obj = Mediafile.objects.get(uuid=manifest_item.uuid, file_type='oc-gen:thumbnail') except Mediafile.DoesNotExist: thumb_obj = False if thumb_obj is not False: self.thumbnail_media = thumb_obj self.thumbnail_uri = thumb_obj.file_uri elif manifest_item.item_type in ['persons', 'projects', 'tables'] \ or self.get_stable_ids: # get stable identifiers for persons or projects by default stable_ids = StableIdentifer.objects.filter(uuid=manifest_item.uuid) if len(stable_ids) > 0: self.stable_id_uris = [] doi_uris = [] orcid_uris = [] other_uris = [] for stable_id in stable_ids: if stable_id.stable_type in StableIdentifer.ID_TYPE_PREFIXES: prefix = StableIdentifer.ID_TYPE_PREFIXES[stable_id.stable_type] else: prefix = '' stable_uri = prefix + stable_id.stable_id if stable_id.stable_type == 'orcid': orcid_uris.append(stable_uri) elif stable_id.stable_type == 'doi': doi_uris.append(stable_uri) else: other_uris.append(stable_uri) # now list URIs in order of importance, with ORCIDs and DOIs # first, followed by other stable URI types (Arks or something else) self.stable_id_uris = orcid_uris + doi_uris + other_uris elif manifest_item.item_type == 'types': tl = TypeLookup() tl.get_octype_without_manifest(identifier) self.content = tl.content elif manifest_item.item_type == 'predicates': try: oc_pred = Predicate.objects.get(uuid=manifest_item.uuid) except Predicate.DoesNotExist: oc_pred = False if oc_pred is not False: self.data_type = oc_pred.data_type self.sort = oc_pred.sort self.slug_uri = 'oc-pred:' + str(self.slug) elif manifest_item.item_type == 'projects': # get a manifest object for the parent of a project, if it exists ch_tab = '"oc_projects" AS "child"' filters = 'child.project_uuid=oc_manifest.uuid '\ ' AND child.uuid=\'' + self.uuid + '\' ' \ ' AND child.project_uuid != \'' + self.uuid + '\' ' par_rows = Manifest.objects\ .filter(item_type='projects')\ .exclude(uuid=self.uuid)\ .extra(tables=[ch_tab], where=[filters])[:1] if len(par_rows) > 0: self.par_proj_man_obj = par_rows[0] elif manifest_item.item_type == 'subjects' and self.get_context: try: subj = Subject.objects.get(uuid=manifest_item.uuid) except Subject.DoesNotExist: subj = False if subj is not False: self.context = subj.context return output
def dereference(self, identifier, link_entity_slug=False): """ Dereferences an entity identified by an identifier, checks if a URI, if, not a URI, then looks in the OC manifest for the item """ output = False try_manifest = True identifier = URImanagement.convert_prefix_to_full_uri(identifier) if (link_entity_slug or (len(identifier) > 8)): if (link_entity_slug or (identifier[:7] == 'http://' or identifier[:8] == 'https://')): try: try_manifest = False ld_entity = LinkEntity.objects.get( Q(uri=identifier) | Q(slug=identifier)) except LinkEntity.DoesNotExist: ld_entity = False if (ld_entity is not False): output = True self.uri = ld_entity.uri self.slug = ld_entity.slug self.label = ld_entity.label self.item_type = 'uri' self.alt_label = ld_entity.alt_label self.entity_type = ld_entity.ent_type self.vocab_uri = ld_entity.vocab_uri self.ld_object_ok = True try: vocab_entity = LinkEntity.objects.get( uri=self.vocab_uri) except LinkEntity.DoesNotExist: vocab_entity = False if (vocab_entity is not False): self.vocabulary = vocab_entity.label if self.get_icon: prefix_uri = URImanagement.prefix_common_uri( ld_entity.uri) icon_anno = LinkAnnotation.objects\ .filter(Q(subject=ld_entity.uri) | Q(subject=identifier) | Q(subject=prefix_uri), predicate_uri='oc-gen:hasIcon')[:1] if len(icon_anno) > 0: self.icon = icon_anno[0].object_uri else: try_manifest = True # couldn't find the item in the linked entities table identifier = URImanagement.get_uuid_from_oc_uri(identifier) if (try_manifest): try: manifest_item = Manifest.objects.get( Q(uuid=identifier) | Q(slug=identifier)) except Manifest.DoesNotExist: manifest_item = False if (manifest_item is not False): output = True self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type) self.uuid = manifest_item.uuid self.slug = manifest_item.slug self.label = manifest_item.label self.item_type = manifest_item.item_type self.class_uri = manifest_item.class_uri self.project_uuid = manifest_item.project_uuid if (manifest_item.item_type == 'media' and self.get_thumbnail): # a media item. get information about its thumbnail. try: thumb_obj = Mediafile.objects.get( uuid=manifest_item.uuid, file_type='oc-gen:thumbnail') except Mediafile.DoesNotExist: thumb_obj = False if thumb_obj is not False: self.thumbnail_media = thumb_obj self.thumbnail_uri = thumb_obj.file_uri elif (manifest_item.item_type == 'types'): tl = TypeLookup() tl.get_octype_without_manifest(identifier) self.content = tl.content elif (manifest_item.item_type == 'predicates'): try: oc_pred = Predicate.objects.get( uuid=manifest_item.uuid) except Predicate.DoesNotExist: oc_pred = False if (oc_pred is not False): self.data_type = oc_pred.data_type elif (manifest_item.item_type == 'subjects' and self.get_context): try: subj = Subject.objects.get(uuid=manifest_item.uuid) except Subject.DoesNotExist: subj = False if subj is not False: self.context = subj.context return output