def update_ontology_doc(self, filename): """ Changes categories in the ontology document """ filepath = self.root_export_dir + '/' + filename newfilepath = self.root_export_dir + '/rev-' + filename if os.path.isfile(filepath): print('Found: ' + filepath) with open(filepath, 'r') as myfile: data = myfile.read() for revision in self.REVISION_LIST: search_old_db = revision['old'] search_old_file = search_old_db.replace(self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) replace_db = revision['new'] replace_file = replace_db.replace(self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) data = data.replace(search_old_file, replace_file) old_uri = URImanagement.convert_prefix_to_full_uri(search_old_db) new_uri = URImanagement.convert_prefix_to_full_uri(replace_db) data = data.replace(old_uri, new_uri) file = codecs.open(newfilepath, 'w', 'utf-8') file.write(data) file.close() else: print('Ouch! Cannot find: '+ filepath)
def get_identifier_list_variants(self, id_list): """ makes different variants of identifiers for a list of identifiers """ output_list = [] if not isinstance(id_list, list): id_list = [str(id_list)] for identifier in id_list: output_list.append(identifier) if (identifier[:7] == 'http://' or identifier[:8] == 'https://'): oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if oc_uuid is not False: output_list.append(oc_uuid) else: prefix_id = URImanagement.prefix_common_uri(identifier) output_list.append(prefix_id) elif ':' in identifier: full_uri = URImanagement.convert_prefix_to_full_uri(identifier) output_list.append(full_uri) else: # probably an open context uuid or a slug ent = Entity() found = ent.dereference(identifier) if found: full_uri = ent.uri output_list.append(full_uri) prefix_uri = URImanagement.prefix_common_uri(full_uri) if prefix_uri != full_uri: output_list.append(prefix_uri) return output_list
def save_icons(self, predicate_uri='oc-gen:hasIcon'): """ Saves icons in the general Open Context namespace """ data = False if(self.graph is not False and self.vocabulary_uri is not False): data = [] if(self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter(source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if(predicate_uri == 'oc-gen:hasIcon'): # for subClassOf predicates full_pred_uri = URImanagement.convert_prefix_to_full_uri(predicate_uri) icon_pred = URIRef(full_pred_uri) for s, p, o in self.graph.triples((None, icon_pred, None)): subject_uri = s.__str__() # get the URI of the subject as a string object_uri = o.__str__() # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if(subject_uri != object_uri): data.append(act_t) if(len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def mass_revise_category_uris(self): """ Revises category uris in a mass edit """ for revision in self.REVISION_LIST: search_old_db = revision['old'] replace_db = revision['new'] old_uri = URImanagement.convert_prefix_to_full_uri(search_old_db) new_uri = URImanagement.convert_prefix_to_full_uri(replace_db) Manifest.objects\ .filter(class_uri=search_old_db)\ .update(class_uri=replace_db) LinkAnnotation.objects\ .filter(subject=search_old_db)\ .update(subject=replace_db) LinkAnnotation.objects\ .filter(subject=old_uri)\ .update(subject=new_uri) LinkAnnotation.objects\ .filter(object_uri=search_old_db)\ .update(object_uri=replace_db) LinkAnnotation.objects\ .filter(object_uri=old_uri)\ .update(object_uri=new_uri) LinkEntity.objects\ .filter(uri=old_uri)\ .update(uri=new_uri)
def update_ontology_doc(self, filename): """ Changes categories in the ontology document """ filepath = self.root_export_dir + '/' + filename newfilepath = self.root_export_dir + '/rev-' + filename if os.path.isfile(filepath): print('Found: ' + filepath) with open(filepath, 'r') as myfile: data = myfile.read() for revision in self.REVISION_LIST: search_old_db = revision['old'] search_old_file = search_old_db.replace( self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) replace_db = revision['new'] replace_file = replace_db.replace( self.PREFIXING['db-prefix'], self.PREFIXING['file-prefix']) data = data.replace(search_old_file, replace_file) old_uri = URImanagement.convert_prefix_to_full_uri( search_old_db) new_uri = URImanagement.convert_prefix_to_full_uri(replace_db) data = data.replace(old_uri, new_uri) file = codecs.open(newfilepath, 'w', 'utf-8') file.write(data) file.close() else: print('Ouch! Cannot find: ' + filepath)
def add_project_predicates_and_annotations_to_graph(self, graph): """ gets the project predicates and their annotations with database calls """ pred_sql_dict_list = self.get_working_project_predicates() la_preds = self.get_link_annotations_for_preds(pred_sql_dict_list) if isinstance(pred_sql_dict_list, list): for sql_dict in pred_sql_dict_list: act_pred = LastUpdatedOrderedDict() act_pred['@id'] = 'oc-pred:' + sql_dict['slug'] act_pred['owl:sameAs'] = URImanagement.make_oc_uri(sql_dict['predicate_uuid'], 'predicates') act_pred['label'] = sql_dict['label'] act_pred['uuid'] = sql_dict['predicate_uuid'] act_pred['slug'] = sql_dict['slug'] if isinstance(sql_dict['class_uri'], str): if len(sql_dict['class_uri']) > 0: act_pred['oc-gen:predType'] = sql_dict['class_uri'] pred_found = False for la_pred in la_preds: if la_pred.subject == sql_dict['predicate_uuid']: pred_found = True # prefix common URIs for the predicate of the link annotation la_pred_uri = URImanagement.prefix_common_uri(la_pred.predicate_uri) if la_pred_uri not in act_pred: act_pred[la_pred_uri] = [] la_object_item = self.make_object_dict_item(la_pred.object_uri) act_pred[la_pred_uri].append(la_object_item) else: if pred_found: # because this list is sorted by la_pred.subject, we're done # finding any more annotations on act_pred item break graph.append(act_pred) return graph
def dereference(self, identifier, link_entity_slug=False): """ Dereferences an entity identified by an identifier, checks if a URI, if, not a URI, then looks in the OC manifest for the item """ output = False # Only try to dereference if the identifier is a string. if not isinstance(identifier, str): return output identifier = URImanagement.convert_prefix_to_full_uri(identifier) oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if not oc_uuid and (settings.CANONICAL_HOST + '/tables/') in identifier: # Special case for probable open context table item. oc_uuid = identifier.replace( (settings.CANONICAL_HOST + '/tables/'), '') if not oc_uuid: # We don't have an Open Context UUID, so look up a linked # data entity. link_entity_found = self.dereference_linked_data( identifier, link_entity_slug=link_entity_slug) if link_entity_found: # Found what we want, so skip the rest and return True. return True # If we haven't found a link_entity, check for manifest items. if oc_uuid: # We found an Open Context uuid by parsing a URI. So that # should be the identifier to lookup. identifier = oc_uuid manifest_item_found = self.dereference_manifest_item(identifier) if manifest_item_found: return True return output
def get_identifier_list_variants(self, id_list): """ makes different variants of identifiers for a list of identifiers """ output_list = [] if not isinstance(id_list, list): id_list = [str(id_list)] for identifier in id_list: output_list.append(identifier) if(identifier[:7] == 'http://' or identifier[:8] == 'https://'): oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if oc_uuid is not False: output_list.append(oc_uuid) else: prefix_id = URImanagement.prefix_common_uri(identifier) output_list.append(prefix_id) elif ':' in identifier: full_uri = URImanagement.convert_prefix_to_full_uri(identifier) output_list.append(full_uri) else: # probably an open context uuid or a slug ent = Entity() found = ent.dereference(identifier) if found: full_uri = ent.uri output_list.append(full_uri) prefix_uri = URImanagement.prefix_common_uri(full_uri) if prefix_uri != full_uri: output_list.append(prefix_uri) return output_list
def get_identifier_list_variants(self, id_list): """ makes different variants of identifiers for a list of identifiers """ output_list = [] if not isinstance(id_list, list): id_list = [str(id_list)] for identifier in id_list: output_list.append(identifier) if(identifier.startswith('http://') or identifier.startswith('https://')): oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier) if oc_uuid: output_list.append(oc_uuid) prefix_id = URImanagement.prefix_common_uri(identifier) if prefix_id: output_list.append(prefix_id) elif ':' in identifier: full_uri = URImanagement.convert_prefix_to_full_uri(identifier) output_list.append(full_uri) else: # probably an open context uuid or a slug m_cache = MemoryCache() ent = m_cache.get_entity(identifier) if ent: full_uri = ent.uri output_list.append(full_uri) prefix_uri = URImanagement.prefix_common_uri(full_uri) if prefix_uri != full_uri: output_list.append(prefix_uri) return output_list
def save_icons(self, predicate_uri='oc-gen:hasIcon'): """ Saves icons in the general Open Context namespace """ data = False if (self.graph is not False and self.vocabulary_uri is not False): data = [] if (self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter( source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if (predicate_uri == 'oc-gen:hasIcon'): # for subClassOf predicates full_pred_uri = URImanagement.convert_prefix_to_full_uri( predicate_uri) icon_pred = URIRef(full_pred_uri) for s, p, o in self.graph.triples((None, icon_pred, None)): subject_uri = s.__str__( ) # get the URI of the subject as a string object_uri = o.__str__( ) # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if (subject_uri != object_uri): data.append(act_t) if (len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def make_list_cite_projects(self, json_ld): """ makes a string for citation of projects """ projects_list = [] cite_projects_list = [] if 'dc-terms:source' in json_ld: for item in json_ld['dc-terms:source']: cite_projects_list.append(item['label']) proj_item = {} if 'rdfs:isDefinedBy' in item: proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri(item['rdfs:isDefinedBy'], False) proj_item['uri'] = item['rdfs:isDefinedBy'] else: proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri(item['id'], False) proj_item['uri'] = item['id'] proj_item['label'] = item['label'] if 'count' in item: proj_item['count'] = item['count'] else: proj_item['count'] = False projects_list.append(proj_item) self.cite_projects = ', '.join(cite_projects_list) self.projects_list = projects_list return self.cite_projects
def mass_revise_category_uris(self): """ Revises category uris in a mass edit """ for revision in self.REVISION_LIST: search_old_db = revision['old'] replace_db = revision['new'] old_uri = URImanagement.convert_prefix_to_full_uri(search_old_db) new_uri = URImanagement.convert_prefix_to_full_uri(replace_db) Manifest.objects\ .filter(class_uri=search_old_db)\ .update(class_uri=replace_db) LinkAnnotation.objects\ .filter(subject=search_old_db)\ .update(subject=replace_db) LinkAnnotation.objects\ .filter(subject=old_uri)\ .update(subject=new_uri) LinkAnnotation.objects\ .filter(object_uri=search_old_db)\ .update(object_uri=replace_db) LinkAnnotation.objects\ .filter(object_uri=old_uri)\ .update(object_uri=new_uri) LinkEntity.objects\ .filter(uri=old_uri)\ .update(uri=new_uri)
def add_project_types_with_annotations_to_graph(self, graph): """ adds project types that have annotations """ type_sql_dict_list = self.get_working_project_types() if isinstance(type_sql_dict_list, list): # consolidate things so a given type is given once in the list # of a graph. To do so, we first put everything in a all_types # dict all_types = LastUpdatedOrderedDict() for sql_dict in type_sql_dict_list: type_uri = URImanagement.make_oc_uri(sql_dict['type_uuid'], 'types') if type_uri not in all_types: act_type = LastUpdatedOrderedDict() act_type['@id'] = type_uri act_type['label'] = sql_dict['type_label'] act_type['owl:sameAs'] = URImanagement.make_oc_uri(sql_dict['type_slug'], 'types') act_type['uuid'] = sql_dict['type_uuid'] act_type['slug'] = sql_dict['type_slug'] else: act_type = all_types[type_uri] la_pred_uri = URImanagement.prefix_common_uri(sql_dict['predicate_uri']) if la_pred_uri not in act_type: act_type[la_pred_uri] = [] la_object_item = self.make_object_dict_item(sql_dict['object_uri']) act_type[la_pred_uri].append(la_object_item) all_types[type_uri] = act_type for type_uri, act_type in all_types.items(): graph.append(act_type) return graph
def add_project_types_with_annotations_to_graph(self, graph): """ adds project types that have annotations """ type_sql_dict_list = self.get_working_project_types() if isinstance(type_sql_dict_list, list): # consolidate things so a given type is given once in the list # of a graph. To do so, we first put everything in a all_types # dict all_types = LastUpdatedOrderedDict() for sql_dict in type_sql_dict_list: type_uri = URImanagement.make_oc_uri(sql_dict['type_uuid'], 'types') if type_uri not in all_types: act_type = LastUpdatedOrderedDict() act_type['@id'] = type_uri act_type['label'] = sql_dict['type_label'] act_type['owl:sameAs'] = URImanagement.make_oc_uri( sql_dict['type_slug'], 'types') act_type['uuid'] = sql_dict['type_uuid'] act_type['slug'] = sql_dict['type_slug'] else: act_type = all_types[type_uri] la_pred_uri = URImanagement.prefix_common_uri( sql_dict['predicate_uri']) act_type = self.add_unique_object_dict_to_pred( act_type, la_pred_uri, sql_dict['object_uri']) all_types[type_uri] = act_type for type_uri, act_type in all_types.items(): graph.append(act_type) return graph
def add_project_predicates_and_annotations_to_graph(self, graph): """ gets the project predicates and their annotations with database calls """ pred_sql_dict_list = self.get_working_project_predicates() la_preds = self.get_link_annotations_for_preds(pred_sql_dict_list) if not isinstance(pred_sql_dict_list, list): # No predicates in the project. Weird, but possible return graph annotated_pred_uuids = {la.subject: [] for la in la_preds} for la in la_preds: annotated_pred_uuids[la.subject].append(la) for sql_dict in pred_sql_dict_list: act_pred = LastUpdatedOrderedDict() act_pred['@id'] = 'oc-pred:' + str(sql_dict['slug']) act_pred['owl:sameAs'] = URImanagement.make_oc_uri( sql_dict['predicate_uuid'], 'predicates') act_pred['label'] = sql_dict['label'] act_pred['uuid'] = sql_dict['predicate_uuid'] act_pred['slug'] = sql_dict['slug'] if isinstance(sql_dict['class_uri'], str) and len(sql_dict['class_uri']) > 0: act_pred['oc-gen:predType'] = sql_dict['class_uri'] uuid_la_preds = annotated_pred_uuids.get( sql_dict['predicate_uuid'], []) for la_pred in uuid_la_preds: la_pred_uri = URImanagement.prefix_common_uri( la_pred.predicate_uri) act_pred = self.add_unique_object_dict_to_pred( act_pred, la_pred_uri, la_pred.object_uri) graph.append(act_pred) return graph
def make_list_cite_projects(self, json_ld): """ makes a string for citation of projects """ projects_list = [] cite_projects_list = [] if 'dc-terms:source' in json_ld: for item in json_ld['dc-terms:source']: cite_projects_list.append(item['label']) proj_item = {} if 'rdfs:isDefinedBy' in item: proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri( item['rdfs:isDefinedBy'], False) proj_item['uri'] = item['rdfs:isDefinedBy'] else: proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri( item['id'], False) proj_item['uri'] = item['id'] proj_item['label'] = item['label'] if 'count' in item: proj_item['count'] = item['count'] else: proj_item['count'] = False projects_list.append(proj_item) self.cite_projects = ', '.join(cite_projects_list) self.projects_list = projects_list return self.cite_projects
def get_entity_parents(self, identifier): """ Gets parent concepts for a given URI or UUID identified entity """ self.loop_count += 1 lequiv = LinkEquivalence() identifiers = lequiv.get_identifier_list_variants(identifier) p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ preds_for_superobjs = lequiv.get_identifier_list_variants(p_for_superobjs) p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ preds_for_subobjs = lequiv.get_identifier_list_variants(p_for_subobjs) try: # look for superior items in the objects of the assertion # sorting by sort so we can privelage a certain hierarchy path superobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers, predicate_uri__in=preds_for_superobjs)\ .exclude(object_uri__in=identifiers)\ .order_by('sort', 'object_uri')[:1] if(len(superobjs_anno) < 1): superobjs_anno = False except LinkAnnotation.DoesNotExist: superobjs_anno = False if(superobjs_anno is not False): parent_id = superobjs_anno[0].object_uri if(parent_id.count('/') > 1): oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id) if(oc_uuid is not False): parent_id = oc_uuid if(parent_id not in self.parent_entities): self.parent_entities.append(parent_id) if self.loop_count <= 50: self.parent_entities = self.get_entity_parents(parent_id) try: """ Now look for superior entities in the subject, not the object sorting by sort so we can privelage a certain hierarchy path """ supersubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers, predicate_uri__in=preds_for_subobjs)\ .exclude(subject__in=identifiers)\ .order_by('sort', 'subject')[:1] if(len(supersubj_anno) < 1): supersubj_anno = False except LinkAnnotation.DoesNotExist: supersubj_anno = False if supersubj_anno is not False: parent_id = supersubj_anno[0].subject if(parent_id.count('/') > 1): oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id) if(oc_uuid is not False): parent_id = oc_uuid if(parent_id not in self.parent_entities): self.parent_entities.append(parent_id) if self.loop_count <= 50: self.parent_entities = self.get_entity_parents(parent_id) return self.parent_entities
def make_alt_uri(self, uri): """ makes an alternative URI, changing a prefixed to a full uri or a full uri to a prefix """ output = uri if (uri[:7] == 'http://' or uri[:8] == 'https://'): output = URImanagement.prefix_common_uri(uri) else: output = URImanagement.convert_prefix_to_full_uri(uri) return output
def make_alt_uri(self, uri): """ makes an alternative URI, changing a prefixed to a full uri or a full uri to a prefix """ output = uri if(uri[:7] == 'http://' or uri[:8] == 'https://'): output = URImanagement.prefix_common_uri(uri) else: output = URImanagement.convert_prefix_to_full_uri(uri) return output
def prep_delete_uuid(self, delete_uuid): """ Prepares some information needed to delete a uuid """ ok_delete = False delete_obj = self.get_manifest(delete_uuid) if delete_obj is not False: ok_delete = True self.delete_manifest_obj = delete_obj self.delete_uri = URImanagement.make_oc_uri(delete_uuid, delete_obj.item_type) self.delete_prefix_uri = URImanagement.prefix_common_uri(self.delete_uri) return ok_delete
def prep_merge_uuid(self, merge_into_uuid): """ Prepares some information needed to delete a uuid """ ok_merge = False merge_obj = self.get_manifest(merge_into_uuid) if merge_obj is not False: ok_merge = True self.merge_manifest_obj = merge_obj self.merge_uri = URImanagement.make_oc_uri(merge_into_uuid, merge_obj.item_type) self.merge_prefix_uri = URImanagement.prefix_common_uri(self.merge_uri) return ok_merge
def prep_delete_uuid(self, delete_uuid): """ Prepares some information needed to delete a uuid """ ok_delete = False delete_obj = self.get_manifest(delete_uuid) if delete_obj is not False: ok_delete = True self.delete_manifest_obj = delete_obj self.delete_uri = URImanagement.make_oc_uri(delete_uuid, delete_obj.item_type) self.delete_prefix_uri = URImanagement.prefix_common_uri(self.delete_uri) return ok_delete
def prep_merge_uuid(self, merge_into_uuid): """ Prepares some information needed to delete a uuid """ ok_merge = False merge_obj = self.get_manifest(merge_into_uuid) if merge_obj is not False: ok_merge = True self.merge_manifest_obj = merge_obj self.merge_uri = URImanagement.make_oc_uri(merge_into_uuid, merge_obj.item_type) self.merge_prefix_uri = URImanagement.prefix_common_uri(self.merge_uri) return ok_merge
def get_entity_parents(self, identifier): """ Gets parent concepts for a given URI or UUID identified entity """ self.loop_count += 1 lequiv = LinkEquivalence() identifiers = lequiv.get_identifier_list_variants(identifier) p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ preds_for_superobjs = lequiv.get_identifier_list_variants( p_for_superobjs) p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ preds_for_subobjs = lequiv.get_identifier_list_variants(p_for_subobjs) try: # look for superior items in the objects of the assertion superobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers, predicate_uri__in=preds_for_superobjs)\ .exclude(object_uri__in=identifiers)[:1] if (len(superobjs_anno) < 1): superobjs_anno = False except LinkAnnotation.DoesNotExist: superobjs_anno = False if (superobjs_anno is not False): parent_id = superobjs_anno[0].object_uri if (parent_id.count('/') > 1): oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id) if (oc_uuid is not False): parent_id = oc_uuid if (parent_id not in self.parent_entities): self.parent_entities.append(parent_id) if self.loop_count <= 50: self.parent_entities = self.get_entity_parents(parent_id) try: """ Now look for superior entities in the subject, not the object """ supersubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers, predicate_uri__in=preds_for_subobjs)\ .exclude(subject__in=identifiers)[:1] if (len(supersubj_anno) < 1): supersubj_anno = False except LinkAnnotation.DoesNotExist: supersubj_anno = False if (supersubj_anno is not False): parent_id = supersubj_anno[0].subject if (parent_id.count('/') > 1): oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id) if (oc_uuid is not False): parent_id = oc_uuid if (parent_id not in self.parent_entities): self.parent_entities.append(parent_id) if self.loop_count <= 50: self.parent_entities = self.get_entity_parents(parent_id) return self.parent_entities
def add_json_ld_link_annotations(self, json_ld): """ adds linked data annotations (typically referencing URIs from outside Open Context) """ if not self.link_annotations: return json_ld if not len(self.link_annotations): return json_ld parts_json_ld = PartsJsonLD() parts_json_ld.proj_context_json_ld = self.proj_context_json_ld parts_json_ld.manifest_obj_dict = self.manifest_obj_dict for la in self.link_annotations: tcheck = URImanagement.get_uuid_from_oc_uri(la.object_uri, True) if not tcheck: # this item is NOT from open context item_type = False else: # an Open Context item item_type = tcheck['item_type'] if item_type == 'persons': # add a stable ID to person items, but only if they are ORCID IDs parts_json_ld.stable_id_predicate = ItemKeys.PREDICATES_FOAF_PRIMARYTOPICOF parts_json_ld.stable_id_prefix_limit = StableIdentifer.ID_TYPE_PREFIXES[ 'orcid'] # this shortens URIs in item-context declared namespaces # to make a compact URI (prefixed), as the act_pred act_pred = URImanagement.prefix_common_uri(la.predicate_uri) if act_pred not in self.dc_author_preds \ and act_pred not in self.dc_inherit_preds \ and act_pred not in self.dc_metadata_preds: # the act_pred is not a dublin core predicate, so we're OK to add it # now, not later. if not biological_taxonomy_validation(act_pred, la.object_uri): # We have a act_pred and object_uri combination # that is not valid. So skip. continue json_ld = parts_json_ld.addto_predicate_list( json_ld, act_pred, la.object_uri, item_type) else: # we've got dublin core assertions, cache these in the dict_object # dc_assertions so they get added LAST, after other asserttions self.dc_assertions = parts_json_ld.addto_predicate_list( self.dc_assertions, act_pred, la.object_uri, item_type) return json_ld
def add_json_ld_link_annotations(self, json_ld): """ adds linked data annotations (typically referencing URIs from outside Open Context) """ if not self.link_annotations or not len(self.link_annotations): # No link annotations, so skip out. return json_ld # We have link annotations. parts_json_ld = PartsJsonLD() parts_json_ld.proj_context_json_ld = self.proj_context_json_ld parts_json_ld.manifest_obj_dict = self.manifest_obj_dict for la in self.link_annotations: tcheck = URImanagement.get_uuid_from_oc_uri(la.object_uri, True) if not tcheck: # this item is NOT from open context item_type = False else: # an Open Context item item_type = tcheck['item_type'] if item_type == 'persons': # add a stable ID to person items, but only if they are ORCID IDs parts_json_ld.stable_id_predicate = ItemKeys.PREDICATES_FOAF_PRIMARYTOPICOF parts_json_ld.stable_id_prefix_limit = StableIdentifer.ID_TYPE_PREFIXES['orcid'] # this shortens URIs in item-context declared namespaces # to make a compact URI (prefixed), as the act_pred act_pred = URImanagement.prefix_common_uri(la.predicate_uri) if act_pred not in self.dc_author_preds \ and act_pred not in self.dc_inherit_preds \ and act_pred not in self.dc_metadata_preds: # the act_prec is not a dublin core predicate, so we're OK to add it # now, not later. json_ld = parts_json_ld.addto_predicate_list( json_ld, act_pred, la.object_uri, item_type ) else: # we've got dublin core assertions, cache these in the dict_object # dc_assertions so they get added LAST, after other asserttions self.dc_assertions = parts_json_ld.addto_predicate_list( self.dc_assertions, act_pred, la.object_uri, item_type ) return json_ld
def add_general_json_ld(self): """ adds general (manifest) information to the JSON-LD object """ self.json_ld['id'] = URImanagement.make_oc_uri(self.uuid, self.item_type) self.json_ld['uuid'] = self.uuid self.json_ld['slug'] = self.slug self.json_ld['label'] = self.label # add multilingual alternative labels if isinstance(self.manifest.localized_json, dict): if len(self.manifest.localized_json) > 0: json_ld['skos:altLabel'] = self.manifest.localized_json if self.manifest.item_type in PartsJsonLD.ITEM_TYPE_CLASS_LIST \ and len(self.manifest.class_uri) > 1: self.json_ld['category'] = [ self.manifest.class_uri ] if self.manifest.item_type == 'projects': # now add the project specific data to the JSON-LD self.add_project_json_ld() elif self.manifest.item_type == 'documents': # now add document specific information to the JSON-LD self.add_document_json_ld() elif self.manifest.item_type == 'predicates': # now add the predicate specific data the JSON-LD self.add_predicate_json_ld() elif self.manifest.item_type == 'types': self.add_type_json_ld()
def make_save_ark_by_uuid(self, uuid, metadata=None): """ makes an saves an ARK identifier by a uuid """ ok = False oc_uri = None arks = StableIdentifer.objects.filter(uuid=uuid, stable_type='ark')[:1] if len(arks) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_ark_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri( oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make ARK id for: ' + oc_uri) ark_id = self.ezid.mint_identifier( oc_uri, metadata, 'ark') if isinstance(ark_id, str): # success! we have an ARK id! stable_id = ark_id.replace('ark:/', '') ok = self.save_oc_item_stable_id( oc_item, stable_id, 'ark') return ok
def make_table_dc_creator_list(self, proj_uuid_counts): """ makes a list of dublin core creators from a project uuid + counts list """ dc_creators = [] for proj_uuid_count in proj_uuid_counts: project_uuid = proj_uuid_count['project_uuid'] proj_count = proj_uuid_count['num_uuids'] auth = Authorship() auth.get_project_authors(project_uuid) if len(auth.creators) < 1 and \ len(auth.contributors) > 0: auth.creators = auth.contributors if len(auth.creators) > 0: for auth_uuid in auth.creators: auth_man = False try: auth_man = Manifest.objects.get(uuid=auth_uuid) except Manifest.DoesNotExist: auth_man = False if auth_man is not False: i = len(dc_creators) + 1 item = LastUpdatedOrderedDict() item['id'] = URImanagement.make_oc_uri(auth_uuid, 'persons') item['count'] = proj_count dc_creators.append(item) return dc_creators
def process_id(self, identifier): # check for identifier query_dict = {'fq': [], 'facet.field': []} fq_terms = [] escape_id = self.escape_solr_arg(identifier) fq_terms.append('persistent_uri:' + escape_id) # now make a DOI URI in case this is just a naked DOI doi_uri = self.escape_solr_arg('http://dx.doi.org/' + identifier) fq_terms.append('persistent_uri:' + doi_uri) # now make an ARK URI in case this is just a naked ARK ark_uri = self.escape_solr_arg('http://n2t.net/' + identifier) fq_terms.append('persistent_uri:' + ark_uri) # now make an ORCID URI in case this is just a naked ORCID orcid_uri = self.escape_solr_arg('http://orcid.org/' + identifier) fq_terms.append('persistent_uri:' + orcid_uri) fq_terms.append('uuid:' + escape_id) tcheck = URImanagement.get_uuid_from_oc_uri(identifier, True) if tcheck is not False: uuid = tcheck['uuid'] fq_terms.append('uuid:' + uuid) fq_final = ' OR '.join(fq_terms) fq_final = '(' + fq_final + ')' query_dict['fq'].append(fq_final) # print(fq_final) return query_dict
def make_json_ld(self): """ makes a JSON-LD object for the table metadata Need oc-table namespace need to include the cc-rel namespace need to add this name space http://www.w3.org/2003/01/geo/ as geo:lat, geo:lon """ json_ld = LastUpdatedOrderedDict() if self.exp_tab is not False: json_ld['id'] = URImanagement.make_oc_uri(self.public_table_id, 'tables') json_ld['uuid'] = self.public_table_id json_ld['label'] = self.exp_tab.label json_ld['fields'] = self.exp_tab.field_count json_ld['rows'] = self.exp_tab.row_count json_ld['dc-terms:identifier'] = self.table_id json_ld['dc-terms:issued'] = self.exp_tab.created.date().isoformat() json_ld['dc-terms:modified'] = self.exp_tab.updated.date().isoformat() json_ld['dc-terms:abstract'] = self.exp_tab.abstract json_ld = self.get_link_annotations(json_ld) stable_ids = self.get_stable_ids() if len(stable_ids) > 0: json_ld['owl:sameAs'] = stable_ids json_ld['has-fields'] = self.get_field_list() """ for key, objects in self.exp_tab.meta_json.items(): json_ld[key] = objects """ return json_ld
def make_trinomial_from_site_labels(self, project_uuid, state_prefix=''): """ makes trinomial identifiers from a site label """ ent = Entity() found = ent.dereference(project_uuid) if found: proj_label = ent.label sites = Manifest.objects\ .filter(project_uuid=project_uuid, class_uri='oc-gen:cat-site') for site in sites: trinomial = str(state_prefix) + site.label parts = self.parse_trinomial(trinomial) dt = Trinomial() dt.uri = URImanagement.make_oc_uri(site.uuid, site.item_type) dt.uuid = site.uuid dt.label = site.label dt.project_label = proj_label dt.trinomial = trinomial dt.state = parts['state'] dt.county = parts['county'] dt.site = parts['site'] dt.save() print('Trinomial: ' + trinomial + ', from: ' + site.label)
def make_table_dc_creator_list(self, proj_uuid_counts): """ makes a list of dublin core creators from a project uuid + counts list """ dc_creators = [] for proj_uuid_count in proj_uuid_counts: project_uuid = proj_uuid_count['project_uuid'] proj_count = proj_uuid_count['num_uuids'] auth = Authorship() auth.get_project_authors(project_uuid) if len(auth.creators) < 1 and \ len(auth.contributors) > 0: auth.creators = auth.contributors if len(auth.creators) > 0: for auth_uuid in auth.creators: auth_man = False try: auth_man = Manifest.objects.get(uuid=auth_uuid) except Manifest.DoesNotExist: auth_man = False if auth_man is not False: i = len(dc_creators) + 1 item = LastUpdatedOrderedDict() item['id'] = URImanagement.make_oc_uri( auth_uuid, 'persons') item['count'] = proj_count dc_creators.append(item) return dc_creators
def add_when_json(self, act_dict, uuid, item_type, event): """ adds when (time interval or instant) data """ when = LastUpdatedOrderedDict() when['id'] = '#event-when-' + str(event.event_id) when['type'] = event.when_type when['type'] = event.meta_type if (event.earliest != event.start): # when['earliest'] = int(event.earliest) pass when['start'] = ISOyears().make_iso_from_float(event.start) when['stop'] = ISOyears().make_iso_from_float(event.stop) if event.latest != event.stop: # when['latest'] = int(event.latest) pass if event.uuid != uuid: # we're inheriting / inferring event metadata from a parent context when['reference-type'] = 'inferred' when['reference-uri'] = URImanagement.make_oc_uri( event.uuid, 'subjects', self.cannonical_uris) rel_meta = self.item_gen_cache.get_entity(event.uuid) if rel_meta is not False: when['reference-label'] = rel_meta.label else: # metadata is specified for this specific item when['reference-type'] = 'specified' when['reference-label'] = self.manifest.label if self.assertion_hashes: when['hash_id'] = event.hash_id act_dict['when'] = when return act_dict
def get_item_media_files(self, man_obj): """ gets media file uris for archiving """ files_dict = LastUpdatedOrderedDict() if isinstance(man_obj, Manifest): med_files = Mediafile.objects\ .filter(uuid=man_obj.uuid, file_type__in=self.ARCHIVE_FILE_TYPES)\ .order_by('-filesize') # print('found files: ' + str(len(med_files))) for act_type in self.ARCHIVE_FILE_TYPES: for med_file in med_files: if med_file.file_type == act_type: extension = '' frag = None file_uri = med_file.file_uri if '#' in file_uri: file_ex = file_uri.split('#') file_uri = file_ex[0] frag = file_ex[-1] if file_uri not in files_dict: act_dict = LastUpdatedOrderedDict() act_dict['filename'] = self.make_archival_file_name(med_file.file_type, man_obj.slug, file_uri) act_dict['dc-terms:isPartOf'] = URImanagement.make_oc_uri(man_obj.uuid, man_obj.item_type) act_dict['type'] = [] files_dict[file_uri] = act_dict files_dict[file_uri]['type'].append(med_file.file_type) return files_dict
def make_trinomial_from_site_labels(self, project_uuid, state_prefix=''): """ makes trinomial identifiers from a site label """ ent = Entity() found = ent.dereference(project_uuid) if found: proj_label = ent.label sites = Manifest.objects\ .filter(project_uuid=project_uuid, class_uri='oc-gen:cat-site') for site in sites: trinomial = str(state_prefix) + site.label if '*' in trinomial: # for North Carolina, only the part before the '*' is a trinomial tr_ex = trinomial.split('*') trinomial = tr_ex[0] print('working on (' + site.uuid + '): ' + trinomial) parts = self.parse_trinomial(trinomial) if 'Tennessee' in proj_label: trinomial = parts['state'] + parts['county'] + str(parts['site']) dt = Trinomial() dt.uri = URImanagement.make_oc_uri(site.uuid, site.item_type) dt.uuid = site.uuid dt.label = site.label dt.project_label = proj_label dt.trinomial = trinomial dt.state = parts['state'] dt.county = parts['county'] dt.site = parts['site'] try: dt.save() print('Trinomial: ' + trinomial + ', from: ' + site.label) except: print('Trinomial: ' + trinomial + ' not valid as a trinomial')
def save_context(self, row_num, man, parent_list): """ Save context information, will also add new context fields as needed """ use_parents = False context_uri = '' if isinstance(parent_list, list): if len(parent_list) > 0: context_uri = URImanagement.make_oc_uri(parent_list[0], 'subjects') use_parents = parent_list[::-1] # save a record of the context URI cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 13 cell.record = context_uri cell.save() cell = None if use_parents is not False: pindex = 0 for parent_uuid in use_parents: pindex += 1 context_label = self.deref_entity_label(parent_uuid) field_num = self.get_add_context_field_number(pindex) cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = field_num cell.record = context_label cell.save() cell = None
def _get_entity_children_db(self, identifier, recursive=True): """ Gets child concepts for a given URI or UUID identified entity """ if not self.child_entities: self.child_entities = LastUpdatedOrderedDict() if identifier in self.child_entities and recursive: output = self.child_entities[identifier] else: act_children = [] p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ lequiv = LinkEquivalence() identifiers = lequiv.get_identifier_list_variants(identifier) try: # look for child items in the objects of the assertion subobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers, predicate_uri__in=p_for_subobjs) if(len(subobjs_anno) < 1): subobjs_anno = False except LinkAnnotation.DoesNotExist: subobjs_anno = False if subobjs_anno is not False: for sub_obj in subobjs_anno: child_id = sub_obj.object_uri act_children.append(child_id) try: """ Now look for subordinate entities in the subject, not the object """ subsubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers, predicate_uri__in=p_for_superobjs) if len(subsubj_anno) < 1: subsubj_anno = False except LinkAnnotation.DoesNotExist: subsubj_anno = False if subsubj_anno is not False: for sub_sub in subsubj_anno: child_id = sub_sub.subject act_children.append(child_id) if len(act_children) > 0: identifier_children = [] for child_id in act_children: if child_id.count('/') > 1: oc_uuid = URImanagement.get_uuid_from_oc_uri(child_id) if oc_uuid: child_id = oc_uuid identifier_children.append(child_id) # recursively get the children of the child if recursive: self.get_entity_children(child_id, recursive) # same the list of children of the current identified item if identifier not in self.child_entities: self.child_entities[identifier] = identifier_children else: # save a False for the current identified item. it has no children if identifier not in self.child_entities: self.child_entities[identifier] = [] output = self.child_entities[identifier] return output
def get_item_basics(self, solr_rec): """ get basic metadata for an item """ output = False if isinstance(solr_rec, dict): if 'uuid' in solr_rec: self.uuid = solr_rec['uuid'] if 'slug_type_uri_label' in solr_rec: id_parts = self.parse_solr_value_parts( solr_rec['slug_type_uri_label']) if id_parts is not False: output = True self.uri = self.make_url_from_val_string( id_parts['uri'], True) self.href = self.make_url_from_val_string( id_parts['uri'], False) item_type_output = URImanagement.get_uuid_from_oc_uri( self.uri, True) self.item_type = item_type_output['item_type'] self.label = id_parts['label'] if 'updated' in solr_rec: self.updated = solr_rec['updated'] if 'published' in solr_rec: self.published = solr_rec['published'] if 'human_remains' in solr_rec: # is the record flagged as related to human remains ?human_remains if solr_rec['human_remains'] > 0: self.human_remains_flagged = True return output
def add_general_json_ld(self): """ adds general (manifest) information to the JSON-LD object """ self.json_ld['id'] = URImanagement.make_oc_uri(self.uuid, self.item_type) self.json_ld['uuid'] = self.uuid self.json_ld['slug'] = self.slug self.json_ld['label'] = self.label # add multilingual alternative labels if isinstance(self.manifest.localized_json, dict): if len(self.manifest.localized_json) > 0: json_ld['skos:altLabel'] = self.manifest.localized_json if self.manifest.item_type in PartsJsonLD.ITEM_TYPE_CLASS_LIST \ and len(self.manifest.class_uri) > 1: self.json_ld['category'] = [ self.manifest.class_uri ] if self.manifest.item_type == 'projects': # now add the project specific data to the JSON-LD self.add_project_json_ld() elif self.manifest.item_type == 'documents': # now add document specific information to the JSON-LD self.add_document_json_ld() elif self.manifest.item_type == 'predicates': # now add the predicate specific data the JSON-LD self.add_predicate_json_ld() elif self.manifest.item_type == 'types': self.add_type_json_ld()
def make_trinomial_from_site_labels(self, project_uuid, state_prefix=''): """ makes trinomial identifiers from a site label """ ent = Entity() found = ent.dereference(project_uuid) if found: proj_label = ent.label sites = Manifest.objects\ .filter(project_uuid=project_uuid, class_uri='oc-gen:cat-site') for site in sites: trinomial = str(state_prefix) + site.label if '*' in trinomial: # for North Carolina, only the part before the '*' is a trinomial tr_ex = trinomial.split('*') trinomial = tr_ex[0] print('working on (' + site.uuid + '): ' + trinomial) parts = self.parse_trinomial(trinomial) if 'Tennessee' in proj_label: trinomial = parts['state'] + parts['county'] + str( parts['site']) dt = Trinomial() dt.uri = URImanagement.make_oc_uri(site.uuid, site.item_type) dt.uuid = site.uuid dt.label = site.label dt.project_label = proj_label dt.trinomial = trinomial dt.state = parts['state'] dt.county = parts['county'] dt.site = parts['site'] try: dt.save() print('Trinomial: ' + trinomial + ', from: ' + site.label) except: print('Trinomial: ' + trinomial + ' not valid as a trinomial')
def make_save_doi_by_uuid(self, uuid, metadata=None): """ makes an saves an DOI identifier by a uuid """ ok = False oc_uri = None dois = StableIdentifer.objects.filter(uuid=uuid, stable_type='doi')[:1] if len(dois) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_doi_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri(oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make DOI id for: ' + oc_uri) ezid_response = self.ezid.mint_identifier(oc_uri, metadata, 'doi') if self.do_test: print('EZID response: ' + str(ezid_response)) if isinstance(ezid_response, str): if '|' in ezid_response: resp_ex = ezid_response.split('|') for resp_id in resp_ex: if 'doi:' in resp_id: ok = self.save_oc_item_stable_id(oc_item, resp_id, 'doi') else: pass else: ok = self.save_oc_item_stable_id(oc_item, ezid_response, 'doi') return ok
def _get_parent_id_db(self, identifier): """Get the parent id for the current identifier """ parent_id = None lequiv = LinkEquivalence() identifiers = lequiv.get_identifier_list_variants(identifier) # print('identifiers: {}'.format(identifiers)) p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ preds_for_superobjs = lequiv.get_identifier_list_variants(p_for_superobjs) p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ preds_for_subobjs = lequiv.get_identifier_list_variants(p_for_subobjs) try: # look for superior items in the objects of the assertion # sorting by sort so we can privelage a certain hierarchy path superobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers, predicate_uri__in=preds_for_superobjs)\ .exclude(object_uri__in=identifiers)\ .order_by('sort', 'object_uri')[:1] if len(superobjs_anno) < 1: superobjs_anno = False except LinkAnnotation.DoesNotExist: superobjs_anno = False if superobjs_anno: parent_id = superobjs_anno[0].object_uri # print('Subject {} is child of {}'.format(identifiers, parent_id)) oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id) if oc_uuid: parent_id = oc_uuid try: """ Now look for superior entities in the subject, not the object sorting by sort so we can privelage a certain hierarchy path """ supersubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers, predicate_uri__in=preds_for_subobjs)\ .exclude(subject__in=identifiers)\ .order_by('sort', 'subject')[:1] if len(supersubj_anno) < 1: supersubj_anno = False except LinkAnnotation.DoesNotExist: supersubj_anno = False if supersubj_anno: parent_id = supersubj_anno[0].subject # print('Subject {} is parent of {}'.format(parent_id, identifiers)) oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id) if oc_uuid: parent_id = oc_uuid return parent_id
def augment_projects(self): """ adds some additional informaiton about projects to make them easier to display """ uuids = [] for proj_r in self.raw_records: uuids.append(proj_r.uuid) # now query the database for all the records with these uuids proj_objs = Project.objects\ .filter(uuid__in=uuids) # now make a dict object to easily get project info by a UUID key proj_obj_dict = {} for proj_obj in proj_objs: proj_obj_dict[proj_obj.uuid] = proj_obj # now query the database for all of the dc related predicates le = LinkEquivalence() subjects = le.get_identifier_list_variants(uuids) predicates = le.get_identifier_list_variants(self.DC_META_PREDS) dc_annos = LinkAnnotation.objects\ .filter(subject__in=subjects, predicate_uri__in=predicates) # now make a dict object to easily get annotations by UUID key dc_anno_dict = {} for dc_anno in dc_annos: dc_pred = URImanagement.prefix_common_uri(dc_anno.predicate_uri) dc_pred = dc_pred.replace('dc-terms:', '') # remove namespace prefix if dc_anno.subject not in dc_anno_dict: dc_anno_dict[dc_anno.subject] = {} if dc_pred not in dc_anno_dict[dc_anno.subject]: dc_anno_dict[dc_anno.subject][dc_pred] = [] if dc_anno.object_uri not in dc_anno_dict[dc_anno.subject][dc_pred]: dc_anno_dict[dc_anno.subject][dc_pred].append(dc_anno.object_uri) # now add information we got from queries and organized into dicts # to the project response objects for proj_r in self.raw_records: if proj_r.uuid in proj_obj_dict: # add projects objects from the database proj_r.extra = proj_obj_dict[proj_r.uuid] if proj_r.uuid in dc_anno_dict: # add annotations from the database proj_r.dc = {'meta': []} for pred, object_uris in dc_anno_dict[proj_r.uuid].items(): proj_r.dc[pred] = [] for object_uri in object_uris: ent = Entity() found = ent.dereference(object_uri) if found: obj_obj = {'id': object_uri, 'label': ent.label} if ent.item_type == 'uri': obj_obj['href'] = ent.uri else: obj_obj['href'] = self.base_url \ + '/' + ent.item_type \ + '/' + ent.uuid proj_r.dc[pred].append(obj_obj) if pred != 'creator' and pred != 'temporal': proj_r.dc['meta'].append(obj_obj) self.records.append(proj_r) # now append the augmented record
def process_equivalent_linked_data(self): """ Types are useful for entity reconciliation this checks for linked data associated with a type """ for equiv_uri in self.LD_EQUIVALENT_PREDICATES: if equiv_uri in self.oc_item.json_ld and "foaf" not in equiv_uri: # for now, default to a close match fname = "skos_closematch___pred_id" allname = "obj_all___skos_closematch___pred_id" if fname not in self.fields: self.fields[fname] = [] if self.ROOT_LINK_DATA_SOLR not in self.fields: self.fields[self.ROOT_LINK_DATA_SOLR] = [] item = self._concat_solr_string_value( "skos-closematch", "id", "http://www.w3.org/2004/02/skos/core#closeMatch", "Close Match" ) self.fields[self.ROOT_LINK_DATA_SOLR].append(item) if allname not in self.fields: self.fields[allname] = [] for entity in self.oc_item.json_ld[equiv_uri]: if "http://" in entity["id"] or "https://" in entity["id"]: self.fields["text"] += entity["label"] + "\n" self.fields["text"] += entity["id"] + "\n" item = self._concat_solr_string_value(entity["slug"], "id", entity["id"], entity["label"]) self.fields[fname].append(item) self.fields[allname].append(item) self.process_object_uri(entity["id"]) if "skos:related" in self.oc_item.json_ld: fname = "skos_related___pred_id" allname = "obj_all___skos_related___pred_id" if fname not in self.fields: self.fields[fname] = [] if self.ROOT_LINK_DATA_SOLR not in self.fields: self.fields[self.ROOT_LINK_DATA_SOLR] = [] item = self._concat_solr_string_value( "skos-related", "id", "http://www.w3.org/2004/02/skos/core#related", "Related" ) self.fields[self.ROOT_LINK_DATA_SOLR].append(item) if allname not in self.fields: self.fields[allname] = [] for entity in self.oc_item.json_ld["skos:related"]: if "http://" in entity["id"] or "https://" in entity["id"]: self.fields["text"] += entity["label"] + "\n" self.fields["text"] += entity["id"] + "\n" item = self._concat_solr_string_value(entity["slug"], "id", entity["id"], entity["label"]) self.fields[fname].append(item) self.fields[allname].append(item) self.process_object_uri(entity["id"]) elif "oc-pred:" in entity["id"] and "owl:sameAs" in entity: pred_uuid = URImanagement.get_uuid_from_oc_uri(entity["owl:sameAs"]) self.fields["text"] += entity["label"] + "\n" self.fields["text"] += entity["id"] + "\n" item = self._concat_solr_string_value( entity["slug"], "id", "/predicates/" + pred_uuid, entity["label"] ) self.fields[fname].append(item) self.fields[allname].append(item)
def parse_json_record(self, json_rec): """ parses json for a geo-json feature of the record """ if 'properties' in json_rec: props = json_rec['properties'] else: props = json_rec if isinstance(props, dict): if 'id' in props: self.id = props['id'].replace('#', '') if 'label' in props: self.label = props['label'] if 'href' in props: self.href = props['href'] if 'uri' in props: item_type_output = URImanagement.get_uuid_from_oc_uri(props['uri'], True) if isinstance(item_type_output, dict): self.item_type = item_type_output['item_type'] self.uuid = item_type_output['uuid'] if 'project label' in props: self.project = props['project label'] if 'context label' in props: self.context = props['context label'] if 'early bce/ce' in props: self.early_bce_ce = props['early bce/ce'] if self.early_bce_ce < 0: self.early_bce_ce = int(round(self.early_bce_ce * -1, 0)) self.early_suffix = 'BCE' else: self.early_bce_ce = int(round(self.early_bce_ce, 0)) self.early_suffix = False if 'late bce/ce' in props: self.late_bce_ce = props['late bce/ce'] if self.late_bce_ce < 0: self.late_bce_ce = int(round(self.late_bce_ce * -1, 0)) self.late_suffix = 'BCE' else: self.late_bce_ce = int(round(self.late_bce_ce, 0)) self.late_suffix = False if 'item category' in props: self.category = props['item category'] if 'snippet' in props: self.snippet = props['snippet'] self.snippet = self.snippet.replace('<em>', '[[[[mark]]]]') self.snippet = self.snippet.replace('</em>', '[[[[/mark]]]]') self.snippet = strip_tags(self.snippet) self.snippet = self.snippet.replace('</', '') self.snippet = self.snippet.replace('<', '') self.snippet = self.snippet.replace('>', '') self.snippet = self.snippet.replace('[[[[mark]]]]', '<mark>') self.snippet = self.snippet.replace('[[[[/mark]]]]', '</mark>') if 'thumbnail' in props: self.thumbnail = props['thumbnail'] if 'published' in props: self.published = QueryMaker().make_human_readable_date(props['published']) if 'updated' in props: self.updated = QueryMaker().make_human_readable_date(props['updated'])
def get_assertion_values(self, uuid, obs_num, predicate_uuid): """ gets the current assertion values in a format for easy use by the item-field.js object """ output = [] ass_list = Assertion.objects\ .filter(uuid=uuid, obs_num=obs_num, predicate_uuid=predicate_uuid) i = 0 for ass in ass_list: i += 1 error_key = 'obs-' + str( obs_num) + '-pred-' + predicate_uuid + '-val-' + str(i) item = LastUpdatedOrderedDict() item['hash_id'] = ass.hash_id item['id'] = None item['uuid'] = None item['slug'] = None item['label'] = None item['literal'] = None item_ok = False if any(ass.object_type in item_type for item_type in settings.ITEM_TYPES): id_man = self.get_manifest_item(ass.object_uuid) if id_man is not False: item_ok = True item['id'] = URImanagement.make_oc_uri( id_man.uuid, id_man.item_type) item['uuid'] = id_man.uuid item['slug'] = id_man.slug item['label'] = id_man.label else: self.errors[ error_key] = 'Cannot find object_uuid: ' + ass.object_uuid elif ass.object_type == 'xsd:string': try: act_string = OCstring.objects.get(uuid=ass.object_uuid) except OCstring.DoesNotExist: act_string = False if act_string is not False: item_ok = True item['uuid'] = act_string.uuid item['literal'] = act_string.content else: self.errors[ error_key] = 'Cannot find string_uuid: ' + ass.object_uuid elif ass.object_type == 'xsd:date': item_ok = True item['literal'] = ass.data_date.date().isoformat() else: item_ok = True item['literal'] = ass.data_num if item_ok: output.append(item) return output
def get_project_authors(self, project_uuid): """ Gets author information for a project """ output = False creator_links = LinkAnnotation.objects\ .filter(Q(subject=project_uuid), Q(predicate_uri=self.URI_DC_CREATE) | Q(predicate_uri=self.PRF_DC_CREATE))\ .order_by('sort') if len(creator_links) < 1: # look for creators from the parent project par_proj = Project.objects\ .filter(uuid=project_uuid)\ .exclude(project_uuid=project_uuid)[:1] if len(par_proj) > 0: creator_links = LinkAnnotation.objects\ .filter(Q(subject=par_proj[0].project_uuid), Q(predicate_uri=self.URI_DC_CREATE) | Q(predicate_uri=self.PRF_DC_CREATE))\ .order_by('sort') if len(creator_links) > 0: for creator in creator_links: pid = URImanagement.get_uuid_from_oc_uri(creator.object_uri) if pid is False: pid = creator.object_uri if pid not in self.creators: self.creators.append(pid) contrib_links = LinkAnnotation.objects\ .filter(Q(subject=project_uuid), Q(predicate_uri=self.URI_DC_CONTRIB) | Q(predicate_uri=self.PRF_DC_CONTRIB))\ .order_by('sort') for contrib in contrib_links: pid = URImanagement.get_uuid_from_oc_uri(contrib.object_uri) if pid is False: pid = contrib.object_uri if pid not in self.contributors: if pid not in self.creators \ or self.consolidate_authorship is False\ or contrib.sort > 0: self.contributors.append( pid) # add to contrib if not a creator if len(self.contributors) > 0 or len(self.creators) > 0: output = True return output
def make_uri_equivalence_list(raw_term, alt_suffix="/"): """ Makes Prefixed, HTTP, HTTPS and '/' ending options list for URLs """ # NOTE: Open Context often references Web URL/URIs to "linked data" # entities. Open Context considers http:// and https:// URLs to be # equivalent. This function takes a raw term and makes http:// # https:// variants. It also makes a prefixed URL if a namespace # is recognized in URImanagement. Finally, it will by default, # make variants that have and do not have a trailing "/". output_list = [] if not isinstance(raw_term, str): return None output_list.append(raw_term) url_terms = [] if raw_term.startswith('http://') or raw_term.startswith('https://'): # NOTE: The raw_term looks like a Web URL. We need to make # variants that start with http, https, and end in a slash, and # do not end in a slash. url_terms = make_suffix_no_suffix_list(raw_term, suffix=alt_suffix) elif raw_term.count(':') == 1: full_uri = URImanagement.convert_prefix_to_full_uri(raw_term) if full_uri: url_terms = make_suffix_no_suffix_list(full_uri, suffix=alt_suffix) url_terms.append(raw_term) for term in url_terms: http_alts = make_alternative_prefix_list(term, alt_prefixes=( 'http://', 'https://', )) if not http_alts: continue for http_alt in http_alts: if http_alt not in output_list: output_list.append(http_alt) prefix_id = URImanagement.prefix_common_uri(http_alt) if alt_suffix and prefix_id.endswith(alt_suffix): # Remove any trailing slash with prefixed IDs. prefix_id = prefix_id[:-len(alt_suffix)] if prefix_id and prefix_id not in output_list: output_list.append(prefix_id) return output_list
def get_project_authors(self, project_uuid): """ Gets author information for a project """ output = False creator_links = LinkAnnotation.objects\ .filter(Q(subject=project_uuid), Q(predicate_uri=self.URI_DC_CREATE) | Q(predicate_uri=self.PRF_DC_CREATE))\ .order_by('sort') if len(creator_links) < 1: # look for creators from the parent project par_proj = Project.objects\ .filter(uuid=project_uuid)\ .exclude(project_uuid=project_uuid)[:1] if len(par_proj) > 0: creator_links = LinkAnnotation.objects\ .filter(Q(subject=par_proj[0].project_uuid), Q(predicate_uri=self.URI_DC_CREATE) | Q(predicate_uri=self.PRF_DC_CREATE))\ .order_by('sort') if len(creator_links) > 0: for creator in creator_links: pid = URImanagement.get_uuid_from_oc_uri(creator.object_uri) if not pid: pid = creator.object_uri if pid not in self.creators: self.creators.append(pid) contrib_links = LinkAnnotation.objects\ .filter(Q(subject=project_uuid), Q(predicate_uri=self.URI_DC_CONTRIB) | Q(predicate_uri=self.PRF_DC_CONTRIB))\ .order_by('sort') for contrib in contrib_links: pid = URImanagement.get_uuid_from_oc_uri(contrib.object_uri) if not pid: pid = contrib.object_uri if pid not in self.contributors: if pid not in self.creators \ or self.consolidate_authorship is False\ or contrib.sort > 0: self.contributors.append(pid) # add to contrib if not a creator if len(self.contributors) > 0 or len(self.creators) > 0: output = True return output
def prep_item_dc_metadata(self): """ prepared dublin core metadata for an item, this needs to happen before we prep dc metadata for associated items and sets of items """ if self.is_valid: # make some uris self.uri = URImanagement.make_oc_uri(self.manifest.uuid, self.manifest.item_type) self.project_uri = URImanagement.make_oc_uri(self.manifest.project_uuid, 'projects') project_ent = self.get_entity(self.manifest.project_uuid) if not isinstance(self.label, str): self.label = self.manifest.label self.title = self.make_dcterms_title(self.manifest.label, self.context) self.description = 'An archaeological site record' context = self.remove_label_from_context(self.manifest.label, self.context) if isinstance(context, str): self.description += ' from: ' + context if project_ent is not False: self.parent_project_uri = URImanagement.make_oc_uri(project_ent.parent_project_uuid, 'projects') self.description += '; part of the "' + project_ent.label self.description += '" data publication.' if self.geo_meta is not None and self.geo_meta is not False: if len(self.geo_meta) > 0: geo = self.geo_meta[0] if isinstance(geo.note, str): if len(geo.note) > 0: # self.description += ' ' + geo.note pass if geo.specificity < 0: self.description += ' Location data approximated as a security precaution.' if self.manifest.uuid != geo.uuid: rel_meta = self.get_entity(geo.uuid) if rel_meta is not False: self.description += ' Location data provided through relationship to the' self.description += ' related place: ' + rel_meta.label self.description += ' (' + rel_meta.uri + ')'
def make_type_relations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_type_pred_uuid, obj_type_f_num): """ Makes semantic relationships between different types in an import """ rels = {} sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) for sub_type_obj in sub_type_list: sub_type_text = sub_type_obj.record row = sub_type_obj.row_num if len(sub_type_text) > 0: tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid, sub_type_text) obj_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=obj_type_f_num, row_num=row)[:1] if len(obj_type_list) > 0: obj_type_text = obj_type_list[0].record if len(obj_type_text) > 0 \ and sub_type_text != obj_type_text: tmo = TypeManagement() tmo.project_uuid = self.project_uuid tmo.source_id = self.source_id obj_type = tmo.get_make_type_within_pred_uuid(obj_type_pred_uuid, obj_type_text) # make a uri for this, since we're making a link assertion obj_uri = URImanagement.make_oc_uri(obj_type.uuid, 'types') # the following bit is so we don't make the # same link assertions over and over. rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid) if rel_id not in rels: rels[rel_id] = {'subject': sub_type.uuid, 'object_uri': obj_uri} # now make the link data annotation relating these types. for rel_id, rel in rels.items(): new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save()
def make_type_relations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_type_pred_uuid, obj_type_f_num): """ Makes semantic relationships between different types in an import """ rels = {} sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) for sub_type_obj in sub_type_list: sub_type_text = sub_type_obj.record row = sub_type_obj.row_num if len(sub_type_text) > 0: tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid( sub_type_pred_uuid, sub_type_text) obj_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=obj_type_f_num, row_num=row)[:1] if len(obj_type_list) > 0: obj_type_text = obj_type_list[0].record if len(obj_type_text) > 0 \ and sub_type_text != obj_type_text: tmo = TypeManagement() tmo.project_uuid = self.project_uuid tmo.source_id = self.source_id obj_type = tmo.get_make_type_within_pred_uuid( obj_type_pred_uuid, obj_type_text) # make a uri for this, since we're making a link assertion obj_uri = URImanagement.make_oc_uri( obj_type.uuid, 'types') # the following bit is so we don't make the # same link assertions over and over. rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid) if rel_id not in rels: rels[rel_id] = { 'subject': sub_type.uuid, 'object_uri': obj_uri } # now make the link data annotation relating these types. for rel_id, rel in rels.items(): new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save()
def get_entity_children(self, identifier, recurive=True): """ Gets child concepts for a given URI or UUID identified entity """ act_children = [] p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ lequiv = LinkEquivalence() identifiers = lequiv.get_identifier_list_variants(identifier) try: # look for child items in the objects of the assertion subobjs_anno = LinkAnnotation.objects.filter( subject__in=identifiers, predicate_uri__in=p_for_subobjs) if (len(subobjs_anno) < 1): subobjs_anno = False except LinkAnnotation.DoesNotExist: subobjs_anno = False if subobjs_anno is not False: for sub_obj in subobjs_anno: child_id = sub_obj.object_uri act_children.append(child_id) try: """ Now look for subordinate entities in the subject, not the object """ subsubj_anno = LinkAnnotation.objects.filter( object_uri__in=identifiers, predicate_uri__in=p_for_superobjs) if len(subsubj_anno) < 1: subsubj_anno = False except LinkAnnotation.DoesNotExist: subsubj_anno = False if subsubj_anno is not False: for sub_sub in subsubj_anno: child_id = sub_sub.subject act_children.append(child_id) if len(act_children) > 0: identifier_children = [] for child_id in act_children: if child_id.count('/') > 1: oc_uuid = URImanagement.get_uuid_from_oc_uri(child_id) if oc_uuid is not False: child_id = oc_uuid identifier_children.append(child_id) # recursively get the children of the child self.get_entity_children(child_id, recurive) # same the list of children of the current identified item if identifier not in self.child_entities: self.child_entities[identifier] = identifier_children else: # save a False for the current identified item. it has no children if identifier not in self.child_entities: self.child_entities[identifier] = False
def prep_item_dc_metadata(self): """ prepared dublin core metadata for an item, this needs to happen before we prep dc metadata for associated items and sets of items """ if self.is_valid: # make some uris self.uri = URImanagement.make_oc_uri(self.manifest.uuid, self.manifest.item_type) self.project_uri = URImanagement.make_oc_uri( self.manifest.project_uuid, 'projects') # get data about entities describing the item category_ent = self.get_entity(self.manifest.class_uri) if category_ent is not False: self.class_label = category_ent.label self.class_slug = category_ent.slug project_ent = self.get_entity(self.manifest.project_uuid) self.title = self.make_dcterms_title(self.manifest.label, self.context) item_type = self.manifest.item_type if item_type == 'subjects': if category_ent is not False: self.description = category_ent.label if item_type in PelagiosData.ITEM_TYPE_DESCRIPTIONS: self.description += ' ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[ item_type].lower() context = self.remove_label_from_context( self.manifest.label, self.context) if isinstance(context, str): self.description += ' from the context: ' + context if project_ent is not False: self.description += '; part of the "' + project_ent.label self.description += '" data publication.' else: self.description = 'A ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[ item_type] if project_ent is not False and item_type != 'projects': self.description += '; part of the "' + project_ent.label self.description += '" data publication.'
def check_opencontext_uri(self, cell): """ looks for a valid opencontext uri in a cell """ oc_item = False if 'http://opencontext.' in cell\ or 'https://opencontext.' in cell: uuid = URImanagement.get_uuid_from_oc_uri(cell) if uuid is not False: # appears to be an Open Context URI # now check we actually have that entity in the database try: oc_item = Manifest.objects.get(uuid=uuid) except Manifest.DoesNotExist: oc_item = False return oc_item