def save_icons(self, predicate_uri='oc-gen:hasIcon'): """ Saves icons in the general Open Context namespace """ data = False if (self.graph is not False and self.vocabulary_uri is not False): data = [] if (self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter( source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if (predicate_uri == 'oc-gen:hasIcon'): # for subClassOf predicates full_pred_uri = URImanagement.convert_prefix_to_full_uri( predicate_uri) icon_pred = URIRef(full_pred_uri) for s, p, o in self.graph.triples((None, icon_pred, None)): subject_uri = s.__str__( ) # get the URI of the subject as a string object_uri = o.__str__( ) # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if (subject_uri != object_uri): data.append(act_t) if (len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def add_skos_hierarachy(self, parent_uri, child_uri): """ Add a hiearchy assertion for linked entities """ try: parent = LinkEntity.objects.get(uri=parent_uri) except LinkEntity.DoesNotExist: parent = False try: child = LinkEntity.objects.get(uri=child_uri) except LinkEntity.DoesNotExist: child = False if parent is not False and child is not False: lr = LinkRecursion() exiting_parents = lr.get_entity_parents(child_uri) if len(exiting_parents) >= 1: print('Child has parents: ' + str(exiting_parents)) else: # child is not already in a hieararchy, ok to put it in one la = LinkAnnotation() la.subject = child.uri # the subordinate is the subject la.subject_type = 'uri' la.project_uuid = self.project_uuid la.source_id = self.source_id + '-hierarchy' la.predicate_uri = self.PRED_SBJ_IS_SUB_OF_OBJ la.object_uri = parent.uri # the parent is the object la.save() print('Made: ' + child.uri + ' child of: ' + parent.uri) else: print('Cannot find parent or child')
def make_type_ld_annotations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_le_f_num): """ Makes linked data annotations for a type in an import """ rels = [] sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) if len(sub_type_list) > 0: distinct_records = {} for cell in sub_type_list: if cell.rec_hash not in distinct_records: distinct_records[cell.rec_hash] = {} distinct_records[cell.rec_hash]['rows'] = [] distinct_records[cell.rec_hash]['imp_cell_obj'] = cell distinct_records[cell.rec_hash]['rows'].append(cell.row_num) for rec_hash_key, distinct_type in distinct_records.items(): # iterate through the distinct types and get associated linked data type_label = distinct_type['imp_cell_obj'].record rows = distinct_type['rows'] if len(type_label) > 0: # the type isn't blank, so we can use it pc = ProcessCells(self.source_id, 0) ld_entities = pc.get_field_records(obj_le_f_num, rows) for ld_hash_key, distinct_ld in ld_entities.items(): obj_uri = distinct_ld['imp_cell_obj'].record if len(obj_uri) > 8: if obj_uri[:7] == 'http://'\ or obj_uri[:8] == 'https://': # we have a valid linked data entity # # now get the UUID for the type tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid, type_label) rel = {'subject_label': type_label, 'subject': sub_type.uuid, 'object_uri': obj_uri} rels.append(rel) if len(rels) > 0: for rel in rels: new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save() web_le = WebLinkEntity() web_le.check_add_link_entity(rel['object_uri'])
def make_dinaa_link_assertions(self): """ makes assertions to relate DINAA URIs with federal registry documents """ self.make_fed_reg_vocab_entity() fed_api = FederalRegistryAPI() search_key_list = fed_api.get_list_cached_keyword_searches() dinaa_matches = fed_api.get_dict_from_file(self.dinaa_matches_key) for s_key in search_key_list: s_json = fed_api.get_dict_from_file(s_key) if 'results' in s_json: for match in dinaa_matches: for s_result in s_json['results']: if s_result['document_number'] == match['doc']: print('Found match for ' + match['doc']) man_obj = False try: man_obj = Manifest.objects.get( uuid=match['uuid']) except Manifest.DoesNotExist: man_obj = False if man_obj is not False: fed_uri = s_result['html_url'] le_check = False try: le_check = LinkEntity.objects.get( uri=fed_uri) except LinkEntity.DoesNotExist: le_check = False if le_check is False: print('Saving entity: ' + s_result['title']) title = s_result['title'] if len(title) > 175: title = title[0:175] + '...' le = LinkEntity() le.uri = fed_uri le.label = title le.alt_label = s_result['document_number'] le.vocab_uri = self.FEDERAL_REG_URI le.ent_type = 'instance' le.slug = 'fed-reg-docs-' + s_result[ 'document_number'] le.save() # Now save the link annotation print('Adding ref link to ' + man_obj.label) la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = self.source_id la.predicate_uri = self.DC_TERMS_REF_BY la.object_uri = fed_uri try: la.save() except: pass
def save_icons(self, predicate_uri='oc-gen:hasIcon'): """ Saves icons in the general Open Context namespace """ data = False if(self.graph is not False and self.vocabulary_uri is not False): data = [] if(self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter(source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if(predicate_uri == 'oc-gen:hasIcon'): # for subClassOf predicates full_pred_uri = URImanagement.convert_prefix_to_full_uri(predicate_uri) icon_pred = URIRef(full_pred_uri) for s, p, o in self.graph.triples((None, icon_pred, None)): subject_uri = s.__str__() # get the URI of the subject as a string object_uri = o.__str__() # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if(subject_uri != object_uri): data.append(act_t) if(len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def make_type_relations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_type_pred_uuid, obj_type_f_num): """ Makes semantic relationships between different types in an import """ rels = {} sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) for sub_type_obj in sub_type_list: sub_type_text = sub_type_obj.record row = sub_type_obj.row_num if len(sub_type_text) > 0: tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid, sub_type_text) obj_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=obj_type_f_num, row_num=row)[:1] if len(obj_type_list) > 0: obj_type_text = obj_type_list[0].record if len(obj_type_text) > 0 \ and sub_type_text != obj_type_text: tmo = TypeManagement() tmo.project_uuid = self.project_uuid tmo.source_id = self.source_id obj_type = tmo.get_make_type_within_pred_uuid(obj_type_pred_uuid, obj_type_text) # make a uri for this, since we're making a link assertion obj_uri = URImanagement.make_oc_uri(obj_type.uuid, 'types') # the following bit is so we don't make the # same link assertions over and over. rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid) if rel_id not in rels: rels[rel_id] = {'subject': sub_type.uuid, 'object_uri': obj_uri} # now make the link data annotation relating these types. for rel_id, rel in rels.items(): new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save()
def save_new_ref_by_annotation(self, oc_item): """ saves a refferenced by annotation if it is new """ is_new = self.check_new_annotation(oc_item.uuid, self.referrer.uri) if is_new: self.new_annotations += 1 la = LinkAnnotation() la.subject = oc_item.uuid la.subject_type = oc_item.item_type la.project_uuid = oc_item.project_uuid la.source_id = self.source_id la.predicate_uri = self.DC_TERMS_REFERENCED_BY la.object_uri = self.referrer.uri la.creator_uuid = '' la.save() print('[' + str(self.new_annotations) + '] annotated: ' + oc_item.uuid)
def make_dinaa_link_assertions(self): """ makes assertions to relate DINAA URIs with federal registry documents """ self.make_fed_reg_vocab_entity() fed_api = FederalRegistryAPI() search_key_list = fed_api.get_list_cached_keyword_searches() dinaa_matches = fed_api.get_dict_from_file(self.dinaa_matches_key) for s_key in search_key_list: s_json = fed_api.get_dict_from_file(s_key) if 'results' in s_json: for match in dinaa_matches: for s_result in s_json['results']: if s_result['document_number'] == match['doc']: print('Found match for ' + match['doc']) man_obj = False try: man_obj = Manifest.objects.get(uuid=match['uuid']) except Manifest.DoesNotExist: man_obj = False if man_obj is not False: fed_uri = s_result['html_url'] le_check = False try: le_check = LinkEntity.objects.get(uri=fed_uri) except LinkEntity.DoesNotExist: le_check = False if le_check is False: print('Saving entity: ' + s_result['title']) title = s_result['title'] if len(title) > 175: title = title[0:175] + '...' le = LinkEntity() le.uri = fed_uri le.label = title le.alt_label = s_result['document_number'] le.vocab_uri = self.FEDERAL_REG_URI le.ent_type = 'instance' le.slug = 'fed-reg-docs-' + s_result['document_number'] le.save() # Now save the link annotation print('Adding ref link to ' + man_obj.label) la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = self.source_id la.predicate_uri = self.DC_TERMS_REF_BY la.object_uri = fed_uri try: la.save() except: pass
def save_hierarchy(self, predicate_uri='rdfs:subClassOf'): """ Saves hierarchic relations from a vocabulary, defaulting to subClassOf predicates """ data = False if (self.graph is not False and self.vocabulary_uri is not False): data = [] if (self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter( source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if (predicate_uri == 'rdfs:subClassOf'): # for subClassOf predicates for s, p, o in self.graph.triples( (None, RDFS.subClassOf, None)): subject_uri = s.__str__( ) # get the URI of the subject as a string object_uri = o.__str__( ) # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if (subject_uri != object_uri): data.append(act_t) elif (predicate_uri == 'rdfs:subPropertyOf'): # for subPropertyOf predicates for s, p, o in self.graph.triples( (None, RDFS.subPropertyOf, None)): subject_uri = s.__str__( ) # get the URI of the subject as a string object_uri = o.__str__( ) # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if (subject_uri != object_uri): data.append(act_t) if (len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def create_pred_parents(self, new_hierachic_list): """ Creates new types for superior (more general) types from a list of types that have hiearchies implicit in their labels once the superior types are created, linked data annotations noting hierarchy are stored """ parent_children_pairs = [] for manifest in new_hierachic_list: try: oc_pred = Predicate.objects.get(uuid=manifest.uuid) except Predicate.DoesNotExist: oc_pred = False if oc_pred is not False: child_parts = manifest.label.split(self.HIERARCHY_DELIM) act_delim = "" act_new_label = "" current_parent = False for label_part in child_parts: act_new_label = act_new_label + act_delim + label_part act_delim = self.HIERARCHY_DELIM pred_manage = PredicateManagement() pred_manage.project_uuid = manifest.project_uuid pred_manage.source_id = self.source_id pred_manage.sort = oc_pred.sort pred_manage.data_type = oc_pred.data_type ppred = pred_manage.get_make_predicate(act_new_label, manifest.class_uri) if ppred is not False and current_parent is not False: parent_child = {"parent": current_parent, "child": ppred.uuid} parent_children_pairs.append(parent_child) current_parent = ppred.uuid if len(parent_children_pairs) > 0: # now make some linked data annotations for parent_child in parent_children_pairs: if parent_child["parent"] is not False: new_la = LinkAnnotation() new_la.subject = parent_child["child"] new_la.subject_type = "predicates" new_la.project_uuid = manifest.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = self.p_for_superobjs new_la.object_uri = URImanagement.make_oc_uri(parent_child["parent"], "predicates") new_la.creator_uuid = "" new_la.save() return parent_children_pairs
def create_concept_parents(self, new_hierachic_list): """ Creates new types for superior (more general) types from a list of types that have hiearchies implicit in their labels once the superior types are created, linked data annotations noting hierarchy are stored """ parent_children_pairs = [] for manifest in new_hierachic_list: try: oc_type = OCtype.objects.get(uuid=manifest.uuid) except OCtype.DoesNotExist: oc_type = False if(oc_type is not False): child_parts = manifest.label.split(self.HIERARCHY_DELIM) act_delim = '' act_new_label = '' current_parent = False for label_part in child_parts: act_new_label = act_new_label + act_delim + label_part act_delim = self.HIERARCHY_DELIM type_manage = TypeManagement() type_manage.project_uuid = oc_type.project_uuid type_manage.source_id = self.source_id ptype = type_manage.get_make_type_within_pred_uuid(oc_type.predicate_uuid, act_new_label) if(current_parent is not False): parent_child = {'parent': current_parent, 'child': ptype.uuid} parent_children_pairs.append(parent_child) current_parent = ptype.uuid if(len(parent_children_pairs) > 0): # now make some linked data annotations for parent_child in parent_children_pairs: if(parent_child['parent'] is not False): new_la = LinkAnnotation() new_la.subject = parent_child['child'] new_la.subject_type = 'types' new_la.project_uuid = oc_type.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = self.p_for_superobjs new_la.object_uri = URImanagement.make_oc_uri(parent_child['parent'], 'types') new_la.creator_uuid = '' new_la.save() return parent_children_pairs
def skos_relate_old_new_predicates(self, project_uuid, source_id, predicate_uuid, new_pred_uuid): """ Makes a new Link Annotation to relate a new predicate_uuid with an existing predicate """ la = LinkAnnotation() la.subject = new_pred_uuid la.subject_type = 'predicates' la.project_uuid = project_uuid la.source_id = source_id la.predicate_uri = 'skos:related' la.object_uri = URImanagement.make_oc_uri(predicate_uuid, 'predicates') try: la.save() output = True except: output = False return output
def save_entity_comments(self): """ saves comments about an entity """ if self.graph is not False and self.vocabulary_uri is not False: lequiv = LinkEquivalence() # get all the varients of RDFS:comments comment_uris = lequiv.get_identifier_list_variants('rdfs:comment') # now get all the entities from this vocabulary (that may be the subject of a comment) raw_subject_uris = LinkEntity.objects.filter( vocab_uri=self.vocabulary_uri) lequiv = LinkEquivalence() subject_uris = lequiv.get_identifier_list_variants( raw_subject_uris) if self.replace_old: # delete the old comments LinkAnnotation.objects\ .filter(subject__in=subject_uris, predicate_uri__in=comment_uris)\ .delete() for s, p, o in self.graph.triples((None, RDFS.comment, None)): subject_uri = s.__str__( ) # get the URI of the subject as a string comment = o.__str__( ) # get the comment from the object as a string # update the entity's comment link_ent = False try: link_ent = LinkEntity.objects.get(uri=subject_uri) except LinkEntity.DoesNotExist: link_ent = False if link_ent is not False: lang = Languages() newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(subject_uri) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = 'rdfs:comment' newr.obj_extra = {} newr.obj_extra[lang.DEFAULT_LANGUAGE] = comment newr.save()
def save_hierarchy(self, predicate_uri='rdfs:subClassOf'): """ Saves hierarchic relations from a vocabulary, defaulting to subClassOf predicates """ data = False if(self.graph is not False and self.vocabulary_uri is not False): data = [] if(self.replace_old): # delete old relations from this vocabulary using this predicate LinkAnnotation.objects.filter(source_id=self.vocabulary_uri, predicate_uri=predicate_uri).delete() if(predicate_uri == 'rdfs:subClassOf'): # for subClassOf predicates for s, p, o in self.graph.triples((None, RDFS.subClassOf, None)): subject_uri = s.__str__() # get the URI of the subject as a string object_uri = o.__str__() # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if(subject_uri != object_uri): data.append(act_t) elif(predicate_uri == 'rdfs:subPropertyOf'): # for subPropertyOf predicates for s, p, o in self.graph.triples((None, RDFS.subPropertyOf, None)): subject_uri = s.__str__() # get the URI of the subject as a string object_uri = o.__str__() # get the URI of the object as a string act_t = {'s': subject_uri, 'o': object_uri} if(subject_uri != object_uri): data.append(act_t) if(len(data) > 0): for act_t in data: newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(act_t['s']) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = predicate_uri newr.object_uri = act_t['o'] newr.save() return data
def validate_make_eol_hierarchy(self, child_uri, parent_uri): """ Validated hierarchy relations for EOL entities. If a child already has a parent, this will not do anything otherwise it will create a hierachy relation """ ok_create = False le_gen = LinkEntityGeneration() child_uri = le_gen.make_clean_uri(child_uri) # strip off any cruft in the URI parent_uri = le_gen.make_clean_uri(parent_uri) ent = Entity() found = ent.dereference(child_uri) if found: lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(child_uri, False) if parents is False: # no parents, so OK to make an assertion ok_create = True else: if len(parents) == 0: # no parents, so OK to make an assertion ok_create = True else: ok_create = True # the child does not yet exist, so OK to make the relation print('Getting missing data for: ' + child_uri) self.get_save_entity_label(child_uri) if ok_create: print('OK, make rel for: ' + child_uri + ' in ' + parent_uri) la = LinkAnnotation() la.subject = child_uri la.subject_type = 'uri' la.project_uuid = '0' la.source_id = 'manual-eol-manage' la.predicate_uri = self.CHILD_PARENT_REL la.object_uri = parent_uri la.sort = 1 la.save() else: print('Already in hierarchy: ' + child_uri)
def link_table_to_projects(self, table_id): """ links a table to a project """ ex_id = ExpTableIdentifiers() ex_id.make_all_identifiers(table_id) proj_uuid_counts = self.get_table_project_uuid_counts(ex_id.table_id) for proj_uuid_count in proj_uuid_counts: project_uuid = proj_uuid_count['project_uuid'] la_recs = LinkAnnotation.objects\ .filter(subject=project_uuid, object_uri=ex_id.uri)[:1] if len(la_recs) < 1: # we don't have a relationship between this project and this # table yet, so OK to create it. la = LinkAnnotation() la.subject = project_uuid la.subject_type = 'projects' la.project_uuid = project_uuid la.source_id = 'exp-tables-management' la.predicate_uri = self.project_to_table_predicate la.object_uri = ex_id.uri la.creator_uuid = '' la.save() print('Linked project: ' + project_uuid + ' to ' + ex_id.uri)
def save_entity_comments(self): """ saves comments about an entity """ if self.graph is not False and self.vocabulary_uri is not False: lequiv = LinkEquivalence() # get all the varients of RDFS:comments comment_uris = lequiv.get_identifier_list_variants('rdfs:comment') # now get all the entities from this vocabulary (that may be the subject of a comment) raw_subject_uris = LinkEntity.objects.filter(vocab_uri=self.vocabulary_uri) lequiv = LinkEquivalence() subject_uris = lequiv.get_identifier_list_variants(raw_subject_uris) if self.replace_old: # delete the old comments LinkAnnotation.objects\ .filter(subject__in=subject_uris, predicate_uri__in=comment_uris)\ .delete() for s, p, o in self.graph.triples((None, RDFS.comment, None)): subject_uri = s.__str__() # get the URI of the subject as a string comment = o.__str__() # get the comment from the object as a string # update the entity's comment link_ent = False try: link_ent = LinkEntity.objects.get(uri=subject_uri) except LinkEntity.DoesNotExist: link_ent = False if link_ent is not False: lang = Languages() newr = LinkAnnotation() # make the subject a prefixed URI if common newr.subject = URImanagement.prefix_common_uri(subject_uri) newr.subject_type = 'uri' newr.project_uuid = '0' newr.source_id = self.vocabulary_uri newr.predicate_uri = 'rdfs:comment' newr.obj_extra = {} newr.obj_extra[lang.DEFAULT_LANGUAGE] = comment newr.save()
def make_von_den_driesch_equiv(self, project_uuid, equiv_pred='skos:closeMatch'): """ makes a skos:closeMatch equivalence relation between entities in the zooarch measurement ontology and predicates in a project """ preds = Predicate.objects\ .filter(project_uuid=project_uuid, data_type='xsd:double') for pred in preds: man_obj = False try: # try to find the manifest item man_obj = Manifest.objects.get(uuid=pred.uuid) except Manifest.DoesNotExist: man_obj = False if man_obj is not False: l_ents = LinkEntity.objects\ .filter(label=man_obj.label, vocab_uri='http://opencontext.org/vocabularies/open-context-zooarch/')[:1] if len(l_ents) > 0: # a Match! Now let's make a close match assertion uri = l_ents[0].uri print(str(man_obj.label) + ' matches ' + uri) la = LinkAnnotation() la.subject = man_obj.uuid # the subordinate is the subject la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'label-match' la.predicate_uri = equiv_pred la.object_uri = uri la.save() # save also that the unit of measurement is in MM la = LinkAnnotation() la.subject = man_obj.uuid # the subordinate is the subject la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'label-match' la.predicate_uri = 'http://www.w3.org/2000/01/rdf-schema#range' la.object_uri = 'http://www.wikidata.org/wiki/Q174789' la.save()
def make_naa_annotations(self, project_uuid, naa_annotated_proj_uuid): """ makes annotations to describe NAA (Neutron Activation Analysis) attributes by copying annoations from another project with NAA attributes. """ old_pred_uuids = [] old_preds = Predicate.objects\ .filter(project_uuid=naa_annotated_proj_uuid, data_type='xsd:double') for old_pred in old_preds: old_pred_uuids.append(old_pred.uuid) old_pred_mans = Manifest.objects\ .filter(uuid__in=old_pred_uuids, project_uuid=naa_annotated_proj_uuid)\ .order_by('label') for old_pred_man in old_pred_mans: new_man_pred = None if len(old_pred_man.label) < 4: # this has a short label, so more likely about a chemical # element new_man_preds = Manifest.objects\ .filter(item_type='predicates', project_uuid=project_uuid, label=old_pred_man.label)[:1] if len(new_man_preds) > 0: # the new project has a predicate with a matching label new_man_pred = new_man_preds[0] if new_man_pred is not None: # we have a match between a predicate label in the old NAA project # and the new project print('-----------------------------') print('Copy annotations from: ' + old_pred_man.label + ' (' + old_pred_man.uuid + ')') print('To: ' + new_man_pred.uuid) print('-----------------------------') old_link_annos = LinkAnnotation.objects\ .filter(subject=old_pred_man.uuid) for old_link_anno in old_link_annos: new_link_anno = old_link_anno new_link_anno.hash_id = None new_link_anno.subject = new_man_pred.uuid new_link_anno.subject_type = new_man_pred.item_type new_link_anno.project_uuid = new_man_pred.project_uuid new_link_anno.source_id = 'naa-link-annotations-method' try: new_link_anno.save() except: pass preds = Predicate.objects\ .filter(project_uuid=project_uuid, data_type='xsd:double') for pred in preds: man_obj = False try: # try to find the manifest item man_obj = Manifest.objects.get(uuid=pred.uuid) except Manifest.DoesNotExist: man_obj = False if man_obj is not False: l_ents = LinkEntity.objects\ .filter(label=man_obj.label, vocab_uri='http://opencontext.org/vocabularies/open-context-zooarch/')[:1] if len(l_ents) > 0: # a Match! Now let's make a close match assertion uri = l_ents[0].uri print(str(man_obj.label) + ' matches ' + uri) la = LinkAnnotation() la.subject = man_obj.uuid # the subordinate is the subject la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'label-match' la.predicate_uri = equiv_pred la.object_uri = uri la.save() # save also that the unit of measurement is in MM la = LinkAnnotation() la.subject = man_obj.uuid # the subordinate is the subject la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'label-match' la.predicate_uri = 'http://www.w3.org/2000/01/rdf-schema#range' la.object_uri = 'http://www.wikidata.org/wiki/Q174789' la.save()
def create_concept_parents(self, new_hierachic_list): """ Creates new types for superior (more general) types from a list of types that have hiearchies implicit in their labels once the superior types are created, linked data annotations noting hierarchy are stored """ parent_children_pairs = [] for manifest in new_hierachic_list: try: oc_type = OCtype.objects.get(uuid=manifest.uuid) except OCtype.DoesNotExist: oc_type = False if (oc_type is not False): child_parts = manifest.label.split(self.HIERARCHY_DELIM) act_delim = '' act_new_label = '' current_parent = False for label_part in child_parts: act_new_label = act_new_label + act_delim + label_part act_delim = self.HIERARCHY_DELIM type_manage = TypeManagement() type_manage.project_uuid = oc_type.project_uuid type_manage.source_id = self.source_id ptype = type_manage.get_make_type_within_pred_uuid( oc_type.predicate_uuid, act_new_label) if (current_parent is not False): parent_child = { 'parent': current_parent, 'child': ptype.uuid } parent_children_pairs.append(parent_child) current_parent = ptype.uuid if (len(parent_children_pairs) > 0): # now make some linked data annotations for parent_child in parent_children_pairs: if (parent_child['parent'] is not False): new_la = LinkAnnotation() new_la.subject = parent_child['child'] new_la.subject_type = 'types' new_la.project_uuid = oc_type.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = self.p_for_superobjs new_la.object_uri = URImanagement.make_oc_uri( parent_child['parent'], 'types') new_la.creator_uuid = '' new_la.save() return parent_children_pairs
def add_period_annoation(self, p_ref): """ adds a period annotation """ entity = Entity() found = entity.dereference(p_ref['oc-uri']) if found: new_la = LinkAnnotation() new_la.subject = entity.uuid new_la.subject_type = entity.item_type new_la.project_uuid = entity.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = 'dc-terms:isReferencedBy' new_la.object_uri = p_ref['period-meta']['uri'] new_la.creator_uuid = '' new_la.save() return found
def make_type_relations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_type_pred_uuid, obj_type_f_num): """ Makes semantic relationships between different types in an import """ rels = {} sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) for sub_type_obj in sub_type_list: sub_type_text = sub_type_obj.record row = sub_type_obj.row_num if len(sub_type_text) > 0: tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid( sub_type_pred_uuid, sub_type_text) obj_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=obj_type_f_num, row_num=row)[:1] if len(obj_type_list) > 0: obj_type_text = obj_type_list[0].record if len(obj_type_text) > 0 \ and sub_type_text != obj_type_text: tmo = TypeManagement() tmo.project_uuid = self.project_uuid tmo.source_id = self.source_id obj_type = tmo.get_make_type_within_pred_uuid( obj_type_pred_uuid, obj_type_text) # make a uri for this, since we're making a link assertion obj_uri = URImanagement.make_oc_uri( obj_type.uuid, 'types') # the following bit is so we don't make the # same link assertions over and over. rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid) if rel_id not in rels: rels[rel_id] = { 'subject': sub_type.uuid, 'object_uri': obj_uri } # now make the link data annotation relating these types. for rel_id, rel in rels.items(): new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save()
def link_sites_from_filecache(self): """ updates Open Context to save new sites and annotations from the file cache """ if self.filecache is not None: # print('Cache update !: ' + self.cache_filekey) self.filecache.working_dir = self.working_dir json_obj = self.filecache.get_dict_from_file(self.cache_filekey) if isinstance(json_obj, dict): if 'trinomial_refs' in json_obj: for tri_ref in json_obj['trinomial_refs']: uri = tri_ref['rec_uri'] title = tri_ref['title'] if len(title) > 194: title = title[0:190] + '... ' l_exists = LinkEntity.objects.filter(uri=uri)[:1] if len(l_exists) < 1: l_ent = LinkEntity() l_ent.uri = uri l_ent.label = title l_ent.alt_label = title l_ent.vocab_uri = tri_ref['source_uri'] l_ent.ent_type = 'class' l_ent.save() for trinomial in tri_ref['trinomials']: man_objs = Manifest.objects.filter(label=trinomial, class_uri='oc-gen:cat-site') if len(man_objs) > 0: man_obj = man_objs[0] la = LinkAnnotation() la.subject = uri # the subordinate is the subject la.subject_type = 'uri' la.project_uuid = man_obj.project_uuid la.source_id = self.source_id la.predicate_uri = "skos:broader" la.object_uri = tri_ref['source_uri'] la.save() try: la.save() except: pass links = LinkAnnotation.objects\ .filter(subject=man_obj.uuid, object_uri=uri)[:1] if len(links) < 1: print('Link ' + man_obj.label + ' (' + man_obj.uuid + ') to ' + uri) la = LinkAnnotation() la.subject = man_obj.uuid # the subordinate is the subject la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = self.source_id la.predicate_uri = 'dc-terms:isReferencedBy' la.object_uri = uri la.save() try: la.save() except: pass
def store_records(self, act_table, recs): """ stores records retrieved for a given table """ for rkey, record in recs.items(): if (act_table == 'link_annotations'): newr = LinkAnnotation(**record) newr.save() elif (act_table == 'link_entities'): newr = LinkEntity(**record) newr.save() elif (act_table == 'link_hierarchies'): newr = LinkHierarchy(**record) newr.save() elif (act_table == 'oc_chronology'): newr = Chronology(**record) newr.save() elif (act_table == 'oc_geodata'): newr = Geodata(**record) newr.save() elif (act_table == 'oc_mediafiles'): newr = Mediafile(**record) newr.save() elif (act_table == 'oc_documents'): newr = OCdocument(**record) newr.save() elif (act_table == 'oc_persons'): newr = Person(**record) newr.save() elif (act_table == 'oc_projects'): newr = Project(**record) newr.save() elif (act_table == 'oc_strings'): newr = OCstring(**record) newr.save() elif (act_table == 'oc_types'): newr = OCtype(**record) newr.save() elif (act_table == 'oc_events'): newr = Event(**record) newr.save() elif (act_table == 'oc_predicates'): newr = Predicate(**record) newr.save() elif (act_table == 'oc_identifiers'): newr = StableIdentifer(**record) newr.save() elif (act_table == 'oc_obsmetadata'): newr = ObsMetadata(**record) newr.save()
def add_table_file_download(self, table_id, file_uri): """ adds a file_uri for a pre-cached table download """ ex_tabs = ExpTable.objects.filter(table_id=table_id)[:1] for ex_tab in ex_tabs: if ExpTable.PREDICATE_DUMP in ex_tab.meta_json: dump_list = ex_tab.meta_json[ExpTable.PREDICATE_DUMP] else: # no predicate for a data dump, so look for it dump_list = [] mm = ManageMediafiles() ok = mm.get_head_info(file_uri) if ok: dump_item = LastUpdatedOrderedDict() dump_item['id'] = file_uri dump_item['dc-terms:hasFormat'] = mm.mime_type_uri dump_item['dcat:size'] = float(mm.filesize) print('Found: ' + str(dump_item)) dump_list.append(dump_item) ex_tab.meta_json[ExpTable.PREDICATE_DUMP] = dump_list ex_tab.save() man_items = Manifest.objects.filter(uuid=table_id)[:1] if len(man_items) > 0: man_obj = man_items[0] new_anno = LinkAnnotation() new_anno.subject = man_obj.uuid new_anno.subject_type = man_obj.item_type new_anno.project_uuid = man_obj.project_uuid new_anno.source_id = 'download-file-relate' new_anno.predicate_uri = ExpTable.PREDICATE_DUMP new_anno.object_uri = file_uri new_anno.sort = len(dump_list) new_anno.obj_extra = dump_item new_anno.save()
def get_add_gbif_parent(child_uri, child_le=None): """Checks to add a parent relation to a GBIF entity Returns a tuple: parent_link_enity, Is_new_relationship """ if not child_le: child_le = add_get_gbif_link_entity(child_uri) la_exist = LinkAnnotation.objects.filter( subject=child_le.uri, predicate_uri=SKOS_BROADER, ).first() if la_exist: # A linking relation to a parent already # exists, so skip out. parent_le = add_get_gbif_link_entity(la_exist.object_uri) return parent_le, False api = gbifAPI() child_id = get_gbif_species_id_from_uri(child_le.uri) parent_id = api.get_gbif_parent_key(child_id) if not parent_id: # We're at the top of the hierarchy. return None, False parent_le = add_get_gbif_link_entity((GBIF_BASE_URI + str(parent_id))) print('Make {} ({}) a child of: {} ({})'.format( child_le.uri, child_le.label, parent_le.uri, parent_le.label, )) la = LinkAnnotation() la.subject = child_le.uri la.subject_type = 'uri' la.project_uuid = '0' la.source_id = HIERARCHY_SOURCE la.predicate_uri = SKOS_BROADER la.object_uri = parent_le.uri la.creator_uuid = '' la.save() return parent_le, True
def generate_table_metadata(self, table_id, overwrite=False): """ makes metadata for a specific table """ ex_id = ExpTableIdentifiers() ex_id.make_all_identifiers(table_id) table_ids = [ex_id.table_id, ex_id.public_table_id] try: ex_tab = ExpTable.objects.get(table_id=table_id) except ExpTable.DoesNotExist: print('No ExpTable object for: ' + ex_id.public_table_id) ex_tab = None try: man_obj = Manifest.objects.get(uuid=ex_id.public_table_id) except Manifest.DoesNotExist: print('No manifest object for: ' + ex_id.public_table_id) man_obj = None if ex_tab is not None and man_obj is not None: proj_uuid_counts = None for meta_pred in self.metadata_predicates: if overwrite: num_old_delete = LinkAnnotation.objects\ .filter(subject__in=table_ids, predicate_uri=meta_pred)\ .delete() print('Deleted annoations ' + str(num_old_delete) + ' for ' + meta_pred) add_meta_for_pred = True else: num_exists = LinkAnnotation.objects\ .filter(subject__in=table_ids, predicate_uri=meta_pred)[:1] if len(num_exists) < 1: add_meta_for_pred = True else: add_meta_for_pred = False if add_meta_for_pred: if meta_pred == 'dc-terms:contributor': print('Getting contributors for ' + table_id) sorted_author_list = self.get_table_author_counts(table_id) contrib_sort = 0 for s_author in sorted_author_list: contrib_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = s_author['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = URImanagement.make_oc_uri(s_author['uuid'], 'persons') la.creator_uuid = '0' la.sort = contrib_sort la.obj_extra = obj_extra la.save() if meta_pred in ['dc-terms:creator', 'dc-terms:source']: # need to get projects for this if proj_uuid_counts is None: # only get this if not gotten yet print('Getting projects for ' + table_id) proj_uuid_counts = self.get_table_project_uuid_counts(table_id) if meta_pred == 'dc-terms:creator': print('Getting creators for ' + table_id) dc_creator_list = self.make_table_dc_creator_list(proj_uuid_counts) create_sort = 0 for dc_creator in dc_creator_list: create_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = dc_creator['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = dc_creator['id'] la.creator_uuid = '0' la.sort = create_sort la.obj_extra = obj_extra la.save() if meta_pred == 'dc-terms:source': print('Getting sources for ' + table_id) proj_sort = 0 for proj_uuid_count in proj_uuid_counts: proj_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = proj_uuid_count['num_uuids'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = URImanagement.make_oc_uri(proj_uuid_count['project_uuid'], 'projects') la.creator_uuid = '0' la.sort = proj_sort la.obj_extra = obj_extra la.save() if meta_pred == 'dc-terms:subject': print('Getting subjects for ' + table_id) dc_subject_list = self.make_table_dc_subject_category_list(table_id) subj_sort = 0 for dc_subject in dc_subject_list: subj_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = dc_subject['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = dc_subject['id'] la.creator_uuid = '0' la.sort = subj_sort la.obj_extra = obj_extra la.save()
def match_trinomial_obj(self, tri): """ Attempts to match a trinomial object 'tri' against tDAR, if it hasn't yet been matched """ found_matches = 0 manifest = False try: manifest = Manifest.objects.get(uuid=tri.uuid) except Manifest.DoesNotExist: manifest = False la_check = LinkAnnotation.objects\ .filter(subject=tri.uuid, predicate_uri='dc-terms:subject', object_uri__contains=self.TDAR_VOCAB)[:1] if len(la_check) < 1 and manifest is not False: # we don't already have a tDAR id for this item, continue with matches tri_man = TrinomialManage() request_keywords = [tri.trinomial] if self.lead_zero_check: # check multiple leading zeros tri_parts = tri_man.parse_trinomial(tri.trinomial) site = tri_parts['site'] site_part_len = len(site) while len(site) < 4: site = '0' + site new_trinomial = tri_parts['state'] + tri_parts['county'] + site request_keywords.append(new_trinomial) for keyword in request_keywords: tdar_api = tdarAPI() results = tdar_api.get_site_keyword(keyword) if isinstance(results, list): for result in results[:self.max_results]: # assume it is a spurious match match_real = False if result['label'] == tri.trinomial: # the trinomial and the tDAR result exactly match match_real = True else: # check if the only difference is in leading zeros tri_parts = tri_man.parse_trinomial(tri.trinomial) site = tri_parts['site'] site_part_len = len(site) while len(site) < 5: site = '0' + site new_trinomial = tri_parts['state'] + tri_parts['county'] + site if new_trinomial == result['label']: # A good match, the tDAR result and the trinomial # match (but with different leading zeros) match_real = True if match_real: found_matches += 1 # OK! Found a match, first save the linked entity in the link entity table le_check = False try: le_check = LinkEntity.objects.get(uri=result['id']) except LinkEntity.DoesNotExist: le_check = False if le_check is False: le = LinkEntity() le.uri = result['id'] le.label = result['label'] le.alt_label = result['label'] le.vocab_uri = self.TDAR_VOCAB le.ent_type = 'type' le.save() # Now save the link annotation la = LinkAnnotation() la.subject = tri.uuid la.subject_type = manifest.item_type la.project_uuid = manifest.project_uuid la.source_id = 'tdar-api-lookup' la.predicate_uri = self.DC_TERMS_SUBJECT la.object_uri = result['id'] la.save() else: print('Almost! ' + result['label'] + ' is not exactly: ' + tri.trinomial) if tdar_api.request_error: self.request_error = True print('HTTP request to tDAR failed!') self.error_wait += self.base_wait if self.error_wait > self.max_wait: print('Too many failures, quiting...') sys.exit('Quitting process') else: # sleep some minutes before trying again print('Will try again in ' + str(self.error_wait) + ' seconds...') sleep(self.error_wait) else: self.request_error = False if self.error_wait >= self.base_wait: print('HTTP requests resumed OK, will continue.') self.error_wait = 0 return found_matches
def match_california_site(self, site_uuid): """ Attempts to match California site name with a tDAR site key word """ found_matches = 0 oc_item = OCitem() exists = oc_item.check_exists(site_uuid) if exists: la_check = LinkAnnotation.objects\ .filter(subject=site_uuid, predicate_uri='dc-terms:subject', object_uri__contains=self.TDAR_VOCAB)[:1] if exists and len(la_check) < 1: # we don't already have a tDAR id for this item, continue with matches # first, generate the item's JSON-LD oc_item.generate_json_ld() request_keywords = [] if 'oc-gen:has-obs' in oc_item.json_ld: if isinstance(oc_item.json_ld['oc-gen:has-obs'], list): for obs in oc_item.json_ld['oc-gen:has-obs']: if 'oc-pred:52-alternate-site-or-place-name' in obs: if isinstance(obs['oc-pred:52-alternate-site-or-place-name'], list): for name_obj in obs['oc-pred:52-alternate-site-or-place-name']: if 'xsd:string' in name_obj: if isinstance(name_obj['xsd:string'], str): name_str = name_obj['xsd:string'] request_keywords.append(name_str) print('Checking names in tDAR: ' + '; '.join(request_keywords)) for keyword in request_keywords: tdar_api = tdarAPI() results = tdar_api.get_site_keyword(keyword) if isinstance(results, list): for result in results[:self.max_results]: # assume it is a spurious match match_real = False lw_result = result['label'].lower() lw_keyword = keyword.lower() if lw_result == lw_keyword: # the trinomial and the tDAR result exactly match match_real = True if match_real: print('FOUND ' + result['label']) found_matches += 1 # OK! Found a match, first save the linked entity in the link entity table le_check = False try: le_check = LinkEntity.objects.get(uri=result['id']) except LinkEntity.DoesNotExist: le_check = False if le_check is False: le = LinkEntity() le.uri = result['id'] le.label = result['label'] le.alt_label = result['label'] le.vocab_uri = self.TDAR_VOCAB le.ent_type = 'type' le.save() # Now save the link annotation la = LinkAnnotation() la.subject = oc_item.manifest.uuid la.subject_type = oc_item.manifest.item_type la.project_uuid = oc_item.manifest.project_uuid la.source_id = 'tdar-api-lookup' la.predicate_uri = self.DC_TERMS_SUBJECT la.object_uri = result['id'] la.save() else: print('Almost! ' + result['label'] + ' is not exactly: ' + keyword) if tdar_api.request_error: self.request_error = True print('HTTP request to tDAR failed!') self.error_wait += self.base_wait if self.error_wait > self.max_wait: print('Too many failures, quiting...') sys.exit('Quitting process') else: # sleep some minutes before trying again print('Will try again in ' + str(self.error_wait) + ' seconds...') sleep(self.error_wait) else: self.request_error = False if self.error_wait >= self.base_wait: print('HTTP requests resumed OK, will continue.') self.error_wait = 0 return found_matches
def make_type_ld_annotations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_le_f_num): """ Makes linked data annotations for a type in an import """ rels = [] sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) if len(sub_type_list) > 0: distinct_records = {} for cell in sub_type_list: if cell.rec_hash not in distinct_records: distinct_records[cell.rec_hash] = {} distinct_records[cell.rec_hash]['rows'] = [] distinct_records[cell.rec_hash]['imp_cell_obj'] = cell distinct_records[cell.rec_hash]['rows'].append(cell.row_num) for rec_hash_key, distinct_type in distinct_records.items(): # iterate through the distinct types and get associated linked data type_label = distinct_type['imp_cell_obj'].record rows = distinct_type['rows'] if len(type_label) > 0: # the type isn't blank, so we can use it pc = ProcessCells(self.source_id, 0) ld_entities = pc.get_field_records(obj_le_f_num, rows) for ld_hash_key, distinct_ld in ld_entities.items(): obj_uri = distinct_ld['imp_cell_obj'].record if len(obj_uri) > 8: if obj_uri[:7] == 'http://'\ or obj_uri[:8] == 'https://': # we have a valid linked data entity # # now get the UUID for the type tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid( sub_type_pred_uuid, type_label) rel = { 'subject_label': type_label, 'subject': sub_type.uuid, 'object_uri': obj_uri } rels.append(rel) if len(rels) > 0: for rel in rels: new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save()
def add_item_annotation(self, post_data): """ Adds a linked data annotation to an item """ note = '' ok_predicates = ['dc-terms:creator', 'dc-terms:contributor', 'dc-terms:subject', 'dc-terms:coverage', 'dc-terms:temporal', 'dc-terms:references', 'dc-terms:isReferencedBy', 'dc-terms:license', 'skos:closeMatch', 'skos:exactMatch', 'owl:sameAs', 'skos:broader', 'skos:related', 'skos:example', 'rdfs:isDefinedBy', 'http://www.w3.org/2000/01/rdf-schema#range'] ok = True predicate_uri = self.request_param_val(post_data, 'predicate_uri') object_uri = self.request_param_val(post_data, 'object_uri') if predicate_uri is not False \ and object_uri is not False: p_entity = Entity() found_p = p_entity.dereference(predicate_uri) if found_p is False \ and predicate_uri in ok_predicates: found_p = True o_entity = Entity() found_o = o_entity.dereference(object_uri) if found_p and found_o: lequiv = LinkEquivalence() pred_list = lequiv.get_identifier_list_variants(predicate_uri) obj_list = lequiv.get_identifier_list_variants(object_uri) la_exist = LinkAnnotation.objects\ .filter(subject=self.uuid, predicate_uri__in=pred_list, object_uri__in=obj_list)[:1] if len(la_exist) < 1: # we don't have an annotation like this yet object_uri = o_entity.uri new_la = LinkAnnotation() new_la.subject = self.manifest.uuid new_la.subject_type = self.manifest.item_type new_la.project_uuid = self.manifest.project_uuid new_la.source_id = self.request_param_val(post_data, 'source_id', 'manual-web-form', False) new_la.sort = self.request_param_val(post_data, 'sort', 0, False) new_la.predicate_uri = predicate_uri new_la.object_uri = object_uri new_la.creator_uuid = self.creator_uuid new_la.save() # now clear the cache a change was made self.clear_caches() else: ok = False note = 'This annotation already exists.' else: ok = False note = 'Missing a predicate or object entity' else: note = self.errors['params'] ok = False self.response = {'action': 'add-item-annotation', 'ok': ok, 'change': {'note': note}} return self.response
def find_related_geonames(self, username='******'): """ Adds geonames spatial data for items with geonames annotations """ man_objs = Manifest.objects\ .filter(project_uuid='0', class_uri='oc-gen:cat-region', item_type='subjects') for man_obj in man_objs: print('Checking slug: ' + man_obj.slug) subj_obj = Subject.objects.get(uuid=man_obj.uuid) context = subj_obj.context if '/' in context: cont_ex = context.split('/') admin_level = len(cont_ex) - 1 if admin_level < 0: admin_level = 0 else: admin_level = 0 q_str = context.replace('/', ' ') geo_api = GeonamesAPI() json_r = geo_api.search_admin_entity(q_str, admin_level, username) if isinstance(json_r, dict): # we found a result from GeoNames! print('Geonames result found.') if 'geonames' in json_r: if len(json_r['geonames']) > 0: # we've got a result geo_id = json_r['geonames'][0]['geonameId'] label = json_r['geonames'][0]['name'] alt_label = json_r['geonames'][0]['toponymName'] geonames_uri = 'http://www.geonames.org/' + str(geo_id) l_ents = LinkEntity.objects\ .filter(uri=geonames_uri)[:1] if len(l_ents) < 1: # we need to create this entity ent = LinkEntity() ent.uri = geonames_uri ent.label = label ent.alt_label = alt_label ent.vocab_uri = GeonamesAPI().VOCAB_URI ent.ent_type = 'class' ent.save() print(geonames_uri) annos = LinkAnnotation.objects\ .filter(subject=man_obj.uuid, object_uri=geonames_uri)[:1] if len(annos) < 1: # we need to add the annotation linking this item print('Adding new annotation!') new_la = LinkAnnotation() new_la.subject = man_obj.uuid new_la.subject_type = man_obj.item_type new_la.project_uuid = man_obj.project_uuid new_la.source_id = man_obj.source_id new_la.predicate_uri = 'skos:closeMatch' new_la.object_uri = geonames_uri new_la.creator_uuid = '' new_la.save() else: print('Relation already known.')
def generate_table_metadata(self, table_id, overwrite=False): """ makes metadata for a specific table """ ex_id = ExpTableIdentifiers() ex_id.make_all_identifiers(table_id) table_ids = [ex_id.table_id, ex_id.public_table_id] try: ex_tab = ExpTable.objects.get(table_id=table_id) except ExpTable.DoesNotExist: print('No ExpTable object for: ' + ex_id.public_table_id) ex_tab = None try: man_obj = Manifest.objects.get(uuid=ex_id.public_table_id) except Manifest.DoesNotExist: print('No manifest object for: ' + ex_id.public_table_id) man_obj = None if ex_tab is not None and man_obj is not None: proj_uuid_counts = None for meta_pred in self.metadata_predicates: if overwrite: num_old_delete = LinkAnnotation.objects\ .filter(subject__in=table_ids, predicate_uri=meta_pred)\ .delete() print('Deleted annoations ' + str(num_old_delete) + ' for ' + meta_pred) add_meta_for_pred = True else: num_exists = LinkAnnotation.objects\ .filter(subject__in=table_ids, predicate_uri=meta_pred)[:1] if len(num_exists) < 1: add_meta_for_pred = True else: add_meta_for_pred = False if add_meta_for_pred: if meta_pred == 'dc-terms:contributor': print('Getting contributors for ' + table_id) sorted_author_list = self.get_table_author_counts( table_id) contrib_sort = 0 for s_author in sorted_author_list: contrib_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = s_author['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = URImanagement.make_oc_uri( s_author['uuid'], 'persons') la.creator_uuid = '0' la.sort = contrib_sort la.obj_extra = obj_extra la.save() if meta_pred in ['dc-terms:creator', 'dc-terms:source']: # need to get projects for this if proj_uuid_counts is None: # only get this if not gotten yet print('Getting projects for ' + table_id) proj_uuid_counts = self.get_table_project_uuid_counts( table_id) if meta_pred == 'dc-terms:creator': print('Getting creators for ' + table_id) dc_creator_list = self.make_table_dc_creator_list( proj_uuid_counts) create_sort = 0 for dc_creator in dc_creator_list: create_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = dc_creator['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = dc_creator['id'] la.creator_uuid = '0' la.sort = create_sort la.obj_extra = obj_extra la.save() if meta_pred == 'dc-terms:source': print('Getting sources for ' + table_id) proj_sort = 0 for proj_uuid_count in proj_uuid_counts: proj_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = proj_uuid_count[ 'num_uuids'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = URImanagement.make_oc_uri( proj_uuid_count['project_uuid'], 'projects') la.creator_uuid = '0' la.sort = proj_sort la.obj_extra = obj_extra la.save() if meta_pred == 'dc-terms:subject': print('Getting subjects for ' + table_id) dc_subject_list = self.make_table_dc_subject_category_list( table_id) subj_sort = 0 for dc_subject in dc_subject_list: subj_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = dc_subject['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = dc_subject['id'] la.creator_uuid = '0' la.sort = subj_sort la.obj_extra = obj_extra la.save()
def validate_make_eol_hierarchy(self, child_uri, parent_uri): """ Validated hierarchy relations for EOL entities. If a child already has a parent, this will not do anything otherwise it will create a hierachy relation """ ok_create = False le_gen = LinkEntityGeneration() child_uri = le_gen.make_clean_uri( child_uri) # strip off any cruft in the URI parent_uri = le_gen.make_clean_uri(parent_uri) ent = Entity() found = ent.dereference(child_uri) if found: lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(child_uri, False) if parents is False: # no parents, so OK to make an assertion ok_create = True else: if len(parents) == 0: # no parents, so OK to make an assertion ok_create = True else: ok_create = True # the child does not yet exist, so OK to make the relation print('Getting missing data for: ' + child_uri) self.get_save_entity_label(child_uri) if ok_create: print('OK, make rel for: ' + child_uri + ' in ' + parent_uri) la = LinkAnnotation() la.subject = child_uri la.subject_type = 'uri' la.project_uuid = '0' la.source_id = 'manual-eol-manage' la.predicate_uri = self.CHILD_PARENT_REL la.object_uri = parent_uri la.sort = 1 la.save() else: print('Already in hierarchy: ' + child_uri)
def store_records(self, act_table, recs): """ stores records retrieved for a given table """ for rkey, record in recs.items(): if act_table == "link_annotations": newr = LinkAnnotation(**record) newr.save() elif act_table == "link_entities": newr = LinkEntity(**record) newr.save() elif act_table == "link_hierarchies": newr = LinkHierarchy(**record) newr.save() elif act_table == "oc_chronology": newr = Chronology(**record) newr.save() elif act_table == "oc_geodata": newr = Geodata(**record) newr.save() elif act_table == "oc_mediafiles": newr = Mediafile(**record) newr.save() elif act_table == "oc_documents": newr = OCdocument(**record) newr.save() elif act_table == "oc_persons": newr = Person(**record) newr.save() elif act_table == "oc_projects": newr = Project(**record) newr.save() elif act_table == "oc_strings": newr = OCstring(**record) newr.save() elif act_table == "oc_types": newr = OCtype(**record) newr.save() elif act_table == "oc_events": newr = Event(**record) newr.save() elif act_table == "oc_predicates": newr = Predicate(**record) newr.save() elif act_table == "oc_identifiers": newr = StableIdentifer(**record) newr.save() elif act_table == "oc_obsmetadata": newr = ObsMetadata(**record) newr.save()
def add_period_coverage(self, uuid, period_uri): """ Adds an periodo uri annotation to an item """ ok = False po_api = PeriodoAPI() if not isinstance(self.periodo_data, dict): self.check_add_period_pred() po_api.get_periodo_data() self.periodo_data = po_api.periodo_data else: po_api.periodo_data = self.periodo_data if isinstance(po_api.periodo_data, dict): period = po_api.get_period_by_uri(period_uri) if isinstance(period, dict): # we found the period, now check the UUID # is found entity = Entity() found = entity.dereference(uuid) if found: # save the period collection entity to database, if needed self.check_add_period_collection(period) # save the period entity to the database, if needed self.check_add_period(period) # check to make sure the annotation does not yet exist # do so by checking all possible varients in expressing # this annotation lequiv = LinkEquivalence() subjects = lequiv.get_identifier_list_variants(uuid) predicates = lequiv.get_identifier_list_variants( self.DC_PERIOD_PRED) objects = lequiv.get_identifier_list_variants( period['period-meta']['uri']) la_exists = LinkAnnotation.objects\ .filter(subject__in=subjects, predicate_uri__in=predicates, object_uri__in=objects)[:1] if len(la_exists) < 1: # OK save to make the annotation new_la = LinkAnnotation() new_la.subject = entity.uuid new_la.subject_type = entity.item_type new_la.project_uuid = entity.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = self.DC_PERIOD_PRED new_la.object_uri = period['period-meta']['uri'] new_la.creator_uuid = '' new_la.save() ok = True return ok
def match_california_site(self, site_uuid): """ Attempts to match California site name with a tDAR site key word """ found_matches = 0 oc_item = OCitem() exists = oc_item.check_exists(site_uuid) if exists: la_check = LinkAnnotation.objects\ .filter(subject=site_uuid, predicate_uri='dc-terms:subject', object_uri__contains=self.TDAR_VOCAB)[:1] if exists and len(la_check) < 1: # we don't already have a tDAR id for this item, continue with matches # first, generate the item's JSON-LD oc_item.generate_json_ld() request_keywords = [] if 'oc-gen:has-obs' in oc_item.json_ld: if isinstance(oc_item.json_ld['oc-gen:has-obs'], list): for obs in oc_item.json_ld['oc-gen:has-obs']: if 'oc-pred:52-alternate-site-or-place-name' in obs: if isinstance( obs['oc-pred:52-alternate-site-or-place-name'], list): for name_obj in obs[ 'oc-pred:52-alternate-site-or-place-name']: if 'xsd:string' in name_obj: if isinstance(name_obj['xsd:string'], str): name_str = name_obj['xsd:string'] request_keywords.append(name_str) print('Checking names in tDAR: ' + '; '.join(request_keywords)) for keyword in request_keywords: tdar_api = tdarAPI() results = tdar_api.get_site_keyword(keyword) if isinstance(results, list): for result in results[:self.max_results]: # assume it is a spurious match match_real = False lw_result = result['label'].lower() lw_keyword = keyword.lower() if lw_result == lw_keyword: # the trinomial and the tDAR result exactly match match_real = True if match_real: print('FOUND ' + result['label']) found_matches += 1 # OK! Found a match, first save the linked entity in the link entity table le_check = False try: le_check = LinkEntity.objects.get( uri=result['id']) except LinkEntity.DoesNotExist: le_check = False if le_check is False: le = LinkEntity() le.uri = result['id'] le.label = result['label'] le.alt_label = result['label'] le.vocab_uri = self.TDAR_VOCAB le.ent_type = 'type' le.save() # Now save the link annotation la = LinkAnnotation() la.subject = oc_item.manifest.uuid la.subject_type = oc_item.manifest.item_type la.project_uuid = oc_item.manifest.project_uuid la.source_id = 'tdar-api-lookup' la.predicate_uri = self.DC_TERMS_SUBJECT la.object_uri = result['id'] la.save() else: print('Almost! ' + result['label'] + ' is not exactly: ' + keyword) if tdar_api.request_error: self.request_error = True print('HTTP request to tDAR failed!') self.error_wait += self.base_wait if self.error_wait > self.max_wait: print('Too many failures, quiting...') sys.exit('Quitting process') else: # sleep some minutes before trying again print('Will try again in ' + str(self.error_wait) + ' seconds...') sleep(self.error_wait) else: self.request_error = False if self.error_wait >= self.base_wait: print('HTTP requests resumed OK, will continue.') self.error_wait = 0 return found_matches
def store_records(self, act_table, recs): """ stores records retrieved for a given table """ i = 0 for record in recs: i += 1 allow_write = self.check_allow_write(act_table, record) record = self.prep_update_keep_old(act_table, record) if (allow_write is False and self.update_keep_old is False): print('\n Not allowed to overwite record.' + str(i)) else: # print('\n Adding record:' + str(record)) newr = False if (act_table == 'link_annotations'): newr = LinkAnnotation(**record) elif (act_table == 'link_entities'): newr = LinkEntity(**record) elif (act_table == 'oc_assertions'): newr = Assertion(**record) elif (act_table == 'oc_manifest'): newr = Manifest(**record) elif (act_table == 'oc_subjects'): newr = Subject(**record) elif (act_table == 'oc_mediafiles'): newr = Mediafile(**record) elif (act_table == 'oc_documents'): newr = OCdocument(**record) elif (act_table == 'oc_persons'): newr = Person(**record) elif (act_table == 'oc_projects'): newr = Project(**record) elif (act_table == 'oc_strings'): newr = OCstring(**record) elif (act_table == 'oc_types'): newr = OCtype(**record) elif (act_table == 'oc_geospace'): newr = Geospace(**record) elif (act_table == 'oc_events'): newr = Event(**record) elif (act_table == 'oc_predicates'): newr = Predicate(**record) elif (act_table == 'oc_identifiers'): newr = StableIdentifer(**record) elif (act_table == 'oc_obsmetadata'): newr = ObsMetadata(**record) if (newr is not False): try: newr.save(force_insert=self.force_insert, force_update=self.update_keep_old) except Exception as error: print('Something slipped past in ' + act_table + '...' + str(error))
def match_trinomial_obj(self, tri): """ Attempts to match a trinomial object 'tri' against tDAR, if it hasn't yet been matched """ found_matches = 0 manifest = False try: manifest = Manifest.objects.get(uuid=tri.uuid) except Manifest.DoesNotExist: manifest = False la_check = LinkAnnotation.objects\ .filter(subject=tri.uuid, predicate_uri='dc-terms:subject', object_uri__contains=self.TDAR_VOCAB)[:1] if len(la_check) < 1 and manifest is not False: # we don't already have a tDAR id for this item, continue with matches tri_man = TrinomialManage() request_keywords = [tri.trinomial] if self.lead_zero_check: # check multiple leading zeros tri_parts = tri_man.parse_trinomial(tri.trinomial) site = tri_parts['site'] site_part_len = len(site) while len(site) < 4: site = '0' + site new_trinomial = tri_parts['state'] + tri_parts[ 'county'] + site request_keywords.append(new_trinomial) for keyword in request_keywords: tdar_api = tdarAPI() results = tdar_api.get_site_keyword(keyword) if isinstance(results, list): for result in results[:self.max_results]: # assume it is a spurious match match_real = False if result['label'] == tri.trinomial: # the trinomial and the tDAR result exactly match match_real = True else: # check if the only difference is in leading zeros tri_parts = tri_man.parse_trinomial(tri.trinomial) site = tri_parts['site'] site_part_len = len(site) while len(site) < 5: site = '0' + site new_trinomial = tri_parts['state'] + tri_parts[ 'county'] + site if new_trinomial == result['label']: # A good match, the tDAR result and the trinomial # match (but with different leading zeros) match_real = True if match_real: found_matches += 1 # OK! Found a match, first save the linked entity in the link entity table le_check = False try: le_check = LinkEntity.objects.get( uri=result['id']) except LinkEntity.DoesNotExist: le_check = False if le_check is False: le = LinkEntity() le.uri = result['id'] le.label = result['label'] le.alt_label = result['label'] le.vocab_uri = self.TDAR_VOCAB le.ent_type = 'type' le.save() # Now save the link annotation la = LinkAnnotation() la.subject = tri.uuid la.subject_type = manifest.item_type la.project_uuid = manifest.project_uuid la.source_id = 'tdar-api-lookup' la.predicate_uri = self.DC_TERMS_SUBJECT la.object_uri = result['id'] la.save() else: print('Almost! ' + result['label'] + ' is not exactly: ' + tri.trinomial) if tdar_api.request_error: self.request_error = True print('HTTP request to tDAR failed!') self.error_wait += self.base_wait if self.error_wait > self.max_wait: print('Too many failures, quiting...') sys.exit('Quitting process') else: # sleep some minutes before trying again print('Will try again in ' + str(self.error_wait) + ' seconds...') sleep(self.error_wait) else: self.request_error = False if self.error_wait >= self.base_wait: print('HTTP requests resumed OK, will continue.') self.error_wait = 0 return found_matches
def add_missing_containing_regions(project_uuid='0', source_id=SOURCE_ID): """Adds missing containing regions that have site counts""" for state, state_uuid, new_region, new_uuid, geonames_uri in ADD_REGIONS: row = { 'parent_uuid': state_uuid, 'context_uuid': new_uuid, 'label': new_region, 'class_uri': 'oc-gen:cat-region', } load_context_row(project_uuid=project_uuid, source_id=source_id, row=row) ent_exists = LinkEntity.objects.filter(uri=geonames_uri).first() if not ent_exists: ent = LinkEntity() ent.uri = geonames_uri ent.label = new_region ent.alt_label = new_region ent.vocab_uri = GeonamesAPI().VOCAB_URI ent.ent_type = 'class' ent.save() la_exists = LinkAnnotation.objects.filter( subject=new_uuid, object_uri=geonames_uri).first() if not la_exists: new_la = LinkAnnotation() new_la.subject = new_uuid new_la.subject_type = 'subjects' new_la.project_uuid = project_uuid new_la.source_id = source_id new_la.predicate_uri = 'skos:closeMatch' new_la.object_uri = geonames_uri new_la.creator_uuid = '' new_la.save()
def link_sites_from_filecache(self): """ updates Open Context to save new sites and annotations from the file cache """ if self.filecache is not None: # print('Cache update !: ' + self.cache_filekey) self.filecache.working_dir = self.working_dir json_obj = self.filecache.get_dict_from_file(self.cache_filekey) if isinstance(json_obj, dict): if 'trinomial_refs' in json_obj: for tri_ref in json_obj['trinomial_refs']: uri = tri_ref['rec_uri'] title = tri_ref['title'] if len(title) > 194: title = title[0:190] + '... ' l_exists = LinkEntity.objects.filter(uri=uri)[:1] if len(l_exists) < 1: l_ent = LinkEntity() l_ent.uri = uri l_ent.label = title l_ent.alt_label = title l_ent.vocab_uri = tri_ref['source_uri'] l_ent.ent_type = 'class' l_ent.save() for trinomial in tri_ref['trinomials']: man_objs = Manifest.objects.filter( label=trinomial, class_uri='oc-gen:cat-site') if len(man_objs) > 0: man_obj = man_objs[0] la = LinkAnnotation() la.subject = uri # the subordinate is the subject la.subject_type = 'uri' la.project_uuid = man_obj.project_uuid la.source_id = self.source_id la.predicate_uri = "skos:broader" la.object_uri = tri_ref['source_uri'] la.save() try: la.save() except: pass links = LinkAnnotation.objects\ .filter(subject=man_obj.uuid, object_uri=uri)[:1] if len(links) < 1: print('Link ' + man_obj.label + ' (' + man_obj.uuid + ') to ' + uri) la = LinkAnnotation() la.subject = man_obj.uuid # the subordinate is the subject la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = self.source_id la.predicate_uri = 'dc-terms:isReferencedBy' la.object_uri = uri la.save() try: la.save() except: pass