Ejemplo n.º 1
0
 def save_icons(self, predicate_uri='oc-gen:hasIcon'):
     """ Saves icons in the general Open Context namespace """
     data = False
     if (self.graph is not False and self.vocabulary_uri is not False):
         data = []
         if (self.replace_old):
             # delete old relations from this vocabulary using this predicate
             LinkAnnotation.objects.filter(
                 source_id=self.vocabulary_uri,
                 predicate_uri=predicate_uri).delete()
         if (predicate_uri == 'oc-gen:hasIcon'):
             # for subClassOf predicates
             full_pred_uri = URImanagement.convert_prefix_to_full_uri(
                 predicate_uri)
             icon_pred = URIRef(full_pred_uri)
             for s, p, o in self.graph.triples((None, icon_pred, None)):
                 subject_uri = s.__str__(
                 )  # get the URI of the subject as a string
                 object_uri = o.__str__(
                 )  # get the URI of the object as a string
                 act_t = {'s': subject_uri, 'o': object_uri}
                 if (subject_uri != object_uri):
                     data.append(act_t)
         if (len(data) > 0):
             for act_t in data:
                 newr = LinkAnnotation()
                 # make the subject a prefixed URI if common
                 newr.subject = URImanagement.prefix_common_uri(act_t['s'])
                 newr.subject_type = 'uri'
                 newr.project_uuid = '0'
                 newr.source_id = self.vocabulary_uri
                 newr.predicate_uri = predicate_uri
                 newr.object_uri = act_t['o']
                 newr.save()
     return data
Ejemplo n.º 2
0
 def add_skos_hierarachy(self, parent_uri, child_uri):
     """ Add a hiearchy assertion for
         linked entities
     """
     try:
         parent = LinkEntity.objects.get(uri=parent_uri)
     except LinkEntity.DoesNotExist:
         parent = False
     try:
         child = LinkEntity.objects.get(uri=child_uri)
     except LinkEntity.DoesNotExist:
         child = False
     if parent is not False and child is not False:
         lr = LinkRecursion()
         exiting_parents = lr.get_entity_parents(child_uri)
         if len(exiting_parents) >= 1:
             print('Child has parents: ' + str(exiting_parents))
         else:
             # child is not already in a hieararchy, ok to put it in one
             la = LinkAnnotation()
             la.subject = child.uri  # the subordinate is the subject
             la.subject_type = 'uri'
             la.project_uuid = self.project_uuid
             la.source_id = self.source_id + '-hierarchy'
             la.predicate_uri = self.PRED_SBJ_IS_SUB_OF_OBJ
             la.object_uri = parent.uri  # the parent is the object
             la.save()
             print('Made: ' + child.uri + ' child of: ' + parent.uri)
     else:
         print('Cannot find parent or child')
Ejemplo n.º 3
0
 def make_type_ld_annotations(self,
                              sub_type_pred_uuid,
                              sub_type_f_num,
                              rel_pred,
                              obj_le_f_num):
     """ Makes linked data annotations
         for a type in an import
     """
     rels = []
     sub_type_list = ImportCell.objects\
                               .filter(source_id=self.source_id,
                                       field_num=sub_type_f_num)
     if len(sub_type_list) > 0:
         distinct_records = {}
         for cell in sub_type_list:
             if cell.rec_hash not in distinct_records:
                 distinct_records[cell.rec_hash] = {}
                 distinct_records[cell.rec_hash]['rows'] = []
                 distinct_records[cell.rec_hash]['imp_cell_obj'] = cell
             distinct_records[cell.rec_hash]['rows'].append(cell.row_num)
         for rec_hash_key, distinct_type in distinct_records.items():
             # iterate through the distinct types and get associated linked data
             type_label = distinct_type['imp_cell_obj'].record
             rows = distinct_type['rows']
             if len(type_label) > 0:
                 # the type isn't blank, so we can use it
                 pc = ProcessCells(self.source_id, 0)
                 ld_entities = pc.get_field_records(obj_le_f_num, rows)
                 for ld_hash_key, distinct_ld in ld_entities.items():
                     obj_uri = distinct_ld['imp_cell_obj'].record
                     if len(obj_uri) > 8:
                         if obj_uri[:7] == 'http://'\
                            or obj_uri[:8] == 'https://':
                             # we have a valid linked data entity
                             #
                             # now get the UUID for the type
                             tm = TypeManagement()
                             tm.project_uuid = self.project_uuid
                             tm.source_id = self.source_id
                             sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid,
                                                                          type_label)
                             rel = {'subject_label': type_label,
                                    'subject': sub_type.uuid,
                                    'object_uri': obj_uri}
                             rels.append(rel)
     if len(rels) > 0:
         for rel in rels:
             new_la = LinkAnnotation()
             new_la.subject = rel['subject']
             new_la.subject_type = 'types'
             new_la.project_uuid = self.project_uuid
             new_la.source_id = self.source_id
             new_la.predicate_uri = rel_pred
             new_la.object_uri = rel['object_uri']
             new_la.creator_uuid = ''
             new_la.save()
             web_le = WebLinkEntity()
             web_le.check_add_link_entity(rel['object_uri'])
Ejemplo n.º 4
0
 def make_dinaa_link_assertions(self):
     """ makes assertions to relate DINAA URIs with federal
         registry documents
     """
     self.make_fed_reg_vocab_entity()
     fed_api = FederalRegistryAPI()
     search_key_list = fed_api.get_list_cached_keyword_searches()
     dinaa_matches = fed_api.get_dict_from_file(self.dinaa_matches_key)
     for s_key in search_key_list:
         s_json = fed_api.get_dict_from_file(s_key)
         if 'results' in s_json:
             for match in dinaa_matches:
                 for s_result in s_json['results']:
                     if s_result['document_number'] == match['doc']:
                         print('Found match for ' + match['doc'])
                         man_obj = False
                         try:
                             man_obj = Manifest.objects.get(
                                 uuid=match['uuid'])
                         except Manifest.DoesNotExist:
                             man_obj = False
                         if man_obj is not False:
                             fed_uri = s_result['html_url']
                             le_check = False
                             try:
                                 le_check = LinkEntity.objects.get(
                                     uri=fed_uri)
                             except LinkEntity.DoesNotExist:
                                 le_check = False
                             if le_check is False:
                                 print('Saving entity: ' +
                                       s_result['title'])
                                 title = s_result['title']
                                 if len(title) > 175:
                                     title = title[0:175] + '...'
                                 le = LinkEntity()
                                 le.uri = fed_uri
                                 le.label = title
                                 le.alt_label = s_result['document_number']
                                 le.vocab_uri = self.FEDERAL_REG_URI
                                 le.ent_type = 'instance'
                                 le.slug = 'fed-reg-docs-' + s_result[
                                     'document_number']
                                 le.save()
                             # Now save the link annotation
                             print('Adding ref link to ' + man_obj.label)
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = self.DC_TERMS_REF_BY
                             la.object_uri = fed_uri
                             try:
                                 la.save()
                             except:
                                 pass
Ejemplo n.º 5
0
 def add_skos_hierarachy(self, parent_uri, child_uri):
     """ Add a hiearchy assertion for
         linked entities
     """
     try:
         parent = LinkEntity.objects.get(uri=parent_uri)
     except LinkEntity.DoesNotExist:
         parent = False
     try:
         child = LinkEntity.objects.get(uri=child_uri)
     except LinkEntity.DoesNotExist:
         child = False
     if parent is not False and child is not False:
         lr = LinkRecursion()
         exiting_parents = lr.get_entity_parents(child_uri)
         if len(exiting_parents) >= 1:
             print('Child has parents: ' + str(exiting_parents))
         else:
             # child is not already in a hieararchy, ok to put it in one
             la = LinkAnnotation()
             la.subject = child.uri  # the subordinate is the subject
             la.subject_type = 'uri'
             la.project_uuid = self.project_uuid
             la.source_id = self.source_id + '-hierarchy'
             la.predicate_uri = self.PRED_SBJ_IS_SUB_OF_OBJ
             la.object_uri = parent.uri  # the parent is the object
             la.save()
             print('Made: ' + child.uri + ' child of: ' + parent.uri)
     else:
         print('Cannot find parent or child')
Ejemplo n.º 6
0
 def save_icons(self, predicate_uri='oc-gen:hasIcon'):
     """ Saves icons in the general Open Context namespace """
     data = False
     if(self.graph is not False and self.vocabulary_uri is not False):
         data = []
         if(self.replace_old):
             # delete old relations from this vocabulary using this predicate
             LinkAnnotation.objects.filter(source_id=self.vocabulary_uri,
                                           predicate_uri=predicate_uri).delete()
         if(predicate_uri == 'oc-gen:hasIcon'):
             # for subClassOf predicates
             full_pred_uri = URImanagement.convert_prefix_to_full_uri(predicate_uri)
             icon_pred = URIRef(full_pred_uri)
             for s, p, o in self.graph.triples((None,
                                                icon_pred,
                                                None)):
                 subject_uri = s.__str__()  # get the URI of the subject as a string
                 object_uri = o.__str__()  # get the URI of the object as a string
                 act_t = {'s': subject_uri,
                          'o': object_uri}
                 if(subject_uri != object_uri):
                     data.append(act_t)
         if(len(data) > 0):
             for act_t in data:
                 newr = LinkAnnotation()
                 # make the subject a prefixed URI if common
                 newr.subject = URImanagement.prefix_common_uri(act_t['s'])
                 newr.subject_type = 'uri'
                 newr.project_uuid = '0'
                 newr.source_id = self.vocabulary_uri
                 newr.predicate_uri = predicate_uri
                 newr.object_uri = act_t['o']
                 newr.save()
     return data
Ejemplo n.º 7
0
 def make_type_relations(self, sub_type_pred_uuid,
                               sub_type_f_num,
                               rel_pred,
                               obj_type_pred_uuid,
                               obj_type_f_num):
     """ Makes semantic relationships between
         different types in an import
     """
     rels = {}
     sub_type_list = ImportCell.objects\
                               .filter(source_id=self.source_id,
                                       field_num=sub_type_f_num)
     for sub_type_obj in sub_type_list:
         sub_type_text = sub_type_obj.record
         row = sub_type_obj.row_num
         if len(sub_type_text) > 0:
             tm = TypeManagement()
             tm.project_uuid = self.project_uuid
             tm.source_id = self.source_id
             sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid,
                                                          sub_type_text)
             obj_type_list = ImportCell.objects\
                                       .filter(source_id=self.source_id,
                                               field_num=obj_type_f_num,
                                               row_num=row)[:1]
             if len(obj_type_list) > 0:
                 obj_type_text = obj_type_list[0].record
                 if len(obj_type_text) > 0 \
                    and sub_type_text != obj_type_text:
                     tmo = TypeManagement()
                     tmo.project_uuid = self.project_uuid
                     tmo.source_id = self.source_id
                     obj_type = tmo.get_make_type_within_pred_uuid(obj_type_pred_uuid,
                                                                   obj_type_text)
                     # make a uri for this, since we're making a link assertion
                     obj_uri = URImanagement.make_oc_uri(obj_type.uuid, 'types')
                     # the following bit is so we don't make the
                     # same link assertions over and over.
                     rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid)
                     if rel_id not in rels:
                         rels[rel_id] = {'subject': sub_type.uuid,
                                         'object_uri': obj_uri}
     # now make the link data annotation relating these types.
     for rel_id, rel in rels.items():
         new_la = LinkAnnotation()
         new_la.subject = rel['subject']
         new_la.subject_type = 'types'
         new_la.project_uuid = self.project_uuid
         new_la.source_id = self.source_id
         new_la.predicate_uri = rel_pred
         new_la.object_uri = rel['object_uri']
         new_la.creator_uuid = ''
         new_la.save()
Ejemplo n.º 8
0
 def save_new_ref_by_annotation(self, oc_item):
     """ saves a refferenced by annotation if it is new """
     is_new = self.check_new_annotation(oc_item.uuid, self.referrer.uri)
     if is_new:
         self.new_annotations += 1
         la = LinkAnnotation()
         la.subject = oc_item.uuid
         la.subject_type = oc_item.item_type
         la.project_uuid = oc_item.project_uuid
         la.source_id = self.source_id
         la.predicate_uri = self.DC_TERMS_REFERENCED_BY
         la.object_uri = self.referrer.uri
         la.creator_uuid = ''
         la.save()
         print('[' + str(self.new_annotations) + '] annotated: ' + oc_item.uuid)
Ejemplo n.º 9
0
 def make_dinaa_link_assertions(self):
     """ makes assertions to relate DINAA URIs with federal
         registry documents
     """
     self.make_fed_reg_vocab_entity()
     fed_api = FederalRegistryAPI()
     search_key_list = fed_api.get_list_cached_keyword_searches()
     dinaa_matches = fed_api.get_dict_from_file(self.dinaa_matches_key)
     for s_key in search_key_list:
         s_json = fed_api.get_dict_from_file(s_key)
         if 'results' in s_json:
             for match in dinaa_matches:
                 for s_result in s_json['results']:
                     if s_result['document_number'] == match['doc']:
                         print('Found match for ' + match['doc'])
                         man_obj = False
                         try:
                             man_obj = Manifest.objects.get(uuid=match['uuid'])
                         except Manifest.DoesNotExist:
                             man_obj = False
                         if man_obj is not False:
                             fed_uri = s_result['html_url']
                             le_check = False
                             try:
                                 le_check = LinkEntity.objects.get(uri=fed_uri)
                             except LinkEntity.DoesNotExist:
                                 le_check = False
                             if le_check is False:
                                 print('Saving entity: ' + s_result['title'])
                                 title = s_result['title']
                                 if len(title) > 175:
                                     title = title[0:175] + '...'
                                 le = LinkEntity()
                                 le.uri = fed_uri
                                 le.label = title
                                 le.alt_label = s_result['document_number']
                                 le.vocab_uri = self.FEDERAL_REG_URI
                                 le.ent_type = 'instance'
                                 le.slug = 'fed-reg-docs-' + s_result['document_number']
                                 le.save()
                             # Now save the link annotation
                             print('Adding ref link to ' + man_obj.label)
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = self.DC_TERMS_REF_BY
                             la.object_uri = fed_uri
                             try:
                                 la.save()
                             except:
                                 pass
Ejemplo n.º 10
0
 def save_hierarchy(self, predicate_uri='rdfs:subClassOf'):
     """ Saves hierarchic relations from a vocabulary,
     defaulting to subClassOf predicates """
     data = False
     if (self.graph is not False and self.vocabulary_uri is not False):
         data = []
         if (self.replace_old):
             # delete old relations from this vocabulary using this predicate
             LinkAnnotation.objects.filter(
                 source_id=self.vocabulary_uri,
                 predicate_uri=predicate_uri).delete()
         if (predicate_uri == 'rdfs:subClassOf'):
             # for subClassOf predicates
             for s, p, o in self.graph.triples(
                 (None, RDFS.subClassOf, None)):
                 subject_uri = s.__str__(
                 )  # get the URI of the subject as a string
                 object_uri = o.__str__(
                 )  # get the URI of the object as a string
                 act_t = {'s': subject_uri, 'o': object_uri}
                 if (subject_uri != object_uri):
                     data.append(act_t)
         elif (predicate_uri == 'rdfs:subPropertyOf'):
             # for subPropertyOf predicates
             for s, p, o in self.graph.triples(
                 (None, RDFS.subPropertyOf, None)):
                 subject_uri = s.__str__(
                 )  # get the URI of the subject as a string
                 object_uri = o.__str__(
                 )  # get the URI of the object as a string
                 act_t = {'s': subject_uri, 'o': object_uri}
                 if (subject_uri != object_uri):
                     data.append(act_t)
         if (len(data) > 0):
             for act_t in data:
                 newr = LinkAnnotation()
                 # make the subject a prefixed URI if common
                 newr.subject = URImanagement.prefix_common_uri(act_t['s'])
                 newr.subject_type = 'uri'
                 newr.project_uuid = '0'
                 newr.source_id = self.vocabulary_uri
                 newr.predicate_uri = predicate_uri
                 newr.object_uri = act_t['o']
                 newr.save()
     return data
Ejemplo n.º 11
0
 def create_pred_parents(self, new_hierachic_list):
     """ Creates new types for
     superior (more general) types from a list
     of types that have hiearchies implicit in their labels
     once the superior types are created,
     linked data annotations noting hierarchy are stored
     """
     parent_children_pairs = []
     for manifest in new_hierachic_list:
         try:
             oc_pred = Predicate.objects.get(uuid=manifest.uuid)
         except Predicate.DoesNotExist:
             oc_pred = False
         if oc_pred is not False:
             child_parts = manifest.label.split(self.HIERARCHY_DELIM)
             act_delim = ""
             act_new_label = ""
             current_parent = False
             for label_part in child_parts:
                 act_new_label = act_new_label + act_delim + label_part
                 act_delim = self.HIERARCHY_DELIM
                 pred_manage = PredicateManagement()
                 pred_manage.project_uuid = manifest.project_uuid
                 pred_manage.source_id = self.source_id
                 pred_manage.sort = oc_pred.sort
                 pred_manage.data_type = oc_pred.data_type
                 ppred = pred_manage.get_make_predicate(act_new_label, manifest.class_uri)
                 if ppred is not False and current_parent is not False:
                     parent_child = {"parent": current_parent, "child": ppred.uuid}
                     parent_children_pairs.append(parent_child)
                 current_parent = ppred.uuid
             if len(parent_children_pairs) > 0:
                 # now make some linked data annotations
                 for parent_child in parent_children_pairs:
                     if parent_child["parent"] is not False:
                         new_la = LinkAnnotation()
                         new_la.subject = parent_child["child"]
                         new_la.subject_type = "predicates"
                         new_la.project_uuid = manifest.project_uuid
                         new_la.source_id = self.source_id
                         new_la.predicate_uri = self.p_for_superobjs
                         new_la.object_uri = URImanagement.make_oc_uri(parent_child["parent"], "predicates")
                         new_la.creator_uuid = ""
                         new_la.save()
     return parent_children_pairs
Ejemplo n.º 12
0
 def create_concept_parents(self, new_hierachic_list):
     """ Creates new types for
     superior (more general) types from a list
     of types that have hiearchies implicit in their labels
     once the superior types are created,
     linked data annotations noting hierarchy are stored
     """
     parent_children_pairs = []
     for manifest in new_hierachic_list:
         try:
             oc_type = OCtype.objects.get(uuid=manifest.uuid)
         except OCtype.DoesNotExist:
             oc_type = False
         if(oc_type is not False):
             child_parts = manifest.label.split(self.HIERARCHY_DELIM)
             act_delim = ''
             act_new_label = ''
             current_parent = False
             for label_part in child_parts:
                 act_new_label = act_new_label + act_delim + label_part
                 act_delim = self.HIERARCHY_DELIM
                 type_manage = TypeManagement()
                 type_manage.project_uuid = oc_type.project_uuid
                 type_manage.source_id = self.source_id
                 ptype = type_manage.get_make_type_within_pred_uuid(oc_type.predicate_uuid,
                                                                    act_new_label)
                 if(current_parent is not False):
                     parent_child = {'parent': current_parent,
                                     'child': ptype.uuid}
                     parent_children_pairs.append(parent_child)
                 current_parent = ptype.uuid
             if(len(parent_children_pairs) > 0):
                 # now make some linked data annotations
                 for parent_child in parent_children_pairs:
                     if(parent_child['parent'] is not False):
                         new_la = LinkAnnotation()
                         new_la.subject = parent_child['child']
                         new_la.subject_type = 'types'
                         new_la.project_uuid = oc_type.project_uuid
                         new_la.source_id = self.source_id
                         new_la.predicate_uri = self.p_for_superobjs
                         new_la.object_uri = URImanagement.make_oc_uri(parent_child['parent'], 'types')
                         new_la.creator_uuid = ''
                         new_la.save()
     return parent_children_pairs
Ejemplo n.º 13
0
 def skos_relate_old_new_predicates(self, project_uuid, source_id,
                                    predicate_uuid, new_pred_uuid):
     """ Makes a new Link Annotation to relate a new predicate_uuid with an
         existing predicate
     """
     la = LinkAnnotation()
     la.subject = new_pred_uuid
     la.subject_type = 'predicates'
     la.project_uuid = project_uuid
     la.source_id = source_id
     la.predicate_uri = 'skos:related'
     la.object_uri = URImanagement.make_oc_uri(predicate_uuid, 'predicates')
     try:
         la.save()
         output = True
     except:
         output = False
     return output
Ejemplo n.º 14
0
 def save_entity_comments(self):
     """ saves comments about an entity """
     if self.graph is not False and self.vocabulary_uri is not False:
         lequiv = LinkEquivalence()
         # get all the varients of RDFS:comments
         comment_uris = lequiv.get_identifier_list_variants('rdfs:comment')
         # now get all the entities from this vocabulary (that may be the subject of a comment)
         raw_subject_uris = LinkEntity.objects.filter(
             vocab_uri=self.vocabulary_uri)
         lequiv = LinkEquivalence()
         subject_uris = lequiv.get_identifier_list_variants(
             raw_subject_uris)
         if self.replace_old:
             # delete the old comments
             LinkAnnotation.objects\
                           .filter(subject__in=subject_uris,
                                   predicate_uri__in=comment_uris)\
                           .delete()
         for s, p, o in self.graph.triples((None, RDFS.comment, None)):
             subject_uri = s.__str__(
             )  # get the URI of the subject as a string
             comment = o.__str__(
             )  # get the comment from the object as a string
             # update the entity's comment
             link_ent = False
             try:
                 link_ent = LinkEntity.objects.get(uri=subject_uri)
             except LinkEntity.DoesNotExist:
                 link_ent = False
             if link_ent is not False:
                 lang = Languages()
                 newr = LinkAnnotation()
                 # make the subject a prefixed URI if common
                 newr.subject = URImanagement.prefix_common_uri(subject_uri)
                 newr.subject_type = 'uri'
                 newr.project_uuid = '0'
                 newr.source_id = self.vocabulary_uri
                 newr.predicate_uri = 'rdfs:comment'
                 newr.obj_extra = {}
                 newr.obj_extra[lang.DEFAULT_LANGUAGE] = comment
                 newr.save()
Ejemplo n.º 15
0
 def save_hierarchy(self, predicate_uri='rdfs:subClassOf'):
     """ Saves hierarchic relations from a vocabulary,
     defaulting to subClassOf predicates """
     data = False
     if(self.graph is not False and self.vocabulary_uri is not False):
         data = []
         if(self.replace_old):
             # delete old relations from this vocabulary using this predicate
             LinkAnnotation.objects.filter(source_id=self.vocabulary_uri,
                                           predicate_uri=predicate_uri).delete()
         if(predicate_uri == 'rdfs:subClassOf'):
             # for subClassOf predicates
             for s, p, o in self.graph.triples((None,
                                                RDFS.subClassOf,
                                                None)):
                 subject_uri = s.__str__()  # get the URI of the subject as a string
                 object_uri = o.__str__()  # get the URI of the object as a string
                 act_t = {'s': subject_uri,
                          'o': object_uri}
                 if(subject_uri != object_uri):
                     data.append(act_t)
         elif(predicate_uri == 'rdfs:subPropertyOf'):
             # for subPropertyOf predicates
             for s, p, o in self.graph.triples((None,
                                                RDFS.subPropertyOf,
                                                None)):
                 subject_uri = s.__str__()  # get the URI of the subject as a string
                 object_uri = o.__str__()  # get the URI of the object as a string
                 act_t = {'s': subject_uri,
                          'o': object_uri}
                 if(subject_uri != object_uri):
                     data.append(act_t)
         if(len(data) > 0):
             for act_t in data:
                 newr = LinkAnnotation()
                 # make the subject a prefixed URI if common
                 newr.subject = URImanagement.prefix_common_uri(act_t['s'])
                 newr.subject_type = 'uri'
                 newr.project_uuid = '0'
                 newr.source_id = self.vocabulary_uri
                 newr.predicate_uri = predicate_uri
                 newr.object_uri = act_t['o']
                 newr.save()
     return data
Ejemplo n.º 16
0
 def validate_make_eol_hierarchy(self, child_uri, parent_uri):
     """ Validated hierarchy relations for EOL entities.
         If a child already has a parent, this will not do anything
         otherwise it will create a hierachy relation
     """
     ok_create = False
     le_gen = LinkEntityGeneration()
     child_uri = le_gen.make_clean_uri(child_uri)  # strip off any cruft in the URI
     parent_uri = le_gen.make_clean_uri(parent_uri)
     ent = Entity()
     found = ent.dereference(child_uri)
     if found:
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(child_uri, False)
         if parents is False:
             # no parents, so OK to make an assertion
             ok_create = True
         else:
             if len(parents) == 0:
                 # no parents, so OK to make an assertion
                 ok_create = True
     else:
         ok_create = True  # the child does not yet exist, so OK to make the relation
         print('Getting missing data for: ' + child_uri)
         self.get_save_entity_label(child_uri)
     if ok_create:
         print('OK, make rel for: ' + child_uri + ' in ' + parent_uri)
         la = LinkAnnotation()
         la.subject = child_uri
         la.subject_type = 'uri'
         la.project_uuid = '0'
         la.source_id = 'manual-eol-manage'
         la.predicate_uri = self.CHILD_PARENT_REL
         la.object_uri = parent_uri
         la.sort = 1
         la.save()
     else:
         print('Already in hierarchy: ' + child_uri)
Ejemplo n.º 17
0
 def link_table_to_projects(self, table_id):
     """ links a table to a project """
     ex_id = ExpTableIdentifiers()
     ex_id.make_all_identifiers(table_id)
     proj_uuid_counts = self.get_table_project_uuid_counts(ex_id.table_id)
     for proj_uuid_count in proj_uuid_counts:
         project_uuid = proj_uuid_count['project_uuid']
         la_recs = LinkAnnotation.objects\
                                 .filter(subject=project_uuid,
                                         object_uri=ex_id.uri)[:1]
         if len(la_recs) < 1:
             # we don't have a relationship between this project and this
             # table yet, so OK to create it.
             la = LinkAnnotation()
             la.subject = project_uuid
             la.subject_type = 'projects'
             la.project_uuid = project_uuid
             la.source_id = 'exp-tables-management'
             la.predicate_uri = self.project_to_table_predicate
             la.object_uri = ex_id.uri
             la.creator_uuid = ''
             la.save()
             print('Linked project: ' + project_uuid + ' to ' + ex_id.uri)
Ejemplo n.º 18
0
 def skos_relate_old_new_predicates(self,
                                    project_uuid,
                                    source_id,
                                    predicate_uuid,
                                    new_pred_uuid):
     """ Makes a new Link Annotation to relate a new predicate_uuid with an
         existing predicate
     """
     la = LinkAnnotation()
     la.subject = new_pred_uuid
     la.subject_type = 'predicates'
     la.project_uuid = project_uuid
     la.source_id = source_id
     la.predicate_uri = 'skos:related'
     la.object_uri = URImanagement.make_oc_uri(predicate_uuid, 'predicates')
     try:
         la.save()
         output = True
     except:
         output = False
     return output
Ejemplo n.º 19
0
 def save_entity_comments(self):
     """ saves comments about an entity """
     if self.graph is not False and self.vocabulary_uri is not False:
         lequiv = LinkEquivalence()
         # get all the varients of RDFS:comments
         comment_uris = lequiv.get_identifier_list_variants('rdfs:comment')
         # now get all the entities from this vocabulary (that may be the subject of a comment)
         raw_subject_uris = LinkEntity.objects.filter(vocab_uri=self.vocabulary_uri)
         lequiv = LinkEquivalence()
         subject_uris = lequiv.get_identifier_list_variants(raw_subject_uris)
         if self.replace_old:
             # delete the old comments
             LinkAnnotation.objects\
                           .filter(subject__in=subject_uris,
                                   predicate_uri__in=comment_uris)\
                           .delete()
         for s, p, o in self.graph.triples((None,
                                            RDFS.comment,
                                            None)):
             subject_uri = s.__str__()  # get the URI of the subject as a string
             comment = o.__str__()  # get the comment from the object as a string
             # update the entity's comment
             link_ent = False
             try:
                 link_ent = LinkEntity.objects.get(uri=subject_uri)
             except LinkEntity.DoesNotExist:
                 link_ent = False
             if link_ent is not False:
                 lang = Languages()
                 newr = LinkAnnotation()
                 # make the subject a prefixed URI if common
                 newr.subject = URImanagement.prefix_common_uri(subject_uri)
                 newr.subject_type = 'uri'
                 newr.project_uuid = '0'
                 newr.source_id = self.vocabulary_uri
                 newr.predicate_uri = 'rdfs:comment'
                 newr.obj_extra = {}
                 newr.obj_extra[lang.DEFAULT_LANGUAGE] = comment
                 newr.save()
Ejemplo n.º 20
0
 def link_table_to_projects(self, table_id):
     """ links a table to a project """
     ex_id = ExpTableIdentifiers()
     ex_id.make_all_identifiers(table_id)
     proj_uuid_counts = self.get_table_project_uuid_counts(ex_id.table_id)
     for proj_uuid_count in proj_uuid_counts:
         project_uuid = proj_uuid_count['project_uuid']
         la_recs = LinkAnnotation.objects\
                                 .filter(subject=project_uuid,
                                         object_uri=ex_id.uri)[:1]
         if len(la_recs) < 1:
             # we don't have a relationship between this project and this
             # table yet, so OK to create it.
             la = LinkAnnotation()
             la.subject = project_uuid
             la.subject_type = 'projects'
             la.project_uuid = project_uuid
             la.source_id = 'exp-tables-management'
             la.predicate_uri = self.project_to_table_predicate
             la.object_uri = ex_id.uri
             la.creator_uuid = ''
             la.save()
             print('Linked project: ' + project_uuid + ' to ' + ex_id.uri)
Ejemplo n.º 21
0
 def make_von_den_driesch_equiv(self,
                                project_uuid,
                                equiv_pred='skos:closeMatch'):
     """ makes a skos:closeMatch equivalence relation
         between entities in the zooarch measurement
         ontology and predicates in a project
     """
     preds = Predicate.objects\
                      .filter(project_uuid=project_uuid,
                              data_type='xsd:double')
     for pred in preds:
         man_obj = False
         try:
             # try to find the manifest item
             man_obj = Manifest.objects.get(uuid=pred.uuid)
         except Manifest.DoesNotExist:
             man_obj = False
         if man_obj is not False:
             l_ents = LinkEntity.objects\
                                .filter(label=man_obj.label,
                                        vocab_uri='http://opencontext.org/vocabularies/open-context-zooarch/')[:1]
             if len(l_ents) > 0:
                 # a Match! Now let's make a close match assertion
                 uri = l_ents[0].uri
                 print(str(man_obj.label) + ' matches ' + uri)
                 la = LinkAnnotation()
                 la.subject = man_obj.uuid  # the subordinate is the subject
                 la.subject_type = man_obj.item_type
                 la.project_uuid = man_obj.project_uuid
                 la.source_id = 'label-match'
                 la.predicate_uri = equiv_pred
                 la.object_uri = uri
                 la.save()
                 # save also that the unit of measurement is in MM
                 la = LinkAnnotation()
                 la.subject = man_obj.uuid  # the subordinate is the subject
                 la.subject_type = man_obj.item_type
                 la.project_uuid = man_obj.project_uuid
                 la.source_id = 'label-match'
                 la.predicate_uri = 'http://www.w3.org/2000/01/rdf-schema#range'
                 la.object_uri = 'http://www.wikidata.org/wiki/Q174789'
                 la.save()
Ejemplo n.º 22
0
    def make_naa_annotations(self, project_uuid, naa_annotated_proj_uuid):
        """ makes annotations to describe NAA
            (Neutron Activation Analysis) attributes by
            copying annoations from another project
            with NAA attributes.
        """
        old_pred_uuids = []
        old_preds = Predicate.objects\
                             .filter(project_uuid=naa_annotated_proj_uuid,
                                     data_type='xsd:double')
        for old_pred in old_preds:
            old_pred_uuids.append(old_pred.uuid)
        old_pred_mans = Manifest.objects\
                                .filter(uuid__in=old_pred_uuids,
                                        project_uuid=naa_annotated_proj_uuid)\
                                .order_by('label')
        for old_pred_man in old_pred_mans:
            new_man_pred = None
            if len(old_pred_man.label) < 4:
                # this has a short label, so more likely about a chemical
                # element
                new_man_preds = Manifest.objects\
                                        .filter(item_type='predicates',
                                                project_uuid=project_uuid,
                                                label=old_pred_man.label)[:1]
                if len(new_man_preds) > 0:
                    # the new project has a predicate with a matching label
                    new_man_pred = new_man_preds[0]
            if new_man_pred is not None:
                # we have a match between a predicate label in the old NAA project
                # and the new project
                print('-----------------------------')
                print('Copy annotations from: ' + old_pred_man.label + ' (' +
                      old_pred_man.uuid + ')')
                print('To: ' + new_man_pred.uuid)
                print('-----------------------------')
                old_link_annos = LinkAnnotation.objects\
                                               .filter(subject=old_pred_man.uuid)
                for old_link_anno in old_link_annos:
                    new_link_anno = old_link_anno
                    new_link_anno.hash_id = None
                    new_link_anno.subject = new_man_pred.uuid
                    new_link_anno.subject_type = new_man_pred.item_type
                    new_link_anno.project_uuid = new_man_pred.project_uuid
                    new_link_anno.source_id = 'naa-link-annotations-method'
                    try:
                        new_link_anno.save()
                    except:
                        pass

        preds = Predicate.objects\
                         .filter(project_uuid=project_uuid,
                                 data_type='xsd:double')
        for pred in preds:
            man_obj = False
            try:
                # try to find the manifest item
                man_obj = Manifest.objects.get(uuid=pred.uuid)
            except Manifest.DoesNotExist:
                man_obj = False
            if man_obj is not False:
                l_ents = LinkEntity.objects\
                                   .filter(label=man_obj.label,
                                           vocab_uri='http://opencontext.org/vocabularies/open-context-zooarch/')[:1]
                if len(l_ents) > 0:
                    # a Match! Now let's make a close match assertion
                    uri = l_ents[0].uri
                    print(str(man_obj.label) + ' matches ' + uri)
                    la = LinkAnnotation()
                    la.subject = man_obj.uuid  # the subordinate is the subject
                    la.subject_type = man_obj.item_type
                    la.project_uuid = man_obj.project_uuid
                    la.source_id = 'label-match'
                    la.predicate_uri = equiv_pred
                    la.object_uri = uri
                    la.save()
                    # save also that the unit of measurement is in MM
                    la = LinkAnnotation()
                    la.subject = man_obj.uuid  # the subordinate is the subject
                    la.subject_type = man_obj.item_type
                    la.project_uuid = man_obj.project_uuid
                    la.source_id = 'label-match'
                    la.predicate_uri = 'http://www.w3.org/2000/01/rdf-schema#range'
                    la.object_uri = 'http://www.wikidata.org/wiki/Q174789'
                    la.save()
Ejemplo n.º 23
0
 def create_concept_parents(self, new_hierachic_list):
     """ Creates new types for
     superior (more general) types from a list
     of types that have hiearchies implicit in their labels
     once the superior types are created,
     linked data annotations noting hierarchy are stored
     """
     parent_children_pairs = []
     for manifest in new_hierachic_list:
         try:
             oc_type = OCtype.objects.get(uuid=manifest.uuid)
         except OCtype.DoesNotExist:
             oc_type = False
         if (oc_type is not False):
             child_parts = manifest.label.split(self.HIERARCHY_DELIM)
             act_delim = ''
             act_new_label = ''
             current_parent = False
             for label_part in child_parts:
                 act_new_label = act_new_label + act_delim + label_part
                 act_delim = self.HIERARCHY_DELIM
                 type_manage = TypeManagement()
                 type_manage.project_uuid = oc_type.project_uuid
                 type_manage.source_id = self.source_id
                 ptype = type_manage.get_make_type_within_pred_uuid(
                     oc_type.predicate_uuid, act_new_label)
                 if (current_parent is not False):
                     parent_child = {
                         'parent': current_parent,
                         'child': ptype.uuid
                     }
                     parent_children_pairs.append(parent_child)
                 current_parent = ptype.uuid
             if (len(parent_children_pairs) > 0):
                 # now make some linked data annotations
                 for parent_child in parent_children_pairs:
                     if (parent_child['parent'] is not False):
                         new_la = LinkAnnotation()
                         new_la.subject = parent_child['child']
                         new_la.subject_type = 'types'
                         new_la.project_uuid = oc_type.project_uuid
                         new_la.source_id = self.source_id
                         new_la.predicate_uri = self.p_for_superobjs
                         new_la.object_uri = URImanagement.make_oc_uri(
                             parent_child['parent'], 'types')
                         new_la.creator_uuid = ''
                         new_la.save()
     return parent_children_pairs
Ejemplo n.º 24
0
 def add_period_annoation(self, p_ref):
     """ adds a period annotation """
     entity = Entity()
     found = entity.dereference(p_ref['oc-uri'])
     if found:
         new_la = LinkAnnotation()
         new_la.subject = entity.uuid
         new_la.subject_type = entity.item_type
         new_la.project_uuid = entity.project_uuid
         new_la.source_id = self.source_id
         new_la.predicate_uri = 'dc-terms:isReferencedBy'
         new_la.object_uri = p_ref['period-meta']['uri']
         new_la.creator_uuid = ''
         new_la.save()
     return found
Ejemplo n.º 25
0
 def make_type_relations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred,
                         obj_type_pred_uuid, obj_type_f_num):
     """ Makes semantic relationships between
         different types in an import
     """
     rels = {}
     sub_type_list = ImportCell.objects\
                               .filter(source_id=self.source_id,
                                       field_num=sub_type_f_num)
     for sub_type_obj in sub_type_list:
         sub_type_text = sub_type_obj.record
         row = sub_type_obj.row_num
         if len(sub_type_text) > 0:
             tm = TypeManagement()
             tm.project_uuid = self.project_uuid
             tm.source_id = self.source_id
             sub_type = tm.get_make_type_within_pred_uuid(
                 sub_type_pred_uuid, sub_type_text)
             obj_type_list = ImportCell.objects\
                                       .filter(source_id=self.source_id,
                                               field_num=obj_type_f_num,
                                               row_num=row)[:1]
             if len(obj_type_list) > 0:
                 obj_type_text = obj_type_list[0].record
                 if len(obj_type_text) > 0 \
                    and sub_type_text != obj_type_text:
                     tmo = TypeManagement()
                     tmo.project_uuid = self.project_uuid
                     tmo.source_id = self.source_id
                     obj_type = tmo.get_make_type_within_pred_uuid(
                         obj_type_pred_uuid, obj_type_text)
                     # make a uri for this, since we're making a link assertion
                     obj_uri = URImanagement.make_oc_uri(
                         obj_type.uuid, 'types')
                     # the following bit is so we don't make the
                     # same link assertions over and over.
                     rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid)
                     if rel_id not in rels:
                         rels[rel_id] = {
                             'subject': sub_type.uuid,
                             'object_uri': obj_uri
                         }
     # now make the link data annotation relating these types.
     for rel_id, rel in rels.items():
         new_la = LinkAnnotation()
         new_la.subject = rel['subject']
         new_la.subject_type = 'types'
         new_la.project_uuid = self.project_uuid
         new_la.source_id = self.source_id
         new_la.predicate_uri = rel_pred
         new_la.object_uri = rel['object_uri']
         new_la.creator_uuid = ''
         new_la.save()
Ejemplo n.º 26
0
 def link_sites_from_filecache(self):
     """ updates Open Context to save new sites
         and annotations from the 
         file cache
     """
     if self.filecache is not None:
         # print('Cache update !: ' + self.cache_filekey)
         self.filecache.working_dir = self.working_dir
         json_obj = self.filecache.get_dict_from_file(self.cache_filekey)
         if isinstance(json_obj, dict):
             if 'trinomial_refs' in json_obj:
                 for tri_ref in json_obj['trinomial_refs']:
                     uri = tri_ref['rec_uri']
                     title = tri_ref['title']
                     if len(title) > 194:
                         title = title[0:190] + '... '
                     l_exists = LinkEntity.objects.filter(uri=uri)[:1]
                     if len(l_exists) < 1:
                         l_ent = LinkEntity()
                         l_ent.uri = uri
                         l_ent.label = title
                         l_ent.alt_label = title
                         l_ent.vocab_uri = tri_ref['source_uri']
                         l_ent.ent_type = 'class'
                         l_ent.save()
                     for trinomial in tri_ref['trinomials']:
                         man_objs = Manifest.objects.filter(label=trinomial,
                                                            class_uri='oc-gen:cat-site')
                         if len(man_objs) > 0:
                             man_obj = man_objs[0]
                             la = LinkAnnotation()
                             la.subject = uri  # the subordinate is the subject
                             la.subject_type = 'uri'
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = "skos:broader"
                             la.object_uri = tri_ref['source_uri']
                             la.save()
                             try:
                                 la.save()
                             except:
                                 pass
                             links = LinkAnnotation.objects\
                                                   .filter(subject=man_obj.uuid,
                                                           object_uri=uri)[:1]
                             if len(links) < 1:
                                 print('Link ' + man_obj.label + ' (' +   man_obj.uuid + ') to ' + uri)
                                 la = LinkAnnotation()
                                 la.subject = man_obj.uuid  # the subordinate is the subject
                                 la.subject_type = man_obj.item_type
                                 la.project_uuid = man_obj.project_uuid
                                 la.source_id = self.source_id
                                 la.predicate_uri = 'dc-terms:isReferencedBy'
                                 la.object_uri = uri
                                 la.save()
                                 try:
                                     la.save()
                                 except:
                                     pass
Ejemplo n.º 27
0
 def save_new_ref_by_annotation(self, oc_item):
     """ saves a refferenced by annotation if it is new """
     is_new = self.check_new_annotation(oc_item.uuid, self.referrer.uri)
     if is_new:
         self.new_annotations += 1
         la = LinkAnnotation()
         la.subject = oc_item.uuid
         la.subject_type = oc_item.item_type
         la.project_uuid = oc_item.project_uuid
         la.source_id = self.source_id
         la.predicate_uri = self.DC_TERMS_REFERENCED_BY
         la.object_uri = self.referrer.uri
         la.creator_uuid = ''
         la.save()
         print('[' + str(self.new_annotations) + '] annotated: ' +
               oc_item.uuid)
Ejemplo n.º 28
0
 def make_naa_annotations(self,
                          project_uuid,
                          naa_annotated_proj_uuid):
     """ makes annotations to describe NAA
         (Neutron Activation Analysis) attributes by
         copying annoations from another project
         with NAA attributes.
     """
     old_pred_uuids = []
     old_preds = Predicate.objects\
                          .filter(project_uuid=naa_annotated_proj_uuid,
                                  data_type='xsd:double')
     for old_pred in old_preds:
         old_pred_uuids.append(old_pred.uuid)
     old_pred_mans = Manifest.objects\
                             .filter(uuid__in=old_pred_uuids,
                                     project_uuid=naa_annotated_proj_uuid)\
                             .order_by('label')
     for old_pred_man in old_pred_mans:
         new_man_pred = None
         if len(old_pred_man.label) < 4:
             # this has a short label, so more likely about a chemical
             # element
             new_man_preds = Manifest.objects\
                                     .filter(item_type='predicates',
                                             project_uuid=project_uuid,
                                             label=old_pred_man.label)[:1]
             if len(new_man_preds) > 0:
                 # the new project has a predicate with a matching label
                 new_man_pred = new_man_preds[0]
         if new_man_pred is not None:
             # we have a match between a predicate label in the old NAA project
             # and the new project
             print('-----------------------------')
             print('Copy annotations from: ' + old_pred_man.label + ' (' + old_pred_man.uuid + ')')
             print('To: ' + new_man_pred.uuid)
             print('-----------------------------')
             old_link_annos = LinkAnnotation.objects\
                                            .filter(subject=old_pred_man.uuid)
             for old_link_anno in old_link_annos:
                 new_link_anno = old_link_anno
                 new_link_anno.hash_id = None
                 new_link_anno.subject = new_man_pred.uuid
                 new_link_anno.subject_type = new_man_pred.item_type
                 new_link_anno.project_uuid = new_man_pred.project_uuid
                 new_link_anno.source_id = 'naa-link-annotations-method'
                 try:
                     new_link_anno.save()
                 except:
                     pass
     
     preds = Predicate.objects\
                      .filter(project_uuid=project_uuid,
                              data_type='xsd:double')
     for pred in preds:
         man_obj = False
         try:
             # try to find the manifest item
             man_obj = Manifest.objects.get(uuid=pred.uuid)
         except Manifest.DoesNotExist:
             man_obj = False
         if man_obj is not False:
             l_ents = LinkEntity.objects\
                                .filter(label=man_obj.label,
                                        vocab_uri='http://opencontext.org/vocabularies/open-context-zooarch/')[:1]
             if len(l_ents) > 0:
                 # a Match! Now let's make a close match assertion
                 uri = l_ents[0].uri
                 print(str(man_obj.label) + ' matches ' + uri)
                 la = LinkAnnotation()
                 la.subject = man_obj.uuid  # the subordinate is the subject
                 la.subject_type = man_obj.item_type
                 la.project_uuid = man_obj.project_uuid
                 la.source_id = 'label-match'
                 la.predicate_uri = equiv_pred
                 la.object_uri = uri
                 la.save()
                 # save also that the unit of measurement is in MM
                 la = LinkAnnotation()
                 la.subject = man_obj.uuid  # the subordinate is the subject
                 la.subject_type = man_obj.item_type
                 la.project_uuid = man_obj.project_uuid
                 la.source_id = 'label-match'
                 la.predicate_uri = 'http://www.w3.org/2000/01/rdf-schema#range'
                 la.object_uri = 'http://www.wikidata.org/wiki/Q174789'
                 la.save()
Ejemplo n.º 29
0
 def store_records(self, act_table, recs):
     """
     stores records retrieved for a given table
     """
     for rkey, record in recs.items():
         if (act_table == 'link_annotations'):
             newr = LinkAnnotation(**record)
             newr.save()
         elif (act_table == 'link_entities'):
             newr = LinkEntity(**record)
             newr.save()
         elif (act_table == 'link_hierarchies'):
             newr = LinkHierarchy(**record)
             newr.save()
         elif (act_table == 'oc_chronology'):
             newr = Chronology(**record)
             newr.save()
         elif (act_table == 'oc_geodata'):
             newr = Geodata(**record)
             newr.save()
         elif (act_table == 'oc_mediafiles'):
             newr = Mediafile(**record)
             newr.save()
         elif (act_table == 'oc_documents'):
             newr = OCdocument(**record)
             newr.save()
         elif (act_table == 'oc_persons'):
             newr = Person(**record)
             newr.save()
         elif (act_table == 'oc_projects'):
             newr = Project(**record)
             newr.save()
         elif (act_table == 'oc_strings'):
             newr = OCstring(**record)
             newr.save()
         elif (act_table == 'oc_types'):
             newr = OCtype(**record)
             newr.save()
         elif (act_table == 'oc_events'):
             newr = Event(**record)
             newr.save()
         elif (act_table == 'oc_predicates'):
             newr = Predicate(**record)
             newr.save()
         elif (act_table == 'oc_identifiers'):
             newr = StableIdentifer(**record)
             newr.save()
         elif (act_table == 'oc_obsmetadata'):
             newr = ObsMetadata(**record)
             newr.save()
Ejemplo n.º 30
0
 def add_table_file_download(self, table_id, file_uri):
     """ adds a file_uri for a pre-cached table download """
     ex_tabs = ExpTable.objects.filter(table_id=table_id)[:1]
     for ex_tab in ex_tabs:
         if ExpTable.PREDICATE_DUMP in ex_tab.meta_json:
             dump_list = ex_tab.meta_json[ExpTable.PREDICATE_DUMP]
         else:
             # no predicate for a data dump, so look for it
             dump_list = []
         mm = ManageMediafiles()
         ok = mm.get_head_info(file_uri)
         if ok:
             dump_item = LastUpdatedOrderedDict()
             dump_item['id'] = file_uri
             dump_item['dc-terms:hasFormat'] = mm.mime_type_uri
             dump_item['dcat:size'] = float(mm.filesize)
             print('Found: ' + str(dump_item))
             dump_list.append(dump_item)
             ex_tab.meta_json[ExpTable.PREDICATE_DUMP] = dump_list
             ex_tab.save()
             man_items = Manifest.objects.filter(uuid=table_id)[:1]
             if len(man_items) > 0:
                 man_obj = man_items[0]
                 new_anno = LinkAnnotation()
                 new_anno.subject = man_obj.uuid
                 new_anno.subject_type = man_obj.item_type
                 new_anno.project_uuid = man_obj.project_uuid
                 new_anno.source_id = 'download-file-relate'
                 new_anno.predicate_uri = ExpTable.PREDICATE_DUMP
                 new_anno.object_uri = file_uri
                 new_anno.sort = len(dump_list)
                 new_anno.obj_extra = dump_item
                 new_anno.save()
Ejemplo n.º 31
0
def get_add_gbif_parent(child_uri, child_le=None):
    """Checks to add a parent relation to a GBIF entity
    
    Returns a tuple:
    parent_link_enity, Is_new_relationship
    
    """
    if not child_le:
        child_le = add_get_gbif_link_entity(child_uri)
    la_exist = LinkAnnotation.objects.filter(
        subject=child_le.uri,
        predicate_uri=SKOS_BROADER,
    ).first()
    if la_exist:
        # A linking relation to a parent already
        # exists, so skip out.
        parent_le = add_get_gbif_link_entity(la_exist.object_uri)
        return parent_le, False
    api = gbifAPI()
    child_id = get_gbif_species_id_from_uri(child_le.uri)
    parent_id = api.get_gbif_parent_key(child_id)
    if not parent_id:
        # We're at the top of the hierarchy.
        return None, False
    parent_le = add_get_gbif_link_entity((GBIF_BASE_URI + str(parent_id)))
    print('Make {} ({}) a child of: {} ({})'.format(
        child_le.uri,
        child_le.label,
        parent_le.uri,
        parent_le.label,
    ))
    la = LinkAnnotation()
    la.subject = child_le.uri
    la.subject_type = 'uri'
    la.project_uuid = '0'
    la.source_id = HIERARCHY_SOURCE
    la.predicate_uri = SKOS_BROADER
    la.object_uri = parent_le.uri
    la.creator_uuid = ''
    la.save()
    return parent_le, True
Ejemplo n.º 32
0
 def add_table_file_download(self, table_id, file_uri):
     """ adds a file_uri for a pre-cached table download """
     ex_tabs = ExpTable.objects.filter(table_id=table_id)[:1]
     for ex_tab in ex_tabs:
         if ExpTable.PREDICATE_DUMP in ex_tab.meta_json:
             dump_list = ex_tab.meta_json[ExpTable.PREDICATE_DUMP]
         else:
             # no predicate for a data dump, so look for it
             dump_list = []
         mm = ManageMediafiles()
         ok = mm.get_head_info(file_uri)
         if ok:
             dump_item = LastUpdatedOrderedDict()
             dump_item['id'] = file_uri
             dump_item['dc-terms:hasFormat'] = mm.mime_type_uri
             dump_item['dcat:size'] = float(mm.filesize)
             print('Found: ' + str(dump_item))
             dump_list.append(dump_item)
             ex_tab.meta_json[ExpTable.PREDICATE_DUMP] = dump_list
             ex_tab.save()
             man_items = Manifest.objects.filter(uuid=table_id)[:1]
             if len(man_items) > 0:
                 man_obj = man_items[0]
                 new_anno = LinkAnnotation()
                 new_anno.subject = man_obj.uuid
                 new_anno.subject_type = man_obj.item_type
                 new_anno.project_uuid = man_obj.project_uuid
                 new_anno.source_id = 'download-file-relate'
                 new_anno.predicate_uri = ExpTable.PREDICATE_DUMP
                 new_anno.object_uri = file_uri
                 new_anno.sort = len(dump_list)
                 new_anno.obj_extra = dump_item
                 new_anno.save()
Ejemplo n.º 33
0
 def generate_table_metadata(self, table_id, overwrite=False):
     """ makes metadata for a specific table """
     ex_id = ExpTableIdentifiers()
     ex_id.make_all_identifiers(table_id)
     table_ids = [ex_id.table_id,
                  ex_id.public_table_id]
     try:
         ex_tab = ExpTable.objects.get(table_id=table_id)
     except ExpTable.DoesNotExist:
         print('No ExpTable object for: ' + ex_id.public_table_id)
         ex_tab = None
     try:
         man_obj = Manifest.objects.get(uuid=ex_id.public_table_id)
     except Manifest.DoesNotExist:
         print('No manifest object for: ' + ex_id.public_table_id)
         man_obj = None
     if ex_tab is not None and man_obj is not None:
         proj_uuid_counts = None
         for meta_pred in self.metadata_predicates:
             if overwrite:
                 num_old_delete = LinkAnnotation.objects\
                                                .filter(subject__in=table_ids,
                                                        predicate_uri=meta_pred)\
                                                .delete()
                 print('Deleted annoations ' + str(num_old_delete) + ' for ' + meta_pred)
                 add_meta_for_pred = True
             else:
                 num_exists = LinkAnnotation.objects\
                                            .filter(subject__in=table_ids,
                                                    predicate_uri=meta_pred)[:1]
                 if len(num_exists) < 1:
                     add_meta_for_pred = True
                 else:
                     add_meta_for_pred = False
             if add_meta_for_pred:
                 if meta_pred == 'dc-terms:contributor':
                     print('Getting contributors for ' + table_id)
                     sorted_author_list = self.get_table_author_counts(table_id)
                     contrib_sort = 0
                     for s_author in sorted_author_list:
                         contrib_sort += 1
                         obj_extra = LastUpdatedOrderedDict()
                         obj_extra['count'] = s_author['count']
                         la = LinkAnnotation()
                         la.subject = man_obj.uuid
                         la.subject_type = man_obj.item_type
                         la.project_uuid = man_obj.project_uuid
                         la.source_id = 'exp-table-manage'
                         la.predicate_uri = meta_pred
                         la.object_uri = URImanagement.make_oc_uri(s_author['uuid'], 'persons')
                         la.creator_uuid = '0'
                         la.sort = contrib_sort
                         la.obj_extra = obj_extra
                         la.save()
                 if meta_pred in ['dc-terms:creator',
                                  'dc-terms:source']:
                     # need to get projects for this
                     if proj_uuid_counts is None:
                         # only get this if not gotten yet
                         print('Getting projects for ' + table_id)
                         proj_uuid_counts = self.get_table_project_uuid_counts(table_id)
                     if meta_pred == 'dc-terms:creator':
                         print('Getting creators for ' + table_id)
                         dc_creator_list = self.make_table_dc_creator_list(proj_uuid_counts)
                         create_sort = 0
                         for dc_creator in dc_creator_list:
                             create_sort += 1
                             obj_extra = LastUpdatedOrderedDict()
                             obj_extra['count'] = dc_creator['count']
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = 'exp-table-manage'
                             la.predicate_uri = meta_pred
                             la.object_uri = dc_creator['id']
                             la.creator_uuid = '0'
                             la.sort = create_sort
                             la.obj_extra = obj_extra
                             la.save()
                     if meta_pred == 'dc-terms:source':
                         print('Getting sources for ' + table_id)
                         proj_sort = 0
                         for proj_uuid_count in proj_uuid_counts:
                             proj_sort += 1
                             obj_extra = LastUpdatedOrderedDict()
                             obj_extra['count'] = proj_uuid_count['num_uuids']
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = 'exp-table-manage'
                             la.predicate_uri = meta_pred
                             la.object_uri = URImanagement.make_oc_uri(proj_uuid_count['project_uuid'],
                                                                       'projects')
                             la.creator_uuid = '0'
                             la.sort = proj_sort
                             la.obj_extra = obj_extra
                             la.save()
                 if meta_pred == 'dc-terms:subject':
                     print('Getting subjects for ' + table_id)
                     dc_subject_list = self.make_table_dc_subject_category_list(table_id)
                     subj_sort = 0
                     for dc_subject in dc_subject_list:
                         subj_sort += 1
                         obj_extra = LastUpdatedOrderedDict()
                         obj_extra['count'] = dc_subject['count']
                         la = LinkAnnotation()
                         la.subject = man_obj.uuid
                         la.subject_type = man_obj.item_type
                         la.project_uuid = man_obj.project_uuid
                         la.source_id = 'exp-table-manage'
                         la.predicate_uri = meta_pred
                         la.object_uri = dc_subject['id']
                         la.creator_uuid = '0'
                         la.sort = subj_sort
                         la.obj_extra = obj_extra
                         la.save()
Ejemplo n.º 34
0
 def match_trinomial_obj(self, tri):
     """ Attempts to match a trinomial object 'tri'
         against tDAR, if it hasn't yet been matched
     """
     found_matches = 0
     manifest = False
     try:
         manifest = Manifest.objects.get(uuid=tri.uuid)
     except Manifest.DoesNotExist:
         manifest = False
     la_check = LinkAnnotation.objects\
                              .filter(subject=tri.uuid,
                                      predicate_uri='dc-terms:subject',
                                      object_uri__contains=self.TDAR_VOCAB)[:1]
     if len(la_check) < 1 and manifest is not False:
         # we don't already have a tDAR id for this item, continue with matches
         tri_man = TrinomialManage()
         request_keywords = [tri.trinomial]
         if self.lead_zero_check:
             # check multiple leading zeros
             tri_parts = tri_man.parse_trinomial(tri.trinomial)
             site = tri_parts['site']
             site_part_len = len(site)
             while len(site) < 4:
                 site = '0' + site
                 new_trinomial = tri_parts['state'] + tri_parts['county'] + site
                 request_keywords.append(new_trinomial)
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     if result['label'] == tri.trinomial:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     else:
                         # check if the only difference is in leading zeros
                         tri_parts = tri_man.parse_trinomial(tri.trinomial)
                         site = tri_parts['site']
                         site_part_len = len(site)
                         while len(site) < 5:
                             site = '0' + site
                             new_trinomial = tri_parts['state'] + tri_parts['county'] + site
                             if new_trinomial == result['label']:
                                 # A good match, the tDAR result and the trinomial
                                 # match (but with different leading zeros)
                                 match_real = True
                     if match_real:
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = tri.uuid
                         la.subject_type = manifest.item_type
                         la.project_uuid = manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] + ' is not exactly: ' + tri.trinomial)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) + ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
Ejemplo n.º 35
0
 def match_california_site(self, site_uuid):
     """ Attempts to match California site name with a tDAR
         site key word
     """
     found_matches = 0
     oc_item = OCitem()
     exists = oc_item.check_exists(site_uuid)
     if exists:
         la_check = LinkAnnotation.objects\
                                  .filter(subject=site_uuid,
                                          predicate_uri='dc-terms:subject',
                                          object_uri__contains=self.TDAR_VOCAB)[:1]
     if exists and len(la_check) < 1:
         # we don't already have a tDAR id for this item, continue with matches
         # first, generate the item's JSON-LD
         oc_item.generate_json_ld()
         request_keywords = []
         if 'oc-gen:has-obs' in oc_item.json_ld:
             if isinstance(oc_item.json_ld['oc-gen:has-obs'], list):
                 for obs in oc_item.json_ld['oc-gen:has-obs']:
                     if 'oc-pred:52-alternate-site-or-place-name' in obs:
                         if isinstance(obs['oc-pred:52-alternate-site-or-place-name'], list): 
                             for name_obj in obs['oc-pred:52-alternate-site-or-place-name']:
                                 if 'xsd:string' in name_obj:
                                     if isinstance(name_obj['xsd:string'], str):
                                         name_str = name_obj['xsd:string']
                                         request_keywords.append(name_str)
         print('Checking names in tDAR: ' + '; '.join(request_keywords))
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     lw_result = result['label'].lower()
                     lw_keyword = keyword.lower()
                     if lw_result == lw_keyword:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     if match_real:
                         print('FOUND ' + result['label'])
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = oc_item.manifest.uuid
                         la.subject_type = oc_item.manifest.item_type
                         la.project_uuid = oc_item.manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] + ' is not exactly: ' + keyword)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) + ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
Ejemplo n.º 36
0
 def make_type_ld_annotations(self, sub_type_pred_uuid, sub_type_f_num,
                              rel_pred, obj_le_f_num):
     """ Makes linked data annotations
         for a type in an import
     """
     rels = []
     sub_type_list = ImportCell.objects\
                               .filter(source_id=self.source_id,
                                       field_num=sub_type_f_num)
     if len(sub_type_list) > 0:
         distinct_records = {}
         for cell in sub_type_list:
             if cell.rec_hash not in distinct_records:
                 distinct_records[cell.rec_hash] = {}
                 distinct_records[cell.rec_hash]['rows'] = []
                 distinct_records[cell.rec_hash]['imp_cell_obj'] = cell
             distinct_records[cell.rec_hash]['rows'].append(cell.row_num)
         for rec_hash_key, distinct_type in distinct_records.items():
             # iterate through the distinct types and get associated linked data
             type_label = distinct_type['imp_cell_obj'].record
             rows = distinct_type['rows']
             if len(type_label) > 0:
                 # the type isn't blank, so we can use it
                 pc = ProcessCells(self.source_id, 0)
                 ld_entities = pc.get_field_records(obj_le_f_num, rows)
                 for ld_hash_key, distinct_ld in ld_entities.items():
                     obj_uri = distinct_ld['imp_cell_obj'].record
                     if len(obj_uri) > 8:
                         if obj_uri[:7] == 'http://'\
                            or obj_uri[:8] == 'https://':
                             # we have a valid linked data entity
                             #
                             # now get the UUID for the type
                             tm = TypeManagement()
                             tm.project_uuid = self.project_uuid
                             tm.source_id = self.source_id
                             sub_type = tm.get_make_type_within_pred_uuid(
                                 sub_type_pred_uuid, type_label)
                             rel = {
                                 'subject_label': type_label,
                                 'subject': sub_type.uuid,
                                 'object_uri': obj_uri
                             }
                             rels.append(rel)
     if len(rels) > 0:
         for rel in rels:
             new_la = LinkAnnotation()
             new_la.subject = rel['subject']
             new_la.subject_type = 'types'
             new_la.project_uuid = self.project_uuid
             new_la.source_id = self.source_id
             new_la.predicate_uri = rel_pred
             new_la.object_uri = rel['object_uri']
             new_la.creator_uuid = ''
             new_la.save()
Ejemplo n.º 37
0
 def add_item_annotation(self, post_data):
     """ Adds a linked data annotation to an item
     """
     note = ''
     ok_predicates = ['dc-terms:creator',
                      'dc-terms:contributor',
                      'dc-terms:subject',
                      'dc-terms:coverage',
                      'dc-terms:temporal',
                      'dc-terms:references',
                      'dc-terms:isReferencedBy',
                      'dc-terms:license',
                      'skos:closeMatch',
                      'skos:exactMatch',
                      'owl:sameAs',
                      'skos:broader',
                      'skos:related',
                      'skos:example',
                      'rdfs:isDefinedBy',
                      'http://www.w3.org/2000/01/rdf-schema#range']
     ok = True
     predicate_uri = self.request_param_val(post_data,
                                            'predicate_uri')
     object_uri = self.request_param_val(post_data,
                                         'object_uri')
     if predicate_uri is not False \
        and object_uri is not False:
         p_entity = Entity()
         found_p = p_entity.dereference(predicate_uri)
         if found_p is False \
            and predicate_uri in ok_predicates:
             found_p = True
         o_entity = Entity()
         found_o = o_entity.dereference(object_uri)
         if found_p and found_o:
             lequiv = LinkEquivalence()
             pred_list = lequiv.get_identifier_list_variants(predicate_uri)
             obj_list = lequiv.get_identifier_list_variants(object_uri)
             la_exist = LinkAnnotation.objects\
                                      .filter(subject=self.uuid,
                                              predicate_uri__in=pred_list,
                                              object_uri__in=obj_list)[:1]
             if len(la_exist) < 1:
                 # we don't have an annotation like this yet
                 object_uri = o_entity.uri
                 new_la = LinkAnnotation()
                 new_la.subject = self.manifest.uuid
                 new_la.subject_type = self.manifest.item_type
                 new_la.project_uuid = self.manifest.project_uuid
                 new_la.source_id = self.request_param_val(post_data,
                                                           'source_id',
                                                           'manual-web-form',
                                                           False)
                 new_la.sort = self.request_param_val(post_data,
                                                      'sort',
                                                      0,
                                                      False)
                 new_la.predicate_uri = predicate_uri
                 new_la.object_uri = object_uri
                 new_la.creator_uuid = self.creator_uuid
                 new_la.save()
                 # now clear the cache a change was made
                 self.clear_caches()
             else:
                 ok = False
                 note = 'This annotation already exists.'
         else:
             ok = False
             note = 'Missing a predicate or object entity'
     else:
         note = self.errors['params']
         ok = False
     self.response = {'action': 'add-item-annotation',
                      'ok': ok,
                      'change': {'note': note}}
     return self.response
Ejemplo n.º 38
0
 def make_von_den_driesch_equiv(self,
                                project_uuid,
                                equiv_pred='skos:closeMatch'):
     """ makes a skos:closeMatch equivalence relation
         between entities in the zooarch measurement
         ontology and predicates in a project
     """
     preds = Predicate.objects\
                      .filter(project_uuid=project_uuid,
                              data_type='xsd:double')
     for pred in preds:
         man_obj = False
         try:
             # try to find the manifest item
             man_obj = Manifest.objects.get(uuid=pred.uuid)
         except Manifest.DoesNotExist:
             man_obj = False
         if man_obj is not False:
             l_ents = LinkEntity.objects\
                                .filter(label=man_obj.label,
                                        vocab_uri='http://opencontext.org/vocabularies/open-context-zooarch/')[:1]
             if len(l_ents) > 0:
                 # a Match! Now let's make a close match assertion
                 uri = l_ents[0].uri
                 print(str(man_obj.label) + ' matches ' + uri)
                 la = LinkAnnotation()
                 la.subject = man_obj.uuid  # the subordinate is the subject
                 la.subject_type = man_obj.item_type
                 la.project_uuid = man_obj.project_uuid
                 la.source_id = 'label-match'
                 la.predicate_uri = equiv_pred
                 la.object_uri = uri
                 la.save()
                 # save also that the unit of measurement is in MM
                 la = LinkAnnotation()
                 la.subject = man_obj.uuid  # the subordinate is the subject
                 la.subject_type = man_obj.item_type
                 la.project_uuid = man_obj.project_uuid
                 la.source_id = 'label-match'
                 la.predicate_uri = 'http://www.w3.org/2000/01/rdf-schema#range'
                 la.object_uri = 'http://www.wikidata.org/wiki/Q174789'
                 la.save()
Ejemplo n.º 39
0
 def find_related_geonames(self, username='******'):
     """ Adds geonames spatial data for items with geonames annotations """
     man_objs = Manifest.objects\
                        .filter(project_uuid='0',
                                class_uri='oc-gen:cat-region',
                                item_type='subjects')
     for man_obj in man_objs:
         print('Checking slug: ' + man_obj.slug)
         subj_obj = Subject.objects.get(uuid=man_obj.uuid)
         context = subj_obj.context
         if '/' in context:
             cont_ex = context.split('/')
             admin_level = len(cont_ex) - 1
             if admin_level < 0:
                 admin_level = 0
         else:
             admin_level = 0
         q_str = context.replace('/', ' ')
         geo_api = GeonamesAPI()
         json_r = geo_api.search_admin_entity(q_str, admin_level, username)
         if isinstance(json_r, dict):
             # we found a result from GeoNames!
             print('Geonames result found.')
             if 'geonames' in json_r:
                 if len(json_r['geonames']) > 0:
                     # we've got a result
                     geo_id = json_r['geonames'][0]['geonameId']
                     label = json_r['geonames'][0]['name']
                     alt_label = json_r['geonames'][0]['toponymName']
                     geonames_uri = 'http://www.geonames.org/' + str(geo_id)
                     l_ents = LinkEntity.objects\
                                        .filter(uri=geonames_uri)[:1]
                     if len(l_ents) < 1:
                         # we need to create this entity
                         ent = LinkEntity()
                         ent.uri = geonames_uri
                         ent.label = label
                         ent.alt_label = alt_label
                         ent.vocab_uri = GeonamesAPI().VOCAB_URI
                         ent.ent_type = 'class'
                         ent.save()
                     print(geonames_uri)
                     annos = LinkAnnotation.objects\
                                           .filter(subject=man_obj.uuid,
                                                   object_uri=geonames_uri)[:1]
                     if len(annos) < 1:
                         # we need to add the annotation linking this item
                         print('Adding new annotation!')
                         new_la = LinkAnnotation()
                         new_la.subject = man_obj.uuid
                         new_la.subject_type = man_obj.item_type
                         new_la.project_uuid = man_obj.project_uuid
                         new_la.source_id = man_obj.source_id
                         new_la.predicate_uri = 'skos:closeMatch'
                         new_la.object_uri = geonames_uri
                         new_la.creator_uuid = ''
                         new_la.save()
                     else:
                         print('Relation already known.')
Ejemplo n.º 40
0
 def generate_table_metadata(self, table_id, overwrite=False):
     """ makes metadata for a specific table """
     ex_id = ExpTableIdentifiers()
     ex_id.make_all_identifiers(table_id)
     table_ids = [ex_id.table_id, ex_id.public_table_id]
     try:
         ex_tab = ExpTable.objects.get(table_id=table_id)
     except ExpTable.DoesNotExist:
         print('No ExpTable object for: ' + ex_id.public_table_id)
         ex_tab = None
     try:
         man_obj = Manifest.objects.get(uuid=ex_id.public_table_id)
     except Manifest.DoesNotExist:
         print('No manifest object for: ' + ex_id.public_table_id)
         man_obj = None
     if ex_tab is not None and man_obj is not None:
         proj_uuid_counts = None
         for meta_pred in self.metadata_predicates:
             if overwrite:
                 num_old_delete = LinkAnnotation.objects\
                                                .filter(subject__in=table_ids,
                                                        predicate_uri=meta_pred)\
                                                .delete()
                 print('Deleted annoations ' + str(num_old_delete) +
                       ' for ' + meta_pred)
                 add_meta_for_pred = True
             else:
                 num_exists = LinkAnnotation.objects\
                                            .filter(subject__in=table_ids,
                                                    predicate_uri=meta_pred)[:1]
                 if len(num_exists) < 1:
                     add_meta_for_pred = True
                 else:
                     add_meta_for_pred = False
             if add_meta_for_pred:
                 if meta_pred == 'dc-terms:contributor':
                     print('Getting contributors for ' + table_id)
                     sorted_author_list = self.get_table_author_counts(
                         table_id)
                     contrib_sort = 0
                     for s_author in sorted_author_list:
                         contrib_sort += 1
                         obj_extra = LastUpdatedOrderedDict()
                         obj_extra['count'] = s_author['count']
                         la = LinkAnnotation()
                         la.subject = man_obj.uuid
                         la.subject_type = man_obj.item_type
                         la.project_uuid = man_obj.project_uuid
                         la.source_id = 'exp-table-manage'
                         la.predicate_uri = meta_pred
                         la.object_uri = URImanagement.make_oc_uri(
                             s_author['uuid'], 'persons')
                         la.creator_uuid = '0'
                         la.sort = contrib_sort
                         la.obj_extra = obj_extra
                         la.save()
                 if meta_pred in ['dc-terms:creator', 'dc-terms:source']:
                     # need to get projects for this
                     if proj_uuid_counts is None:
                         # only get this if not gotten yet
                         print('Getting projects for ' + table_id)
                         proj_uuid_counts = self.get_table_project_uuid_counts(
                             table_id)
                     if meta_pred == 'dc-terms:creator':
                         print('Getting creators for ' + table_id)
                         dc_creator_list = self.make_table_dc_creator_list(
                             proj_uuid_counts)
                         create_sort = 0
                         for dc_creator in dc_creator_list:
                             create_sort += 1
                             obj_extra = LastUpdatedOrderedDict()
                             obj_extra['count'] = dc_creator['count']
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = 'exp-table-manage'
                             la.predicate_uri = meta_pred
                             la.object_uri = dc_creator['id']
                             la.creator_uuid = '0'
                             la.sort = create_sort
                             la.obj_extra = obj_extra
                             la.save()
                     if meta_pred == 'dc-terms:source':
                         print('Getting sources for ' + table_id)
                         proj_sort = 0
                         for proj_uuid_count in proj_uuid_counts:
                             proj_sort += 1
                             obj_extra = LastUpdatedOrderedDict()
                             obj_extra['count'] = proj_uuid_count[
                                 'num_uuids']
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = 'exp-table-manage'
                             la.predicate_uri = meta_pred
                             la.object_uri = URImanagement.make_oc_uri(
                                 proj_uuid_count['project_uuid'],
                                 'projects')
                             la.creator_uuid = '0'
                             la.sort = proj_sort
                             la.obj_extra = obj_extra
                             la.save()
                 if meta_pred == 'dc-terms:subject':
                     print('Getting subjects for ' + table_id)
                     dc_subject_list = self.make_table_dc_subject_category_list(
                         table_id)
                     subj_sort = 0
                     for dc_subject in dc_subject_list:
                         subj_sort += 1
                         obj_extra = LastUpdatedOrderedDict()
                         obj_extra['count'] = dc_subject['count']
                         la = LinkAnnotation()
                         la.subject = man_obj.uuid
                         la.subject_type = man_obj.item_type
                         la.project_uuid = man_obj.project_uuid
                         la.source_id = 'exp-table-manage'
                         la.predicate_uri = meta_pred
                         la.object_uri = dc_subject['id']
                         la.creator_uuid = '0'
                         la.sort = subj_sort
                         la.obj_extra = obj_extra
                         la.save()
Ejemplo n.º 41
0
 def validate_make_eol_hierarchy(self, child_uri, parent_uri):
     """ Validated hierarchy relations for EOL entities.
         If a child already has a parent, this will not do anything
         otherwise it will create a hierachy relation
     """
     ok_create = False
     le_gen = LinkEntityGeneration()
     child_uri = le_gen.make_clean_uri(
         child_uri)  # strip off any cruft in the URI
     parent_uri = le_gen.make_clean_uri(parent_uri)
     ent = Entity()
     found = ent.dereference(child_uri)
     if found:
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(child_uri, False)
         if parents is False:
             # no parents, so OK to make an assertion
             ok_create = True
         else:
             if len(parents) == 0:
                 # no parents, so OK to make an assertion
                 ok_create = True
     else:
         ok_create = True  # the child does not yet exist, so OK to make the relation
         print('Getting missing data for: ' + child_uri)
         self.get_save_entity_label(child_uri)
     if ok_create:
         print('OK, make rel for: ' + child_uri + ' in ' + parent_uri)
         la = LinkAnnotation()
         la.subject = child_uri
         la.subject_type = 'uri'
         la.project_uuid = '0'
         la.source_id = 'manual-eol-manage'
         la.predicate_uri = self.CHILD_PARENT_REL
         la.object_uri = parent_uri
         la.sort = 1
         la.save()
     else:
         print('Already in hierarchy: ' + child_uri)
Ejemplo n.º 42
0
 def store_records(self, act_table, recs):
     """
     stores records retrieved for a given table
     """
     for rkey, record in recs.items():
         if act_table == "link_annotations":
             newr = LinkAnnotation(**record)
             newr.save()
         elif act_table == "link_entities":
             newr = LinkEntity(**record)
             newr.save()
         elif act_table == "link_hierarchies":
             newr = LinkHierarchy(**record)
             newr.save()
         elif act_table == "oc_chronology":
             newr = Chronology(**record)
             newr.save()
         elif act_table == "oc_geodata":
             newr = Geodata(**record)
             newr.save()
         elif act_table == "oc_mediafiles":
             newr = Mediafile(**record)
             newr.save()
         elif act_table == "oc_documents":
             newr = OCdocument(**record)
             newr.save()
         elif act_table == "oc_persons":
             newr = Person(**record)
             newr.save()
         elif act_table == "oc_projects":
             newr = Project(**record)
             newr.save()
         elif act_table == "oc_strings":
             newr = OCstring(**record)
             newr.save()
         elif act_table == "oc_types":
             newr = OCtype(**record)
             newr.save()
         elif act_table == "oc_events":
             newr = Event(**record)
             newr.save()
         elif act_table == "oc_predicates":
             newr = Predicate(**record)
             newr.save()
         elif act_table == "oc_identifiers":
             newr = StableIdentifer(**record)
             newr.save()
         elif act_table == "oc_obsmetadata":
             newr = ObsMetadata(**record)
             newr.save()
Ejemplo n.º 43
0
 def add_period_coverage(self, uuid, period_uri):
     """ Adds an periodo uri annotation to an item
     """
     ok = False
     po_api = PeriodoAPI()
     if not isinstance(self.periodo_data, dict):
         self.check_add_period_pred()
         po_api.get_periodo_data()
         self.periodo_data = po_api.periodo_data
     else:
         po_api.periodo_data = self.periodo_data
     if isinstance(po_api.periodo_data, dict):
         period = po_api.get_period_by_uri(period_uri)
         if isinstance(period, dict):
             # we found the period, now check the UUID
             # is found
             entity = Entity()
             found = entity.dereference(uuid)
             if found:
                 # save the period collection entity to database, if needed
                 self.check_add_period_collection(period)
                 # save the period entity to the database, if needed
                 self.check_add_period(period)
                 # check to make sure the annotation does not yet exist
                 # do so by checking all possible varients in expressing
                 # this annotation
                 lequiv = LinkEquivalence()
                 subjects = lequiv.get_identifier_list_variants(uuid)
                 predicates = lequiv.get_identifier_list_variants(
                     self.DC_PERIOD_PRED)
                 objects = lequiv.get_identifier_list_variants(
                     period['period-meta']['uri'])
                 la_exists = LinkAnnotation.objects\
                                           .filter(subject__in=subjects,
                                                   predicate_uri__in=predicates,
                                                   object_uri__in=objects)[:1]
                 if len(la_exists) < 1:
                     # OK save to make the annotation
                     new_la = LinkAnnotation()
                     new_la.subject = entity.uuid
                     new_la.subject_type = entity.item_type
                     new_la.project_uuid = entity.project_uuid
                     new_la.source_id = self.source_id
                     new_la.predicate_uri = self.DC_PERIOD_PRED
                     new_la.object_uri = period['period-meta']['uri']
                     new_la.creator_uuid = ''
                     new_la.save()
                     ok = True
     return ok
Ejemplo n.º 44
0
 def find_related_geonames(self, username='******'):
     """ Adds geonames spatial data for items with geonames annotations """
     man_objs = Manifest.objects\
                        .filter(project_uuid='0',
                                class_uri='oc-gen:cat-region',
                                item_type='subjects')
     for man_obj in man_objs:
         print('Checking slug: ' + man_obj.slug)
         subj_obj = Subject.objects.get(uuid=man_obj.uuid)
         context = subj_obj.context
         if '/' in context:
             cont_ex = context.split('/')
             admin_level = len(cont_ex) - 1
             if admin_level < 0:
                 admin_level = 0
         else:
             admin_level = 0
         q_str = context.replace('/', ' ')
         geo_api = GeonamesAPI()
         json_r = geo_api.search_admin_entity(q_str,
                                              admin_level,
                                              username)
         if isinstance(json_r, dict):
             # we found a result from GeoNames!
             print('Geonames result found.')
             if 'geonames' in json_r:
                 if len(json_r['geonames']) > 0:
                     # we've got a result
                     geo_id = json_r['geonames'][0]['geonameId']
                     label = json_r['geonames'][0]['name']
                     alt_label = json_r['geonames'][0]['toponymName']
                     geonames_uri = 'http://www.geonames.org/' + str(geo_id)
                     l_ents = LinkEntity.objects\
                                        .filter(uri=geonames_uri)[:1]
                     if len(l_ents) < 1:
                         # we need to create this entity
                         ent = LinkEntity()
                         ent.uri = geonames_uri
                         ent.label = label
                         ent.alt_label = alt_label
                         ent.vocab_uri = GeonamesAPI().VOCAB_URI
                         ent.ent_type = 'class'
                         ent.save()
                     print(geonames_uri)
                     annos = LinkAnnotation.objects\
                                           .filter(subject=man_obj.uuid,
                                                   object_uri=geonames_uri)[:1]
                     if len(annos) < 1:
                         # we need to add the annotation linking this item
                         print('Adding new annotation!')
                         new_la = LinkAnnotation()
                         new_la.subject = man_obj.uuid
                         new_la.subject_type = man_obj.item_type
                         new_la.project_uuid = man_obj.project_uuid
                         new_la.source_id = man_obj.source_id
                         new_la.predicate_uri = 'skos:closeMatch'
                         new_la.object_uri = geonames_uri
                         new_la.creator_uuid = ''
                         new_la.save()
                     else:
                         print('Relation already known.')
Ejemplo n.º 45
0
 def match_california_site(self, site_uuid):
     """ Attempts to match California site name with a tDAR
         site key word
     """
     found_matches = 0
     oc_item = OCitem()
     exists = oc_item.check_exists(site_uuid)
     if exists:
         la_check = LinkAnnotation.objects\
                                  .filter(subject=site_uuid,
                                          predicate_uri='dc-terms:subject',
                                          object_uri__contains=self.TDAR_VOCAB)[:1]
     if exists and len(la_check) < 1:
         # we don't already have a tDAR id for this item, continue with matches
         # first, generate the item's JSON-LD
         oc_item.generate_json_ld()
         request_keywords = []
         if 'oc-gen:has-obs' in oc_item.json_ld:
             if isinstance(oc_item.json_ld['oc-gen:has-obs'], list):
                 for obs in oc_item.json_ld['oc-gen:has-obs']:
                     if 'oc-pred:52-alternate-site-or-place-name' in obs:
                         if isinstance(
                                 obs['oc-pred:52-alternate-site-or-place-name'],
                                 list):
                             for name_obj in obs[
                                     'oc-pred:52-alternate-site-or-place-name']:
                                 if 'xsd:string' in name_obj:
                                     if isinstance(name_obj['xsd:string'],
                                                   str):
                                         name_str = name_obj['xsd:string']
                                         request_keywords.append(name_str)
         print('Checking names in tDAR: ' + '; '.join(request_keywords))
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     lw_result = result['label'].lower()
                     lw_keyword = keyword.lower()
                     if lw_result == lw_keyword:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     if match_real:
                         print('FOUND ' + result['label'])
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(
                                 uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = oc_item.manifest.uuid
                         la.subject_type = oc_item.manifest.item_type
                         la.project_uuid = oc_item.manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] +
                               ' is not exactly: ' + keyword)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) +
                           ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
Ejemplo n.º 46
0
 def store_records(self, act_table, recs):
     """
     stores records retrieved for a given table
     """
     i = 0
     for record in recs:
         i += 1
         allow_write = self.check_allow_write(act_table, record)
         record = self.prep_update_keep_old(act_table, record)
         if (allow_write is False and self.update_keep_old is False):
             print('\n Not allowed to overwite record.' + str(i))
         else:
             # print('\n Adding record:' + str(record))
             newr = False
             if (act_table == 'link_annotations'):
                 newr = LinkAnnotation(**record)
             elif (act_table == 'link_entities'):
                 newr = LinkEntity(**record)
             elif (act_table == 'oc_assertions'):
                 newr = Assertion(**record)
             elif (act_table == 'oc_manifest'):
                 newr = Manifest(**record)
             elif (act_table == 'oc_subjects'):
                 newr = Subject(**record)
             elif (act_table == 'oc_mediafiles'):
                 newr = Mediafile(**record)
             elif (act_table == 'oc_documents'):
                 newr = OCdocument(**record)
             elif (act_table == 'oc_persons'):
                 newr = Person(**record)
             elif (act_table == 'oc_projects'):
                 newr = Project(**record)
             elif (act_table == 'oc_strings'):
                 newr = OCstring(**record)
             elif (act_table == 'oc_types'):
                 newr = OCtype(**record)
             elif (act_table == 'oc_geospace'):
                 newr = Geospace(**record)
             elif (act_table == 'oc_events'):
                 newr = Event(**record)
             elif (act_table == 'oc_predicates'):
                 newr = Predicate(**record)
             elif (act_table == 'oc_identifiers'):
                 newr = StableIdentifer(**record)
             elif (act_table == 'oc_obsmetadata'):
                 newr = ObsMetadata(**record)
             if (newr is not False):
                 try:
                     newr.save(force_insert=self.force_insert,
                               force_update=self.update_keep_old)
                 except Exception as error:
                     print('Something slipped past in ' + act_table +
                           '...' + str(error))
Ejemplo n.º 47
0
 def match_trinomial_obj(self, tri):
     """ Attempts to match a trinomial object 'tri'
         against tDAR, if it hasn't yet been matched
     """
     found_matches = 0
     manifest = False
     try:
         manifest = Manifest.objects.get(uuid=tri.uuid)
     except Manifest.DoesNotExist:
         manifest = False
     la_check = LinkAnnotation.objects\
                              .filter(subject=tri.uuid,
                                      predicate_uri='dc-terms:subject',
                                      object_uri__contains=self.TDAR_VOCAB)[:1]
     if len(la_check) < 1 and manifest is not False:
         # we don't already have a tDAR id for this item, continue with matches
         tri_man = TrinomialManage()
         request_keywords = [tri.trinomial]
         if self.lead_zero_check:
             # check multiple leading zeros
             tri_parts = tri_man.parse_trinomial(tri.trinomial)
             site = tri_parts['site']
             site_part_len = len(site)
             while len(site) < 4:
                 site = '0' + site
                 new_trinomial = tri_parts['state'] + tri_parts[
                     'county'] + site
                 request_keywords.append(new_trinomial)
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     if result['label'] == tri.trinomial:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     else:
                         # check if the only difference is in leading zeros
                         tri_parts = tri_man.parse_trinomial(tri.trinomial)
                         site = tri_parts['site']
                         site_part_len = len(site)
                         while len(site) < 5:
                             site = '0' + site
                             new_trinomial = tri_parts['state'] + tri_parts[
                                 'county'] + site
                             if new_trinomial == result['label']:
                                 # A good match, the tDAR result and the trinomial
                                 # match (but with different leading zeros)
                                 match_real = True
                     if match_real:
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(
                                 uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = tri.uuid
                         la.subject_type = manifest.item_type
                         la.project_uuid = manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] +
                               ' is not exactly: ' + tri.trinomial)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) +
                           ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
Ejemplo n.º 48
0
def add_missing_containing_regions(project_uuid='0', source_id=SOURCE_ID):
    """Adds missing containing regions that have site counts"""
    for state, state_uuid, new_region, new_uuid, geonames_uri in ADD_REGIONS:
        row = {
            'parent_uuid': state_uuid,
            'context_uuid': new_uuid,
            'label': new_region,
            'class_uri': 'oc-gen:cat-region',
        }
        load_context_row(project_uuid=project_uuid,
                         source_id=source_id,
                         row=row)
        ent_exists = LinkEntity.objects.filter(uri=geonames_uri).first()
        if not ent_exists:
            ent = LinkEntity()
            ent.uri = geonames_uri
            ent.label = new_region
            ent.alt_label = new_region
            ent.vocab_uri = GeonamesAPI().VOCAB_URI
            ent.ent_type = 'class'
            ent.save()
        la_exists = LinkAnnotation.objects.filter(
            subject=new_uuid, object_uri=geonames_uri).first()
        if not la_exists:
            new_la = LinkAnnotation()
            new_la.subject = new_uuid
            new_la.subject_type = 'subjects'
            new_la.project_uuid = project_uuid
            new_la.source_id = source_id
            new_la.predicate_uri = 'skos:closeMatch'
            new_la.object_uri = geonames_uri
            new_la.creator_uuid = ''
            new_la.save()
Ejemplo n.º 49
0
 def link_sites_from_filecache(self):
     """ updates Open Context to save new sites
         and annotations from the 
         file cache
     """
     if self.filecache is not None:
         # print('Cache update !: ' + self.cache_filekey)
         self.filecache.working_dir = self.working_dir
         json_obj = self.filecache.get_dict_from_file(self.cache_filekey)
         if isinstance(json_obj, dict):
             if 'trinomial_refs' in json_obj:
                 for tri_ref in json_obj['trinomial_refs']:
                     uri = tri_ref['rec_uri']
                     title = tri_ref['title']
                     if len(title) > 194:
                         title = title[0:190] + '... '
                     l_exists = LinkEntity.objects.filter(uri=uri)[:1]
                     if len(l_exists) < 1:
                         l_ent = LinkEntity()
                         l_ent.uri = uri
                         l_ent.label = title
                         l_ent.alt_label = title
                         l_ent.vocab_uri = tri_ref['source_uri']
                         l_ent.ent_type = 'class'
                         l_ent.save()
                     for trinomial in tri_ref['trinomials']:
                         man_objs = Manifest.objects.filter(
                             label=trinomial, class_uri='oc-gen:cat-site')
                         if len(man_objs) > 0:
                             man_obj = man_objs[0]
                             la = LinkAnnotation()
                             la.subject = uri  # the subordinate is the subject
                             la.subject_type = 'uri'
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = "skos:broader"
                             la.object_uri = tri_ref['source_uri']
                             la.save()
                             try:
                                 la.save()
                             except:
                                 pass
                             links = LinkAnnotation.objects\
                                                   .filter(subject=man_obj.uuid,
                                                           object_uri=uri)[:1]
                             if len(links) < 1:
                                 print('Link ' + man_obj.label + ' (' +
                                       man_obj.uuid + ') to ' + uri)
                                 la = LinkAnnotation()
                                 la.subject = man_obj.uuid  # the subordinate is the subject
                                 la.subject_type = man_obj.item_type
                                 la.project_uuid = man_obj.project_uuid
                                 la.source_id = self.source_id
                                 la.predicate_uri = 'dc-terms:isReferencedBy'
                                 la.object_uri = uri
                                 la.save()
                                 try:
                                     la.save()
                                 except:
                                     pass