def add_project_types_with_annotations_to_graph(self, graph): """ adds project types that have annotations """ type_sql_dict_list = self.get_working_project_types() if isinstance(type_sql_dict_list, list): # consolidate things so a given type is given once in the list # of a graph. To do so, we first put everything in a all_types # dict all_types = LastUpdatedOrderedDict() for sql_dict in type_sql_dict_list: type_uri = URImanagement.make_oc_uri(sql_dict['type_uuid'], 'types') if type_uri not in all_types: act_type = LastUpdatedOrderedDict() act_type['@id'] = type_uri act_type['label'] = sql_dict['type_label'] act_type['owl:sameAs'] = URImanagement.make_oc_uri( sql_dict['type_slug'], 'types') act_type['uuid'] = sql_dict['type_uuid'] act_type['slug'] = sql_dict['type_slug'] else: act_type = all_types[type_uri] la_pred_uri = URImanagement.prefix_common_uri( sql_dict['predicate_uri']) act_type = self.add_unique_object_dict_to_pred( act_type, la_pred_uri, sql_dict['object_uri']) all_types[type_uri] = act_type for type_uri, act_type in all_types.items(): graph.append(act_type) return graph
def add_project_types_with_annotations_to_graph(self, graph): """ adds project types that have annotations """ type_sql_dict_list = self.get_working_project_types() if isinstance(type_sql_dict_list, list): # consolidate things so a given type is given once in the list # of a graph. To do so, we first put everything in a all_types # dict all_types = LastUpdatedOrderedDict() for sql_dict in type_sql_dict_list: type_uri = URImanagement.make_oc_uri(sql_dict['type_uuid'], 'types') if type_uri not in all_types: act_type = LastUpdatedOrderedDict() act_type['@id'] = type_uri act_type['label'] = sql_dict['type_label'] act_type['owl:sameAs'] = URImanagement.make_oc_uri(sql_dict['type_slug'], 'types') act_type['uuid'] = sql_dict['type_uuid'] act_type['slug'] = sql_dict['type_slug'] else: act_type = all_types[type_uri] la_pred_uri = URImanagement.prefix_common_uri(sql_dict['predicate_uri']) if la_pred_uri not in act_type: act_type[la_pred_uri] = [] la_object_item = self.make_object_dict_item(sql_dict['object_uri']) act_type[la_pred_uri].append(la_object_item) all_types[type_uri] = act_type for type_uri, act_type in all_types.items(): graph.append(act_type) return graph
def make_save_doi_by_uuid(self, uuid, metadata=None): """ makes an saves an DOI identifier by a uuid """ ok = False oc_uri = None dois = StableIdentifer.objects.filter(uuid=uuid, stable_type='doi')[:1] if len(dois) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_doi_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri(oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make DOI id for: ' + oc_uri) ezid_response = self.ezid.mint_identifier(oc_uri, metadata, 'doi') if self.do_test: print('EZID response: ' + str(ezid_response)) if isinstance(ezid_response, str): if '|' in ezid_response: resp_ex = ezid_response.split('|') for resp_id in resp_ex: if 'doi:' in resp_id: ok = self.save_oc_item_stable_id(oc_item, resp_id, 'doi') else: pass else: ok = self.save_oc_item_stable_id(oc_item, ezid_response, 'doi') return ok
def make_table_dc_creator_list(self, proj_uuid_counts): """ makes a list of dublin core creators from a project uuid + counts list """ dc_creators = [] for proj_uuid_count in proj_uuid_counts: project_uuid = proj_uuid_count['project_uuid'] proj_count = proj_uuid_count['num_uuids'] auth = Authorship() auth.get_project_authors(project_uuid) if len(auth.creators) < 1 and \ len(auth.contributors) > 0: auth.creators = auth.contributors if len(auth.creators) > 0: for auth_uuid in auth.creators: auth_man = False try: auth_man = Manifest.objects.get(uuid=auth_uuid) except Manifest.DoesNotExist: auth_man = False if auth_man is not False: i = len(dc_creators) + 1 item = LastUpdatedOrderedDict() item['id'] = URImanagement.make_oc_uri( auth_uuid, 'persons') item['count'] = proj_count dc_creators.append(item) return dc_creators
def make_table_dc_creator_list(self, proj_uuid_counts): """ makes a list of dublin core creators from a project uuid + counts list """ dc_creators = [] for proj_uuid_count in proj_uuid_counts: project_uuid = proj_uuid_count['project_uuid'] proj_count = proj_uuid_count['num_uuids'] auth = Authorship() auth.get_project_authors(project_uuid) if len(auth.creators) < 1 and \ len(auth.contributors) > 0: auth.creators = auth.contributors if len(auth.creators) > 0: for auth_uuid in auth.creators: auth_man = False try: auth_man = Manifest.objects.get(uuid=auth_uuid) except Manifest.DoesNotExist: auth_man = False if auth_man is not False: i = len(dc_creators) + 1 item = LastUpdatedOrderedDict() item['id'] = URImanagement.make_oc_uri(auth_uuid, 'persons') item['count'] = proj_count dc_creators.append(item) return dc_creators
def make_json_ld(self): """ makes a JSON-LD object for the table metadata Need oc-table namespace need to include the cc-rel namespace need to add this name space http://www.w3.org/2003/01/geo/ as geo:lat, geo:lon """ json_ld = LastUpdatedOrderedDict() if self.exp_tab is not False: json_ld['id'] = URImanagement.make_oc_uri(self.public_table_id, 'tables') json_ld['uuid'] = self.public_table_id json_ld['label'] = self.exp_tab.label json_ld['fields'] = self.exp_tab.field_count json_ld['rows'] = self.exp_tab.row_count json_ld['dc-terms:identifier'] = self.table_id json_ld['dc-terms:issued'] = self.exp_tab.created.date().isoformat() json_ld['dc-terms:modified'] = self.exp_tab.updated.date().isoformat() json_ld['dc-terms:abstract'] = self.exp_tab.abstract json_ld = self.get_link_annotations(json_ld) stable_ids = self.get_stable_ids() if len(stable_ids) > 0: json_ld['owl:sameAs'] = stable_ids json_ld['has-fields'] = self.get_field_list() """ for key, objects in self.exp_tab.meta_json.items(): json_ld[key] = objects """ return json_ld
def add_general_json_ld(self): """ adds general (manifest) information to the JSON-LD object """ self.json_ld['id'] = URImanagement.make_oc_uri(self.uuid, self.item_type) self.json_ld['uuid'] = self.uuid self.json_ld['slug'] = self.slug self.json_ld['label'] = self.label # add multilingual alternative labels if isinstance(self.manifest.localized_json, dict): if len(self.manifest.localized_json) > 0: json_ld['skos:altLabel'] = self.manifest.localized_json if self.manifest.item_type in PartsJsonLD.ITEM_TYPE_CLASS_LIST \ and len(self.manifest.class_uri) > 1: self.json_ld['category'] = [ self.manifest.class_uri ] if self.manifest.item_type == 'projects': # now add the project specific data to the JSON-LD self.add_project_json_ld() elif self.manifest.item_type == 'documents': # now add document specific information to the JSON-LD self.add_document_json_ld() elif self.manifest.item_type == 'predicates': # now add the predicate specific data the JSON-LD self.add_predicate_json_ld() elif self.manifest.item_type == 'types': self.add_type_json_ld()
def make_trinomial_from_site_labels(self, project_uuid, state_prefix=''): """ makes trinomial identifiers from a site label """ ent = Entity() found = ent.dereference(project_uuid) if found: proj_label = ent.label sites = Manifest.objects\ .filter(project_uuid=project_uuid, class_uri='oc-gen:cat-site') for site in sites: trinomial = str(state_prefix) + site.label if '*' in trinomial: # for North Carolina, only the part before the '*' is a trinomial tr_ex = trinomial.split('*') trinomial = tr_ex[0] print('working on (' + site.uuid + '): ' + trinomial) parts = self.parse_trinomial(trinomial) if 'Tennessee' in proj_label: trinomial = parts['state'] + parts['county'] + str(parts['site']) dt = Trinomial() dt.uri = URImanagement.make_oc_uri(site.uuid, site.item_type) dt.uuid = site.uuid dt.label = site.label dt.project_label = proj_label dt.trinomial = trinomial dt.state = parts['state'] dt.county = parts['county'] dt.site = parts['site'] try: dt.save() print('Trinomial: ' + trinomial + ', from: ' + site.label) except: print('Trinomial: ' + trinomial + ' not valid as a trinomial')
def get_item_media_files(self, man_obj): """ gets media file uris for archiving """ files_dict = LastUpdatedOrderedDict() if isinstance(man_obj, Manifest): med_files = Mediafile.objects\ .filter(uuid=man_obj.uuid, file_type__in=self.ARCHIVE_FILE_TYPES)\ .order_by('-filesize') # print('found files: ' + str(len(med_files))) for act_type in self.ARCHIVE_FILE_TYPES: for med_file in med_files: if med_file.file_type == act_type: extension = '' frag = None file_uri = med_file.file_uri if '#' in file_uri: file_ex = file_uri.split('#') file_uri = file_ex[0] frag = file_ex[-1] if file_uri not in files_dict: act_dict = LastUpdatedOrderedDict() act_dict['filename'] = self.make_archival_file_name(med_file.file_type, man_obj.slug, file_uri) act_dict['dc-terms:isPartOf'] = URImanagement.make_oc_uri(man_obj.uuid, man_obj.item_type) act_dict['type'] = [] files_dict[file_uri] = act_dict files_dict[file_uri]['type'].append(med_file.file_type) return files_dict
def add_project_predicates_and_annotations_to_graph(self, graph): """ gets the project predicates and their annotations with database calls """ pred_sql_dict_list = self.get_working_project_predicates() la_preds = self.get_link_annotations_for_preds(pred_sql_dict_list) if not isinstance(pred_sql_dict_list, list): # No predicates in the project. Weird, but possible return graph annotated_pred_uuids = {la.subject: [] for la in la_preds} for la in la_preds: annotated_pred_uuids[la.subject].append(la) for sql_dict in pred_sql_dict_list: act_pred = LastUpdatedOrderedDict() act_pred['@id'] = 'oc-pred:' + str(sql_dict['slug']) act_pred['owl:sameAs'] = URImanagement.make_oc_uri( sql_dict['predicate_uuid'], 'predicates') act_pred['label'] = sql_dict['label'] act_pred['uuid'] = sql_dict['predicate_uuid'] act_pred['slug'] = sql_dict['slug'] if isinstance(sql_dict['class_uri'], str) and len(sql_dict['class_uri']) > 0: act_pred['oc-gen:predType'] = sql_dict['class_uri'] uuid_la_preds = annotated_pred_uuids.get( sql_dict['predicate_uuid'], []) for la_pred in uuid_la_preds: la_pred_uri = URImanagement.prefix_common_uri( la_pred.predicate_uri) act_pred = self.add_unique_object_dict_to_pred( act_pred, la_pred_uri, la_pred.object_uri) graph.append(act_pred) return graph
def add_when_json(self, act_dict, uuid, item_type, event): """ adds when (time interval or instant) data """ when = LastUpdatedOrderedDict() when['id'] = '#event-when-' + str(event.event_id) when['type'] = event.when_type when['type'] = event.meta_type if (event.earliest != event.start): # when['earliest'] = int(event.earliest) pass when['start'] = ISOyears().make_iso_from_float(event.start) when['stop'] = ISOyears().make_iso_from_float(event.stop) if event.latest != event.stop: # when['latest'] = int(event.latest) pass if event.uuid != uuid: # we're inheriting / inferring event metadata from a parent context when['reference-type'] = 'inferred' when['reference-uri'] = URImanagement.make_oc_uri( event.uuid, 'subjects', self.cannonical_uris) rel_meta = self.item_gen_cache.get_entity(event.uuid) if rel_meta is not False: when['reference-label'] = rel_meta.label else: # metadata is specified for this specific item when['reference-type'] = 'specified' when['reference-label'] = self.manifest.label if self.assertion_hashes: when['hash_id'] = event.hash_id act_dict['when'] = when return act_dict
def make_save_ark_by_uuid(self, uuid, metadata=None): """ makes an saves an ARK identifier by a uuid """ ok = False oc_uri = None arks = StableIdentifer.objects.filter(uuid=uuid, stable_type='ark')[:1] if len(arks) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_ark_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri( oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make ARK id for: ' + oc_uri) ark_id = self.ezid.mint_identifier( oc_uri, metadata, 'ark') if isinstance(ark_id, str): # success! we have an ARK id! stable_id = ark_id.replace('ark:/', '') ok = self.save_oc_item_stable_id( oc_item, stable_id, 'ark') return ok
def add_project_predicates_and_annotations_to_graph(self, graph): """ gets the project predicates and their annotations with database calls """ pred_sql_dict_list = self.get_working_project_predicates() la_preds = self.get_link_annotations_for_preds(pred_sql_dict_list) if isinstance(pred_sql_dict_list, list): for sql_dict in pred_sql_dict_list: act_pred = LastUpdatedOrderedDict() act_pred['@id'] = 'oc-pred:' + sql_dict['slug'] act_pred['owl:sameAs'] = URImanagement.make_oc_uri(sql_dict['predicate_uuid'], 'predicates') act_pred['label'] = sql_dict['label'] act_pred['uuid'] = sql_dict['predicate_uuid'] act_pred['slug'] = sql_dict['slug'] if isinstance(sql_dict['class_uri'], str): if len(sql_dict['class_uri']) > 0: act_pred['oc-gen:predType'] = sql_dict['class_uri'] pred_found = False for la_pred in la_preds: if la_pred.subject == sql_dict['predicate_uuid']: pred_found = True # prefix common URIs for the predicate of the link annotation la_pred_uri = URImanagement.prefix_common_uri(la_pred.predicate_uri) if la_pred_uri not in act_pred: act_pred[la_pred_uri] = [] la_object_item = self.make_object_dict_item(la_pred.object_uri) act_pred[la_pred_uri].append(la_object_item) else: if pred_found: # because this list is sorted by la_pred.subject, we're done # finding any more annotations on act_pred item break graph.append(act_pred) return graph
def save_context(self, row_num, man, parent_list): """ Save context information, will also add new context fields as needed """ use_parents = False context_uri = '' if isinstance(parent_list, list): if len(parent_list) > 0: context_uri = URImanagement.make_oc_uri(parent_list[0], 'subjects') use_parents = parent_list[::-1] # save a record of the context URI cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 13 cell.record = context_uri cell.save() cell = None if use_parents is not False: pindex = 0 for parent_uuid in use_parents: pindex += 1 context_label = self.deref_entity_label(parent_uuid) field_num = self.get_add_context_field_number(pindex) cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = field_num cell.record = context_label cell.save() cell = None
def make_trinomial_from_site_labels(self, project_uuid, state_prefix=''): """ makes trinomial identifiers from a site label """ ent = Entity() found = ent.dereference(project_uuid) if found: proj_label = ent.label sites = Manifest.objects\ .filter(project_uuid=project_uuid, class_uri='oc-gen:cat-site') for site in sites: trinomial = str(state_prefix) + site.label parts = self.parse_trinomial(trinomial) dt = Trinomial() dt.uri = URImanagement.make_oc_uri(site.uuid, site.item_type) dt.uuid = site.uuid dt.label = site.label dt.project_label = proj_label dt.trinomial = trinomial dt.state = parts['state'] dt.county = parts['county'] dt.site = parts['site'] dt.save() print('Trinomial: ' + trinomial + ', from: ' + site.label)
def make_trinomial_from_site_labels(self, project_uuid, state_prefix=''): """ makes trinomial identifiers from a site label """ ent = Entity() found = ent.dereference(project_uuid) if found: proj_label = ent.label sites = Manifest.objects\ .filter(project_uuid=project_uuid, class_uri='oc-gen:cat-site') for site in sites: trinomial = str(state_prefix) + site.label if '*' in trinomial: # for North Carolina, only the part before the '*' is a trinomial tr_ex = trinomial.split('*') trinomial = tr_ex[0] print('working on (' + site.uuid + '): ' + trinomial) parts = self.parse_trinomial(trinomial) if 'Tennessee' in proj_label: trinomial = parts['state'] + parts['county'] + str( parts['site']) dt = Trinomial() dt.uri = URImanagement.make_oc_uri(site.uuid, site.item_type) dt.uuid = site.uuid dt.label = site.label dt.project_label = proj_label dt.trinomial = trinomial dt.state = parts['state'] dt.county = parts['county'] dt.site = parts['site'] try: dt.save() print('Trinomial: ' + trinomial + ', from: ' + site.label) except: print('Trinomial: ' + trinomial + ' not valid as a trinomial')
def get_assertion_values(self, uuid, obs_num, predicate_uuid): """ gets the current assertion values in a format for easy use by the item-field.js object """ output = [] ass_list = Assertion.objects\ .filter(uuid=uuid, obs_num=obs_num, predicate_uuid=predicate_uuid) i = 0 for ass in ass_list: i += 1 error_key = 'obs-' + str( obs_num) + '-pred-' + predicate_uuid + '-val-' + str(i) item = LastUpdatedOrderedDict() item['hash_id'] = ass.hash_id item['id'] = None item['uuid'] = None item['slug'] = None item['label'] = None item['literal'] = None item_ok = False if any(ass.object_type in item_type for item_type in settings.ITEM_TYPES): id_man = self.get_manifest_item(ass.object_uuid) if id_man is not False: item_ok = True item['id'] = URImanagement.make_oc_uri( id_man.uuid, id_man.item_type) item['uuid'] = id_man.uuid item['slug'] = id_man.slug item['label'] = id_man.label else: self.errors[ error_key] = 'Cannot find object_uuid: ' + ass.object_uuid elif ass.object_type == 'xsd:string': try: act_string = OCstring.objects.get(uuid=ass.object_uuid) except OCstring.DoesNotExist: act_string = False if act_string is not False: item_ok = True item['uuid'] = act_string.uuid item['literal'] = act_string.content else: self.errors[ error_key] = 'Cannot find string_uuid: ' + ass.object_uuid elif ass.object_type == 'xsd:date': item_ok = True item['literal'] = ass.data_date.date().isoformat() else: item_ok = True item['literal'] = ass.data_num if item_ok: output.append(item) return output
def prep_item_dc_metadata(self): """ prepared dublin core metadata for an item, this needs to happen before we prep dc metadata for associated items and sets of items """ if self.is_valid: # make some uris self.uri = URImanagement.make_oc_uri(self.manifest.uuid, self.manifest.item_type) self.project_uri = URImanagement.make_oc_uri(self.manifest.project_uuid, 'projects') project_ent = self.get_entity(self.manifest.project_uuid) if not isinstance(self.label, str): self.label = self.manifest.label self.title = self.make_dcterms_title(self.manifest.label, self.context) self.description = 'An archaeological site record' context = self.remove_label_from_context(self.manifest.label, self.context) if isinstance(context, str): self.description += ' from: ' + context if project_ent is not False: self.parent_project_uri = URImanagement.make_oc_uri(project_ent.parent_project_uuid, 'projects') self.description += '; part of the "' + project_ent.label self.description += '" data publication.' if self.geo_meta is not None and self.geo_meta is not False: if len(self.geo_meta) > 0: geo = self.geo_meta[0] if isinstance(geo.note, str): if len(geo.note) > 0: # self.description += ' ' + geo.note pass if geo.specificity < 0: self.description += ' Location data approximated as a security precaution.' if self.manifest.uuid != geo.uuid: rel_meta = self.get_entity(geo.uuid) if rel_meta is not False: self.description += ' Location data provided through relationship to the' self.description += ' related place: ' + rel_meta.label self.description += ' (' + rel_meta.uri + ')'
def make_all_identifiers(self, identifier): """ makes all identifiers used with an export table, based on a given identifier if the given identifier has a '_' or '/' character, it is either internal to Open Context ('_') or from an expernal URI ('/') """ if '/' in identifier: id_ex = identifier.split('/') self.table_id = id_ex[1] + '_' + id_ex[0] self.public_table_id = identifier self.uri = URImanagement.make_oc_uri(self.public_table_id, 'tables') elif '_' in identifier: id_ex = identifier.split('_') self.table_id = identifier self.public_table_id = id_ex[1] + '/' + id_ex[0] self.uri = URImanagement.make_oc_uri(self.public_table_id, 'tables') else: self.table_id = identifier self.public_table_id = identifier self.uri = URImanagement.make_oc_uri(self.public_table_id, 'tables')
def make_type_relations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_type_pred_uuid, obj_type_f_num): """ Makes semantic relationships between different types in an import """ rels = {} sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) for sub_type_obj in sub_type_list: sub_type_text = sub_type_obj.record row = sub_type_obj.row_num if len(sub_type_text) > 0: tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid(sub_type_pred_uuid, sub_type_text) obj_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=obj_type_f_num, row_num=row)[:1] if len(obj_type_list) > 0: obj_type_text = obj_type_list[0].record if len(obj_type_text) > 0 \ and sub_type_text != obj_type_text: tmo = TypeManagement() tmo.project_uuid = self.project_uuid tmo.source_id = self.source_id obj_type = tmo.get_make_type_within_pred_uuid(obj_type_pred_uuid, obj_type_text) # make a uri for this, since we're making a link assertion obj_uri = URImanagement.make_oc_uri(obj_type.uuid, 'types') # the following bit is so we don't make the # same link assertions over and over. rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid) if rel_id not in rels: rels[rel_id] = {'subject': sub_type.uuid, 'object_uri': obj_uri} # now make the link data annotation relating these types. for rel_id, rel in rels.items(): new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save()
def prep_delete_uuid(self, delete_uuid): """ Prepares some information needed to delete a uuid """ ok_delete = False delete_obj = self.get_manifest(delete_uuid) if delete_obj is not False: ok_delete = True self.delete_manifest_obj = delete_obj self.delete_uri = URImanagement.make_oc_uri(delete_uuid, delete_obj.item_type) self.delete_prefix_uri = URImanagement.prefix_common_uri(self.delete_uri) return ok_delete
def prep_merge_uuid(self, merge_into_uuid): """ Prepares some information needed to delete a uuid """ ok_merge = False merge_obj = self.get_manifest(merge_into_uuid) if merge_obj is not False: ok_merge = True self.merge_manifest_obj = merge_obj self.merge_uri = URImanagement.make_oc_uri(merge_into_uuid, merge_obj.item_type) self.merge_prefix_uri = URImanagement.prefix_common_uri(self.merge_uri) return ok_merge
def make_type_relations(self, sub_type_pred_uuid, sub_type_f_num, rel_pred, obj_type_pred_uuid, obj_type_f_num): """ Makes semantic relationships between different types in an import """ rels = {} sub_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=sub_type_f_num) for sub_type_obj in sub_type_list: sub_type_text = sub_type_obj.record row = sub_type_obj.row_num if len(sub_type_text) > 0: tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id sub_type = tm.get_make_type_within_pred_uuid( sub_type_pred_uuid, sub_type_text) obj_type_list = ImportCell.objects\ .filter(source_id=self.source_id, field_num=obj_type_f_num, row_num=row)[:1] if len(obj_type_list) > 0: obj_type_text = obj_type_list[0].record if len(obj_type_text) > 0 \ and sub_type_text != obj_type_text: tmo = TypeManagement() tmo.project_uuid = self.project_uuid tmo.source_id = self.source_id obj_type = tmo.get_make_type_within_pred_uuid( obj_type_pred_uuid, obj_type_text) # make a uri for this, since we're making a link assertion obj_uri = URImanagement.make_oc_uri( obj_type.uuid, 'types') # the following bit is so we don't make the # same link assertions over and over. rel_id = str(sub_type.uuid) + ' ' + str(obj_type.uuid) if rel_id not in rels: rels[rel_id] = { 'subject': sub_type.uuid, 'object_uri': obj_uri } # now make the link data annotation relating these types. for rel_id, rel in rels.items(): new_la = LinkAnnotation() new_la.subject = rel['subject'] new_la.subject_type = 'types' new_la.project_uuid = self.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = rel_pred new_la.object_uri = rel['object_uri'] new_la.creator_uuid = '' new_la.save()
def get_assertion_values(self, uuid, obs_num, predicate_uuid): """ gets the current assertion values in a format for easy use by the item-field.js object """ output = [] ass_list = Assertion.objects\ .filter(uuid=uuid, obs_num=obs_num, predicate_uuid=predicate_uuid) i = 0 for ass in ass_list: i += 1 error_key = 'obs-' + str(obs_num) + '-pred-' + predicate_uuid + '-val-' + str(i) item = LastUpdatedOrderedDict() item['hash_id'] = ass.hash_id item['id'] = None item['uuid'] = None item['slug'] = None item['label'] = None item['literal'] = None item_ok = False if any(ass.object_type in item_type for item_type in settings.ITEM_TYPES): id_man = self.get_manifest_item(ass.object_uuid) if id_man is not False: item_ok = True item['id'] = URImanagement.make_oc_uri(id_man.uuid, id_man.item_type) item['uuid'] = id_man.uuid item['slug'] = id_man.slug item['label'] = id_man.label else: self.errors[error_key] = 'Cannot find object_uuid: ' + ass.object_uuid elif ass.object_type == 'xsd:string': try: act_string = OCstring.objects.get(uuid=ass.object_uuid) except OCstring.DoesNotExist: act_string = False if act_string is not False: item_ok = True item['uuid'] = act_string.uuid item['literal'] = act_string.content else: self.errors[error_key] = 'Cannot find string_uuid: ' + ass.object_uuid elif ass.object_type == 'xsd:date': item_ok = True item['literal'] = ass.data_date.date().isoformat() else: item_ok = True item['literal'] = ass.data_num if item_ok: output.append(item) return output
def prep_item_dc_metadata(self): """ prepared dublin core metadata for an item, this needs to happen before we prep dc metadata for associated items and sets of items """ if self.is_valid: # make some uris self.uri = URImanagement.make_oc_uri(self.manifest.uuid, self.manifest.item_type) self.project_uri = URImanagement.make_oc_uri( self.manifest.project_uuid, 'projects') # get data about entities describing the item category_ent = self.get_entity(self.manifest.class_uri) if category_ent is not False: self.class_label = category_ent.label self.class_slug = category_ent.slug project_ent = self.get_entity(self.manifest.project_uuid) self.title = self.make_dcterms_title(self.manifest.label, self.context) item_type = self.manifest.item_type if item_type == 'subjects': if category_ent is not False: self.description = category_ent.label if item_type in PelagiosData.ITEM_TYPE_DESCRIPTIONS: self.description += ' ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[ item_type].lower() context = self.remove_label_from_context( self.manifest.label, self.context) if isinstance(context, str): self.description += ' from the context: ' + context if project_ent is not False: self.description += '; part of the "' + project_ent.label self.description += '" data publication.' else: self.description = 'A ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[ item_type] if project_ent is not False and item_type != 'projects': self.description += '; part of the "' + project_ent.label self.description += '" data publication.'
def create_pred_parents(self, new_hierachic_list): """ Creates new types for superior (more general) types from a list of types that have hiearchies implicit in their labels once the superior types are created, linked data annotations noting hierarchy are stored """ parent_children_pairs = [] for manifest in new_hierachic_list: try: oc_pred = Predicate.objects.get(uuid=manifest.uuid) except Predicate.DoesNotExist: oc_pred = False if (oc_pred is not False): child_parts = manifest.label.split(self.HIERARCHY_DELIM) act_delim = '' act_new_label = '' current_parent = False for label_part in child_parts: act_new_label = act_new_label + act_delim + label_part act_delim = self.HIERARCHY_DELIM pred_manage = PredicateManagement() pred_manage.project_uuid = manifest.project_uuid pred_manage.source_id = self.source_id pred_manage.sort = oc_pred.sort pred_manage.data_type = oc_pred.data_type ppred = pred_manage.get_make_predicate( act_new_label, manifest.class_uri) if (ppred is not False and current_parent is not False): parent_child = { 'parent': current_parent, 'child': ppred.uuid } parent_children_pairs.append(parent_child) current_parent = ppred.uuid if (len(parent_children_pairs) > 0): # now make some linked data annotations for parent_child in parent_children_pairs: if (parent_child['parent'] is not False): new_la = LinkAnnotation() new_la.subject = parent_child['child'] new_la.subject_type = 'predicates' new_la.project_uuid = manifest.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = self.p_for_superobjs new_la.object_uri = URImanagement.make_oc_uri( parent_child['parent'], 'predicates') new_la.creator_uuid = '' new_la.save() return parent_children_pairs
def prep_item_dc_metadata(self): """ prepared dublin core metadata for an item, this needs to happen before we prep dc metadata for associated items and sets of items """ if self.is_valid: # make some uris self.uri = URImanagement.make_oc_uri(self.manifest.uuid, self.manifest.item_type) self.project_uri = URImanagement.make_oc_uri(self.manifest.project_uuid, 'projects') # get data about entities describing the item category_ent = self.get_entity(self.manifest.class_uri) if category_ent is not False: self.class_label = category_ent.label self.class_slug = category_ent.slug project_ent = self.get_entity(self.manifest.project_uuid) self.title = self.make_dcterms_title(self.manifest.label, self.context) item_type = self.manifest.item_type if item_type == 'subjects': if category_ent is not False: self.description = category_ent.label if item_type in PelagiosData.ITEM_TYPE_DESCRIPTIONS: self.description += ' ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[item_type].lower() context = self.remove_label_from_context(self.manifest.label, self.context) if isinstance(context, str): self.description += ' from the context: ' + context if project_ent is not False: self.description += '; part of the "' + project_ent.label self.description += '" data publication.' else: self.description = 'A ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[item_type] if project_ent is not False and item_type != 'projects': self.description += '; part of the "' + project_ent.label self.description += '" data publication.'
def make_save_ark_by_uuid(self, uuid, metadata=None): """ makes an saves an ARK identifier by a uuid """ ok = False oc_uri = None arks = StableIdentifer.objects.filter( uuid=uuid, stable_type='ark' )[:1] if len(arks) > 0: print('uuid {} has an ARK'.format(uuid)) return None # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if not oc_item.exists: print('uuid {} does not exist'.format(uuid)) return None if metadata is None: metadata = self.make_ark_metadata_by_uuid(uuid, oc_item) if not isinstance(metadata, dict): raise RuntimeError('Cannot make metadata for {}'.format(uuid)) if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri( oc_item.manifest.uuid, oc_item.item_type ) if not isinstance(oc_uri, str): raise RuntimeError( 'Invalid URI for {} item_type {}'.format( oc_item.manifest.uuid, oc_item.item_type ) ) print('Make ARK id for: ' + oc_uri) ark_id = self.ezid.mint_identifier(oc_uri, metadata, 'ark') if not isinstance(ark_id, str): raise RuntimeWarning('EZID failed minting an ARK for {}'.format(oc_uri)) # success! we have an ARK id! stable_id = ark_id.replace('ark:/', '') ok = self.save_oc_item_stable_id( oc_item, stable_id, 'ark' ) return ok
def create_pred_parents(self, new_hierachic_list): """ Creates new types for superior (more general) types from a list of types that have hiearchies implicit in their labels once the superior types are created, linked data annotations noting hierarchy are stored """ parent_children_pairs = [] for manifest in new_hierachic_list: try: oc_pred = Predicate.objects.get(uuid=manifest.uuid) except Predicate.DoesNotExist: oc_pred = False if oc_pred is not False: child_parts = manifest.label.split(self.HIERARCHY_DELIM) act_delim = "" act_new_label = "" current_parent = False for label_part in child_parts: act_new_label = act_new_label + act_delim + label_part act_delim = self.HIERARCHY_DELIM pred_manage = PredicateManagement() pred_manage.project_uuid = manifest.project_uuid pred_manage.source_id = self.source_id pred_manage.sort = oc_pred.sort pred_manage.data_type = oc_pred.data_type ppred = pred_manage.get_make_predicate(act_new_label, manifest.class_uri) if ppred is not False and current_parent is not False: parent_child = {"parent": current_parent, "child": ppred.uuid} parent_children_pairs.append(parent_child) current_parent = ppred.uuid if len(parent_children_pairs) > 0: # now make some linked data annotations for parent_child in parent_children_pairs: if parent_child["parent"] is not False: new_la = LinkAnnotation() new_la.subject = parent_child["child"] new_la.subject_type = "predicates" new_la.project_uuid = manifest.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = self.p_for_superobjs new_la.object_uri = URImanagement.make_oc_uri(parent_child["parent"], "predicates") new_la.creator_uuid = "" new_la.save() return parent_children_pairs
def create_concept_parents(self, new_hierachic_list): """ Creates new types for superior (more general) types from a list of types that have hiearchies implicit in their labels once the superior types are created, linked data annotations noting hierarchy are stored """ parent_children_pairs = [] for manifest in new_hierachic_list: try: oc_type = OCtype.objects.get(uuid=manifest.uuid) except OCtype.DoesNotExist: oc_type = False if(oc_type is not False): child_parts = manifest.label.split(self.HIERARCHY_DELIM) act_delim = '' act_new_label = '' current_parent = False for label_part in child_parts: act_new_label = act_new_label + act_delim + label_part act_delim = self.HIERARCHY_DELIM type_manage = TypeManagement() type_manage.project_uuid = oc_type.project_uuid type_manage.source_id = self.source_id ptype = type_manage.get_make_type_within_pred_uuid(oc_type.predicate_uuid, act_new_label) if(current_parent is not False): parent_child = {'parent': current_parent, 'child': ptype.uuid} parent_children_pairs.append(parent_child) current_parent = ptype.uuid if(len(parent_children_pairs) > 0): # now make some linked data annotations for parent_child in parent_children_pairs: if(parent_child['parent'] is not False): new_la = LinkAnnotation() new_la.subject = parent_child['child'] new_la.subject_type = 'types' new_la.project_uuid = oc_type.project_uuid new_la.source_id = self.source_id new_la.predicate_uri = self.p_for_superobjs new_la.object_uri = URImanagement.make_oc_uri(parent_child['parent'], 'types') new_la.creator_uuid = '' new_la.save() return parent_children_pairs
def skos_relate_old_new_predicates(self, project_uuid, source_id, predicate_uuid, new_pred_uuid): """ Makes a new Link Annotation to relate a new predicate_uuid with an existing predicate """ la = LinkAnnotation() la.subject = new_pred_uuid la.subject_type = 'predicates' la.project_uuid = project_uuid la.source_id = source_id la.predicate_uri = 'skos:related' la.object_uri = URImanagement.make_oc_uri(predicate_uuid, 'predicates') try: la.save() output = True except: output = False return output
def make_dir_dict(self, part_num, license_uri, project_uuid): """ """ act_dir = self.make_act_files_dir_name(part_num, license_uri, project_uuid) dir_dict = LastUpdatedOrderedDict() dir_dict['dc-terms:isPartOf'] = URImanagement.make_oc_uri(project_uuid, 'projects') dir_dict['dc-terms:license'] = license_uri dir_dict['partion-number'] = part_num dir_dict['label'] = act_dir dir_dict['size'] = 0 dir_dict['dc-terms:creator'] = [] dir_dict['dc-terms:contributor'] = [] dir_dict['category'] = [] dir_dict['files'] = [] # save it so we can have a directory ready self.arch_files_obj.save_serialized_json(act_dir, self.dir_content_file_json, dir_dict) return dir_dict
def add_author_list(self, sauthors, dc_type): """ makes an author list from a sorted tuple of author identifiers """ i = 0 author_list = [] for uri_key, count in sauthors: i += 1 auth = LastUpdatedOrderedDict() auth['id'] = '#' + dc_type + '-' + str(i) if 'http://' in uri_key or 'https://' in uri_key: auth['rdfs:isDefinedBy'] = uri_key else: auth['rdfs:isDefinedBy'] = URImanagement.make_oc_uri( uri_key, 'persons') auth['label'] = self.deref_entity_label(uri_key) auth['count'] = count author_list.append(auth) return author_list
def add_author_list(self, sauthors, dc_type): """ makes an author list from a sorted tuple of author identifiers """ i = 0 author_list = [] for uri_key, count in sauthors: i += 1 auth = LastUpdatedOrderedDict() auth['id'] = '#' + dc_type + '-' + str(i) if 'http://' in uri_key or 'https://' in uri_key: auth['rdfs:isDefinedBy'] = uri_key else: auth['rdfs:isDefinedBy'] = URImanagement.make_oc_uri(uri_key, 'persons') auth['label'] = self.deref_entity_label(uri_key) auth['count'] = count author_list.append(auth) return author_list
def get_manifest_objects_from_uuids(self, query_uuids): """ gets manifest objects from a list of uuids that are in the ITEM_TYPE_MANIFEST_LIST """ if not isinstance(query_uuids, list): query_uuids = [query_uuids] if len(query_uuids) > 0: # go and retrieve all of these manifest objects act_man_objs = Manifest.objects.filter(uuid__in=query_uuids, item_type__in=self.ITEM_TYPE_MANIFEST_LIST) for act_man_obj in act_man_objs: uuid = act_man_obj.uuid # now add some attributes expected for entites act_man_obj.uri = URImanagement.make_oc_uri(uuid, act_man_obj.item_type) act_man_obj.slug_uri = None act_man_obj.thumbnail_uri = None act_man_obj.content = None act_man_obj.item_json_ld = None self.manifest_obj_dict[uuid] = act_man_obj
def make_json_ld(self): """ makes a JSON-LD object for the table metadata Need oc-table namespace need to include the cc-rel namespace need to add this name space http://www.w3.org/2003/01/geo/ as geo:lat, geo:lon """ json_ld = LastUpdatedOrderedDict() if self.exp_tab is not False: json_ld['id'] = URImanagement.make_oc_uri(self.public_table_id, 'tables') json_ld['uuid'] = self.public_table_id json_ld['label'] = self.exp_tab.label json_ld['fields'] = self.exp_tab.field_count json_ld['rows'] = self.exp_tab.row_count json_ld['dc-terms:identifier'] = self.table_id json_ld['dc-terms:issued'] = self.exp_tab.created.date().isoformat( ) json_ld['dc-terms:modified'] = self.exp_tab.updated.date( ).isoformat() json_ld['dc-terms:abstract'] = self.exp_tab.abstract json_ld = self.get_link_annotations(json_ld) stable_ids = self.get_stable_ids() if len(stable_ids) > 0: json_ld['owl:sameAs'] = stable_ids json_ld['has-fields'] = self.get_field_list() if 'dc-terms:license' not in json_ld: # default to an attribution license. lic_dict = { 'id': 'http://creativecommons.org/licenses/by/4.0/', 'slug': 'cc-license-by-4-0', 'label': 'Attribution 4.0' } json_ld['dc-terms:license'] = [lic_dict] """ for key, objects in self.exp_tab.meta_json.items(): json_ld[key] = objects """ return json_ld
def make_save_doi_by_uuid(self, uuid, metadata=None): """ makes an saves an DOI identifier by a uuid """ ok = False oc_uri = None dois = StableIdentifer.objects.filter(uuid=uuid, stable_type='doi')[:1] if len(dois) < 1: # the item doesn't yet have an ARK id, so make one! oc_item = OCitem() exists = oc_item.check_exists(uuid) if oc_item.exists: if metadata is None: metadata = self.make_doi_metadata_by_uuid(uuid, oc_item) if isinstance(metadata, dict): if '_target' in metadata: oc_uri = metadata['_target'] else: oc_uri = URImanagement.make_oc_uri( oc_item.manifest.uuid, oc_item.item_type) if isinstance(oc_uri, str): print('Make DOI id for: ' + oc_uri) ezid_response = self.ezid.mint_identifier( oc_uri, metadata, 'doi') if self.do_test: print('EZID response: ' + str(ezid_response)) if isinstance(ezid_response, str): if '|' in ezid_response: resp_ex = ezid_response.split('|') for resp_id in resp_ex: if 'doi:' in resp_id: ok = self.save_oc_item_stable_id( oc_item, resp_id, 'doi') else: pass else: ok = self.save_oc_item_stable_id( oc_item, ezid_response, 'doi') return ok
def save_context(self, row_num, man, parent_list): """ Save context information, will also add new context fields as needed """ use_parents = False context_uri = '' if isinstance(parent_list, list): if len(parent_list) > 0: context_uri = URImanagement.make_oc_uri( parent_list[0], 'subjects') use_parents = parent_list[::-1] # save a record of the context URI cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 13 cell.record = context_uri cell.save() cell = None if use_parents is not False: pindex = 0 for parent_uuid in use_parents: pindex += 1 context_label = self.deref_entity_label(parent_uuid) field_num = self.get_add_context_field_number(pindex) cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = field_num cell.record = context_label cell.save() cell = None
def make_graph(self): """ makes a graph of assertions for the void file """ lang_obj = Languages() # get a list of project manifest + projects objects # these are filtered for publicly available projects only pprojs = PelagiosProjects() pprojs.request = self.request self.man_proj_objs = pprojs.get_projects() # first make assertions about Open Context oc_projs_uri = settings.CANONICAL_HOST + '/projects/' self.make_add_triple(oc_projs_uri, RDF.type, self.make_full_uri('void', 'Dataset')) self.make_add_triple(oc_projs_uri, self.make_full_uri('dcterms', 'title'), None, settings.CANONICAL_SITENAME) self.make_add_triple(oc_projs_uri, self.make_full_uri('dcterms', 'description'), None, settings.HOST_TAGLINE) self.make_add_triple(oc_projs_uri, self.make_full_uri('foaf', 'homepage'), settings.CANONICAL_HOST) # now add assertions about Web data and Open Context self.make_add_web_dataset_assertions() # now add the projects as subsets of data for proj_dict in self.man_proj_objs: man = proj_dict['man'] uri = URImanagement.make_oc_uri(man.uuid, man.item_type) self.make_add_triple(oc_projs_uri, self.make_full_uri('void', 'subset'), uri) # now add assertions about each project, esp. datadump uri for proj_dict in self.man_proj_objs: man = proj_dict['man'] proj = proj_dict['proj'] uri = URImanagement.make_oc_uri(man.uuid, man.item_type) data_uri = self.base_uri + man.uuid + self.data_dump_extension self.make_add_triple(uri, RDF.type, self.make_full_uri('void', 'Dataset')) self.make_add_triple(uri, self.make_full_uri('void', 'dataDump'), data_uri) """ self.make_add_triple(uri, self.make_full_uri('foaf', 'homepage'), uri) """ self.make_add_triple(uri, self.make_full_uri('dcterms', 'publisher'), None, settings.CANONICAL_SITENAME) self.make_add_triple(data_uri, self.make_full_uri('dcterms', 'license'), self.OA_LICENSE) self.make_add_triple(uri, self.make_full_uri('dcterms', 'title'), None, man.label) self.make_add_triple(uri, self.make_full_uri('dcterms', 'description'), None, proj.short_des) if man.published is not None: self.make_add_triple(uri, self.make_full_uri('dcterms', 'issued'), None, man.published.date().isoformat()) if man.revised is not None: self.make_add_triple(uri, self.make_full_uri('dcterms', 'modified'), None, man.revised.date().isoformat())
def dereference_manifest_item(self, identifier): """Dereferences a manifest item (something part of an OC project)""" manifest_item = Manifest.objects.filter( Q(uuid=identifier) | Q(slug=identifier)).first() if not manifest_item: return None # We found the item, now get the data out. self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type) self.uuid = manifest_item.uuid self.slug = manifest_item.slug self.label = manifest_item.label self.item_type = manifest_item.item_type self.class_uri = manifest_item.class_uri self.project_uuid = manifest_item.project_uuid if manifest_item.item_type == 'media' and self.get_thumbnail: # a media item. get information about its thumbnail. thumb_obj = Mediafile.objects.filter( uuid=manifest_item.uuid, file_type='oc-gen:thumbnail').first() if thumb_obj: self.thumbnail_media = thumb_obj self.thumbnail_uri = thumb_obj.file_uri elif manifest_item.item_type in ['persons', 'projects', 'tables'] \ or self.get_stable_ids: # get stable identifiers for persons or projects by default stable_ids = StableIdentifer.objects.filter( uuid=manifest_item.uuid) if len(stable_ids) > 0: self.stable_id_uris = [] doi_uris = [] orcid_uris = [] other_uris = [] for stable_id in stable_ids: if stable_id.stable_type in StableIdentifer.ID_TYPE_PREFIXES: prefix = StableIdentifer.ID_TYPE_PREFIXES[ stable_id.stable_type] else: prefix = '' stable_uri = prefix + stable_id.stable_id if stable_id.stable_type == 'orcid': orcid_uris.append(stable_uri) elif stable_id.stable_type == 'doi': doi_uris.append(stable_uri) else: other_uris.append(stable_uri) # now list URIs in order of importance, with ORCIDs and DOIs # first, followed by other stable URI types (Arks or something else) self.stable_id_uris = orcid_uris + doi_uris + other_uris elif manifest_item.item_type == 'types': tl = TypeLookup() tl.get_octype_without_manifest(identifier) self.content = tl.content elif manifest_item.item_type == 'predicates': oc_pred = Predicate.objects.filter(uuid=manifest_item.uuid).first() if oc_pred: self.data_type = oc_pred.data_type self.sort = oc_pred.sort self.slug_uri = 'oc-pred:' + str(self.slug) elif manifest_item.item_type == 'projects': # get a manifest object for the parent of a project, if it exists ch_tab = '"oc_projects" AS "child"' filters = 'child.project_uuid=oc_manifest.uuid '\ ' AND child.uuid=\'' + self.uuid + '\' ' \ ' AND child.project_uuid != \'' + self.uuid + '\' ' par_rows = Manifest.objects\ .filter(item_type='projects')\ .exclude(uuid=self.uuid)\ .extra(tables=[ch_tab], where=[filters])[:1] if len(par_rows) > 0: self.par_proj_man_obj = par_rows[0] elif (manifest_item.item_type == 'subjects' and self.get_context and not self.context): subj = Subject.objects.filter(uuid=manifest_item.uuid).first() if subj: self.context = subj.context return True
def prep_assocated_dc_metadata(self): """ prepares dc_metadata for items associated to the main item that actually has 1 or more gazetteer links """ if self.is_valid and len(self.raw_associated) > 0: project_ent = self.get_entity(self.manifest.project_uuid) ass_items = [] # list of associated items ass_sets = [] # list of associated sets for key, ass in self.raw_associated.items(): if isinstance(ass['uuid'], str) and \ isinstance(ass['label'], str): # we have a uuid identified item, meaning a specific # related resource ass['uri'] = URImanagement.make_oc_uri(ass['uuid'], ass['item_type']) ass['title'] = self.make_dcterms_title(ass['label'], self.context) # now prepare description information description = '' cat_ent = self.get_entity(ass['media_class_uri']) if cat_ent is not False: ass['class_label'] = cat_ent.label ass['class_slug'] = cat_ent.slug description += cat_ent.label if ass['item_type'] in PelagiosData.ITEM_TYPE_DESCRIPTIONS: if description == '': description = 'A' description += ' ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[ass['item_type']] ass['description'] = self.add_description_item_class_project(description, project_ent) if ass['temporal'] is None: ass['temporal'] = self.temporal ass_items.append(ass) elif self.contents_cnt > 1 or self.manifest.item_type == 'projects': # the associated item is for a result set, not an individual item rel_media_cat_ent = False if isinstance(ass['media_class_uri'], str): rel_media_cat_ent = self.get_entity(ass['media_class_uri']) cat_ent = self.get_entity(ass['class_uri']) description = 'A set of ' if cat_ent is not False: ass['class_label'] = cat_ent.label ass['class_slug'] = cat_ent.slug ass['title'] = cat_ent.label description += cat_ent.label.lower() else: ass['title'] = 'Related' if rel_media_cat_ent is not False: ass['title'] += ' ' + rel_media_cat_ent.label description += ' ' + rel_media_cat_ent.label.lower() if ass['item_type'] in PelagiosData.ITEM_TYPE_DESCRIPTIONS_PLR: type_des = PelagiosData.ITEM_TYPE_DESCRIPTIONS_PLR[ass['item_type']] ass['title'] += ' ' + type_des description += ' ' + type_des.lower() ass['title'] += ' Related to: ' + self.manifest.label if isinstance(self.class_label, str) and \ self.manifest.item_type != 'projects': ass['title'] += ' (' + self.class_label + ')' ass['description'] = self.add_description_item_class_project(description, project_ent) param_sep = '?' # payload is for querying for temporal data payload = { 'response': 'metadata', 'type': ass['item_type'], 'prop': []} if ass['item_type'] == 'media': ass['uri'] = settings.CANONICAL_HOST + '/media-search/' if isinstance(self.context, str): ass['uri'] += self.encode_url_context_path(self.context) if cat_ent is not False: ass['uri'] += param_sep + 'prop=rel--' + cat_ent.slug param_sep = '&' payload['prop'].append('rel--' + cat_ent.slug) if rel_media_cat_ent is not False: ass['uri'] += param_sep + 'prop=' + rel_media_cat_ent.slug param_sep = '&' payload['prop'].append(rel_media_cat_ent.slug) elif isinstance(ass['media_class_uri'], str): ass['uri'] += param_sep + 'prop=' + quote_plus(ass['media_class_uri']) payload['prop'].append(ass['media_class_uri']) elif ass['item_type'] == 'subjects': ass['uri'] = settings.CANONICAL_HOST + '/subjects-search/' if isinstance(self.context, str): ass['uri'] += self.encode_url_context_path(self.context) if cat_ent is not False: ass['uri'] += param_sep + 'prop=' + cat_ent.slug param_sep = '&' payload['prop'].append(cat_ent.slug) else: ass['uri'] = settings.CANONICAL_HOST + '/search/' if isinstance(self.context, str): ass['uri'] += self.encode_url_context_path(self.context) if cat_ent is not False: ass['uri'] += param_sep + 'prop=rel--' + cat_ent.slug param_sep = '&' payload['prop'].append('rel--' + cat_ent.slug) ass['uri'] += param_sep + 'type=' + ass['item_type'] param_sep = '&' if project_ent is not False: ass['uri'] += param_sep + 'proj=' + project_ent.slug payload['proj'] = project_ent.slug # now query Solr for temporal data cq = CompleteQuery() spatial_context = None if isinstance(self.context, str): spatial_context = self.context if len(payload['prop']) < 1: # remove unused property key payload.pop('prop', None) ass_metadata = cq.get_json_query(payload, spatial_context) if 'dc-terms:temporal' in ass_metadata: ass['temporal'] = ass_metadata['dc-terms:temporal'] ass_sets.append(ass) else: pass if self.manifest.item_type == 'projects': # we have a project so get the hero image (if exists) directly # for the depiction (note: returns None if not found) self.depiction = self.get_depiction_image_file(self.uuid) else: # we have another item_type, so the self.depiction comes # from the list of associated items for ass in ass_items: if isinstance(ass['depiction'], str): # the item depiction file is the first one we find # from the associated item list self.depiction = ass['depiction'] break self.associated = ass_items + ass_sets
def prep_assocated_dc_metadata(self): """ prepares dc_metadata for items associated to the main item that actually has 1 or more gazetteer links """ if self.is_valid and len(self.raw_associated) > 0: if isinstance(self.active_project_uuid, str): project_ent = self.get_entity(self.active_project_uuid) else: project_ent = self.get_entity(self.manifest.project_uuid) ass_items = [] # list of associated items ass_sets = [] # list of associated sets for key, ass in self.raw_associated.items(): if isinstance(ass['uuid'], str) and \ isinstance(ass['label'], str): # we have a uuid identified item, meaning a specific # related resource ass['uri'] = URImanagement.make_oc_uri( ass['uuid'], ass['item_type']) ass['title'] = self.make_dcterms_title( ass['label'], self.context) # now prepare description information description = '' cat_ent = self.get_entity(ass['media_class_uri']) if cat_ent is not False: ass['class_label'] = cat_ent.label ass['class_slug'] = cat_ent.slug description += cat_ent.label if ass['item_type'] in PelagiosData.ITEM_TYPE_DESCRIPTIONS: if description == '': description = 'A' description += ' ' + PelagiosData.ITEM_TYPE_DESCRIPTIONS[ ass['item_type']] ass['description'] = self.add_description_item_class_project( description, project_ent) if ass['temporal'] is None: ass['temporal'] = self.temporal ass_items.append(ass) elif self.contents_cnt > 1 or self.manifest.item_type == 'projects': # the associated item is for a result set, not an individual item rel_media_cat_ent = False if isinstance(ass['media_class_uri'], str): rel_media_cat_ent = self.get_entity( ass['media_class_uri']) cat_ent = self.get_entity(ass['class_uri']) description = 'A set of ' if cat_ent is not False: ass['class_label'] = cat_ent.label ass['class_slug'] = cat_ent.slug ass['title'] = cat_ent.label description += cat_ent.label.lower() else: ass['title'] = 'Related' if rel_media_cat_ent is not False: ass['title'] += ' ' + rel_media_cat_ent.label description += ' ' + rel_media_cat_ent.label.lower() if ass['item_type'] in PelagiosData.ITEM_TYPE_DESCRIPTIONS_PLR: type_des = PelagiosData.ITEM_TYPE_DESCRIPTIONS_PLR[ ass['item_type']] ass['title'] += ' ' + type_des description += ' ' + type_des.lower() ass['title'] += ' Related to: ' + self.manifest.label if isinstance(self.class_label, str) and \ self.manifest.item_type != 'projects': ass['title'] += ' (' + self.class_label + ')' ass['description'] = self.add_description_item_class_project( description, project_ent) param_sep = '?' # payload is for querying for temporal data payload = { 'response': 'metadata', 'type': ass['item_type'], 'prop': [] } if ass['item_type'] == 'media': ass['uri'] = settings.CANONICAL_HOST + '/media-search/' if isinstance(self.context, str): ass['uri'] += self.encode_url_context_path( self.context) if cat_ent is not False: ass['uri'] += param_sep + 'prop=rel--' + cat_ent.slug param_sep = '&' payload['prop'].append('rel--' + cat_ent.slug) if rel_media_cat_ent is not False: ass['uri'] += param_sep + 'prop=' + rel_media_cat_ent.slug param_sep = '&' payload['prop'].append(rel_media_cat_ent.slug) elif isinstance(ass['media_class_uri'], str): ass['uri'] += param_sep + 'prop=' + quote_plus( ass['media_class_uri']) payload['prop'].append(ass['media_class_uri']) elif ass['item_type'] == 'subjects': ass['uri'] = settings.CANONICAL_HOST + '/subjects-search/' if isinstance(self.context, str): ass['uri'] += self.encode_url_context_path( self.context) if cat_ent is not False: ass['uri'] += param_sep + 'prop=' + cat_ent.slug param_sep = '&' payload['prop'].append(cat_ent.slug) else: ass['uri'] = settings.CANONICAL_HOST + '/search/' if isinstance(self.context, str): ass['uri'] += self.encode_url_context_path( self.context) if cat_ent is not False: ass['uri'] += param_sep + 'prop=rel--' + cat_ent.slug param_sep = '&' payload['prop'].append('rel--' + cat_ent.slug) ass['uri'] += param_sep + 'type=' + ass['item_type'] param_sep = '&' if project_ent is not False: ass['uri'] += param_sep + 'proj=' + project_ent.slug payload['proj'] = project_ent.slug # now query Solr for temporal data cq = CompleteQuery() spatial_context = None if isinstance(self.context, str): spatial_context = self.context if len(payload['prop']) < 1: # remove unused property key payload.pop('prop', None) ass_metadata = cq.get_json_query(payload, spatial_context) if 'dc-terms:temporal' in ass_metadata: ass['temporal'] = ass_metadata['dc-terms:temporal'] ass_sets.append(ass) else: pass if self.manifest.item_type == 'projects': # we have a project so get the hero image (if exists) directly # for the depiction (note: returns None if not found) self.depiction = self.get_depiction_image_file(self.uuid) else: # we have another item_type, so the self.depiction comes # from the list of associated items for ass in ass_items: if isinstance(ass['depiction'], str): # the item depiction file is the first one we find # from the associated item list self.depiction = ass['depiction'] break self.associated = ass_items + ass_sets
def generate_table_metadata(self, table_id, overwrite=False): """ makes metadata for a specific table """ ex_id = ExpTableIdentifiers() ex_id.make_all_identifiers(table_id) table_ids = [ex_id.table_id, ex_id.public_table_id] try: ex_tab = ExpTable.objects.get(table_id=table_id) except ExpTable.DoesNotExist: print('No ExpTable object for: ' + ex_id.public_table_id) ex_tab = None try: man_obj = Manifest.objects.get(uuid=ex_id.public_table_id) except Manifest.DoesNotExist: print('No manifest object for: ' + ex_id.public_table_id) man_obj = None if ex_tab is not None and man_obj is not None: proj_uuid_counts = None for meta_pred in self.metadata_predicates: if overwrite: num_old_delete = LinkAnnotation.objects\ .filter(subject__in=table_ids, predicate_uri=meta_pred)\ .delete() print('Deleted annoations ' + str(num_old_delete) + ' for ' + meta_pred) add_meta_for_pred = True else: num_exists = LinkAnnotation.objects\ .filter(subject__in=table_ids, predicate_uri=meta_pred)[:1] if len(num_exists) < 1: add_meta_for_pred = True else: add_meta_for_pred = False if add_meta_for_pred: if meta_pred == 'dc-terms:contributor': print('Getting contributors for ' + table_id) sorted_author_list = self.get_table_author_counts(table_id) contrib_sort = 0 for s_author in sorted_author_list: contrib_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = s_author['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = URImanagement.make_oc_uri(s_author['uuid'], 'persons') la.creator_uuid = '0' la.sort = contrib_sort la.obj_extra = obj_extra la.save() if meta_pred in ['dc-terms:creator', 'dc-terms:source']: # need to get projects for this if proj_uuid_counts is None: # only get this if not gotten yet print('Getting projects for ' + table_id) proj_uuid_counts = self.get_table_project_uuid_counts(table_id) if meta_pred == 'dc-terms:creator': print('Getting creators for ' + table_id) dc_creator_list = self.make_table_dc_creator_list(proj_uuid_counts) create_sort = 0 for dc_creator in dc_creator_list: create_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = dc_creator['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = dc_creator['id'] la.creator_uuid = '0' la.sort = create_sort la.obj_extra = obj_extra la.save() if meta_pred == 'dc-terms:source': print('Getting sources for ' + table_id) proj_sort = 0 for proj_uuid_count in proj_uuid_counts: proj_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = proj_uuid_count['num_uuids'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = URImanagement.make_oc_uri(proj_uuid_count['project_uuid'], 'projects') la.creator_uuid = '0' la.sort = proj_sort la.obj_extra = obj_extra la.save() if meta_pred == 'dc-terms:subject': print('Getting subjects for ' + table_id) dc_subject_list = self.make_table_dc_subject_category_list(table_id) subj_sort = 0 for dc_subject in dc_subject_list: subj_sort += 1 obj_extra = LastUpdatedOrderedDict() obj_extra['count'] = dc_subject['count'] la = LinkAnnotation() la.subject = man_obj.uuid la.subject_type = man_obj.item_type la.project_uuid = man_obj.project_uuid la.source_id = 'exp-table-manage' la.predicate_uri = meta_pred la.object_uri = dc_subject['id'] la.creator_uuid = '0' la.sort = subj_sort la.obj_extra = obj_extra la.save()
def addto_predicate_list(self, act_dict, act_pred_key, object_id, item_type, do_slug_uri=False, add_hash_id=False): """ creates a list for an predicate (act_pred_key) of the json_ld dictionary object if it doesn't exist adds a list item of a dictionary object for item that's an object of the predicate """ # first get the identifiers for objects that # may already be listed with this predicate. This prevents duplication object_ids = [] if act_pred_key in act_dict: for obj in act_dict[act_pred_key]: if 'id' in obj: object_ids.append(obj['id']) elif '@id' in obj: object_ids.append(obj['@id']) else: act_dict[act_pred_key] = [] new_object_item = None ent = self.get_object_item_entity(object_id, item_type) if ent is not False: new_object_item = LastUpdatedOrderedDict() if add_hash_id is not False: new_object_item['hash_id'] = add_hash_id if do_slug_uri: # make the item's ID based on a slug for an item new_object_item['id'] = URImanagement.make_oc_uri(object_id, item_type) else: # normal URI new_object_item['id'] = ent.uri new_object_item['slug'] = ent.slug if isinstance(ent.label, str): new_object_item['label'] = ent.label else: new_object_item['label'] = 'No record of label' if isinstance(ent.thumbnail_uri, str): new_object_item['oc-gen:thumbnail-uri'] = ent.thumbnail_uri if isinstance(ent.content, str) and ent.content != ent.label: new_object_item['rdfs:comment'] = ent.content if isinstance(ent.class_uri, str) and item_type in self.ITEM_TYPE_CLASS_LIST: new_object_item['type'] = ent.class_uri if ent.class_uri not in self.class_uri_list: self.class_uri_list.append(ent.class_uri) # list of unique open context item classes if isinstance(self.predicate_uri_as_stable_id, str) and item_type == 'predicates': # we need to add a the full uri to make this predicate new_object_item[self.predicate_uri_as_stable_id] = URImanagement.make_oc_uri(ent.uuid, item_type) if hasattr(ent, 'stable_id_uris'): if ent.stable_id_uris is not False \ and isinstance(self.stable_id_predicate, str): if len(ent.stable_id_uris) > 0: # add a stable identifier URI using the appropriate predicate. # just adds the first such identifier if isinstance(self.stable_id_prefix_limit, str): for stable_id_uri in ent.stable_id_uris: if self.stable_id_prefix_limit in stable_id_uri: # we have a stable ID of the correct prefix new_object_item[self.stable_id_predicate] = stable_id_uri break else: new_object_item[self.stable_id_predicate] = ent.stable_id_uris[0] elif act_pred_key == 'oc-gen:hasIcon': new_object_item = {'id': object_id} # OK now check to see if the new object is already listed with the predicate if new_object_item is not None: if new_object_item['id'] not in object_ids: # only add it if it does not exist yet act_dict[act_pred_key].append(new_object_item) return act_dict
def dereference(self, identifier, link_entity_slug=False): """ Dereferences an entity identified by an identifier, checks if a URI, if, not a URI, then looks in the OC manifest for the item """ output = False try_manifest = True identifier = URImanagement.convert_prefix_to_full_uri(identifier) if(link_entity_slug or (len(identifier) > 8)): if(link_entity_slug or (identifier[:7] == 'http://' or identifier[:8] == 'https://')): try: try_manifest = False ld_entity = LinkEntity.objects.get(Q(uri=identifier) | Q(slug=identifier)) except LinkEntity.DoesNotExist: ld_entity = False if(ld_entity is not False): output = True self.uri = ld_entity.uri self.slug = ld_entity.slug self.label = ld_entity.label self.item_type = 'uri' self.alt_label = ld_entity.alt_label self.entity_type = ld_entity.ent_type self.vocab_uri = ld_entity.vocab_uri self.ld_object_ok = True try: vocab_entity = LinkEntity.objects.get(uri=self.vocab_uri) except LinkEntity.DoesNotExist: vocab_entity = False if(vocab_entity is not False): self.vocabulary = vocab_entity.label if self.get_icon: prefix_uri = URImanagement.prefix_common_uri(ld_entity.uri) icon_anno = LinkAnnotation.objects\ .filter(Q(subject=ld_entity.uri) | Q(subject=identifier) | Q(subject=prefix_uri), predicate_uri='oc-gen:hasIcon')[:1] if len(icon_anno) > 0: self.icon = icon_anno[0].object_uri else: try_manifest = True # couldn't find the item in the linked entities table identifier = URImanagement.get_uuid_from_oc_uri(identifier) if(try_manifest): try: manifest_item = Manifest.objects.get(Q(uuid=identifier) | Q(slug=identifier)) except Manifest.DoesNotExist: manifest_item = False if(manifest_item is not False): output = True self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type) self.uuid = manifest_item.uuid self.slug = manifest_item.slug self.label = manifest_item.label self.item_type = manifest_item.item_type self.class_uri = manifest_item.class_uri self.project_uuid = manifest_item.project_uuid if(manifest_item.item_type == 'media' and self.get_thumbnail): # a media item. get information about its thumbnail. try: thumb_obj = Mediafile.objects.get(uuid=manifest_item.uuid, file_type='oc-gen:thumbnail') except Mediafile.DoesNotExist: thumb_obj = False if thumb_obj is not False: self.thumbnail_media = thumb_obj self.thumbnail_uri = thumb_obj.file_uri elif(manifest_item.item_type == 'types'): tl = TypeLookup() tl.get_octype_without_manifest(identifier) self.content = tl.content elif(manifest_item.item_type == 'predicates'): try: oc_pred = Predicate.objects.get(uuid=manifest_item.uuid) except Predicate.DoesNotExist: oc_pred = False if(oc_pred is not False): self.data_type = oc_pred.data_type elif(manifest_item.item_type == 'subjects' and self.get_context): try: subj = Subject.objects.get(uuid=manifest_item.uuid) except Subject.DoesNotExist: subj = False if subj is not False: self.context = subj.context return output
def make_graph(self): """ makes a graph of assertions for the void file """ lang_obj = Languages() # get a list of project manifest + projects objects # these are filtered for publicly available projects only pprojs = PelagiosProjects() pprojs.request = self.request self.man_proj_objs = pprojs.get_projects() # first make assertions about Open Context oc_projs_uri = settings.CANONICAL_HOST + '/projects/' self.make_add_triple(oc_projs_uri, RDF.type, self.make_full_uri('void', 'Dataset')) self.make_add_triple(oc_projs_uri, self.make_full_uri('dcterms', 'title'), None, settings.CANONICAL_SITENAME) self.make_add_triple(oc_projs_uri, self.make_full_uri('dcterms', 'description'), None, settings.HOST_TAGLINE) self.make_add_triple(oc_projs_uri, self.make_full_uri('foaf', 'homepage'), settings.CANONICAL_HOST) # now add assertions about Web data and Open Context self.make_add_web_dataset_assertions() # now add the projects as subsets of data for proj_dict in self.man_proj_objs: man = proj_dict['man'] uri = URImanagement.make_oc_uri(man.uuid, man.item_type) self.make_add_triple(oc_projs_uri, self.make_full_uri('void', 'subset'), uri) # now add assertions about each project, esp. datadump uri for proj_dict in self.man_proj_objs: man = proj_dict['man'] proj = proj_dict['proj'] uri = URImanagement.make_oc_uri(man.uuid, man.item_type) data_uri = self.base_uri + man.uuid self.make_add_triple(uri, RDF.type, self.make_full_uri('void', 'Dataset')) self.make_add_triple(uri, self.make_full_uri('void', 'dataDump'), data_uri) """ self.make_add_triple(uri, self.make_full_uri('foaf', 'homepage'), uri) """ self.make_add_triple(uri, self.make_full_uri('dcterms', 'publisher'), None, settings.CANONICAL_SITENAME) self.make_add_triple(data_uri, self.make_full_uri('dcterms', 'license'), self.OA_LICENSE) self.make_add_triple(uri, self.make_full_uri('dcterms', 'title'), None, man.label) self.make_add_triple(uri, self.make_full_uri('dcterms', 'description'), None, proj.short_des) if man.published is not None: self.make_add_triple(uri, self.make_full_uri('dcterms', 'issued'), None, man.published.date().isoformat()) if man.revised is not None: self.make_add_triple(uri, self.make_full_uri('dcterms', 'modified'), None, man.revised.date().isoformat())
def dereference(self, identifier, link_entity_slug=False): """ Dereferences an entity identified by an identifier, checks if a URI, if, not a URI, then looks in the OC manifest for the item """ output = False try_manifest = True identifier = URImanagement.convert_prefix_to_full_uri(identifier) if (link_entity_slug or (len(identifier) > 8)): if (link_entity_slug or (identifier[:7] == 'http://' or identifier[:8] == 'https://')): try: try_manifest = False ld_entity = LinkEntity.objects.get( Q(uri=identifier) | Q(slug=identifier)) except LinkEntity.DoesNotExist: ld_entity = False if (ld_entity is not False): output = True self.uri = ld_entity.uri self.slug = ld_entity.slug self.label = ld_entity.label self.item_type = 'uri' self.alt_label = ld_entity.alt_label self.entity_type = ld_entity.ent_type self.vocab_uri = ld_entity.vocab_uri self.ld_object_ok = True try: vocab_entity = LinkEntity.objects.get( uri=self.vocab_uri) except LinkEntity.DoesNotExist: vocab_entity = False if (vocab_entity is not False): self.vocabulary = vocab_entity.label if self.get_icon: prefix_uri = URImanagement.prefix_common_uri( ld_entity.uri) icon_anno = LinkAnnotation.objects\ .filter(Q(subject=ld_entity.uri) | Q(subject=identifier) | Q(subject=prefix_uri), predicate_uri='oc-gen:hasIcon')[:1] if len(icon_anno) > 0: self.icon = icon_anno[0].object_uri else: try_manifest = True # couldn't find the item in the linked entities table identifier = URImanagement.get_uuid_from_oc_uri(identifier) if (try_manifest): try: manifest_item = Manifest.objects.get( Q(uuid=identifier) | Q(slug=identifier)) except Manifest.DoesNotExist: manifest_item = False if (manifest_item is not False): output = True self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type) self.uuid = manifest_item.uuid self.slug = manifest_item.slug self.label = manifest_item.label self.item_type = manifest_item.item_type self.class_uri = manifest_item.class_uri self.project_uuid = manifest_item.project_uuid if (manifest_item.item_type == 'media' and self.get_thumbnail): # a media item. get information about its thumbnail. try: thumb_obj = Mediafile.objects.get( uuid=manifest_item.uuid, file_type='oc-gen:thumbnail') except Mediafile.DoesNotExist: thumb_obj = False if thumb_obj is not False: self.thumbnail_media = thumb_obj self.thumbnail_uri = thumb_obj.file_uri elif (manifest_item.item_type == 'types'): tl = TypeLookup() tl.get_octype_without_manifest(identifier) self.content = tl.content elif (manifest_item.item_type == 'predicates'): try: oc_pred = Predicate.objects.get( uuid=manifest_item.uuid) except Predicate.DoesNotExist: oc_pred = False if (oc_pred is not False): self.data_type = oc_pred.data_type elif (manifest_item.item_type == 'subjects' and self.get_context): try: subj = Subject.objects.get(uuid=manifest_item.uuid) except Subject.DoesNotExist: subj = False if subj is not False: self.context = subj.context return output
def save_basic_default_field_cells(self, row_num, man): """ Saves the default fields that do not involve containment lookups """ # save URI cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 1 cell.record = URImanagement.make_oc_uri(man.uuid, man.item_type) cell.save() cell = None # save label cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 2 cell.record = man.label cell.save() cell = None # save project label cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 3 cell.record = self.deref_entity_label(man.project_uuid) cell.save() cell = None # save project URI cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 4 cell.record = URImanagement.make_oc_uri(man.project_uuid, 'projects') cell.save() cell = None # save item category / class cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 5 cell.record = self.deref_entity_label(man.class_uri) cell.save() cell = None # last updated if man.revised is datetime: last_update = man.revised else: last_update = man.record_updated cell = ExpCell() cell.table_id = self.table_id cell.uuid = man.uuid cell.project_uuid = man.project_uuid cell.row_num = row_num cell.field_num = 6 cell.record = last_update.strftime('%Y-%m-%d') cell.save() cell = None
def add_geojson(self, json_ld): """ adds geospatial and event data that links time and space information """ uuid = self.manifest.uuid item_type = self.manifest.item_type geo_meta = self.geo_meta event_meta = self.event_meta features_dict = False # dict of all features to be added feature_events = False # mappings between features and time periods if geo_meta is not False: # print('here!' + str(geo_meta)) features_dict = LastUpdatedOrderedDict() feature_events = LastUpdatedOrderedDict() for geo in geo_meta: geo_id = geo.feature_id geo_node = '#geo-' + str( geo_id) # the node id for database rec of the feature geo_node_geom = '#geo-geom-' + str(geo_id) geo_node_props = '#geo-props-' + str(geo_id) geo_node_derived = '#geo-derived-' + str( geo_id) # node id for a derived feature geo_node_derived_geom = '#geo-derived-geom-' + str(geo_id) geo_node_derived_props = '#geo-derived-props-' + str(geo_id) feature_events[geo_node] = [] geo_props = LastUpdatedOrderedDict() geo_props['href'] = URImanagement.make_oc_uri( uuid, item_type, self.cannonical_uris) geo_props['type'] = geo.meta_type if len(geo.note) > 0: geo_props['note'] = geo.note if uuid != geo.uuid: geo_props['reference-type'] = 'inferred' geo_props['reference-uri'] = URImanagement.make_oc_uri( geo.uuid, 'subjects', self.cannonical_uris) rel_meta = self.item_gen_cache.get_entity(geo.uuid) if rel_meta is not False: geo_props['reference-label'] = rel_meta.label geo_props['reference-slug'] = rel_meta.slug else: geo_props['reference-label'] = self.manifest.label geo_props['reference-type'] = 'specified' if self.assertion_hashes: geo_props['hash_id'] = geo.hash_id geo_props['feature_id'] = geo.feature_id if geo.specificity < 0 and self.manifest.item_type != 'projects': # case where we've got reduced precision geospatial data # geotile = quadtree.encode(geo.latitude, geo.longitude, abs(geo.specificity)) geo_props['location-precision'] = abs(geo.specificity) geo_props[ 'location-precision-note'] = 'Location data approximated as a security precaution.' gmt = GlobalMercator() geotile = gmt.lat_lon_to_quadtree(geo.latitude, geo.longitude, abs(geo.specificity)) tile_bounds = gmt.quadtree_to_lat_lon(geotile) item_polygon = Polygon([[(tile_bounds[1], tile_bounds[0]), (tile_bounds[1], tile_bounds[2]), (tile_bounds[3], tile_bounds[2]), (tile_bounds[3], tile_bounds[0]), (tile_bounds[1], tile_bounds[0])] ]) item_f_poly = Feature(geometry=item_polygon) item_f_poly.id = geo_node_derived item_f_poly.geometry.id = geo_node_derived_geom item_f_poly.properties.update(geo_props) item_f_poly.properties['location-note'] = 'This region defines the '\ 'approximate location for this item.' item_f_poly.properties['id'] = geo_node_derived_props features_dict[geo_node_derived] = item_f_poly item_point = Point( (float(geo.longitude), float(geo.latitude))) item_f_point = Feature(geometry=item_point) item_f_point.id = geo_node item_f_point.geometry.id = geo_node_geom item_f_point.properties.update(geo_props) item_f_point.properties['location-note'] = 'This point defines the center of the '\ 'region approximating the location for this item.' item_f_point.properties['id'] = geo_node_props features_dict[geo_node] = item_f_point elif len(geo.coordinates) > 1: # here we have geo_json expressed features and geometries to use if geo.specificity < 0: geo_props[ 'location-precision-note'] = 'Location data approximated as a security precaution.' elif geo.specificity > 0: geo_props[ 'location-precision-note'] = 'Location data has uncertainty.' else: geo_props['location-precision-note'] = 'Location data available with no '\ 'intentional reduction in precision.' item_point = Point( (float(geo.longitude), float(geo.latitude))) item_f_point = Feature(geometry=item_point) item_f_point.properties.update(geo_props) if uuid == geo.uuid: #the item itself has the polygon as it's feature item_db = Point( (float(geo.longitude), float(geo.latitude))) if geo.ftype == 'Polygon': coord_obj = json.loads(geo.coordinates) item_db = Polygon(coord_obj) elif (geo.ftype == 'MultiPolygon'): coord_obj = json.loads(geo.coordinates) item_db = MultiPolygon(coord_obj) elif (geo.ftype == 'MultiLineString'): coord_obj = json.loads(geo.coordinates) item_db = MultiLineString(coord_obj) item_f_db = Feature(geometry=item_db) item_f_db.id = geo_node item_f_db.geometry.id = geo_node_geom item_f_db.properties.update(geo_props) item_f_db.properties['id'] = geo_node_props features_dict[geo_node] = item_f_db item_f_point.id = geo_node_derived item_f_point.geometry.id = geo_node_derived_geom item_f_point.properties['location-region-note'] = 'This point represents the center of the '\ 'region defining the location of this item.' item_f_point.properties['id'] = geo_node_derived_props features_dict[geo_node_derived] = item_f_point else: #the item is contained within another item with a polygon or multipolygon feature item_f_point.id = geo_node item_f_point.geometry.id = geo_node_geom item_f_point.properties['id'] = geo_node_props item_f_point.properties['contained-in-region'] = True item_f_point.properties['location-region-note'] = 'This point represents the center of the '\ 'region containing this item.' features_dict[geo_node] = item_f_point else: # case where the item only has a point for geo-spatial reference geo_props[ 'location-note'] = 'Location data available with no intentional reduction in precision.' item_point = Point( (float(geo.longitude), float(geo.latitude))) item_f_point = Feature(geometry=item_point) item_f_point.id = geo_node item_f_point.geometry.id = geo_node_geom item_f_point.properties.update(geo_props) item_f_point.properties['id'] = geo_node_props features_dict[geo_node] = item_f_point if event_meta is not False: # events provide chrological information, tied to geo features # sometimes there are more than 1 time period for each geo feature # in such cases, we duplicate geo features and add the different time event # information to the new features for event in event_meta: rel_feature_num = 1 # default to the first geospatial feature for where the event happened rel_feature_node = False if event.feature_id > 0: rel_feature_num = event.feature_id if rel_feature_num >= 1: rel_feature_node = '#geo-' + str(rel_feature_num) act_event_obj = LastUpdatedOrderedDict() act_event_obj = self.add_when_json(act_event_obj, uuid, item_type, event) if rel_feature_node is not False and feature_events is not False: feature_events[rel_feature_node].append(act_event_obj) if features_dict is not False: if feature_events is not False: for node_key, event_list in feature_events.items(): # update the feature with the first event "when" information if len(event_list) > 0: features_dict[node_key].update(event_list[0]) event_i = 1 for event in event_list: if event_i <= 1: # add the time info to the feature old_feature = features_dict[node_key] old_geo_id = old_feature.geometry['id'] old_prop_id = old_feature.properties['id'] features_dict[node_key].update(event) else: act_feature = copy.deepcopy(old_feature) # now add new node ids for the new features created to for the event new_node = node_key + '-event-' + str( event_i) act_feature.id = new_node act_feature.geometry[ 'id'] = old_geo_id + '-event-' + str( event_i) act_feature.properties[ 'id'] = old_prop_id + '-event-' + str( event_i) act_feature.update( event ) # add the time info to the new feature features_dict[new_node] = act_feature del (act_feature) event_i += 1 feature_keys = list(features_dict.keys()) if len(feature_keys) < 1: del features_dict[feature_keys[0]][ 'id'] # remove the conflicting id # only 1 feature, so item is not a feature collection json_ld.update(features_dict[feature_keys[0]]) else: feature_list = [ ] # multiple features, so item has a feature collection for node_key, feature in features_dict.items(): feature_list.append(feature) item_fc = FeatureCollection(feature_list) json_ld.update(item_fc) return json_ld