def make_spatial_context_json_ld(self, raw_contexts): """ adds context information, if present """ #adds parent contents, with different treenodes first_node = True act_context = LastUpdatedOrderedDict() for tree_node, r_parents in raw_contexts.items(): act_context = LastUpdatedOrderedDict() # change the parent node to context not contents tree_node = tree_node.replace('contents', 'context') act_context['id'] = tree_node act_context['type'] = 'oc-gen:contexts' # now reverse the list of parent contexts, so top most parent context is first, # followed by children contexts parents = r_parents[::-1] parts_json_ld = PartsJsonLD() parts_json_ld.class_uri_list += self.class_uri_list if len(parents) > 3: # lots of parents, so probably not worth trying to use the cache. # makes more sense look these all up in the manifest in 1 query # get manifest objects for all the parent items, for use in making JSON_LD parts_json_ld.get_manifest_objects_from_uuids(parents) for parent_uuid in parents: act_context = parts_json_ld.addto_predicate_list( act_context, ItemKeys.PREDICATES_OCGEN_HASPATHITEMS, parent_uuid, 'subjects') self.class_uri_list += parts_json_ld.class_uri_list if first_node: # set aside a list of parent labels to use for making a dc-term:title first_node = False if ItemKeys.PREDICATES_OCGEN_HASPATHITEMS in act_context: for parent_obj in act_context[ ItemKeys.PREDICATES_OCGEN_HASPATHITEMS]: self.parent_context_list.append(parent_obj['label']) return act_context
def web_dump(self, table_id): """ writes a csv file for a Web dump instead of a file save """ self.table_id = table_id self.get_table_fields() self.get_max_row_number() filename = 'oc-table-' + table_id + '.csv' response = HttpResponse(content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename="' + filename + '"' writer = csv.writer(response, dialect='excel', quoting=csv.QUOTE_ALL) writer.writerow( self.field_name_row) # write the field labels in first row cells = ExportTableDump(self.table_id).cells written_rows = 0 last_row_num = 1 act_row_dict = LastUpdatedOrderedDict() for cell in cells: if cell['row_num'] > last_row_num: # we've advanced to the next row, time to write the # active row to the csv file ok = self.compose_write_row(writer, act_row_dict) if ok: written_rows += 1 act_row_dict = None act_row_dict = LastUpdatedOrderedDict() last_row_num = cell['row_num'] act_row_dict[cell['field_num']] = cell['record'] # now right the last row ok = self.compose_write_row(writer, act_row_dict) return response
def classify_xml_attributes_to_objects(self, cont_vocabs): """ classifies open context types used with each attribute """ objects_dict = LastUpdatedOrderedDict() for vocab in cont_vocabs: vocab_id = vocab.xpath('VocabID')[0].text obj_dict = LastUpdatedOrderedDict() obj_dict['id'] = vocab_id if len(vocab.xpath('Arch16n')) > 0: obj_dict['label'] = vocab.xpath('Arch16n')[0].text else: obj_dict['label'] = vocab.xpath('VocabName')[0].text obj_dict['faims_attrib_id'] = vocab.xpath('AttributeID')[0].text obj_dict['faims_internal_str'] = vocab.xpath('VocabName')[0].text sort_str = vocab.xpath('VocabCountOrder')[0].text try: obj_dict['sort'] = float(sort_str) except: obj_dict['sort'] = 0 obj_dict['note'] = None notes = vocab.xpath('VocabDescription') for act_note_node in notes: act_note = act_note_node.text if '<' in act_note and '>' in act_note: # has escaped HTML, unescape it obj_dict['note'] = act_note.unescape(act_note) else: obj_dict['note'] = act_note objects_dict[vocab_id] = obj_dict return objects_dict
def add_project_types_with_annotations_to_graph(self, graph): """ adds project types that have annotations """ type_sql_dict_list = self.get_working_project_types() if isinstance(type_sql_dict_list, list): # consolidate things so a given type is given once in the list # of a graph. To do so, we first put everything in a all_types # dict all_types = LastUpdatedOrderedDict() for sql_dict in type_sql_dict_list: type_uri = URImanagement.make_oc_uri(sql_dict['type_uuid'], 'types') if type_uri not in all_types: act_type = LastUpdatedOrderedDict() act_type['@id'] = type_uri act_type['label'] = sql_dict['type_label'] act_type['owl:sameAs'] = URImanagement.make_oc_uri( sql_dict['type_slug'], 'types') act_type['uuid'] = sql_dict['type_uuid'] act_type['slug'] = sql_dict['type_slug'] else: act_type = all_types[type_uri] la_pred_uri = URImanagement.prefix_common_uri( sql_dict['predicate_uri']) act_type = self.add_unique_object_dict_to_pred( act_type, la_pred_uri, sql_dict['object_uri']) all_types[type_uri] = act_type for type_uri, act_type in all_types.items(): graph.append(act_type) return graph
def get_item_media_files(self, man_obj): """ gets media file uris for archiving """ files_dict = LastUpdatedOrderedDict() if isinstance(man_obj, Manifest): med_files = Mediafile.objects\ .filter(uuid=man_obj.uuid, file_type__in=self.ARCHIVE_FILE_TYPES)\ .order_by('-filesize') # print('found files: ' + str(len(med_files))) for act_type in self.ARCHIVE_FILE_TYPES: for med_file in med_files: if med_file.file_type == act_type: extension = '' frag = None file_uri = med_file.file_uri if '#' in file_uri: file_ex = file_uri.split('#') file_uri = file_ex[0] frag = file_ex[-1] if file_uri not in files_dict: act_dict = LastUpdatedOrderedDict() act_dict['filename'] = self.make_archival_file_name(med_file.file_type, man_obj.slug, file_uri) act_dict['dc-terms:isPartOf'] = URImanagement.make_oc_uri(man_obj.uuid, man_obj.item_type) act_dict['type'] = [] files_dict[file_uri] = act_dict files_dict[file_uri]['type'].append(med_file.file_type) return files_dict
def add_json_ld_descriptive_assertions(self, json_ld): """ adds descriptive assertions (descriptive properties, non spatial containment links) to items, as parts of Observations """ observations = [] working_obs = LastUpdatedOrderedDict() act_obs = LastUpdatedOrderedDict() for assertion in self.assertions: act_obs_num = assertion.obs_num if assertion.predicate_uuid in self.NO_OBS_ASSERTION_PREDS: # we've got a predicate that does not belong in an observation json_ld = self.add_json_ld_direct_assertion(json_ld, assertion) else: if act_obs_num not in working_obs: # we've got a new observation, so make a new observation object for it act_obs = self.make_json_ld_obs_dict_w_metadata(assertion) working_obs[act_obs_num] = act_obs else: act_obs = working_obs[act_obs_num] act_obs = self.add_json_ld_assertion_predicate_objects( act_obs, assertion) working_obs[act_obs_num] = act_obs # now that we've gotten observations made, # add them to the final list of observations for obs_num in self.obs_list: if obs_num in working_obs: act_obs = working_obs[obs_num] observations.append(act_obs) if len(observations) > 0: json_ld[ItemKeys.PREDICATES_OCGEN_HASOBS] = observations return json_ld
def db_save_reconcile_entity_predicates_types(self, act_dir): """ saves predicates and type items to the Open Context database, and / or reconciles these items with previously saved items from the same project """ key = self.oc_config_entity_types json_obj = self.fm.get_dict_from_file(key, act_dir) if json_obj is None: print('Need to 1st generate an attributes file from the ArchEnts!') ok = False else: # we have JSON with dictionary for the entity_types self.entity_types = json_obj make_entity_types_assertions = False for faims_ent_type_id, ent_dict in json_obj.items(): if isinstance(ent_dict['item_type'], str) \ and ent_dict['add_type_as_attribute']: # OK we have some items that need entity types made as # a descriptive attribute make_entity_types_assertions = True break if make_entity_types_assertions: # we have entity_types that need to have a descriptive # predicate, so create a new predicate in Open Context # to describe entity_types for this project sup_dict = LastUpdatedOrderedDict() sup_dict[self.reconcile_key] = self.ent_type_pred_sup_id pm = PredicateManagement() pm.project_uuid = self.project_uuid pm.source_id = self.source_id pm.sup_dict = sup_dict pm.sup_reconcile_key = self.reconcile_key pm.sup_reconcile_value = self.ent_type_pred_sup_id pred_obj = pm.get_make_predicate( self.FAIMS_ENTITY_TYPE_PREDICATE_LABEL, 'variable', 'id') if pred_obj is not False: # we reconciled or created the predicate! # now we mint oc_types for all the entity_types predicate_uuid = str(pred_obj.uuid) for faims_ent_type_id, ent_dict in json_obj.items(): if isinstance(ent_dict['item_type'], str) \ and ent_dict['add_type_as_attribute']: # OK, we have an item entity type to be used as a description sup_dict = LastUpdatedOrderedDict() sup_dict[self.reconcile_key] = faims_ent_type_id tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id tm.sup_dict = sup_dict tm.sup_reconcile_key = self.reconcile_key tm.sup_reconcile_value = faims_ent_type_id type_obj = tm.get_make_type_within_pred_uuid( predicate_uuid, ent_dict['label']) if type_obj is not False: # we have reconciled the type! ent_dict['type_uuid'] = str(type_obj.uuid) ent_dict['predicate_uuid'] = predicate_uuid self.entity_types[faims_ent_type_id] = ent_dict # now save the results self.fm.save_serialized_json(key, act_dir, self.entity_types)
def get_arachne_comparanda(self): """ get a type item including lots of useful related data """ label = self.item_json['label'] # keyword = self.type_german_mappings(label) arachne_search_url = self.check_arachne_relation() if isinstance(arachne_search_url, str): # print('search url: ' + arachne_search_url) a_api = ArachneAPI() a_api.get_results_from_search_url(arachne_search_url) if a_api.results is not False: editorial_pred = LastUpdatedOrderedDict() editorial_pred[ 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#editorialNote' editorial_pred['slug'] = 'skos-editorialnote' editorial_pred['label'] = 'Arachne comparative material' editorial_pred['oc-gen:predType'] = 'variable' editorial_pred['type'] = 'xsd:string' example_pred = LastUpdatedOrderedDict() example_pred[ 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#example' example_pred['slug'] = 'skos-example' example_pred['label'] = 'Comparanda in Arachne' example_pred['oc-gen:predType'] = 'link' example_pred['type'] = '@id' self.item_json['@context'][2][ 'skos:editorialNote'] = editorial_pred self.item_json['@context'][2]['skos:example'] = example_pred self.add_arachne_observation(a_api) return self.item_json
def add_arachne_observation(self, a_api): """ Adds an observation for Arachne comparanda """ if 'oc-gen:has-obs' not in self.item_json: self.item_json['oc-gen:has-obs'] = [] arachne_obs = LastUpdatedOrderedDict() arachne_obs['id'] = '#obs-' + str( len(self.item_json['oc-gen:has-obs']) + 1) arachne_obs['oc-gen:sourceID'] = a_api.DEFAULT_API_BASE_URL arachne_obs['oc-gen:obsStatus'] = 'active' arachne_obs['type'] = 'oc-gen:observations' editorial = LastUpdatedOrderedDict() editorial['id'] = '#string-arachne-editorial' note = '' note += '<p>Arachne has: <strong>' + str( a_api.result_count) + '</strong> related item(s) with images</p>' note += '<p>Browse these comparanda: ' note += '[<a href="' + a_api.arachne_html_url + '" target="_blank">Link to Arachne search results</a>]</p>' note += '<p><small>Open Context editors identified materials in Arachne likley to be relevant for comparison to this type.' note += ' <a href="http://arachne.uni-koeln.de/" target="_blank">Arachne</a> is the central object database of the German Archaeological Institute (DAI)' note += ' and the Archaeological Institute of the University of Cologne.</small></p>' editorial['xsd:string'] = note arachne_obs['skos:editorialNote'] = [] arachne_obs['skos:editorialNote'].append(editorial) arachne_obs['skos:example'] = a_api.results self.item_json['oc-gen:has-obs'].append(arachne_obs)
def get_proj_types(self, project_uuid): """ get predicates used for different types """ skip_types = ['types', 'predicates'] output = [] man_dist = Manifest.objects\ .filter(project_uuid=project_uuid)\ .values('item_type')\ .distinct('item_type') for man_type in man_dist: item_type = man_type['item_type'] if item_type not in skip_types: item = LastUpdatedOrderedDict() item['id'] = project_uuid + '/' + item_type item['label'] = 'Descriptions for ' + item_type item['class_uri'] = '' item['class_label'] = '' item['children'] = self.get_proj_type_classes( project_uuid, item_type) output.append(item) complex_preds = self.get_proj_complex_description_preds(project_uuid) if len(complex_preds) > 0: item = LastUpdatedOrderedDict() item['id'] = project_uuid + '/complex-descriptions' item['label'] = 'Descriptions used in Complex Descriptions' item['class_uri'] = '' item['class_label'] = '' item['children'] = [''] output.append(item) return output
def make_geojson(self, record_index, total_found): """Outputs the record object as GeoJSON""" geo_json = LastUpdatedOrderedDict() geo_json['id'] = '#record-{}-of-{}'.format(record_index, total_found) geo_json['label'] = self.label geo_json['rdfs:isDefinedBy'] = self.uri geo_json['type'] = 'Feature' geo_json['category'] = 'oc-api:geo-record' geometry = LastUpdatedOrderedDict() geometry['id'] = '#record-geom-{}-of-{}'.format( record_index, total_found) geometry['type'] = self.geo_feature_type geometry['coordinates'] = self.geometry_coords geo_json['geometry'] = geometry if (self.early_date is not None and self.late_date is not None): # If we have dates, add them. when = LastUpdatedOrderedDict() when['id'] = '#record-event-{}-of-{}'.format( record_index, total_found) when['type'] = 'oc-gen:formation-use-life' # convert numeric to GeoJSON-LD ISO 8601 when['start'] = ISOyears().make_iso_from_float(self.early_date) when['stop'] = ISOyears().make_iso_from_float(self.late_date) geo_json['when'] = when # Now add the properties dict to the GeoJSON props_id_value = '#rec-{}-of-{}'.format(record_index, total_found) geo_json['properties'] = self.make_client_properties_dict( id_value=props_id_value, feature_type='item record') return geo_json
def __init__(self): self.facet_field_index = 0 self.dom_id_prefix = False self.id = False self.defined_by = False self.label = False self.type = False # is the item_type_limit is in effect? self.item_type_limited = False self.fg_id_options = LastUpdatedOrderedDict() self.fg_num_options = LastUpdatedOrderedDict() self.fg_date_options = LastUpdatedOrderedDict() self.fg_string_options = LastUpdatedOrderedDict() self.group_labels = [] self.id_options = [] self.numeric_options = [] self.date_options = [] self.string_options = [] self.option_types = [] self.show_group_labels = False # now add groups as keys, with list values # for each type of faceted search option for group_label in FacetSearchTemplate.SUB_HEADINGS: if group_label not in self.fg_id_options: self.fg_id_options[group_label] = [] if group_label not in self.fg_num_options: self.fg_num_options[group_label] = [] if group_label not in self.fg_date_options: self.fg_date_options[group_label] = [] if group_label not in self.fg_string_options: self.fg_string_options[group_label] = []
def make_json_for_html(self): """ makes JSON strings for embedding in HTML """ root_obj = [] if len(self.root_classes) > 0: # we have root level categories root_dict = LastUpdatedOrderedDict() root_dict['root'] = 'Top-Level Classes / Categories' root_dict['children'] = self.root_classes root_dict['more'] = True root_obj.append(root_dict) if len(self.root_properties) > 0: # we have root level properties root_dict = LastUpdatedOrderedDict() root_dict['root'] = 'Top-Level Properties / Relations' root_dict['children'] = self.root_properties root_dict['more'] = True root_obj.append(root_dict) if len(self.children) > 0: # we have concpet children root_dict = LastUpdatedOrderedDict() if self.entity.entity_type == 'class': root_dict['root'] = 'Sub-categories for ' + self.entity.label else: root_dict['root'] = 'Sub-properties for ' + self.entity.label root_dict['children'] = self.children root_dict['more'] = True root_obj.append(root_dict) if len(root_obj) > 0: # we items to display for the json_tree self.json_tree = json.dumps(root_obj, ensure_ascii=False, indent=4)
def document_missing_old_oc_uuids(self): """ checks to see that uuids are missing, documents them in a JSON file """ missing = LastUpdatedOrderedDict() missing['total-missing'] = 0 self.get_migrate_old_oc_table_ids() for old_table_id in self.table_id_list: act_tab = LastUpdatedOrderedDict() act_tab['label'] = self.label act_tab['records'] = LastUpdatedOrderedDict() if isinstance(old_table_id, str): uuids = self.get_old_oc_record_uuids(old_table_id, True) for uuid in uuids: u_ok = ExpCell.objects\ .filter(table_id=old_table_id, uuid=uuid)[:1] if len(u_ok) < 1: missing['total-missing'] += 1 print(str(missing['total-missing']) + ' uuid: ' + uuid) if self.act_table_obj is not False: if 'records' in self.act_table_obj: if uuid in self.act_table_obj['records']: act_tab['records'][uuid] = self.act_table_obj['records'][uuid] missing[old_table_id] = act_tab missing_json = json.dumps(missing, ensure_ascii=False, indent=4) dir_file = self.set_check_directory(self.old_oc_table_dir) + 'missing-uuids.json' f = open(dir_file, 'w', encoding='utf-8') f.write(missing_json) f.close()
def make_sort_links_list(self, request_dict): """ makes a list of the links for sort options """ if 'sort' in request_dict: request_dict.pop('sort') order_opts = [ {'key': 'asc', 'order': 'ascending'}, {'key': 'desc', 'order': 'descending'} ] for act_sort in self.SORT_OPTIONS: if act_sort['opt']: # only make sort_options if the 'opt' key is true if act_sort['value'] is not None: for order_opt in order_opts: act_sort_val = act_sort['value'] + self.order_sep + order_opt['key'] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = json.dumps(request_dict, ensure_ascii=False, indent=4) fl.spatial_context = self.spatial_context sort_rparams = fl.add_to_request('sort', act_sort_val) links = fl.make_request_urls(sort_rparams) current_sort_obj = LastUpdatedOrderedDict() current_sort_obj['id'] = links['html'] current_sort_obj['json'] = links['json'] current_sort_obj['type'] = act_sort['type'] current_sort_obj['label'] = act_sort['label'] current_sort_obj['oc-api:sort-order'] = order_opt['order'] in_active_list = False for cur_act_sort in self.current_sorting: if act_sort['type'] == cur_act_sort['type'] \ and order_opt['order'] == cur_act_sort['oc-api:sort-order']: # the current sort option is ALREADY in use in_active_list = True if in_active_list is False: # only add the sort option if it's not already in use self.sort_links.append(current_sort_obj) else: if self.using_default_sorting is False: # only add a link to the default sorting if # we are not currently using it fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = json.dumps(request_dict, ensure_ascii=False, indent=4) fl.spatial_context = self.spatial_context links = fl.make_request_urls(request_dict) current_sort_obj = LastUpdatedOrderedDict() current_sort_obj['id'] = links['html'] current_sort_obj['json'] = links['json'] current_sort_obj['type'] = act_sort['type'] current_sort_obj['label'] = act_sort['label'] current_sort_obj['oc-api:sort-order'] = 'descending' self.sort_links.append(current_sort_obj)
def make_related_media_facets(self, solr_json): """Makes related media facets from a solr_json response""" options = [] for media_config in configs.FACETS_RELATED_MEDIA['oc-api:has-rel-media-options']: facet_val_count_tups = utilities.get_path_facet_value_count_tuples( media_config['facet_path'], solr_json ) media_type_total_count = 0 for facet_val, facet_count in facet_val_count_tups: if facet_val == "0": # Skip, this facet_value is for # items with NO related media of this type continue media_type_total_count += facet_count if media_type_total_count == 0: # No items have related media of this type, # so continue and don't make a facet option # for this. continue sl = SearchLinks( request_dict=copy.deepcopy(self.request_dict), base_search_url=self.base_search_url ) # Remove non search related params. sl.remove_non_query_params() sl.replace_param_value( media_config['param_key'], new_value=1, ) urls = sl.make_urls_from_request_dict() if urls['html'] == self.current_filters_url: # The new URL matches our current filter # url, so don't add this facet option. continue option = LastUpdatedOrderedDict() option['label'] = media_config['label'] option['count'] = media_type_total_count option['id'] = urls['html'] option['json'] = urls['json'] options.append(option) if not len(options): # We found no related media configs, so return None return None # Return the related media facets object. rel_media_facets = LastUpdatedOrderedDict() rel_media_facets['id'] = configs.FACETS_RELATED_MEDIA['id'] rel_media_facets['label'] = configs.FACETS_RELATED_MEDIA['label'] rel_media_facets['oc-api:has-rel-media-options'] = options return rel_media_facets
def __init__(self): self.tree = None self.project_uuid = False self.source_id = False self.relation_types = LastUpdatedOrderedDict() self.entities = LastUpdatedOrderedDict() self.oc_config_relation_types = 'oc-relation-types' self.oc_config_entities = 'oc-entities' self.reconcile_key = 'faims_id' self.fm = FileManage()
def process_solr_tiles(self, solr_tiles): """ processes the solr_json discovery geo tiles, aggregating to a certain depth """ # first aggregate counts for tile that belong togther aggregate_tiles = self.aggregate_spatial_tiles(solr_tiles) # now generate GeoJSON for each tile region # print('Total tiles: ' + str(t) + ' reduced to ' + str(len(aggregate_tiles))) i = 0 for tile_key, aggregate_count in aggregate_tiles.items(): i += 1 add_region = True fl = FilterLinks() fl.base_request_json = self.filter_request_dict_json fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('disc-geotile', tile_key) record = LastUpdatedOrderedDict() record['id'] = fl.make_request_url(new_rparams) record['json'] = fl.make_request_url(new_rparams, '.json') record['count'] = aggregate_count record['type'] = 'Feature' record['category'] = 'oc-api:geo-facet' if self.min_date is not False \ and self.max_date is not False: when = LastUpdatedOrderedDict() when['id'] = '#event-' + tile_key when['type'] = 'oc-gen:formation-use-life' # convert numeric to GeoJSON-LD ISO 8601 when['start'] = ISOyears().make_iso_from_float(self.min_date) when['stop'] = ISOyears().make_iso_from_float(self.max_date) record['when'] = when gm = GlobalMercator() geo_coords = gm.quadtree_to_geojson_poly_coords(tile_key) geometry = LastUpdatedOrderedDict() geometry['id'] = '#geo-disc-tile-geom-' + tile_key geometry['type'] = 'Polygon' geometry['coordinates'] = geo_coords record['geometry'] = geometry properties = LastUpdatedOrderedDict() properties['id'] = '#geo-disc-tile-' + tile_key properties['href'] = record['id'] properties['label'] = 'Discovery region (' + str(i) + ')' properties['feature-type'] = 'discovery region (facet)' properties['count'] = aggregate_count properties['early bce/ce'] = self.min_date properties['late bce/ce'] = self.max_date record['properties'] = properties if len(tile_key) >= 6: if tile_key[:6] == '211111': # no bad coordinates (off 0, 0 coast of Africa) add_region = False # don't display items without coordinates if add_region: self.geojson_regions.append(record)
def json_geo_overlay(self): """Output a json string for the geo_overlays.""" output = LastUpdatedOrderedDict() output['overlays'] = [] for geo_media in self.geo_overlays: geo = LastUpdatedOrderedDict() geo['url'] = geo_media.full_file_obj.file_uri geo['metadata'] = geo_media.metadata output['overlays'].append(geo) return json.dumps(output, indent=4, ensure_ascii=False)
def db_save_reconcile_predicates_types(self, act_dir): """ saves predicates and type items to the Open Context database, and / or reconciles these items with previously saved items from the same project """ key = self.oc_config_attributes json_obj = self.fm.get_dict_from_file(key, act_dir) if json_obj is None: print('Need to 1st generate an attributes file from the ArchEnts!') ok = False else: # we have JSON with dictionary for the attributes ok = True self.attributes = json_obj for faims_id_pred, attrib_dict in json_obj.items(): # default to always making a predicate and a type for attributes sup_dict = LastUpdatedOrderedDict() sup_dict[self.reconcile_key] = faims_id_pred pm = PredicateManagement() pm.project_uuid = self.project_uuid pm.source_id = self.source_id pm.sup_dict = sup_dict pm.sup_reconcile_key = self.reconcile_key pm.sup_reconcile_value = faims_id_pred pred_obj = pm.get_make_predicate(attrib_dict['label'], attrib_dict['predicate_type'], attrib_dict['data_type']) if pred_obj is not False: # we reconciled the predicate! self.attributes[faims_id_pred]['predicate_uuid'] = str( pred_obj.uuid) if 'objects' in attrib_dict: for faims_id_type, type_dict in attrib_dict[ 'objects'].items(): sup_dict = LastUpdatedOrderedDict() sup_dict[self.reconcile_key] = faims_id_type tm = TypeManagement() tm.project_uuid = self.project_uuid tm.source_id = self.source_id tm.sup_dict = sup_dict tm.sup_reconcile_key = self.reconcile_key tm.sup_reconcile_value = faims_id_type type_obj = tm.get_make_type_within_pred_uuid( pred_obj.uuid, type_dict['label']) if type_obj is not False: # we have reconciled the type! type_dict['type_uuid'] = str(type_obj.uuid) type_dict['predicate_uuid'] = str( pred_obj.uuid) self.attributes[faims_id_pred]['objects'][ faims_id_type] = type_dict # now save the results self.fm.save_serialized_json(key, act_dir, self.attributes)
def make_dict_from_anno_obj(self, anno_obj): """ returns an ordered dict for an import field annotation object """ anno_dict = LastUpdatedOrderedDict() anno_dict['id'] = anno_obj.id sub_field_obj = self.get_field_object(anno_obj.field_num) if sub_field_obj is not False: anno_dict['subject'] = self.make_dict_from_field_obj(sub_field_obj) anno_dict['subject']['id'] = anno_obj.field_num else: anno_dict['subject'] = False if anno_obj.predicate_field_num > 0: pred_field_obj = self.get_field_object(anno_obj.predicate_field_num) anno_dict['predicate'] = self.make_dict_from_field_obj(pred_field_obj) anno_dict['predicate']['id'] = anno_obj.predicate_field_num anno_dict['predicate']['type'] = 'import-field' else: anno_dict['predicate'] = LastUpdatedOrderedDict() anno_dict['predicate']['id'] = anno_obj.predicate ent = Entity() found = ent.dereference(anno_obj.predicate) if found: anno_dict['predicate']['label'] = ent.label anno_dict['predicate']['type'] = ent.item_type elif anno_obj.predicate == ImportFieldAnnotation.PRED_CONTAINED_IN: anno_dict['predicate']['label'] = 'Contained in' elif anno_obj.predicate == ImportFieldAnnotation.PRED_DESCRIBES: anno_dict['predicate']['label'] = 'Describes' elif anno_obj.predicate == ImportFieldAnnotation.PRED_VALUE_OF: anno_dict['predicate']['label'] = 'Value of' elif anno_obj.predicate == ImportFieldAnnotation.PRED_MEDIA_PART_OF: anno_dict['predicate']['label'] = 'Media part of' else: anno_dict['predicate']['label'] = False anno_dict['predicate']['type'] = False if anno_obj.object_field_num > 0: obj_field_obj = self.get_field_object(anno_obj.object_field_num) anno_dict['object'] = self.make_dict_from_field_obj(obj_field_obj) anno_dict['object']['id'] = anno_obj.object_field_num anno_dict['object']['type'] = 'import-field' else: anno_dict['object'] = LastUpdatedOrderedDict() anno_dict['object']['id'] = anno_obj.object_uuid ent = Entity() found = ent.dereference(anno_obj.object_uuid) if found: anno_dict['object']['label'] = ent.label anno_dict['object']['type'] = ent.item_type else: anno_dict['object']['label'] = False anno_dict['object']['type'] = False return anno_dict
def get_orphan_items(self, item_type, consider=['contain', 'link']): """ gets a list of items that do not have descriptions """ assertion_limit = '' if 'contain' in consider: assertion_limit += ' AND oc_assertions.predicate_uuid !=' assertion_limit += '\'' + Assertion.PREDICATES_CONTAINS + '\'' if 'link' in consider: assertion_limit += ' AND oc_assertions.predicate_uuid !=' assertion_limit += '\'' + Assertion.PREDICATES_LINK + '\'' sql = 'SELECT oc_manifest.uuid AS uuid, \ oc_manifest.label AS label, \ oc_manifest.item_type AS item_type, \ oc_manifest.class_uri AS class_uri \ FROM oc_manifest \ LEFT JOIN oc_assertions ON \ (oc_manifest.uuid = oc_assertions.uuid \ ' + assertion_limit + ') \ WHERE oc_manifest.project_uuid = \ \'' + self.project_uuid + '\' \ AND oc_manifest.item_type = \ \'' + item_type + '\' \ AND oc_assertions.uuid IS NULL \ ORDER BY oc_manifest.sort; ' non_descript = Manifest.objects.raw(sql) for dull_man in non_descript: item_type = dull_man.item_type class_uri = dull_man.class_uri if len(class_uri) < 1: class_uri = item_type if item_type not in self.blank_items: self.blank_items[item_type] = LastUpdatedOrderedDict() if class_uri not in self.blank_items[item_type]: class_dict = LastUpdatedOrderedDict() ent = Entity() found = ent.dereference(class_uri) if found: class_dict['label'] = ent.label else: class_dict['label'] = item_type class_dict['items'] = [] self.blank_items[item_type][class_uri] = class_dict item = LastUpdatedOrderedDict() item['uuid'] = dull_man.uuid item['label'] = dull_man.label self.blank_items[item_type][class_uri]['items'].append(item) return self.blank_items
def get_catal_related(self): """ Check to see if this item has related data in the Çatalhöyük Living Archive """ label = self.item_json['label'] category_list = [] project_list = [] if 'category' in self.item_json: category_list = self.item_json['category'] if 'dc-terms:isPartOf' in self.item_json: project_list = self.item_json['dc-terms:isPartOf'] catal_api = CatalLivingArchiveAPI() catal_api.check_relevance(category_list, project_list) if catal_api.relevant: catal_api.get_unit(label) if catal_api.has_data: editorial_pred = LastUpdatedOrderedDict() editorial_pred[ 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#editorialNote' editorial_pred['slug'] = 'skos-editorialnote' editorial_pred[ 'label'] = 'About Çatalhöyük Living Archive Data' editorial_pred['oc-gen:predType'] = 'variable' editorial_pred['type'] = 'xsd:string' props_pred = LastUpdatedOrderedDict() props_pred[ 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#definition' props_pred['slug'] = 'skos-definition' props_pred[ 'label'] = 'Çatalhöyük Living Archive: Unit Properties' props_pred['oc-gen:predType'] = 'variable' props_pred['type'] = 'xsd:string' finds_pred = LastUpdatedOrderedDict() finds_pred[ 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#note' finds_pred['slug'] = 'skos-note' finds_pred['label'] = 'Çatalhöyük Living Archive: Unit Finds' finds_pred['oc-gen:predType'] = 'variable' finds_pred['type'] = 'xsd:string' self.item_json['@context'][2][ 'skos:editorialNote'] = editorial_pred if catal_api.props_count > 0: self.item_json['@context'][2][ 'skos:definition'] = props_pred if catal_api.finds_count > 0: self.item_json['@context'][2]['skos:note'] = finds_pred self.add_catal_observation(catal_api) return self.item_json
def get_all_projects(self): """ Processes the current batch, determined by the row number by running the individual import processes in the proper order """ output = [] man_projs = Manifest.objects\ .filter(item_type='projects')\ .order_by('-revised', '-published', '-record_updated') for man_proj in man_projs: act_item = LastUpdatedOrderedDict() act_item['uuid'] = man_proj.uuid act_item['label'] = man_proj.label act_item['published'] = man_proj.published act_item['revised'] = man_proj.revised try: pobj = Project.objects.get(uuid=man_proj.uuid) act_item['edit_status'] = pobj.edit_status act_item['short_des'] = pobj.short_des except Project.DoesNotExist: act_item['edit_status'] = False act_item['short_des'] = '' p_sources = ImportSource.objects\ .filter(project_uuid=man_proj.uuid) act_item['count_imp'] = len(p_sources) output.append(act_item) return output
def classify_xml_tree_relation_types(self): """ gets xml relation types """ if self.tree is not False: rel_counts = {} rel_types = self.tree.xpath('/relationships/relationshipType') for rel_type in rel_types: faims_id = rel_type.get('relntypeid') relations = rel_type.xpath('relationship') relation_count = len(relations) rel_counts[faims_id] = relation_count rel_type_obj = LastUpdatedOrderedDict() rel_type_obj['id'] = faims_id rel_type_obj['label'] = rel_type.get('relntypename') rel_type_obj['oc-equiv'] = None rel_type_obj['order'] = None rel_type_obj['predicate_uuid'] = None rel_type_obj['data_type'] = 'id' rel_type_obj['predicate_type'] = 'link' rel_type_obj['count'] = relation_count self.relation_types[faims_id] = rel_type_obj s = [(faims_id, rel_counts[faims_id]) for faims_id in sorted(rel_counts, key=rel_counts.get)] i = 0 for faims_id, count in s: i += 1 self.relation_types[faims_id]['order'] = i
def add_project_predicates_and_annotations_to_graph(self, graph): """ gets the project predicates and their annotations with database calls """ pred_sql_dict_list = self.get_working_project_predicates() la_preds = self.get_link_annotations_for_preds(pred_sql_dict_list) if not isinstance(pred_sql_dict_list, list): # No predicates in the project. Weird, but possible return graph annotated_pred_uuids = {la.subject: [] for la in la_preds} for la in la_preds: annotated_pred_uuids[la.subject].append(la) for sql_dict in pred_sql_dict_list: act_pred = LastUpdatedOrderedDict() act_pred['@id'] = 'oc-pred:' + str(sql_dict['slug']) act_pred['owl:sameAs'] = URImanagement.make_oc_uri( sql_dict['predicate_uuid'], 'predicates') act_pred['label'] = sql_dict['label'] act_pred['uuid'] = sql_dict['predicate_uuid'] act_pred['slug'] = sql_dict['slug'] if isinstance(sql_dict['class_uri'], str) and len(sql_dict['class_uri']) > 0: act_pred['oc-gen:predType'] = sql_dict['class_uri'] uuid_la_preds = annotated_pred_uuids.get( sql_dict['predicate_uuid'], []) for la_pred in uuid_la_preds: la_pred_uri = URImanagement.prefix_common_uri( la_pred.predicate_uri) act_pred = self.add_unique_object_dict_to_pred( act_pred, la_pred_uri, la_pred.object_uri) graph.append(act_pred) return graph
def __init__(self, id_href=True): # for geo_json_context self.geo_json_context = self.GEO_JSON_CONTEXT_URI context = LastUpdatedOrderedDict() context['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' context['rdfs'] = 'http://www.w3.org/2000/01/rdf-schema#' context['xsd'] = 'http://www.w3.org/2001/XMLSchema#' context['skos'] = 'http://www.w3.org/2004/02/skos/core#' context['owl'] = 'http://www.w3.org/2002/07/owl#' context['dc-terms'] = 'http://purl.org/dc/terms/' context['dcmi'] = 'http://dublincore.org/documents/dcmi-terms/' context['bibo'] = 'http://purl.org/ontology/bibo/' context['foaf'] = 'http://xmlns.com/foaf/0.1/' context['cidoc-crm'] = 'http://erlangen-crm.org/current/' context['dcat'] = 'http://www.w3.org/ns/dcat#' context['geojson'] = 'https://purl.org/geojson/vocab#' context['cc'] = 'http://creativecommons.org/ns#' context['nmo'] = 'http://nomisma.org/ontology#' context['oc-gen'] = 'http://opencontext.org/vocabularies/oc-general/' context['oc-pred'] = 'http://opencontext.org/predicates/' context['@language'] = Languages().DEFAULT_LANGUAGE context['id'] = '@id' context['label'] = 'rdfs:label' context['uuid'] = 'dc-terms:identifier' context['slug'] = 'oc-gen:slug' context['type'] = '@type' context['category'] = {'@id': 'oc-gen:category', '@type': '@id'} context['owl:sameAs'] = {'@type': '@id'} context['skos:altLabel'] = {'@container': '@language'} context['xsd:string'] = {'@container': '@language'} context['description'] = {'@id': 'dc-terms:description', '@container': '@language'} for pred in settings.TEXT_CONTENT_PREDICATES: if pred not in context: context[pred] = {'@container': '@language'} self.context = context
def check_make_valid_label(self, label, prefix='', num_id_len=False): """ Checks a label, if not valid suggests an alternative based on the prefix. If unique_in_project is True then validate uniqueness within the project, if false then the label will be checked for uniqueness within a context """ output = LastUpdatedOrderedDict() if isinstance(label, str): if len(label) < 1: label = False output['checked'] = label output['suggested'] = self.suggest_valid_label(prefix, num_id_len) if label is False: output['exists'] = None output['valid'] = None else: exist_id = self.check_label_exists_in_scope(label) if exist_id is False: output['exists'] = False output['valid'] = True if self.uuid is not False: manifest_item = self.get_manifest_item(self.uuid) if manifest_item is not False: output['suggested'] = manifest_item.label output['uuid'] = manifest_item.uuid else: output['exists'] = True output['exists_uuid'] = exist_id output['valid'] = False return output
def get_project(self, project_uuid): """ Processes the current batch, determined by the row number by running the individual import processes in the proper order """ act_item = LastUpdatedOrderedDict() try: man_proj = Manifest.objects.get(uuid=project_uuid) except Manifest.DoesNotExist: act_item = False if act_item is not False: act_item['uuid'] = man_proj.uuid act_item['label'] = man_proj.label act_item['published'] = man_proj.published act_item['revised'] = man_proj.revised try: pobj = Project.objects.get(uuid=man_proj.uuid) act_item['edit_status'] = pobj.edit_status act_item['short_des'] = pobj.short_des except Project.DoesNotExist: act_item['edit_status'] = False act_item['short_des'] = '' # get sources from refine first, since it lets us know if updated refine_sources = self.relate_refine_local_sources() raw_p_sources = ImportSource.objects\ .filter(project_uuid=project_uuid)\ .order_by('-updated') raw_p_sources = self.note_unimport_ok(raw_p_sources) p_sources = self.note_reloadable_sources(raw_p_sources) act_item['sources'] = p_sources act_item['refines'] = refine_sources act_item['ref_baseurl'] = RefineAPI().get_project_base_url() act_item['refine_ok'] = self.refine_ok return act_item
def add_predicate_json_ld(self): """ adds predicate specific information to the JSON-LD object """ try: predicate = Predicate.objects.get(uuid=self.manifest.uuid) except Predicate.DoesNotExist: predicate = None if isinstance(predicate, Predicate): self.json_ld['oc-gen:data-type'] = predicate.data_type p_range = LastUpdatedOrderedDict() p_range['id'] = predicate.data_type if predicate.data_type == 'id': p_range['id'] = 'http://opencontext.org/vocabularies/oc-general/items' p_range['label'] = 'URI identified items' elif predicate.data_type == 'xsd:string': p_range['label'] = 'Alphanumeric text strings' elif predicate.data_type == 'xsd:double': p_range['label'] = 'Decimal values' elif predicate.data_type == 'xsd:integer': p_range['label'] = 'Integer values' elif predicate.data_type == 'xsd:date': p_range['label'] = 'Calendar / date values' self.json_ld['rdfs:range'] = [p_range] if self.assertion_hashes: # add a default sort order, for edit view JSON self.json_ld['oc-gen:default-sort-order'] = float(predicate.sort)