def _get_inline_preview(self, link, store=None): """Query RDFstore for graph and convert selected fields to JSON dictionary. """ graph = None try: if settings.RDF_USE_LOCAL_GRAPH: record = RDFRecord(source_uri=link) if record.exists(): graph = record.get_graph() else: raise UnknownGraph("unable to find {}".format(link)) else: if not store: store = get_rdfstore() store = store.get_graph_store named_graph = "{}/graph".format(link.rstrip('/')) graph = store.get(named_graph=named_graph, as_graph=True) except UnknownGraph as ug: logger.warn("Unable to find Graph for: {}".format(link)) return None preview_fields = settings.EDM_API_INLINE_PREVIEW preview_predicates = [URIRef(pred) for pred in preview_fields.keys()] inline_dict = {} for pred, obj in graph.predicate_objects(): if pred in preview_predicates: inline_dict[preview_fields[str(pred)]] = str(obj) if 'delving_hubId' in preview_fields.values(): hub_id, spec = self.get_hub_id() inline_dict['delving_hubId'] = hub_id return inline_dict
def test__get_rdf_base_url__return_base_url_from_settings(settings): settings.RDF_BASE_URL = "http://testserver" base_url = RDFRecord.get_rdf_base_url() assert base_url assert base_url == "testserver" base_url = RDFRecord.get_rdf_base_url(prepend_scheme=True, scheme="https") assert base_url assert base_url == "https://testserver"
def get_context_data(self, **kwargs): context = super(NaveDocumentTemplateView, self).get_context_data(**kwargs) absolute_uri = self.request.build_absolute_uri() target_uri = RDFRecord.get_internal_rdf_base_uri(absolute_uri) if "detail/foldout/" in target_uri: slug = self.kwargs.get('slug') record = ElasticSearchRDFRecord(hub_id=slug) graph = record.get_graph_by_id(self.kwargs.get('slug')) if graph is not None: target_uri = record.source_uri else: logger.warn("Unable to find source_uri for slug: {}".format(slug)) else: target_uri = RDFRecord.get_internal_rdf_base_uri(absolute_uri) record = ElasticSearchRDFRecord(hub_id=self.kwargs.get('slug')) graph = record.get_graph_by_source_uri(target_uri) if graph is None: raise UnknownGraph("URI {} is not known in our graph store".format(target_uri)) if "/resource/cache/" in target_uri: target_uri = target_uri.rstrip('/') cache_resource = CacheResource.objects.filter(document_uri=target_uri) if cache_resource.exists(): graph = cache_resource.first().get_graph() elif settings.RDF_USE_LOCAL_GRAPH: mode = self.request.REQUEST.get('mode', 'default') acceptance = True if mode == 'acceptance' else False context['acceptance'] = acceptance elif '/resource/aggregation' in target_uri: target_named_graph = "{}/graph".format(target_uri.rstrip('/')) graph, nr_levels = RDFModel.get_context_graph(store=rdfstore.get_rdfstore(), named_graph=target_named_graph) else: graph, nr_levels = RDFModel.get_context_graph( store=rdfstore.get_rdfstore(), target_uri=target_uri ) # todo: remove: should no longer be necessary with the addition of common.middleware.ForceLangMiddleware language = self.request.GET.get('lang', None) if language: activate(language) bindings = GraphBindings( about_uri=target_uri, graph=graph, excluded_properties=settings.RDF_EXCLUDED_PROPERTIES ) context['resources'] = bindings context['absolute_uri'] = RDFRecord.get_external_rdf_url(target_uri, self.request) for rdf_type in bindings.get_about_resource().get_types(): search_label = rdf_type.search_label.lower() content_template = settings.RDF_CONTENT_FOLDOUTS.get(search_label) if content_template: self.template_name = content_template break context['points'] = RDFModel.get_geo_points(graph) return context
def _process_action(self, action): try: self.spec = action["dataset"] process_verb = action["action"] record = None if process_verb in ["clear_orphans"]: purge_date = action.get("modification_date") if purge_date: orphans_removed = RDFRecord.remove_orphans(spec=self.spec, timestamp=purge_date) logger.info("Deleted {} orphans for {} before {}".format(orphans_removed, self.spec, purge_date)) elif process_verb in ["disable_index"]: RDFRecord.delete_from_index(self.spec) logger.info("Deleted dataset {} from index. ".format(self.spec)) elif process_verb in ["drop_dataset"]: RDFRecord.delete_from_index(self.spec) DataSet.objects.filter(spec=self.spec).delete() logger.info("Deleted dataset {} from index. ".format(self.spec)) else: record_graph_uri = action["graphUri"] graph_ntriples = action["graph"] acceptance_mode = action.get("acceptanceMode", "false") acceptance = True if acceptance_mode is not None and acceptance_mode.lower() in ["true"] else False content_hash = action.get("contentHash", None) from lod.utils.resolver import ElasticSearchRDFRecord record = ElasticSearchRDFRecord(spec=self.spec, rdf_string=graph_ntriples) try: rdf_format = record.DEFAULT_RDF_FORMAT if "<rdf:RDF" not in graph_ntriples else "xml" record.from_rdf_string( rdf_string=graph_ntriples, named_graph=record_graph_uri, input_format=rdf_format ) except ParseError as e: self.rdf_errors.append((e, action)) logger.error(e, action) return None self.records_stored += 1 self.es_actions[(record.hub_id, content_hash)] = record.create_es_action( action=process_verb, store=self.store, context=True, flat=True, exclude_fields=None, acceptance=acceptance, doc_type="void_edmrecord", record_type="mdr", content_hash=content_hash, ) if settings.RDF_STORE_TRIPLES: self.sparql_update_queries[(record.hub_id, content_hash)] = record.create_sparql_update_query( acceptance=acceptance ) return record except KeyError as ke: self.json_errors.append((ke, action)) self.records_with_errors += 1 return None
def identify(self): """Return the OAI-PMH Identify request. See http://www.openarchives.org/OAI/openarchivesprotocol.html#Identify """ self.template_name = 'oaipmh/identify.xml' identify_data = { 'name': 'OAI-PMH repository for {}'.format(settings.SITE_NAME), # perhaps an oai_admins method with default logic settings.admins? 'admins': (email for name, email in settings.ADMINS), 'earliest_date': '1990-02-01T12:00:00Z', # placeholder # should probably be a class variable/configuration 'deleted': 'no', # no, transient, persistent (?) # class-level variable/configuration (may affect templates also) 'granularity': 'YYYY-MM-DDThh:mm:ssZ', # or YYYY-MM-DD # class-level config? 'compression': 'deflate', # gzip? - optional # description - optional # (place-holder values from OAI docs example) 'identifier_scheme': 'oai', 'repository_identifier': "{}".format(RDFRecord.get_rdf_base_url(prepend_scheme=True)), 'identifier_delimiter': '_', 'sample_identifier': '{}_spec_localId'.format(settings.SITE_NAME) } return self.render_to_response(identify_data)
def test_store_remote_cached_resource(self): test_uri = "http://nl.dbpedia.org/resource/Ton_Smits" resource = CacheResource.get_remote_lod_resource(test_uri) store = rdfstore._rdfstore_test assert len(resource) > 0 store._clear_all() graph_store = store.get_graph_store cache_graph = "http://{}/resource/cache#graph".format(RDFRecord.get_rdf_base_url()) self.assertFalse( store.ask( query="where {{<{}> ?p ?o}}".format(test_uri) )) response = CacheResource.store_remote_cached_resource(resource, graph_store, cache_graph) assert response is not None assert response self.assertTrue( store.ask( query="where {{<{}> ?p ?o}}".format(test_uri) ) ) # cacheUrl is no longer being added self.assertFalse( store.ask( query="where {{<{}> <http://schemas.delving.org/nave/terms/cacheUrl> ?o}}".format( test_uri) ) )
def generate_proxyfield_uri(self, label, language=None): label = label.replace(' ', '_') if language: label = "{}/{}".format(language, label) return "{}/resource/dataset/{}/{}".format( RDFRecord.get_rdf_base_url(prepend_scheme=True), self.spec, label )
def get(self, request, *args, **kwargs): target_uri = os.path.splitext(request.build_absolute_uri())[0].replace('/data/', '/resource/') if not self.request.path.startswith("/data"): target_uri = re.sub('/[a-z]{2}/resource/', '/resource/', target_uri, count=1) if target_uri.endswith('graph'): target_uri = re.sub("/graph", "", target_uri) extension_ = self.kwargs.get('extension') rdf_format = mime_to_extension(get_lod_mime_type(extension_, self.request)) if rdf_format == "rdf": rdf_format = "xml" resolved_uri = RDFRecord.get_internal_rdf_base_uri(target_uri) if "/resource/cache/" in target_uri: # old lookup rdfstore.get_rdfstore().get_cached_source_uri(target_uri) target_uri = target_uri.split('/resource/cache/')[-1] if 'geonames.org' in target_uri: target_uri = '{}/'.format(target_uri) if CacheResource.objects.filter(document_uri=target_uri).exists(): cache_object = CacheResource.objects.filter(document_uri=target_uri).first() content = cache_object.get_graph().serialize(format=rdf_format) else: raise UnknownGraph("URI {} is not known in our graph store".format(target_uri)) elif settings.RDF_USE_LOCAL_GRAPH: mode = self.request.REQUEST.get('mode', 'default') acceptance = True if mode == 'acceptance' else False local_object = ElasticSearchRDFRecord(source_uri=resolved_uri) local_object.get_graph_by_source_uri(uri=resolved_uri) if not local_object.exists(): # todo: temporary work around for EDMRecords not saved with subjects logger.warn("Unable to find graph for: {}".format(resolved_uri)) raise UnknownGraph("URI {} is not known in our graph store".format(resolved_uri)) mode = self.get_mode(request) if mode in ['context', 'api', 'api-flat']: # get_graph(with_mappings=True, include_mapping_target=True, acceptance=acceptance) content = local_object.get_context_graph(with_mappings=True, include_mapping_target=True) if mode in ['api', 'api-flat']: bindings = GraphBindings(about_uri=resolved_uri, graph=content) index_doc = bindings.to_index_doc() if mode == 'api' else bindings.to_flat_index_doc() content = json.dumps(index_doc) rdf_format = 'json-ld' else: content = content.serialize(format=rdf_format) else: content = local_object.get_graph() content = content.serialize(format=rdf_format) elif self.store.ask(uri=resolved_uri): target_uri = resolved_uri content = self.get_content(target_uri, rdf_format, request) return HttpResponse( content, content_type='{}; charset=utf8'.format(result_extension_to_mime(rdf_format)) )
def get_resolved_uri(context, uri): """Returns resolved uri, or Cached URI.""" request = context['request'] request_base = urlparse(request.build_absolute_uri()).netloc rdf_base = urlparse(uri).netloc if request_base in settings.RDF_ROUTED_ENTRY_POINTS and rdf_base in RDFRecord.get_rdf_base_url(): resolved_uri = uri.replace(rdf_base, request_base) elif rdf_base not in request_base: resolved_uri = get_cache_url(uri) else: return uri return resolved_uri
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.base_uri = r'{}/resource'.format(RDFRecord.get_rdf_base_url(prepend_scheme=True)) if self.get_namespace_prefix(): namespace_string = 'http://{}/resource/ns/{}/'.format( RDF_BASE_URL.replace("http://", ""), self.get_namespace_prefix() ) self.ns = Namespace(namespace_string) self.rdf_type_base = Namespace("{}/{}/".format(self.base_uri, self.get_rdf_type().lower())) if namespace_string not in settings.RDF_SUPPORTED_NAMESPACES: namespace_manager.bind(self.get_namespace_prefix(), self.ns) self.ns_dict = dict(list(namespace_manager.namespaces())) self.graph = None
def process_narthex_file(self, spec, store=None, acceptance=False, path=None, console=False): start = datetime.now() if not store: store = rdfstore.get_rdfstore() if not path: processed_fname = self.get_narthex_processed_fname() else: processed_fname = path print("started processing {} for dataset {}".format(processed_fname, spec)) with open(processed_fname, 'r') as f: rdf_record = [] lines = 0 records = 0 stored = 0 new = 0 not_orphaned = [] sparql_update_queries = [] es_actions = [] # set orphaned records for line in f: lines += 1 exists, named_graph, content_hash = self.is_line_marker(line) if exists: new += 1 records += 1 triples = " ".join(rdf_record) record = ElasticSearchRDFRecord(rdf_string=triples, spec=spec) try: record.from_rdf_string(named_graph=named_graph, rdf_string=triples, input_format="xml") es_actions.append(record.create_es_action(doc_type="void_edmrecord", record_type="mdr", context=True)) except Exception as ex: if console: print("problem with {} for spec {} caused by {}".format(triples, spec, ex)) else: logger.error("problem with {} for spec {} caused by {}".format(triples, spec, ex)) rdf_record[:] = [] if settings.RDF_STORE_TRIPLES: sparql_update_queries.append( record.create_sparql_update_query(acceptance=acceptance) ) nr_sparql_updates = len(sparql_update_queries) if settings.RDF_STORE_TRIPLES and nr_sparql_updates > 0 and nr_sparql_updates % 50 == 0: store.update("\n".join(sparql_update_queries)) sparql_update_queries[:] = [] if records % 100 == 0 and records > 0: logger.info("processed {} records of {} at {}".format(records, spec, ctime())) if console: print("processed {} records of {} at {}".format(records, spec, ctime())) if len(es_actions) > 100: self.bulk_index(es_actions, spec) es_actions[:] = [] else: rdf_record.append(line) # store the remaining bulk items self.bulk_index(es_actions, spec) if settings.RDF_STORE_TRIPLES and len(sparql_update_queries) > 0: store.update("\n".join(sparql_update_queries)) logger.info( "Dataset {}: records inserted {}, records same content hash {}, lines parsed {}, total records processed {}".format( spec, new, stored, lines, records) ) print("Finished loading {spec} with {lines} and {records} in {seconds}\n".format( spec=spec, lines=lines, records=records, seconds=datetime.now() - start )) RDFRecord.remove_orphans(spec, start.isoformat()) return lines, records
def save(self, *args, **kwargs): # point to resource and not page or data source_uri = self.source_uri.replace('/data/', '/resource/').replace('/page/', '/resource/') # rewrite to base url self.source_uri = RDFRecord.get_internal_rdf_base_uri(source_uri) super(UserGeneratedContent, self).save(*args, **kwargs)
def get_context_data(self, **kwargs): # todo later add acceptance mode target_uri = self.request.build_absolute_uri().replace('/page/', '/resource/') if "?" in target_uri: target_uri = re.sub("\?.*$", '', target_uri) # target_uri = target_uri.split('?')[:-1] if not self.request.path.startswith("/page"): target_uri = re.sub('/[a-z]{2}/resource/', '/resource/', target_uri, count=1) if target_uri.endswith('graph'): target_uri = re.sub("/graph$", "", target_uri) context = super(LoDHTMLView, self).get_context_data(**kwargs) # default and test mode mode = self.request.REQUEST.get('mode', 'default') acceptance = True if mode == 'acceptance' else False if not acceptance: acceptance = self.request.COOKIES.get('NAVE_ACCEPTANCE_MODE', False) object_local_cache = None cached = False context['about'] = target_uri context['ugc'] = None if "/resource/cache/" in target_uri: # lookup solution # rdfstore.get_rdfstore().get_cached_source_uri(target_uri) cached = True target_uri = target_uri.split('/resource/cache/')[-1] if target_uri.endswith("about.rdf"): target_uri = re.sub('about.rdf$', '', target_uri) else: target_uri = target_uri.rstrip('/') resolved_uri = RDFRecord.get_internal_rdf_base_uri(target_uri) if UserGeneratedContent.objects.filter(source_uri=resolved_uri).exists(): context['ugc'] = UserGeneratedContent.objects.filter(source_uri=resolved_uri) if settings.RDF_USE_LOCAL_GRAPH: object_local_cache = ElasticSearchRDFRecord(source_uri=resolved_uri) object_local_cache.get_graph_by_source_uri(uri=resolved_uri) if not object_local_cache.exists(): context['source_uri'] = target_uri context['unknown_graph'] = True return context target_uri = resolved_uri elif self.store.ask(uri=resolved_uri): target_uri = resolved_uri context['source_uri'] = target_uri context['about_label'] = target_uri.split('/')[-1] context['about_spec'] = target_uri.split('/')[-2] context['cached'] = cached # special query for skos def is_skos(): return self.store.ask( query="where {{<{subject}> <{predicate}> <{object}>}}".format( subject=target_uri, predicate=RDF.type, object=SKOS.Concept)) if object_local_cache: # todo: add code to retrieve proxyresources # (with_mappings=True, include_mapping_target=True, acceptance=acceptance) graph = object_local_cache.get_context_graph(with_mappings=True, include_mapping_target=True) nr_levels = 4 elif cached: if CacheResource.objects.filter(document_uri=target_uri).exists(): cache_object = CacheResource.objects.filter(document_uri=target_uri).first() graph = cache_object.get_graph() nr_levels = 3 else: context['unknown_graph'] = True return context elif is_skos(): graph, nr_levels = RDFModel.get_skos_context_graph(store=self.store, target_uri=target_uri) # nav_tree = RDFModel.get_nav_tree(target_uri=target_uri, store=self.store) # todo finish the nav tree implementation if 'skos_nav' in self.request.GET: return context elif '/resource/aggregation' in target_uri: target_named_graph = "{}/graph".format(target_uri.rstrip('/')) graph, nr_levels = RDFModel.get_context_graph(store=self.store, named_graph=target_named_graph) else: graph, nr_levels = RDFModel.get_context_graph(target_uri=target_uri, store=self.store) graph_contains_target = graph.query("""ASK {{ <{}> ?p ?o }} """.format(target_uri)).askAnswer if not graph_contains_target or len(graph) == 0: context['unknown_graph'] = True return context if context['about'].endswith('/'): context['about'] = context['about'].rstrip('/') context['graph'] = graph context['nr_levels'] = nr_levels context['namespaces'] = [(prefix, uri) for prefix, uri in graph.namespaces()] graph_bindings = GraphBindings(target_uri, graph, excluded_properties=settings.RDF_EXCLUDED_PROPERTIES) context['skos_links'], context['skos_filter'] = graph_bindings.get_all_skos_links() context['resources'] = graph_bindings resource = graph_bindings.get_about_resource() context['items'] = resource.get_items(as_tuples=True) rdf_type = graph_bindings.get_about_resource().get_type() context['rdf_type'] = rdf_type context['content_template'] = self.get_content_type_template(rdf_type.search_label) context['graph_stats'] = RDFModel.get_graph_statistics(graph) context['alt'] = "" context['points'] = RDFModel.get_geo_points(graph) # DEEPZOOM VALUE(S) zooms = graph_bindings.get_list('nave_deepZoomUrl') if zooms: context['deepzoom_count'] = len(zooms) context['deepzoom_urls'] = [zoom.value for zoom in zooms] # EXPERT MODE expert_mode = self.request.COOKIES.get('NAVE_DETAIL_EXPERT_MODE', False) if expert_mode: # do expert mode stuff like more like this context['expert_mode'] = True if settings.MLT_DETAIL_ENABLE and object_local_cache: context['data'] = {'items': object_local_cache.get_more_like_this()} if settings.MLT_BANNERS and isinstance(settings.MLT_BANNERS, dict) and object_local_cache: from collections import OrderedDict context['data'] = {"mlt_banners": OrderedDict()} for name, config in settings.MLT_BANNERS.items(): mlt_fields = config.get("fields", None) if mlt_fields and any(".raw" in field for field in mlt_fields): # .raw fields don't work with MORE LIKE THIS queries so are # queried directly. context['data']['mlt_banners'][name] = object_local_cache.get_raw_related( query_fields=mlt_fields, filter_query=config.get("filter_query", None), graph_bindings=graph_bindings ) else: context['data']['mlt_banners'][name] = object_local_cache.get_more_like_this( mlt_count=10, mlt_fields=mlt_fields, filter_query=config.get("filter_query", None) ) view_modes = { 'properties': "rdf/_rdf_properties.html" } display_mode = self.request.GET.get('display') if display_mode: self.template_name = view_modes.get(display_mode, self.template_name) return context
def get_external_uri(context, absolute_uri): return RDFRecord.get_external_rdf_url(absolute_uri, context['request'])
def get_hub_id(self): *rest, spec, local_id = self.about_uri.split('/') local_id = RDFRecord.clean_local_id(local_id) return "{}_{}_{}".format(self.org_id, spec, local_id), spec