def resource_cache(self, root, resource_id, filename): """ Called when a request is made for an item in the resource cache and is responsible for rendering the data. When the data to be rendered is HTML it will add a header to show that the content is cached, and set a <base> header if not present to make sure all relative links are resolved correctly. """ from pylons import response from paste.fileapp import FileApp from ckanext.dgu.lib.helpers import tidy_url archive_root = pylons.config.get('ckanext-archiver.archive_dir') if not archive_root: # Bad configuration likely to cause this. abort(404, "Could not find archive folder") resource = model.Resource.get(resource_id) is_html = False content_type = "application/octet-stream" fmt = "" if resource: task_status = model.Session.query(model.TaskStatus).\ filter(model.TaskStatus.task_type=='qa').\ filter(model.TaskStatus.key=='status').\ filter(model.TaskStatus.entity_id==resource.id).first() if task_status: status = json.loads(task_status.error) fmt = status['format'] # Make an attempt at getting the correct content type but fail with # application/octet-stream in cases where we don't know. formats = { "CSV": "application/csv", "XLS": "application/vnd.ms-excel", "HTML": 'text/html; charset=utf-8' } content_type = formats.get(fmt, "application/octet-stream") is_html = fmt == "HTML" filepath = os.path.join(archive_root, root, resource_id, filename).encode('utf-8') if not os.path.exists(filepath): abort(404, "Resource is not cached") file_size = os.path.getsize(filepath) if not is_html: headers = [('Content-Type', content_type), ('Content-Length', str(file_size))] fapp = FileApp(filepath, headers=headers) return fapp(request.environ, self.start_response) origin = tidy_url(resource.url) parts = urlparse.urlparse(origin) url = "{0}://{1}".format(parts.scheme, parts.netloc) base_string = "<head><base href='{0}'>".format(url) response.headers['Content-Type'] = content_type try: f = open(filepath, "r") except IOError: log.error('Error reading resource cache file: %s', filepath) abort(403, "The system was unable to read this resource from the cache. Admins have been notified") content = f.read() f.close() if not re.search("<base ", content, re.IGNORECASE): compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE) content = compiled_head.sub( base_string, content, re.IGNORECASE) if not '__archiver__cache__header__' in content: # We should insert our HTML block at the bottom of the page with # the appropriate CSS to render it at the top. Easier to insert # before </body>. c.url = resource.url replacement = render("data/cache_header.html") try: compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE) content = compiled_body.sub( "{0}</body>".format(replacement), content, re.IGNORECASE) except Exception, e: log.error("Failed to do the replacement in resource<{0}> and file: {1}".format(resource.id, filepath)) return
def resource_cache(self, root, resource_id, filename): """ Called when a request is made for an item in the resource cache and is responsible for rendering the data. When the data to be rendered is HTML it will add a header to show that the content is cached, and set a <base> header if not present to make sure all relative links are resolved correctly. """ abort(403, 'This feature is currently disabled') from pylons import response from paste.fileapp import FileApp from ckanext.dgu.lib.helpers import tidy_url from ckanext.qa.model import QA archive_root = pylons.config.get('ckanext-archiver.archive_dir') if not archive_root: # Bad configuration likely to cause this. abort(404, "Could not find archive folder") resource = model.Resource.get(resource_id) fmt = "" if resource: qa = QA.get_for_resource(resource.id) if qa: fmt = qa.format is_html = fmt == "HTML" filepath = os.path.join(archive_root, root, resource_id, filename).encode('utf-8') filepath = urllib.quote(filepath) if not os.path.exists(filepath): abort(404, "Resource is not cached") file_size = os.path.getsize(filepath) if not is_html: # Content-Type is determined by FileApp based on the extension. # Using the format provided by QA isn't an option currently as # for zip files it gives the format of the content of the zip. headers = [('Content-Length', str(file_size))] fapp = FileApp(filepath, headers=headers) return fapp(request.environ, self.start_response) origin = tidy_url(resource.url) parts = urlparse.urlparse(origin) url = "{0}://{1}".format(parts.scheme, parts.netloc) base_string = "<head><base href='{0}'>".format(url) response.headers['Content-Type'] = 'text/html; charset=utf-8' try: f = open(filepath, "r") except IOError: log.error('Error reading resource cache file: %s', filepath) abort(403, "The system was unable to read this resource from the cache. Admins have been notified") content = f.read() f.close() if not re.search("<base ", content, re.IGNORECASE): compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE) content = compiled_head.sub( base_string, content, re.IGNORECASE) if not '__archiver__cache__header__' in content: # We should insert our HTML block at the bottom of the page with # the appropriate CSS to render it at the top. Easier to insert # before </body>. c.url = resource.url replacement = render("data/cache_header.html") try: compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE) content = compiled_body.sub( "{0}</body>".format(replacement), content, re.IGNORECASE) except Exception, e: log.warn("Failed to do the replacement in resource<{0}> and file: {1}".format(resource.id, filepath)) return
def resource_cache(self, root, resource_id, filename): """ Called when a request is made for an item in the resource cache and is responsible for rendering the data. When the data to be rendered is HTML it will add a header to show that the content is cached, and set a <base> header if not present to make sure all relative links are resolved correctly. """ from pylons import response from paste.fileapp import FileApp from ckanext.dgu.lib.helpers import tidy_url from ckanext.qa.model import QA archive_root = pylons.config.get('ckanext-archiver.archive_dir') if not archive_root: # Bad configuration likely to cause this. abort(404, "Could not find archive folder") resource = model.Resource.get(resource_id) fmt = "" if resource: qa = QA.get_for_resource(resource.id) if qa: fmt = qa.format is_html = fmt == "HTML" filepath = os.path.join(archive_root, root, resource_id, filename).encode('utf-8') filepath = urllib.quote(filepath) if not os.path.exists(filepath): abort(404, "Resource is not cached") file_size = os.path.getsize(filepath) if not is_html: # Content-Type is determined by FileApp based on the extension. # Using the format provided by QA isn't an option currently as # for zip files it gives the format of the content of the zip. headers = [('Content-Length', str(file_size))] fapp = FileApp(filepath, headers=headers) return fapp(request.environ, self.start_response) origin = tidy_url(resource.url) parts = urlparse.urlparse(origin) url = "{0}://{1}".format(parts.scheme, parts.netloc) base_string = "<head><base href='{0}'>".format(url) response.headers['Content-Type'] = 'text/html; charset=utf-8' try: f = open(filepath, "r") except IOError: log.error('Error reading resource cache file: %s', filepath) abort( 403, "The system was unable to read this resource from the cache. Admins have been notified" ) content = f.read() f.close() if not re.search("<base ", content, re.IGNORECASE): compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE) content = compiled_head.sub(base_string, content, re.IGNORECASE) if not '__archiver__cache__header__' in content: # We should insert our HTML block at the bottom of the page with # the appropriate CSS to render it at the top. Easier to insert # before </body>. c.url = resource.url replacement = render("data/cache_header.html") try: compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE) content = compiled_body.sub("{0}</body>".format(replacement), content, re.IGNORECASE) except Exception, e: log.warn( "Failed to do the replacement in resource<{0}> and file: {1}" .format(resource.id, filepath)) return
def resource_cache(self, root, resource_id, filename): """ Called when a request is made for an item in the resource cache and is responsible for rendering the data. When the data to be rendered is HTML it will add a header to show that the content is cached, and set a <base> header if not present to make sure all relative links are resolved correctly. """ from pylons import response from paste.fileapp import FileApp from ckanext.dgu.lib.helpers import tidy_url archive_root = pylons.config.get('ckanext-archiver.archive_dir') if not archive_root: # Bad configuration likely to cause this. abort(404, "Could not find archive folder") resource = model.Resource.get(resource_id) is_html = False content_type = "application/octet-stream" fmt = "" if resource: task_status = model.Session.query(model.TaskStatus).\ filter(model.TaskStatus.task_type=='qa').\ filter(model.TaskStatus.key=='status').\ filter(model.TaskStatus.entity_id==resource.id).first() if task_status: status = json.loads(task_status.error) fmt = status['format'] # Make an attempt at getting the correct content type but fail with # application/octet-stream in cases where we don't know. formats = { "CSV": "application/csv", "XLS": "application/vnd.ms-excel", "HTML": 'text/html; charset=utf-8' } content_type = formats.get(fmt, "application/octet-stream") is_html = fmt == "HTML" filepath = os.path.join(archive_root, root, resource_id, filename).encode('utf-8') if not os.path.exists(filepath): abort(404, "Resource is not cached") file_size = os.path.getsize(filepath) if not is_html: headers = [('Content-Type', content_type), ('Content-Length', str(file_size))] fapp = FileApp(filepath, headers=headers) return fapp(request.environ, self.start_response) origin = tidy_url(resource.url) parts = urlparse.urlparse(origin) url = "{0}://{1}".format(parts.scheme, parts.netloc) base_string = "<head><base href='{0}'>".format(url) response.headers['Content-Type'] = content_type try: f = open(filepath, "r") except IOError: log.error('Error reading resource cache file: %s', filepath) abort( 403, "The system was unable to read this resource from the cache. Admins have been notified" ) content = f.read() f.close() if not re.search("<base ", content, re.IGNORECASE): compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE) content = compiled_head.sub(base_string, content, re.IGNORECASE) if not '__archiver__cache__header__' in content: # We should insert our HTML block at the bottom of the page with # the appropriate CSS to render it at the top. Easier to insert # before </body>. c.url = resource.url replacement = render("data/cache_header.html") try: compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE) content = compiled_body.sub("{0}</body>".format(replacement), content, re.IGNORECASE) except Exception, e: log.warn( "Failed to do the replacement in resource<{0}> and file: {1}" .format(resource.id, filepath)) return