Esempio n. 1
0
    def resource_cache(self, root, resource_id, filename):
        """
        Called when a request is made for an item in the resource cache and
        is responsible for rendering the data.  When the data to be rendered
        is HTML it will add a header to show that the content is cached, and
        set a <base> header if not present to make sure all relative links are
        resolved correctly.
        """
        from pylons import response
        from paste.fileapp import FileApp
        from ckanext.dgu.lib.helpers import tidy_url

        archive_root = pylons.config.get('ckanext-archiver.archive_dir')
        if not archive_root:
            # Bad configuration likely to cause this.
            abort(404, "Could not find archive folder")

        resource = model.Resource.get(resource_id)
        is_html = False
        content_type = "application/octet-stream"

        fmt = ""
        if resource:
            task_status = model.Session.query(model.TaskStatus).\
                          filter(model.TaskStatus.task_type=='qa').\
                          filter(model.TaskStatus.key=='status').\
                          filter(model.TaskStatus.entity_id==resource.id).first()
            if task_status:
                status = json.loads(task_status.error)
                fmt = status['format']

        # Make an attempt at getting the correct content type but fail with
        # application/octet-stream in cases where we don't know.
        formats = {
            "CSV": "application/csv",
            "XLS": "application/vnd.ms-excel",
            "HTML": 'text/html; charset=utf-8' }
        content_type = formats.get(fmt, "application/octet-stream")

        is_html = fmt == "HTML"

        filepath = os.path.join(archive_root, root, resource_id, filename).encode('utf-8')
        if not os.path.exists(filepath):
            abort(404, "Resource is not cached")

        file_size = os.path.getsize(filepath)
        if not is_html:
            headers = [('Content-Type', content_type),
                       ('Content-Length', str(file_size))]
            fapp = FileApp(filepath, headers=headers)
            return fapp(request.environ, self.start_response)

        origin = tidy_url(resource.url)
        parts = urlparse.urlparse(origin)
        url = "{0}://{1}".format(parts.scheme, parts.netloc)
        base_string = "<head><base href='{0}'>".format(url)

        response.headers['Content-Type'] = content_type
        try:
            f = open(filepath, "r")
        except IOError:
            log.error('Error reading resource cache file: %s', filepath)
            abort(403, "The system was unable to read this resource from the cache. Admins have been notified")

        content = f.read()
        f.close()

        if not re.search("<base ", content, re.IGNORECASE):
            compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE)
            content = compiled_head.sub( base_string, content, re.IGNORECASE)

        if not '__archiver__cache__header__' in content:
            # We should insert our HTML block at the bottom of the page with
            # the appropriate CSS to render it at the top.  Easier to insert
            # before </body>.
            c.url = resource.url
            replacement = render("data/cache_header.html")
            try:
                compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE)
                content = compiled_body.sub( "{0}</body>".format(replacement), content, re.IGNORECASE)
            except Exception, e:
                log.error("Failed to do the replacement in resource<{0}> and file: {1}".format(resource.id, filepath))
                return
Esempio n. 2
0
    def resource_cache(self, root, resource_id, filename):
        """
        Called when a request is made for an item in the resource cache and
        is responsible for rendering the data.  When the data to be rendered
        is HTML it will add a header to show that the content is cached, and
        set a <base> header if not present to make sure all relative links are
        resolved correctly.
        """
        abort(403, 'This feature is currently disabled')
        from pylons import response
        from paste.fileapp import FileApp
        from ckanext.dgu.lib.helpers import tidy_url
        from ckanext.qa.model import QA

        archive_root = pylons.config.get('ckanext-archiver.archive_dir')
        if not archive_root:
            # Bad configuration likely to cause this.
            abort(404, "Could not find archive folder")

        resource = model.Resource.get(resource_id)

        fmt = ""
        if resource:
            qa = QA.get_for_resource(resource.id)
            if qa:
                fmt = qa.format

        is_html = fmt == "HTML"

        filepath = os.path.join(archive_root, root, resource_id, filename).encode('utf-8')
        filepath = urllib.quote(filepath)
        if not os.path.exists(filepath):
            abort(404, "Resource is not cached")

        file_size = os.path.getsize(filepath)
        if not is_html:
            # Content-Type is determined by FileApp based on the extension.
            # Using the format provided by QA isn't an option currently as
            # for zip files it gives the format of the content of the zip.
            headers = [('Content-Length', str(file_size))]
            fapp = FileApp(filepath, headers=headers)
            return fapp(request.environ, self.start_response)

        origin = tidy_url(resource.url)
        parts = urlparse.urlparse(origin)
        url = "{0}://{1}".format(parts.scheme, parts.netloc)
        base_string = "<head><base href='{0}'>".format(url)

        response.headers['Content-Type'] = 'text/html; charset=utf-8'
        try:
            f = open(filepath, "r")
        except IOError:
            log.error('Error reading resource cache file: %s', filepath)
            abort(403, "The system was unable to read this resource from the cache. Admins have been notified")

        content = f.read()
        f.close()

        if not re.search("<base ", content, re.IGNORECASE):
            compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE)
            content = compiled_head.sub( base_string, content, re.IGNORECASE)

        if not '__archiver__cache__header__' in content:
            # We should insert our HTML block at the bottom of the page with
            # the appropriate CSS to render it at the top.  Easier to insert
            # before </body>.
            c.url = resource.url
            replacement = render("data/cache_header.html")
            try:
                compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE)
                content = compiled_body.sub( "{0}</body>".format(replacement), content, re.IGNORECASE)
            except Exception, e:
                log.warn("Failed to do the replacement in resource<{0}> and file: {1}".format(resource.id, filepath))
                return
Esempio n. 3
0
    def resource_cache(self, root, resource_id, filename):
        """
        Called when a request is made for an item in the resource cache and
        is responsible for rendering the data.  When the data to be rendered
        is HTML it will add a header to show that the content is cached, and
        set a <base> header if not present to make sure all relative links are
        resolved correctly.
        """
        from pylons import response
        from paste.fileapp import FileApp
        from ckanext.dgu.lib.helpers import tidy_url
        from ckanext.qa.model import QA

        archive_root = pylons.config.get('ckanext-archiver.archive_dir')
        if not archive_root:
            # Bad configuration likely to cause this.
            abort(404, "Could not find archive folder")

        resource = model.Resource.get(resource_id)

        fmt = ""
        if resource:
            qa = QA.get_for_resource(resource.id)
            if qa:
                fmt = qa.format

        is_html = fmt == "HTML"

        filepath = os.path.join(archive_root, root, resource_id,
                                filename).encode('utf-8')
        filepath = urllib.quote(filepath)
        if not os.path.exists(filepath):
            abort(404, "Resource is not cached")

        file_size = os.path.getsize(filepath)
        if not is_html:
            # Content-Type is determined by FileApp based on the extension.
            # Using the format provided by QA isn't an option currently as
            # for zip files it gives the format of the content of the zip.
            headers = [('Content-Length', str(file_size))]
            fapp = FileApp(filepath, headers=headers)
            return fapp(request.environ, self.start_response)

        origin = tidy_url(resource.url)
        parts = urlparse.urlparse(origin)
        url = "{0}://{1}".format(parts.scheme, parts.netloc)
        base_string = "<head><base href='{0}'>".format(url)

        response.headers['Content-Type'] = 'text/html; charset=utf-8'
        try:
            f = open(filepath, "r")
        except IOError:
            log.error('Error reading resource cache file: %s', filepath)
            abort(
                403,
                "The system was unable to read this resource from the cache. Admins have been notified"
            )

        content = f.read()
        f.close()

        if not re.search("<base ", content, re.IGNORECASE):
            compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE)
            content = compiled_head.sub(base_string, content, re.IGNORECASE)

        if not '__archiver__cache__header__' in content:
            # We should insert our HTML block at the bottom of the page with
            # the appropriate CSS to render it at the top.  Easier to insert
            # before </body>.
            c.url = resource.url
            replacement = render("data/cache_header.html")
            try:
                compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE)
                content = compiled_body.sub("{0}</body>".format(replacement),
                                            content, re.IGNORECASE)
            except Exception, e:
                log.warn(
                    "Failed to do the replacement in resource<{0}> and file: {1}"
                    .format(resource.id, filepath))
                return
Esempio n. 4
0
    def resource_cache(self, root, resource_id, filename):
        """
        Called when a request is made for an item in the resource cache and
        is responsible for rendering the data.  When the data to be rendered
        is HTML it will add a header to show that the content is cached, and
        set a <base> header if not present to make sure all relative links are
        resolved correctly.
        """
        from pylons import response
        from paste.fileapp import FileApp
        from ckanext.dgu.lib.helpers import tidy_url

        archive_root = pylons.config.get('ckanext-archiver.archive_dir')
        if not archive_root:
            # Bad configuration likely to cause this.
            abort(404, "Could not find archive folder")

        resource = model.Resource.get(resource_id)
        is_html = False
        content_type = "application/octet-stream"

        fmt = ""
        if resource:
            task_status = model.Session.query(model.TaskStatus).\
                          filter(model.TaskStatus.task_type=='qa').\
                          filter(model.TaskStatus.key=='status').\
                          filter(model.TaskStatus.entity_id==resource.id).first()
            if task_status:
                status = json.loads(task_status.error)
                fmt = status['format']

        # Make an attempt at getting the correct content type but fail with
        # application/octet-stream in cases where we don't know.
        formats = {
            "CSV": "application/csv",
            "XLS": "application/vnd.ms-excel",
            "HTML": 'text/html; charset=utf-8'
        }
        content_type = formats.get(fmt, "application/octet-stream")

        is_html = fmt == "HTML"

        filepath = os.path.join(archive_root, root, resource_id,
                                filename).encode('utf-8')
        if not os.path.exists(filepath):
            abort(404, "Resource is not cached")

        file_size = os.path.getsize(filepath)
        if not is_html:
            headers = [('Content-Type', content_type),
                       ('Content-Length', str(file_size))]
            fapp = FileApp(filepath, headers=headers)
            return fapp(request.environ, self.start_response)

        origin = tidy_url(resource.url)
        parts = urlparse.urlparse(origin)
        url = "{0}://{1}".format(parts.scheme, parts.netloc)
        base_string = "<head><base href='{0}'>".format(url)

        response.headers['Content-Type'] = content_type
        try:
            f = open(filepath, "r")
        except IOError:
            log.error('Error reading resource cache file: %s', filepath)
            abort(
                403,
                "The system was unable to read this resource from the cache. Admins have been notified"
            )

        content = f.read()
        f.close()

        if not re.search("<base ", content, re.IGNORECASE):
            compiled_head = re.compile(re.escape("<head>"), re.IGNORECASE)
            content = compiled_head.sub(base_string, content, re.IGNORECASE)

        if not '__archiver__cache__header__' in content:
            # We should insert our HTML block at the bottom of the page with
            # the appropriate CSS to render it at the top.  Easier to insert
            # before </body>.
            c.url = resource.url
            replacement = render("data/cache_header.html")
            try:
                compiled_body = re.compile(re.escape("</body>"), re.IGNORECASE)
                content = compiled_body.sub("{0}</body>".format(replacement),
                                            content, re.IGNORECASE)
            except Exception, e:
                log.warn(
                    "Failed to do the replacement in resource<{0}> and file: {1}"
                    .format(resource.id, filepath))
                return