Example #1
0
def course_image_url(course, image_key='course_image'):
    """Try to look up the image url for the course.  If it's not found,
    log an error and return the dead link.
    image_key can be one of the three: 'course_image', 'hero_image', 'thumbnail_image' """
    if course.static_asset_path:
        # If we are a static course with the image_key attribute
        # set different than the default, return that path so that
        # courses can use custom course image paths, otherwise just
        # return the default static path.
        url = '/static/' + (course.static_asset_path or getattr(course, 'data_dir', ''))
        if hasattr(course, image_key) and getattr(course, image_key) != course.fields[image_key].default:
            url += '/' + getattr(course, image_key)
        else:
            url += '/images/' + image_key + '.jpg'
    elif not getattr(course, image_key):
        # if image_key is empty, use the default image url from settings
        url = settings.STATIC_URL + settings.DEFAULT_COURSE_ABOUT_IMAGE_URL
    else:
        loc = StaticContent.compute_location(course.id, getattr(course, image_key))
        if getattr(course, image_key).endswith('images_course_image.jpg'):
            try:
                AssetManager.find(loc)
            except NotFoundError:
                url = '/static/' + settings.DEFAULT_COURSE_ABOUT_IMAGE_URL
                return url

        url = StaticContent.serialize_asset_key_with_slash(loc)

    return url
Example #2
0
    def test_happy_path(self, modulestore_type, create_after_overview):
        """
        What happens when everything works like we expect it to.

        If `create_after_overview` is True, we will temporarily disable
        thumbnail creation so that the initial CourseOverview is created without
        an image_set, and the CourseOverviewImageSet is created afterwards. If
        `create_after_overview` is False, we'll create the CourseOverviewImageSet
        at the same time as the CourseOverview.
        """
        # Create a real (oversized) image...
        image = Image.new("RGB", (800, 400), "blue")
        image_buff = StringIO()
        image.save(image_buff, format="JPEG")
        image_buff.seek(0)
        image_name = "big_course_image.jpeg"

        with self.store.default_store(modulestore_type):
            course = CourseFactory.create(
                default_store=modulestore_type, course_image=image_name
            )

            # Save a real image here...
            course_image_asset_key = StaticContent.compute_location(course.id, course.course_image)
            course_image_content = StaticContent(course_image_asset_key, image_name, 'image/jpeg', image_buff)
            contentstore().save(course_image_content)

            # If create_after_overview is True, disable thumbnail generation so
            # that the CourseOverview object is created and saved without an
            # image_set at first (it will be lazily created later).
            if create_after_overview:
                self.set_config(enabled=False)

            # Now generate the CourseOverview...
            course_overview = CourseOverview.get_from_id(course.id)

            # If create_after_overview is True, no image_set exists yet. Verify
            # that, then switch config back over to True and it should lazily
            # create the image_set on the next get_from_id() call.
            if create_after_overview:
                self.assertFalse(hasattr(course_overview, 'image_set'))
                self.set_config(enabled=True)
                course_overview = CourseOverview.get_from_id(course.id)

            self.assertTrue(hasattr(course_overview, 'image_set'))
            image_urls = course_overview.image_urls
            config = CourseOverviewImageConfig.current()

            # Make sure the thumbnail names come out as expected...
            self.assertTrue(image_urls['raw'].endswith('big_course_image.jpeg'))
            self.assertTrue(image_urls['small'].endswith('big_course_image-jpeg-{}x{}.jpg'.format(*config.small)))
            self.assertTrue(image_urls['large'].endswith('big_course_image-jpeg-{}x{}.jpg'.format(*config.large)))

            # Now make sure our thumbnails are of the sizes we expect...
            for image_url, expected_size in [(image_urls['small'], config.small), (image_urls['large'], config.large)]:
                image_key = StaticContent.get_location_from_path(image_url)
                image_content = AssetManager.find(image_key)
                image = Image.open(StringIO(image_content.data))
                self.assertEqual(image.size, expected_size)
Example #3
0
    def test_different_resolutions(self, src_dimensions):
        """
        Test various resolutions of images to make thumbnails of.

        Note that our test sizes are small=(200, 100) and large=(400, 200).

        1. Images should won't be blown up if it's too small, so a (100, 50)
           resolution image will remain (100, 50).
        2. However, images *will* be converted using our format and quality
           settings (JPEG, 75% -- the PIL default). This is because images with
           relatively small dimensions not compressed properly.
        3. Image thumbnail naming will maintain the naming convention of the
           target resolution, even if the image was not actually scaled to that
           size (i.e. it was already smaller). This is mostly because it's
           simpler to be consistent, but it also lets us more easily tell which
           configuration a thumbnail was created under.
        """
        # Create a source image...
        image = Image.new("RGB", src_dimensions, "blue")
        image_buff = StringIO()
        image.save(image_buff, format="PNG")
        image_buff.seek(0)
        image_name = "src_course_image.png"

        course = CourseFactory.create(course_image=image_name)

        # Save the image to the contentstore...
        course_image_asset_key = StaticContent.compute_location(course.id, course.course_image)
        course_image_content = StaticContent(course_image_asset_key, image_name, 'image/png', image_buff)
        contentstore().save(course_image_content)

        # Now generate the CourseOverview...
        config = CourseOverviewImageConfig.current()
        course_overview = CourseOverview.get_from_id(course.id)
        image_urls = course_overview.image_urls

        for image_url, target in [(image_urls['small'], config.small), (image_urls['large'], config.large)]:
            image_key = StaticContent.get_location_from_path(image_url)
            image_content = AssetManager.find(image_key)
            image = Image.open(StringIO(image_content.data))

            # Naming convention for thumbnail
            self.assertTrue(image_url.endswith('src_course_image-png-{}x{}.jpg'.format(*target)))

            # Actual thumbnail data
            src_x, src_y = src_dimensions
            target_x, target_y = target
            image_x, image_y = image.size

            # I'm basically going to assume the image library knows how to do
            # the right thing in terms of handling aspect ratio. We're just
            # going to make sure that small images aren't blown up, and that
            # we never exceed our target sizes
            self.assertLessEqual(image_x, target_x)
            self.assertLessEqual(image_y, target_y)

            if src_x < target_x and src_y < target_y:
                self.assertEqual(src_x, image_x)
                self.assertEqual(src_y, image_y)
Example #4
0
    def get_canonicalized_asset_path(course_key, path, base_url,
                                     excluded_exts):
        """
        Returns a fully-qualified path to a piece of static content.

        If a static asset CDN is configured, this path will include it.
        Otherwise, the path will simply be relative.

        Args:
            course_key: key to the course which owns this asset
            path: the path to said content

        Returns:
            string: fully-qualified path to asset
        """

        # Break down the input path.
        _, _, relative_path, params, query_string, fragment = urlparse(path)

        # Convert our path to an asset key if it isn't one already.
        asset_key = StaticContent.get_asset_key_from_path(
            course_key, relative_path)

        # Check the status of the asset to see if this can be served via CDN aka publicly.
        serve_from_cdn = False
        try:
            content = AssetManager.find(asset_key, as_stream=True)
            is_locked = getattr(content, "locked", True)
            serve_from_cdn = not is_locked
        except (ItemNotFoundError, NotFoundError):
            # If we can't find the item, just treat it as if it's locked.
            serve_from_cdn = False

        # See if this is an allowed file extension to serve.  Some files aren't served through the
        # CDN in order to avoid same-origin policy/CORS-related issues.
        if any(relative_path.lower().endswith(excluded_ext.lower())
               for excluded_ext in excluded_exts):
            serve_from_cdn = False

        # Update any query parameter values that have asset paths in them. This is for assets that
        # require their own after-the-fact values, like a Flash file that needs the path of a config
        # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml
        query_params = parse_qsl(query_string)
        updated_query_params = []
        for query_name, query_val in query_params:
            if query_val.startswith("/static/"):
                new_val = StaticContent.get_canonicalized_asset_path(
                    course_key, query_val, base_url, excluded_exts)
                updated_query_params.append((query_name, new_val))
            else:
                updated_query_params.append((query_name, query_val))

        serialized_asset_key = StaticContent.serialize_asset_key_with_slash(
            asset_key)
        base_url = base_url if serve_from_cdn else ''

        return urlunparse((None, base_url, serialized_asset_key, params,
                           urlencode(updated_query_params), fragment))
Example #5
0
def create_course_image_thumbnail(course, dimensions):
    """Create a course image thumbnail and return the URL.

    - dimensions is a tuple of (width, height)
    """
    course_image_asset_key = StaticContent.compute_location(course.id, course.course_image)
    course_image = AssetManager.find(course_image_asset_key)  # a StaticContent obj

    _content, thumb_loc = contentstore().generate_thumbnail(course_image, dimensions=dimensions)

    return StaticContent.serialize_asset_key_with_slash(thumb_loc)
Example #6
0
def create_course_image_thumbnail(course, dimensions):
    """Create a course image thumbnail and return the URL.

    - dimensions is a tuple of (width, height)
    """
    course_image_asset_key = StaticContent.compute_location(course.id, course.course_image)
    course_image = AssetManager.find(course_image_asset_key)  # a StaticContent obj

    _content, thumb_loc = contentstore().generate_thumbnail(course_image, dimensions=dimensions)

    return StaticContent.serialize_asset_key_with_slash(thumb_loc)
Example #7
0
def get_asset_content_from_path(course_key, asset_path):
    """
    Locate the given asset content, load it into memory, and return it.
    Returns None if the asset is not found.
    """
    try:
        asset_key = StaticContent.get_asset_key_from_path(
            course_key, asset_path)
        return AssetManager.find(asset_key)
    except (ItemNotFoundError, NotFoundError):
        return None
Example #8
0
def get_versioned_asset_url(asset_path):
    """
    Creates a versioned asset URL.
    """
    try:
        locator = StaticContent.get_location_from_path(asset_path)
        content = AssetManager.find(locator, as_stream=True)
        return StaticContent.add_version_to_asset_path(asset_path, content.content_digest)
    except (InvalidKeyError, ItemNotFoundError):
        pass

    return asset_path
def get_old_style_versioned_asset_url(asset_path):
    """
    Creates an old-style versioned asset URL.
    """
    try:
        locator = StaticContent.get_location_from_path(asset_path)
        content = AssetManager.find(locator, as_stream=True)
        return f'{VERSIONED_ASSETS_PREFIX}/{content.content_digest}{asset_path}'
    except (InvalidKeyError, ItemNotFoundError):
        pass

    return asset_path
Example #10
0
def get_old_style_versioned_asset_url(asset_path):
    """
    Creates an old-style versioned asset URL.
    """
    try:
        locator = StaticContent.get_location_from_path(asset_path)
        content = AssetManager.find(locator, as_stream=True)
        return u'{}/{}{}'.format(VERSIONED_ASSETS_PREFIX, content.content_digest, asset_path)
    except (InvalidKeyError, ItemNotFoundError):
        pass

    return asset_path
def get_versioned_asset_url(asset_path):
    """
    Creates a versioned asset URL.
    """
    try:
        locator = StaticContent.get_location_from_path(asset_path)
        content = AssetManager.find(locator, as_stream=True)
        return StaticContent.add_version_to_asset_path(asset_path, content.content_digest)
    except (InvalidKeyError, ItemNotFoundError):
        pass

    return asset_path
Example #12
0
    def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts):
        """
        Returns a fully-qualified path to a piece of static content.

        If a static asset CDN is configured, this path will include it.
        Otherwise, the path will simply be relative.

        Args:
            course_key: key to the course which owns this asset
            path: the path to said content

        Returns:
            string: fully-qualified path to asset
        """

        # Break down the input path.
        _, _, relative_path, params, query_string, fragment = urlparse(path)

        # Convert our path to an asset key if it isn't one already.
        asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path)

        # Check the status of the asset to see if this can be served via CDN aka publicly.
        serve_from_cdn = False
        try:
            content = AssetManager.find(asset_key, as_stream=True)
            is_locked = getattr(content, "locked", True)
            serve_from_cdn = not is_locked
        except (ItemNotFoundError, NotFoundError):
            # If we can't find the item, just treat it as if it's locked.
            serve_from_cdn = False

        # See if this is an allowed file extension to serve.  Some files aren't served through the
        # CDN in order to avoid same-origin policy/CORS-related issues.
        if any(relative_path.lower().endswith(excluded_ext.lower()) for excluded_ext in excluded_exts):
            serve_from_cdn = False

        # Update any query parameter values that have asset paths in them. This is for assets that
        # require their own after-the-fact values, like a Flash file that needs the path of a config
        # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml
        query_params = parse_qsl(query_string)
        updated_query_params = []
        for query_name, query_val in query_params:
            if query_val.startswith("/static/"):
                new_val = StaticContent.get_canonicalized_asset_path(course_key, query_val, base_url, excluded_exts)
                updated_query_params.append((query_name, new_val))
            else:
                updated_query_params.append((query_name, query_val))

        serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key)
        base_url = base_url if serve_from_cdn else ''

        return urlunparse((None, base_url, serialized_asset_key, params, urlencode(updated_query_params), fragment))
    def get_content_digest_for_asset_path(cls, prefix, path):
        """
        Takes an unprocessed asset path, parses it just enough to try and find the
        asset it refers to, and returns the content digest of that asset if it exists.
        """

        # Parse the path as if it was potentially a relative URL with query parameters,
        # or an absolute URL, etc.  Only keep the path because that's all we need.
        _, _, relative_path, _, _, _ = urlparse(path)
        asset_key = StaticContent.get_asset_key_from_path(cls.courses[prefix].id, relative_path)

        try:
            content = AssetManager.find(asset_key, as_stream=True)
            return content.content_digest
        except (ItemNotFoundError, NotFoundError):
            return None
Example #14
0
    def get_content_digest_for_asset_path(cls, prefix, path):
        """
        Takes an unprocessed asset path, parses it just enough to try and find the
        asset it refers to, and returns the content digest of that asset if it exists.
        """

        # Parse the path as if it was potentially a relative URL with query parameters,
        # or an absolute URL, etc.  Only keep the path because that's all we need.
        _, _, relative_path, _, _, _ = urlparse(path)
        asset_key = StaticContent.get_asset_key_from_path(cls.courses[prefix].id, relative_path)

        try:
            content = AssetManager.find(asset_key, as_stream=True)
            return content.content_digest
        except (ItemNotFoundError, NotFoundError):
            return None
def get_asset_content_from_path(course_key, asset_path):
    """
    Locate the given asset content, load it into memory, and return it.

    Returns None if the asset is not found.
    """
    try:
        from xmodule.contentstore.content import StaticContent
        from xmodule.assetstore.assetmgr import AssetManager
        from xmodule.modulestore.exceptions import ItemNotFoundError
        from xmodule.exceptions import NotFoundError
    except ImportError as exc:
        raise EdXPlatformImportError(exc)

    try:
        asset_key = StaticContent.get_asset_key_from_path(
            course_key, asset_path)
        return AssetManager.find(asset_key)
    except (ItemNotFoundError, NotFoundError) as exc:
        return None
Example #16
0
    def load_asset_from_location(self, location):
        """
        Loads an asset based on its location, either retrieving it from a cache
        or loading it directly from the contentstore.
        """

        # See if we can load this item from cache.
        content = get_cached_content(location)
        if content is None:
            # Not in cache, so just try and load it from the asset manager.
            try:
                content = AssetManager.find(location, as_stream=True)
            except (ItemNotFoundError, NotFoundError):
                raise

            # Now that we fetched it, let's go ahead and try to cache it. We cap this at 1MB
            # because it's the default for memcached and also we don't want to do too much
            # buffering in memory when we're serving an actual request.
            if content.length is not None and content.length < 1048576:
                content = content.copy_to_in_mem()
                set_cached_content(content)

        return content
Example #17
0
    def load_asset_from_location(self, location):
        """
        Loads an asset based on its location, either retrieving it from a cache
        or loading it directly from the contentstore.
        """

        # See if we can load this item from cache.
        content = get_cached_content(location)
        if content is None:
            # Not in cache, so just try and load it from the asset manager.
            try:
                content = AssetManager.find(location, as_stream=True)
            except (ItemNotFoundError, NotFoundError):
                raise

            # Now that we fetched it, let's go ahead and try to cache it. We cap this at 1MB
            # because it's the default for memcached and also we don't want to do too much
            # buffering in memory when we're serving an actual request.
            if content.length is not None and content.length < 1048576:
                content = content.copy_to_in_mem()
                set_cached_content(content)

        return content
Example #18
0
    def process_request(self, request):
        """Process the given request"""
        asset_path = request.path

        if self.is_asset_request(request):  # lint-amnesty, pylint: disable=too-many-nested-blocks
            # Make sure we can convert this request into a location.
            if AssetLocator.CANONICAL_NAMESPACE in asset_path:
                asset_path = asset_path.replace('block/', 'block@', 1)

            # If this is a versioned request, pull out the digest and chop off the prefix.
            requested_digest = None
            if StaticContent.is_versioned_asset_path(asset_path):
                requested_digest, asset_path = StaticContent.parse_versioned_asset_path(
                    asset_path)

            # Make sure we have a valid location value for this asset.
            try:
                loc = StaticContent.get_location_from_path(asset_path)
            except (InvalidLocationError, InvalidKeyError):
                return HttpResponseBadRequest()

            # Attempt to load the asset to make sure it exists, and grab the asset digest
            # if we're able to load it.
            actual_digest = None
            try:
                content = self.load_asset_from_location(loc)
                actual_digest = getattr(content, "content_digest", None)
            except (ItemNotFoundError, NotFoundError):
                return HttpResponseNotFound()

            # If this was a versioned asset, and the digest doesn't match, redirect
            # them to the actual version.
            if requested_digest is not None and actual_digest is not None and (
                    actual_digest != requested_digest):
                actual_asset_path = StaticContent.add_version_to_asset_path(
                    asset_path, actual_digest)
                return HttpResponsePermanentRedirect(actual_asset_path)

            # Set the basics for this request. Make sure that the course key for this
            # asset has a run, which old-style courses do not.  Otherwise, this will
            # explode when the key is serialized to be sent to NR.
            safe_course_key = loc.course_key
            if safe_course_key.run is None:
                safe_course_key = safe_course_key.replace(run='only')

            if newrelic:
                newrelic.agent.add_custom_parameter('course_id',
                                                    safe_course_key)
                newrelic.agent.add_custom_parameter('org', loc.org)
                newrelic.agent.add_custom_parameter('contentserver.path',
                                                    loc.path)

                # Figure out if this is a CDN using us as the origin.
                is_from_cdn = StaticContentServer.is_cdn_request(request)
                newrelic.agent.add_custom_parameter('contentserver.from_cdn',
                                                    is_from_cdn)

                # Check if this content is locked or not.
                locked = self.is_content_locked(content)
                newrelic.agent.add_custom_parameter('contentserver.locked',
                                                    locked)

            # Check that user has access to the content.
            if not self.is_user_authorized(request, content, loc):
                return HttpResponseForbidden('Unauthorized')

            # Figure out if the client sent us a conditional request, and let them know
            # if this asset has changed since then.
            last_modified_at_str = content.last_modified_at.strftime(
                HTTP_DATE_FORMAT)
            if 'HTTP_IF_MODIFIED_SINCE' in request.META:
                if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
                if if_modified_since == last_modified_at_str:
                    return HttpResponseNotModified()

            # *** File streaming within a byte range ***
            # If a Range is provided, parse Range attribute of the request
            # Add Content-Range in the response if Range is structurally correct
            # Request -> Range attribute structure: "Range: bytes=first-[last]"
            # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
            response = None
            if request.META.get('HTTP_RANGE'):
                # If we have a StaticContent, get a StaticContentStream.  Can't manipulate the bytes otherwise.
                if isinstance(content, StaticContent):
                    content = AssetManager.find(loc, as_stream=True)

                header_value = request.META['HTTP_RANGE']
                try:
                    unit, ranges = parse_range_header(header_value,
                                                      content.length)
                except ValueError as exception:
                    # If the header field is syntactically invalid it should be ignored.
                    log.exception("%s in Range header: %s for content: %s",
                                  str(exception), header_value, str(loc))
                else:
                    if unit != 'bytes':
                        # Only accept ranges in bytes
                        log.warning(
                            "Unknown unit in Range header: %s for content: %s",
                            header_value, str(loc))
                    elif len(ranges) > 1:
                        # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
                        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
                        # But we send back the full content.
                        log.warning(
                            "More than 1 ranges in Range header: %s for content: %s",
                            header_value, str(loc))
                    else:
                        first, last = ranges[0]

                        if 0 <= first <= last < content.length:
                            # If the byte range is satisfiable
                            response = HttpResponse(
                                content.stream_data_in_range(first, last))
                            response[
                                'Content-Range'] = 'bytes {first}-{last}/{length}'.format(
                                    first=first,
                                    last=last,
                                    length=content.length)
                            response['Content-Length'] = str(last - first + 1)
                            response.status_code = 206  # Partial Content

                            if newrelic:
                                newrelic.agent.add_custom_parameter(
                                    'contentserver.ranged', True)
                        else:
                            log.warning(
                                "Cannot satisfy ranges in Range header: %s for content: %s",
                                header_value, str(loc))
                            return HttpResponse(
                                status=416)  # Requested Range Not Satisfiable

            # If Range header is absent or syntactically invalid return a full content response.
            if response is None:
                response = HttpResponse(content.stream_data())
                response['Content-Length'] = content.length

            if newrelic:
                newrelic.agent.add_custom_parameter(
                    'contentserver.content_len', content.length)
                newrelic.agent.add_custom_parameter(
                    'contentserver.content_type', content.content_type)

            # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
            response['Accept-Ranges'] = 'bytes'
            response['Content-Type'] = content.content_type
            response['X-Frame-Options'] = 'ALLOW'

            # Set any caching headers, and do any response cleanup needed.  Based on how much
            # middleware we have in place, there's no easy way to use the built-in Django
            # utilities and properly sanitize and modify a response to ensure that it is as
            # cacheable as possible, which is why we do it ourselves.
            self.set_caching_headers(content, response)

            return response
Example #19
0
    def process_request(self, request):
        # look to see if the request is prefixed with an asset prefix tag
        if request.path.startswith("/" + XASSET_LOCATION_TAG + "/") or request.path.startswith(
            "/" + AssetLocator.CANONICAL_NAMESPACE
        ):
            if AssetLocator.CANONICAL_NAMESPACE in request.path:
                request.path = request.path.replace("block/", "block@", 1)
            try:
                loc = StaticContent.get_location_from_path(request.path)
            except (InvalidLocationError, InvalidKeyError):
                # return a 'Bad Request' to browser as we have a malformed Location
                response = HttpResponse()
                response.status_code = 400
                return response

            # first look in our cache so we don't have to round-trip to the DB
            content = get_cached_content(loc)
            if content is None:
                # nope, not in cache, let's fetch from DB
                try:
                    content = AssetManager.find(loc, as_stream=True)
                except (ItemNotFoundError, NotFoundError):
                    response = HttpResponse()
                    response.status_code = 404
                    return response

                # since we fetched it from DB, let's cache it going forward, but only if it's < 1MB
                # this is because I haven't been able to find a means to stream data out of memcached
                if content.length is not None:
                    if content.length < 1048576:
                        # since we've queried as a stream, let's read in the stream into memory to set in cache
                        content = content.copy_to_in_mem()
                        set_cached_content(content)
            else:
                # NOP here, but we may wish to add a "cache-hit" counter in the future
                pass

            # Check that user has access to content
            if getattr(content, "locked", False):
                if not hasattr(request, "user") or not request.user.is_authenticated():
                    return HttpResponseForbidden("Unauthorized")
                if not request.user.is_staff:
                    if getattr(loc, "deprecated", False) and not CourseEnrollment.is_enrolled_by_partial(
                        request.user, loc.course_key
                    ):
                        return HttpResponseForbidden("Unauthorized")
                    if not getattr(loc, "deprecated", False) and not CourseEnrollment.is_enrolled(
                        request.user, loc.course_key
                    ):
                        return HttpResponseForbidden("Unauthorized")

            # convert over the DB persistent last modified timestamp to a HTTP compatible
            # timestamp, so we can simply compare the strings
            last_modified_at_str = content.last_modified_at.strftime("%a, %d-%b-%Y %H:%M:%S GMT")

            # see if the client has cached this content, if so then compare the
            # timestamps, if they are the same then just return a 304 (Not Modified)
            if "HTTP_IF_MODIFIED_SINCE" in request.META:
                if_modified_since = request.META["HTTP_IF_MODIFIED_SINCE"]
                if if_modified_since == last_modified_at_str:
                    return HttpResponseNotModified()

            # *** File streaming within a byte range ***
            # If a Range is provided, parse Range attribute of the request
            # Add Content-Range in the response if Range is structurally correct
            # Request -> Range attribute structure: "Range: bytes=first-[last]"
            # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
            response = None
            if request.META.get("HTTP_RANGE"):
                # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
                if type(content) == StaticContent:
                    content = AssetManager.find(loc, as_stream=True)

                header_value = request.META["HTTP_RANGE"]
                try:
                    unit, ranges = parse_range_header(header_value, content.length)
                except ValueError as exception:
                    # If the header field is syntactically invalid it should be ignored.
                    log.exception(
                        u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
                    )
                else:
                    if unit != "bytes":
                        # Only accept ranges in bytes
                        log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
                    elif len(ranges) > 1:
                        # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
                        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
                        # But we send back the full content.
                        log.warning(
                            u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
                        )
                    else:
                        first, last = ranges[0]

                        if 0 <= first <= last < content.length:
                            # If the byte range is satisfiable
                            response = HttpResponse(content.stream_data_in_range(first, last))
                            response["Content-Range"] = "bytes {first}-{last}/{length}".format(
                                first=first, last=last, length=content.length
                            )
                            response["Content-Length"] = str(last - first + 1)
                            response.status_code = 206  # Partial Content
                        else:
                            log.warning(
                                u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
                            )
                            return HttpResponse(status=416)  # Requested Range Not Satisfiable

            # If Range header is absent or syntactically invalid return a full content response.
            if response is None:
                response = HttpResponse(content.stream_data())
                response["Content-Length"] = content.length

            # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
            response["Accept-Ranges"] = "bytes"
            response["Content-Type"] = content.content_type
            response["Last-Modified"] = last_modified_at_str

            return response
Example #20
0
    def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts, encode=True):
        """
        Returns a fully-qualified path to a piece of static content.

        If a static asset CDN is configured, this path will include it.
        Otherwise, the path will simply be relative.

        Args:
            course_key: key to the course which owns this asset
            path: the path to said content

        Returns:
            string: fully-qualified path to asset
        """

        # Break down the input path.
        _, _, relative_path, params, query_string, _ = urlparse(path)

        # Convert our path to an asset key if it isn't one already.
        asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path)

        # Check the status of the asset to see if this can be served via CDN aka publicly.
        serve_from_cdn = False
        content_digest = None
        try:
            content = AssetManager.find(asset_key, as_stream=True)
            serve_from_cdn = not getattr(content, "locked", True)
            content_digest = getattr(content, "content_digest", None)
        except (ItemNotFoundError, NotFoundError):
            # If we can't find the item, just treat it as if it's locked.
            serve_from_cdn = False

        # Do a generic check to see if anything about this asset disqualifies it from being CDN'd.
        is_excluded = False
        if StaticContent.is_excluded_asset_type(relative_path, excluded_exts):
            serve_from_cdn = False
            is_excluded = True

        # Update any query parameter values that have asset paths in them. This is for assets that
        # require their own after-the-fact values, like a Flash file that needs the path of a config
        # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml
        query_params = parse_qsl(query_string)
        updated_query_params = []
        for query_name, query_val in query_params:
            if query_val.startswith("/static/"):
                new_val = StaticContent.get_canonicalized_asset_path(
                    course_key, query_val, base_url, excluded_exts, encode=False)
                updated_query_params.append((query_name, new_val))
            else:
                # Make sure we're encoding Unicode strings down to their byte string
                # representation so that `urlencode` can handle it.
                updated_query_params.append((query_name, query_val.encode('utf-8')))

        serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key)
        base_url = base_url if serve_from_cdn else ''
        asset_path = serialized_asset_key

        # If the content has a digest (i.e. md5sum) value specified, create a versioned path to the asset using it.
        if not is_excluded and content_digest:
            asset_path = StaticContent.add_version_to_asset_path(serialized_asset_key, content_digest)

        # Only encode this if told to.  Important so that we don't double encode
        # when working with paths that are in query parameters.
        asset_path = asset_path.encode('utf-8')
        if encode:
            asset_path = quote_plus(asset_path, '/:+@')

        return urlunparse((None, base_url.encode('utf-8'), asset_path, params, urlencode(updated_query_params), None))
Example #21
0
    def process_request(self, request):
        """Process the given request"""
        if self.is_asset_request(request):
            # Make sure we can convert this request into a location.
            if AssetLocator.CANONICAL_NAMESPACE in request.path:
                request.path = request.path.replace('block/', 'block@', 1)
            try:
                loc = StaticContent.get_location_from_path(request.path)
            except (InvalidLocationError, InvalidKeyError):
                return HttpResponseBadRequest()

            # Try and load the asset.
            content = None
            try:
                content = self.load_asset_from_location(loc)
            except (ItemNotFoundError, NotFoundError):
                return HttpResponseNotFound()

            # Set the basics for this request.
            newrelic.agent.add_custom_parameter('course_id', loc.course_key)
            newrelic.agent.add_custom_parameter('org', loc.org)
            newrelic.agent.add_custom_parameter('contentserver.path', loc.path)

            # Figure out if this is a CDN using us as the origin.
            is_from_cdn = StaticContentServer.is_cdn_request(request)
            newrelic.agent.add_custom_parameter('contentserver.from_cdn',
                                                True if is_from_cdn else False)

            # Check if this content is locked or not.
            locked = self.is_content_locked(content)
            newrelic.agent.add_custom_parameter('contentserver.locked',
                                                True if locked else False)

            # Check that user has access to the content.
            if not self.is_user_authorized(request, content, loc):
                return HttpResponseForbidden('Unauthorized')

            # Figure out if the client sent us a conditional request, and let them know
            # if this asset has changed since then.
            last_modified_at_str = content.last_modified_at.strftime(
                HTTP_DATE_FORMAT)
            if 'HTTP_IF_MODIFIED_SINCE' in request.META:
                if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
                if if_modified_since == last_modified_at_str:
                    return HttpResponseNotModified()

            # *** File streaming within a byte range ***
            # If a Range is provided, parse Range attribute of the request
            # Add Content-Range in the response if Range is structurally correct
            # Request -> Range attribute structure: "Range: bytes=first-[last]"
            # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
            response = None
            if request.META.get('HTTP_RANGE'):
                # If we have a StaticContent, get a StaticContentStream.  Can't manipulate the bytes otherwise.
                if type(content) == StaticContent:
                    content = AssetManager.find(loc, as_stream=True)

                header_value = request.META['HTTP_RANGE']
                try:
                    unit, ranges = parse_range_header(header_value,
                                                      content.length)
                except ValueError as exception:
                    # If the header field is syntactically invalid it should be ignored.
                    log.exception(u"%s in Range header: %s for content: %s",
                                  exception.message, header_value,
                                  unicode(loc))
                else:
                    if unit != 'bytes':
                        # Only accept ranges in bytes
                        log.warning(
                            u"Unknown unit in Range header: %s for content: %s",
                            header_value, unicode(loc))
                    elif len(ranges) > 1:
                        # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
                        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
                        # But we send back the full content.
                        log.warning(
                            u"More than 1 ranges in Range header: %s for content: %s",
                            header_value, unicode(loc))
                    else:
                        first, last = ranges[0]

                        if 0 <= first <= last < content.length:
                            # If the byte range is satisfiable
                            response = HttpResponse(
                                content.stream_data_in_range(first, last))
                            response[
                                'Content-Range'] = 'bytes {first}-{last}/{length}'.format(
                                    first=first,
                                    last=last,
                                    length=content.length)
                            range_len = last - first + 1
                            response['Content-Length'] = str(range_len)
                            response.status_code = 206  # Partial Content

                            newrelic.agent.add_custom_parameter(
                                'contentserver.range_len', range_len)
                        else:
                            log.warning(
                                u"Cannot satisfy ranges in Range header: %s for content: %s",
                                header_value, unicode(loc))
                            return HttpResponse(
                                status=416)  # Requested Range Not Satisfiable

            # If Range header is absent or syntactically invalid return a full content response.
            if response is None:
                response = HttpResponse(content.stream_data())
                response['Content-Length'] = content.length

                newrelic.agent.add_custom_parameter(
                    'contentserver.content_len', content.length)
                newrelic.agent.add_custom_parameter(
                    'contentserver.content_type', content.content_type)

            # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
            response['Accept-Ranges'] = 'bytes'
            response['Content-Type'] = content.content_type

            # Set any caching headers, and do any response cleanup needed.  Based on how much
            # middleware we have in place, there's no easy way to use the built-in Django
            # utilities and properly sanitize and modify a response to ensure that it is as
            # cacheable as possible, which is why we do it ourselves.
            self.set_caching_headers(content, response)

            return response
Example #22
0
    def process_request(self, request):
        """Process the given request"""
        asset_path = request.path

        if self.is_asset_request(request):
            # Make sure we can convert this request into a location.
            if AssetLocator.CANONICAL_NAMESPACE in asset_path:
                asset_path = asset_path.replace('block/', 'block@', 1)

            # If this is a versioned request, pull out the digest and chop off the prefix.
            requested_digest = None
            if StaticContent.is_versioned_asset_path(asset_path):
                requested_digest, asset_path = StaticContent.parse_versioned_asset_path(asset_path)

            # Make sure we have a valid location value for this asset.
            try:
                loc = StaticContent.get_location_from_path(asset_path)
            except (InvalidLocationError, InvalidKeyError):
                return HttpResponseBadRequest()

            # Attempt to load the asset to make sure it exists, and grab the asset digest
            # if we're able to load it.
            actual_digest = None
            try:
                content = self.load_asset_from_location(loc)
                actual_digest = content.content_digest
            except (ItemNotFoundError, NotFoundError):
                return HttpResponseNotFound()

            # If this was a versioned asset, and the digest doesn't match, redirect
            # them to the actual version.
            if requested_digest is not None and (actual_digest != requested_digest):
                actual_asset_path = StaticContent.add_version_to_asset_path(asset_path, actual_digest)
                return HttpResponsePermanentRedirect(actual_asset_path)

            # Set the basics for this request. Make sure that the course key for this
            # asset has a run, which old-style courses do not.  Otherwise, this will
            # explode when the key is serialized to be sent to NR.
            safe_course_key = loc.course_key
            if safe_course_key.run is None:
                safe_course_key = safe_course_key.replace(run='only')

            newrelic.agent.add_custom_parameter('course_id', safe_course_key)
            newrelic.agent.add_custom_parameter('org', loc.org)
            newrelic.agent.add_custom_parameter('contentserver.path', loc.path)

            # Figure out if this is a CDN using us as the origin.
            is_from_cdn = StaticContentServer.is_cdn_request(request)
            newrelic.agent.add_custom_parameter('contentserver.from_cdn', is_from_cdn)

            # Check if this content is locked or not.
            locked = self.is_content_locked(content)
            newrelic.agent.add_custom_parameter('contentserver.locked', locked)

            # Check that user has access to the content.
            if not self.is_user_authorized(request, content, loc):
                return HttpResponseForbidden('Unauthorized')

            # Figure out if the client sent us a conditional request, and let them know
            # if this asset has changed since then.
            last_modified_at_str = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
            if 'HTTP_IF_MODIFIED_SINCE' in request.META:
                if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
                if if_modified_since == last_modified_at_str:
                    return HttpResponseNotModified()

            # *** File streaming within a byte range ***
            # If a Range is provided, parse Range attribute of the request
            # Add Content-Range in the response if Range is structurally correct
            # Request -> Range attribute structure: "Range: bytes=first-[last]"
            # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
            response = None
            if request.META.get('HTTP_RANGE'):
                # If we have a StaticContent, get a StaticContentStream.  Can't manipulate the bytes otherwise.
                if type(content) == StaticContent:
                    content = AssetManager.find(loc, as_stream=True)

                header_value = request.META['HTTP_RANGE']
                try:
                    unit, ranges = parse_range_header(header_value, content.length)
                except ValueError as exception:
                    # If the header field is syntactically invalid it should be ignored.
                    log.exception(
                        u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
                    )
                else:
                    if unit != 'bytes':
                        # Only accept ranges in bytes
                        log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
                    elif len(ranges) > 1:
                        # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
                        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
                        # But we send back the full content.
                        log.warning(
                            u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
                        )
                    else:
                        first, last = ranges[0]

                        if 0 <= first <= last < content.length:
                            # If the byte range is satisfiable
                            response = HttpResponse(content.stream_data_in_range(first, last))
                            response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
                                first=first, last=last, length=content.length
                            )
                            response['Content-Length'] = str(last - first + 1)
                            response.status_code = 206  # Partial Content

                            newrelic.agent.add_custom_parameter('contentserver.ranged', True)
                        else:
                            log.warning(
                                u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
                            )
                            return HttpResponse(status=416)  # Requested Range Not Satisfiable

            # If Range header is absent or syntactically invalid return a full content response.
            if response is None:
                response = HttpResponse(content.stream_data())
                response['Content-Length'] = content.length

            newrelic.agent.add_custom_parameter('contentserver.content_len', content.length)
            newrelic.agent.add_custom_parameter('contentserver.content_type', content.content_type)

            # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
            response['Accept-Ranges'] = 'bytes'
            response['Content-Type'] = content.content_type

            # Set any caching headers, and do any response cleanup needed.  Based on how much
            # middleware we have in place, there's no easy way to use the built-in Django
            # utilities and properly sanitize and modify a response to ensure that it is as
            # cacheable as possible, which is why we do it ourselves.
            self.set_caching_headers(content, response)

            return response
Example #23
0
    def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts, encode=True):
        """
        Returns a fully-qualified path to a piece of static content.

        If a static asset CDN is configured, this path will include it.
        Otherwise, the path will simply be relative.

        Args:
            course_key: key to the course which owns this asset
            path: the path to said content

        Returns:
            string: fully-qualified path to asset
        """

        # Break down the input path.
        _, _, relative_path, params, query_string, _ = urlparse(path)

        # Convert our path to an asset key if it isn't one already.
        asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path)

        # Check the status of the asset to see if this can be served via CDN aka publicly.
        serve_from_cdn = False
        content_digest = None
        try:
            content = AssetManager.find(asset_key, as_stream=True)
            serve_from_cdn = not getattr(content, "locked", True)
            content_digest = getattr(content, "content_digest", None)
        except (ItemNotFoundError, NotFoundError):
            # If we can't find the item, just treat it as if it's locked.
            serve_from_cdn = False

        # Do a generic check to see if anything about this asset disqualifies it from being CDN'd.
        is_excluded = False
        if StaticContent.is_excluded_asset_type(relative_path, excluded_exts):
            serve_from_cdn = False
            is_excluded = True

        # Update any query parameter values that have asset paths in them. This is for assets that
        # require their own after-the-fact values, like a Flash file that needs the path of a config
        # file passed to it e.g. /static/visualization.swf?configFile=/static/visualization.xml
        query_params = parse_qsl(query_string)
        updated_query_params = []
        for query_name, query_val in query_params:
            if query_val.startswith("/static/"):
                new_val = StaticContent.get_canonicalized_asset_path(
                    course_key, query_val, base_url, excluded_exts, encode=False)
                updated_query_params.append((query_name, new_val.encode('utf-8')))
            else:
                # Make sure we're encoding Unicode strings down to their byte string
                # representation so that `urlencode` can handle it.
                updated_query_params.append((query_name, query_val.encode('utf-8')))

        serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key)
        base_url = base_url if serve_from_cdn else ''
        asset_path = serialized_asset_key

        # If the content has a digest (i.e. md5sum) value specified, create a versioned path to the asset using it.
        if not is_excluded and content_digest:
            asset_path = StaticContent.add_version_to_asset_path(serialized_asset_key, content_digest)

        # Only encode this if told to.  Important so that we don't double encode
        # when working with paths that are in query parameters.
        if encode:
            asset_path = asset_path.encode('utf-8')
            asset_path = quote_plus(asset_path, '/:+@')

        return urlunparse(('', base_url, asset_path, params, urlencode(updated_query_params), ''))
Example #24
0
    def process_request(self, request):
        """Process the given request"""
        if self.is_asset_request(request):
            # Make sure we can convert this request into a location.
            if AssetLocator.CANONICAL_NAMESPACE in request.path:
                request.path = request.path.replace('block/', 'block@', 1)
            try:
                loc = StaticContent.get_location_from_path(request.path)
            except (InvalidLocationError, InvalidKeyError):
                return HttpResponseBadRequest()

            # Try and load the asset.
            content = None
            try:
                content = self.load_asset_from_location(loc)
            except (ItemNotFoundError, NotFoundError):
                return HttpResponseNotFound()

            # Check that user has access to the content.
            if not self.is_user_authorized(request, content, loc):
                return HttpResponseForbidden('Unauthorized')

            # Figure out if the client sent us a conditional request, and let them know
            # if this asset has changed since then.
            last_modified_at_str = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
            if 'HTTP_IF_MODIFIED_SINCE' in request.META:
                if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
                if if_modified_since == last_modified_at_str:
                    return HttpResponseNotModified()

            # *** File streaming within a byte range ***
            # If a Range is provided, parse Range attribute of the request
            # Add Content-Range in the response if Range is structurally correct
            # Request -> Range attribute structure: "Range: bytes=first-[last]"
            # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
            response = None
            if request.META.get('HTTP_RANGE'):
                # If we have a StaticContent, get a StaticContentStream.  Can't manipulate the bytes otherwise.
                if type(content) == StaticContent:
                    content = AssetManager.find(loc, as_stream=True)

                header_value = request.META['HTTP_RANGE']
                try:
                    unit, ranges = parse_range_header(header_value, content.length)
                except ValueError as exception:
                    # If the header field is syntactically invalid it should be ignored.
                    log.exception(
                        u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
                    )
                else:
                    if unit != 'bytes':
                        # Only accept ranges in bytes
                        log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
                    elif len(ranges) > 1:
                        # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
                        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
                        # But we send back the full content.
                        log.warning(
                            u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
                        )
                    else:
                        first, last = ranges[0]

                        if 0 <= first <= last < content.length:
                            # If the byte range is satisfiable
                            response = HttpResponse(content.stream_data_in_range(first, last))
                            response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
                                first=first, last=last, length=content.length
                            )
                            response['Content-Length'] = str(last - first + 1)
                            response.status_code = 206  # Partial Content
                        else:
                            log.warning(
                                u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
                            )
                            return HttpResponse(status=416)  # Requested Range Not Satisfiable

            # If Range header is absent or syntactically invalid return a full content response.
            if response is None:
                response = HttpResponse(content.stream_data())
                response['Content-Length'] = content.length

            # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
            response['Accept-Ranges'] = 'bytes'
            response['Content-Type'] = content.content_type

            # Set any caching headers, and do any response cleanup needed.  Based on how much
            # middleware we have in place, there's no easy way to use the built-in Django
            # utilities and properly sanitize and modify a response to ensure that it is as
            # cacheable as possible, which is why we do it ourselves.
            self.set_caching_headers(content, response)

            return response
def _find_asset_urls_in_block(
    task_id,
    value,
    block_loc,
    block_assets,
    course_key,
    environment,
    staff_user_id,
    update,
    dictionary=None,
    value_key=None,
):

    if type(value) == dict:
        for key, val in value.items():
            _find_asset_urls_in_block(task_id,
                                      val,
                                      block_loc,
                                      block_assets,
                                      course_key,
                                      environment,
                                      staff_user_id,
                                      update,
                                      dictionary=value,
                                      value_key=key)
    elif type(value) == list:
        for item in value:
            _find_asset_urls_in_block(task_id,
                                      item,
                                      block_loc,
                                      block_assets,
                                      course_key,
                                      environment,
                                      staff_user_id,
                                      update,
                                      dictionary=dictionary,
                                      value_key=value_key)
    elif type(value) in (str, unicode):
        save_updated = False
        urls = re.findall(URL_RE, value)

        for url in urls:
            url = strip_tags(url)
            parsed_url = urlparse(url)
            asset_url = StaticContent.ASSET_URL_RE.match(parsed_url.path)

            if asset_url is not None:
                # check if asset URL belongs to some other server or course
                if parsed_url.hostname != environment or \
                                asset_url.groupdict().get('course') != course_key.course or \
                                asset_url.groupdict().get('org') != course_key.org:

                    asset_info = {
                        'name': asset_url.groupdict().get('name'),
                        'module': block_loc,
                        'available': False
                    }
                    asset_path = '{}{}'.format(
                        StaticContent.get_base_url_path_for_course_assets(
                            course_key),
                        asset_url.groupdict().get('name'))

                    # check if asset exists in this course
                    try:
                        loc = StaticContent.get_location_from_path(asset_path)
                    except (InvalidLocationError, InvalidKeyError):
                        pass
                    else:
                        try:
                            AssetManager.find(loc, as_stream=True)
                        except (ItemNotFoundError, NotFoundError):
                            pass
                        else:
                            asset_info['available'] = True

                            if update:
                                # replace url with the `asset_path`
                                full_asset_path = urljoin(
                                    'https://{}'.format(environment),
                                    asset_path)
                                value = value.replace(url, full_asset_path, 1)
                                save_updated = True
                                log.info(
                                    '[{}] Replacing `{}` with new path `{}` in module `{}`'
                                    .format(task_id, url, full_asset_path,
                                            block_loc))

                    block_assets.append(asset_info)

        if urls and save_updated and update:
            dictionary[value_key] = value
Example #26
0
    def process_request(self, request):
        # look to see if the request is prefixed with an asset prefix tag
        if (request.path.startswith('/' + XASSET_LOCATION_TAG + '/')
                or request.path.startswith('/' +
                                           AssetLocator.CANONICAL_NAMESPACE)):
            if AssetLocator.CANONICAL_NAMESPACE in request.path:
                request.path = request.path.replace('block/', 'block@', 1)
            try:
                loc = StaticContent.get_location_from_path(request.path)
            except (InvalidLocationError, InvalidKeyError):
                # return a 'Bad Request' to browser as we have a malformed Location
                response = HttpResponse()
                response.status_code = 400
                return response

            # first look in our cache so we don't have to round-trip to the DB
            content = get_cached_content(loc)
            if content is None:
                # nope, not in cache, let's fetch from DB
                try:
                    content = AssetManager.find(loc, as_stream=True)
                except (ItemNotFoundError, NotFoundError):
                    response = HttpResponse()
                    response.status_code = 404
                    return response

                # since we fetched it from DB, let's cache it going forward, but only if it's < 1MB
                # this is because I haven't been able to find a means to stream data out of memcached
                if content.length is not None:
                    if content.length < 1048576:
                        # since we've queried as a stream, let's read in the stream into memory to set in cache
                        content = content.copy_to_in_mem()
                        set_cached_content(content)
            else:
                # NOP here, but we may wish to add a "cache-hit" counter in the future
                pass

            # Check that user has access to content
            if getattr(content, "locked", False):
                if not hasattr(request,
                               "user") or not request.user.is_authenticated():
                    return HttpResponseForbidden('Unauthorized')
                if not request.user.is_staff:
                    if getattr(
                            loc, 'deprecated', False
                    ) and not CourseEnrollment.is_enrolled_by_partial(
                            request.user, loc.course_key):
                        return HttpResponseForbidden('Unauthorized')
                    if not getattr(loc, 'deprecated',
                                   False) and not CourseEnrollment.is_enrolled(
                                       request.user, loc.course_key):
                        return HttpResponseForbidden('Unauthorized')

            # convert over the DB persistent last modified timestamp to a HTTP compatible
            # timestamp, so we can simply compare the strings
            last_modified_at_str = content.last_modified_at.strftime(
                "%a, %d-%b-%Y %H:%M:%S GMT")

            # see if the client has cached this content, if so then compare the
            # timestamps, if they are the same then just return a 304 (Not Modified)
            if 'HTTP_IF_MODIFIED_SINCE' in request.META:
                if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
                if if_modified_since == last_modified_at_str:
                    return HttpResponseNotModified()

            # *** File streaming within a byte range ***
            # If a Range is provided, parse Range attribute of the request
            # Add Content-Range in the response if Range is structurally correct
            # Request -> Range attribute structure: "Range: bytes=first-[last]"
            # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
            response = None
            if request.META.get('HTTP_RANGE'):
                # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
                if type(content) == StaticContent:
                    content = AssetManager.find(loc, as_stream=True)

                header_value = request.META['HTTP_RANGE']
                try:
                    unit, ranges = parse_range_header(header_value,
                                                      content.length)
                except ValueError as exception:
                    # If the header field is syntactically invalid it should be ignored.
                    log.exception(u"%s in Range header: %s for content: %s",
                                  exception.message, header_value,
                                  unicode(loc))
                else:
                    if unit != 'bytes':
                        # Only accept ranges in bytes
                        log.warning(
                            u"Unknown unit in Range header: %s for content: %s",
                            header_value, unicode(loc))
                    elif len(ranges) > 1:
                        # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
                        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
                        # But we send back the full content.
                        log.warning(
                            u"More than 1 ranges in Range header: %s for content: %s",
                            header_value, unicode(loc))
                    else:
                        first, last = ranges[0]

                        if 0 <= first <= last < content.length:
                            # If the byte range is satisfiable
                            response = HttpResponse(
                                content.stream_data_in_range(first, last))
                            response[
                                'Content-Range'] = 'bytes {first}-{last}/{length}'.format(
                                    first=first,
                                    last=last,
                                    length=content.length)
                            response['Content-Length'] = str(last - first + 1)
                            response.status_code = 206  # Partial Content
                        else:
                            log.warning(
                                u"Cannot satisfy ranges in Range header: %s for content: %s",
                                header_value, unicode(loc))
                            return HttpResponse(
                                status=416)  # Requested Range Not Satisfiable

            # If Range header is absent or syntactically invalid return a full content response.
            if response is None:
                response = HttpResponse(content.stream_data())
                response['Content-Length'] = content.length

            # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
            response['Accept-Ranges'] = 'bytes'
            response['Content-Type'] = content.content_type
            response['Last-Modified'] = last_modified_at_str

            return response