예제 #1
0
def get_from_archive(url: str, archive: ReleaseArchive) -> Tuple[bytes, dict]:
    candidates = ReleaseFile.normalize(url)
    for candidate in candidates:
        try:
            return archive.get_file_by_url(candidate)
        except KeyError:
            pass

    # None of the filenames matched
    raise KeyError(f"Not found in archive: '{url}'")
예제 #2
0
def get_index_entry(release, dist, url) -> Optional[dict]:
    try:
        index = get_artifact_index(release, dist)
    except Exception as exc:
        logger.error("sourcemaps.index_read_failed", exc_info=exc)
        return None

    if index:
        for candidate in ReleaseFile.normalize(url):
            entry = index.get("files", {}).get(candidate)
            if entry:
                return entry

    return None
예제 #3
0
def fetch_release_file(filename, release, dist=None):
    cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), )

    logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id)
    result = cache.get(cache_key)

    dist_name = dist and dist.name or None

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            'Checking database for release artifact %r (release_id=%s)', filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                dist=dist,
                ident__in=filename_idents,
            ).select_related('file')
        )

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)', filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next((
                rf
                for ident in filename_idents
                for rf in possible_files
                if rf.ident == ident
            ))

        logger.debug(
            'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id
        )
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
예제 #4
0
def fetch_release_file(filename, release, dist=None):
    cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), )

    logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id)
    result = cache.get(cache_key)

    dist_name = dist and dist.name or None

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            'Checking database for release artifact %r (release_id=%s)', filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                dist=dist,
                ident__in=filename_idents,
            ).select_related('file')
        )

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)', filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next((
                rf
                for ident in filename_idents
                for rf in possible_files
                if rf.ident == ident
            ))

        logger.debug(
            'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id
        )
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception:
            logger.error('sourcemap.compress_read_failed', exc_info=sys.exc_info())
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
예제 #5
0
def fetch_release_file(filename, release, dist=None):
    """
    Attempt to retrieve a release artifact from the database.

    Caches the result of that attempt (whether successful or not).
    """

    dist_name = dist and dist.name or None
    releasefile_ident = ReleaseFile.get_ident(filename, dist_name)
    cache_key = get_release_file_cache_key(release_id=release.id,
                                           releasefile_ident=releasefile_ident)
    # Cache key to store file metadata, currently only the size of the
    # compressed version of file. We cannot use the cache_key because large
    # payloads (silently) fail to cache due to e.g. memcached payload size
    # limitation and we use the meta data to avoid compression of such a files.
    cache_key_meta = get_release_file_cache_key_meta(
        release_id=release.id, releasefile_ident=releasefile_ident)

    logger.debug("Checking cache for release artifact %r (release_id=%s)",
                 filename, release.id)
    result = cache.get(cache_key)

    # not in the cache (meaning we haven't checked the database recently), so check the database
    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [
            ReleaseFile.get_ident(f, dist_name) for f in filename_choices
        ]

        logger.debug(
            "Checking database for release artifact %r (release_id=%s)",
            filename, release.id)

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release, dist=dist,
                ident__in=filename_idents).select_related("file"))

        if len(possible_files) == 0:
            logger.debug(
                "Release artifact %r not found in database (release_id=%s)",
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None

        elif len(possible_files) == 1:
            releasefile = possible_files[0]

        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next(rf for ident in filename_idents
                               for rf in possible_files if rf.ident == ident)

        logger.debug("Found release artifact %r (id=%s, release_id=%s)",
                     filename, releasefile.id, release.id)

        # If the release file is not in cache, check if we can retrieve at
        # least the size metadata from cache and prevent compression and
        # caching if payload exceeds the backend limit.
        z_body_size = None

        if CACHE_MAX_VALUE_SIZE:
            cache_meta = cache.get(cache_key_meta)
            if cache_meta:
                z_body_size = int(cache_meta.get("compressed_size"))

        def fetch_release_body():
            with ReleaseFile.cache.getfile(releasefile) as fp:
                if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE:
                    return None, fp.read()
                else:
                    return compress_file(fp)

        try:
            with metrics.timer("sourcemaps.release_file_read"):
                z_body, body = fetch_retry_policy(fetch_release_body)
        except Exception:
            logger.error("sourcemap.compress_read_failed",
                         exc_info=sys.exc_info())
            result = None
        else:
            headers = {
                k.lower(): v
                for k, v in releasefile.file.headers.items()
            }
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)

            # If we don't have the compressed body for caching because the
            # cached metadata said it is too large payload for the cache
            # backend, do not attempt to cache.
            if z_body:
                # This will implicitly skip too large payloads. Those will be cached
                # on the file system by `ReleaseFile.cache`, instead.
                cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

                # In case the previous call to cache implicitly fails, we use
                # the meta data to avoid pointless compression which is done
                # only for caching.
                cache.set(cache_key_meta, {"compressed_size": len(z_body)},
                          3600)

    # in the cache as an unsuccessful attempt
    elif result == -1:
        result = None

    # in the cache as a successful attempt, including the zipped contents of the file
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(filename, result[0],
                                zlib.decompress(result[1]), result[2],
                                encoding)

    return result
예제 #6
0
def fetch_release_file(filename, release, dist=None):
    dist_name = dist and dist.name or None
    cache_key = "releasefile:v1:%s:%s" % (release.id, ReleaseFile.get_ident(filename, dist_name))

    logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            "Checking database for release artifact %r (release_id=%s)", filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release, dist=dist, ident__in=filename_idents
            ).select_related("file")
        )

        if len(possible_files) == 0:
            logger.debug(
                "Release artifact %r not found in database (release_id=%s)", filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next(
                (rf for ident in filename_idents for rf in possible_files if rf.ident == ident)
            )

        logger.debug(
            "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id
        )
        try:
            with metrics.timer("sourcemaps.release_file_read"):
                with ReleaseFile.cache.getfile(releasefile) as fp:
                    z_body, body = compress_file(fp)
        except Exception:
            logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info())
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            # This will implicitly skip too large payloads. Those will be cached
            # on the file system by `ReleaseFile.cache`, instead.
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
예제 #7
0
def fetch_release_file(filename, release, dist=None):
    """
    Attempt to retrieve a release artifact from the database.

    Caches the result of that attempt (whether successful or not).
    """
    dist_name = dist and dist.name or None
    cache_key, cache_key_meta = get_cache_keys(filename, release, dist)

    logger.debug("Checking cache for release artifact %r (release_id=%s)",
                 filename, release.id)
    result = cache.get(cache_key)

    # not in the cache (meaning we haven't checked the database recently), so check the database
    if result is None:
        with metrics.timer("sourcemaps.release_artifact_from_file"):
            filename_choices = ReleaseFile.normalize(filename)
            filename_idents = [
                ReleaseFile.get_ident(f, dist_name) for f in filename_choices
            ]

            logger.debug(
                "Checking database for release artifact %r (release_id=%s)",
                filename, release.id)

            possible_files = list(
                ReleaseFile.objects.filter(
                    release_id=release.id,
                    dist_id=dist.id if dist else dist,
                    ident__in=filename_idents,
                ).select_related("file"))

            if len(possible_files) == 0:
                logger.debug(
                    "Release artifact %r not found in database (release_id=%s)",
                    filename,
                    release.id,
                )
                cache.set(cache_key, -1, 60)
                return None

            elif len(possible_files) == 1:
                releasefile = possible_files[0]

            else:
                # Pick first one that matches in priority order.
                # This is O(N*M) but there are only ever at most 4 things here
                # so not really worth optimizing.
                releasefile = next(rf for ident in filename_idents
                                   for rf in possible_files
                                   if rf.ident == ident)

            logger.debug(
                "Found release artifact %r (id=%s, release_id=%s)",
                filename,
                releasefile.id,
                release.id,
            )

            result = fetch_and_cache_artifact(
                filename,
                lambda: ReleaseFile.cache.getfile(releasefile),
                cache_key,
                cache_key_meta,
                releasefile.file.headers,
                compress_file,
            )

    # in the cache as an unsuccessful attempt
    elif result == -1:
        result = None

    # in the cache as a successful attempt, including the zipped contents of the file
    else:
        result = result_from_cache(filename, result)

    return result