예제 #1
0
def _download_rpm(rpm: Rpm, repo_url: str, rpm_table: RpmTable,
                  cfg: DownloadConfig) -> Tuple[Rpm, str]:
    "Returns a storage_id and a copy of `rpm` with a canonical checksum."
    log.info(f"Downloading {rpm}")
    storage = cfg.new_storage()
    with download_resource(repo_url,
                           rpm.location) as input_, storage.writer() as output:
        # Before committing to the DB, let's standardize on one hash
        # algorithm.  Otherwise, it might happen that two repos may
        # store the same RPM hashed with different algorithms, and thus
        # trigger our "different hashes" detector for a sane RPM.
        canonical_hash = hashlib.new(CANONICAL_HASH)
        for chunk in verify_chunk_stream(
                read_chunks(input_, BUFFER_BYTES), [rpm.checksum], rpm.size,
                rpm.location):  # May raise a ReportableError
            canonical_hash.update(chunk)
            output.write(chunk)
        # NB: We can also query the RPM as we download it above, via
        # something like P123285392.  However, at present, all necessary
        # metadata can be retrieved via `parse_metadata.py`.
        rpm = rpm._replace(canonical_checksum=Checksum(
            algorithm=CANONICAL_HASH, hexdigest=canonical_hash.hexdigest()))
        storage_id = output.commit()
    assert storage_id is not None
    return rpm, storage_id
예제 #2
0
def _download_repomd(
        repo: YumDnfConfRepo,
        repo_universe: str) -> Tuple[YumDnfConfRepo, str, RepoMetadata]:
    with download_resource(repo.base_url,
                           "repodata/repomd.xml") as repomd_stream:
        repomd = RepoMetadata.new(xml=repomd_stream.read())
    return repo, repo_universe, repomd
예제 #3
0
def _download_repodata(
    repodata: Repodata,
    *,
    repo_url: str,
    repodata_table: RepodataTable,
    cfg: DownloadConfig,
    is_primary: bool,
) -> DownloadRepodataReturnType:
    """This function behaves differently depending on two main characteristics:
      - Whether or not the provided repodata is primary, and
      - Whether or not it already exists in storage.
    Which actions are taken depends on which of the above true, and this
    branching is explained within the function.
    """
    storage = cfg.new_storage()
    # We only need to download the repodata if is not already in the DB,
    # or if it is primary (so we can parse it for RPMs).
    with cfg.new_db_ctx(readonly=True) as ro_repo_db:
        storage_id = ro_repo_db.get_storage_id(repodata_table, repodata)

    # Nothing to do -- only need to download repodata if it's the primary
    # (so we can parse it for RPMs), or if it's not already in the DB.
    if not is_primary and storage_id:
        return DownloadRepodataReturnType(repodata, False, storage_id, None)
    rpms = [] if is_primary else None

    # Remaining possibilities are that we've got a primary with or without
    # a storage_id, or a non-primary without a storage_id
    with ExitStack() as cm:
        rpm_parser = None
        if is_primary:
            # We'll parse the selected primary file to discover the RPMs.
            rpm_parser = cm.enter_context(get_rpm_parser(repodata))

        if storage_id:
            # Read the primary from storage as we already have an ID
            infile = cm.enter_context(storage.reader(storage_id))
            # No need to write as this repodata was already stored
            outfile = None
        else:
            # Nothing stored, must download - can fail due to repo updates
            infile = cm.enter_context(
                download_resource(repo_url, repodata.location))
            # Want to persist the downloaded repodata into storage so that
            # future runs don't need to redownload it
            outfile = cm.enter_context(storage.writer())

        log.info(f"Fetching {repodata}")
        for chunk in verify_chunk_stream(
                read_chunks(infile, BUFFER_BYTES),
            [repodata.checksum],
                repodata.size,
                repodata.location,
        ):  # May raise a ReportableError
            if outfile:
                outfile.write(chunk)
            if rpm_parser:
                try:
                    rpms.extend(rpm_parser.feed(chunk))
                except Exception as ex:
                    raise RepodataParseError((repodata.location, ex))
        # Must commit the output context to get a storage_id.
        if outfile:
            return DownloadRepodataReturnType(repodata, True, outfile.commit(),
                                              rpms)
    # The primary repodata was already stored, and we just parsed it for RPMs.
    assert storage_id is not None
    return DownloadRepodataReturnType(repodata, False, storage_id, rpms)