예제 #1
0
def test_patches_for_subdir(
    pkgstore,
    package_version,
    channel_name,
    package_file_name,
    package_repodata_patches,
    dao,
    db,
    package_subdir,
    patches_subdir,
):
    @contextmanager
    def get_db():
        yield db

    with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
        indexing.update_indexes(dao, pkgstore, channel_name)

    index_path = os.path.join(
        pkgstore.channels_dir,
        channel_name,
        package_subdir,
        "index.html",
    )

    assert os.path.isfile(index_path)
    with open(index_path, 'r') as fid:
        content = fid.read()

    assert "repodata.json" in content
    assert "repodata.json.bz2" in content
    assert "repodata_from_packages.json" in content
    assert "repodata_from_packages.json.bz2" in content

    fname = "repodata.json"

    repodata_path = os.path.join(
        pkgstore.channels_dir, channel_name, package_subdir, fname
    )

    assert os.path.isfile(repodata_path)

    with open(repodata_path) as fid:
        data = json.load(fid)

    packages = data["packages"]

    pkg = packages[package_file_name]

    if patches_subdir == package_subdir:
        assert pkg['run_exports'] == {"weak": ["otherpackage > 0.2"]}
    else:
        assert pkg['run_exports'] == {"weak": ["otherpackage > 0.1"]}
예제 #2
0
def test_no_repodata_patches_package(
    pkgstore,
    package_version,
    channel_name,
    package_file_name,
    dao,
    db,
):
    @contextmanager
    def get_db():
        yield db

    with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
        indexing.update_indexes(dao, pkgstore, channel_name)

    index_path = os.path.join(
        pkgstore.channels_dir,
        channel_name,
        "noarch",
        "index.html",
    )

    assert os.path.isfile(index_path)
    with open(index_path, 'r') as fid:
        content = fid.read()

    assert "repodata.json" in content
    assert "repodata.json.bz2" in content
    assert "repodata_from_packages.json" not in content
    assert "repodata_from_packages.json.bz2" not in content

    fname = "repodata.json"

    repodata_path = os.path.join(pkgstore.channels_dir, channel_name, "noarch",
                                 fname)

    assert os.path.isfile(repodata_path)

    with open(repodata_path) as fid:
        data = json.load(fid)

    packages = data["packages"]

    pkg = packages[package_file_name]

    assert pkg['run_exports'] == {"weak": ["otherpackage > 0.1"]}

    assert not pkg.get("revoked", False)
    assert 'package_has_been_revoked' not in pkg["depends"]

    assert package_file_name not in data.get("removed", ())
예제 #3
0
def test_synchronisation_no_checksums_in_db(
    repo_content,
    mirror_channel,
    dao,
    config,
    dummy_response,
    db,
    user,
    n_new_packages,
    arch,
    package_version,
    mocker,
):

    package_info = '{"size": 5000, "subdirs":["noarch"]}'
    package_version.info = package_info
    db.commit()

    pkgstore = config.get_package_store()
    rules = Rules("", {"user_id": str(uuid.UUID(bytes=user.id))}, db)

    class DummySession:
        def get(self, path, stream=False):
            return dummy_response()

        def close(self):
            pass

    # generate local repodata.json
    update_indexes(dao, pkgstore, mirror_channel.name)

    dummy_repo = RemoteRepository("", DummySession())

    initial_sync_mirror(
        mirror_channel.name,
        dummy_repo,
        arch,
        dao,
        pkgstore,
        rules,
        skip_errors=False,
    )

    versions = (
        db.query(PackageVersion)
        .filter(PackageVersion.channel_name == mirror_channel.name)
        .all()
    )

    assert len(versions) == n_new_packages + 1
예제 #4
0
def test_update_indexes(config: Config, local_channel, dao):
    pkgstore = config.get_package_store()

    update_indexes(dao, pkgstore, local_channel.name)

    files = pkgstore.list_files(local_channel.name)

    base_files = [
        'channeldata.json',
        'index.html',
        'noarch/index.html',
        'noarch/repodata.json',
    ]

    expected_files = base_files.copy()

    for suffix in ['.bz2', '.gz']:
        expected_files.extend(s + suffix for s in base_files)

    assert sorted(files) == sorted(expected_files)
예제 #5
0
def test_repodata_zchunk(
    pkgstore,
    package_version,
    channel_name,
    package_file_name,
    dao,
    db,
):
    indexing.update_indexes(dao, pkgstore, channel_name)

    index_path = os.path.join(
        pkgstore.channels_dir,
        channel_name,
        "noarch",
        "index.html",
    )

    assert os.path.isfile(index_path)
    with open(index_path, 'r') as fid:
        content = fid.read()

    assert "repodata.json" in content
    assert "repodata.json.bz2" in content
    assert "repodata.json.zck" in content

    for fname in ("repodata.json", "repodata.json.zck"):

        repodata_path = os.path.join(pkgstore.channels_dir, channel_name,
                                     "noarch", fname)

        assert os.path.isfile(repodata_path)

        if fname.endswith('.zck'):
            subprocess.check_call(['unzck', repodata_path])
            with open('repodata.json') as f:
                repodata_unzck = f.read()

            assert repodata == repodata_unzck  # NOQA
        else:
            with open(repodata_path) as f:
                repodata = f.read()  # NOQA
예제 #6
0
def test_synchronisation_sha(
    repo_content,
    mirror_channel,
    dao,
    config,
    dummy_response,
    db,
    user,
    n_new_packages,
    arch,
    package_version,
):
    pkgstore = config.get_package_store()
    rules = Rules("", {"user_id": str(uuid.UUID(bytes=user.id))}, db)

    class DummySession:
        def get(self, path, stream=False):
            return dummy_response()

    # generate local repodata.json
    update_indexes(dao, pkgstore, mirror_channel.name)

    dummy_repo = RemoteRepository("", DummySession())

    initial_sync_mirror(
        mirror_channel.name,
        dummy_repo,
        arch,
        dao,
        pkgstore,
        rules,
        skip_errors=False,
    )

    versions = (db.query(PackageVersion).filter(
        PackageVersion.channel_name == mirror_channel.name).all())

    assert len(versions) == n_new_packages + 1
예제 #7
0
def test_index_html(
    pkgstore,
    package_version,
    package_repodata_patches,
    channel_name,
    package_file_name,
    dao,
    db,
    remove_instructions,
):
    @contextmanager
    def get_db():
        yield db

    with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
        indexing.update_indexes(dao, pkgstore, channel_name)

    index_path = os.path.join(
        pkgstore.channels_dir,
        channel_name,
        "noarch",
        "index.html",
    )

    assert os.path.isfile(index_path)
    with open(index_path, 'r') as fid:
        content = fid.read()

    assert "repodata.json" in content
    assert "repodata.json.bz2" in content
    assert "repodata_from_packages.json" in content
    assert "repodata_from_packages.json.bz2" in content
    if remove_instructions:
        assert package_file_name not in content
    else:
        assert package_file_name in content
예제 #8
0
파일: main.py 프로젝트: ahendriksen/quetz
def post_channel(
        request: Request,
        new_channel: rest_models.Channel,
        background_tasks: BackgroundTasks,
        mirror_api_key: Optional[str] = None,
        register_mirror: bool = False,
        dao: Dao = Depends(get_dao),
        auth: authorization.Rules = Depends(get_rules),
        task: Task = Depends(get_tasks_worker),
        config=Depends(get_config),
        session: requests.Session = Depends(get_remote_session),
):

    user_id = auth.assert_user()

    existing_channel = dao.get_channel(new_channel.name)

    if existing_channel:
        raise HTTPException(
            status_code=status.HTTP_409_CONFLICT,
            detail=f"Channel {new_channel.name} exists",
        )

    if not new_channel.mirror_channel_url:
        auth.assert_create_channel()

    is_mirror = new_channel.mirror_channel_url and new_channel.mirror_mode == "mirror"

    is_proxy = new_channel.mirror_channel_url and new_channel.mirror_mode == "proxy"

    if is_mirror:
        auth.assert_create_mirror_channel()

    if is_proxy:
        auth.assert_create_proxy_channel()

    if new_channel.actions is None:
        if is_mirror:
            actions = [ChannelActionEnum.synchronize_repodata]
        else:
            actions = []
    else:
        actions = new_channel.actions

    includelist = new_channel.metadata.includelist
    excludelist = new_channel.metadata.excludelist

    if includelist is not None and excludelist is not None:
        raise HTTPException(
            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
            detail="Cannot use both `includelist` and `excludelist` together.",
        )

    user_attrs = new_channel.dict(exclude_unset=True)

    if "size_limit" in user_attrs:
        auth.assert_set_channel_size_limit()
        size_limit = new_channel.size_limit
    else:
        if config.configured_section("quotas"):
            size_limit = config.quotas_channel_quota
        else:
            size_limit = None

    channel = dao.create_channel(new_channel, user_id, authorization.OWNER,
                                 size_limit)
    pkgstore.create_channel(new_channel.name)
    indexing.update_indexes(dao, pkgstore, new_channel.name)

    # register mirror
    if is_mirror and register_mirror:
        mirror_url = str(new_channel.mirror_channel_url)
        mirror_url = mirror_url.replace("get", "api/channels")
        headers = {"x-api-key": mirror_api_key} if mirror_api_key else {}
        api_endpoint = str(
            request.url.replace(query=None)) + '/' + new_channel.name
        request.url
        response = session.post(
            mirror_url + '/mirrors',
            json={
                "url": api_endpoint.replace("api/channels", "get"),
                "api_endpoint": api_endpoint,
                "metrics_endpoint": api_endpoint.replace("api", "metrics"),
            },
            headers=headers,
        )
        if response.status_code != 201:
            logger.warning(
                f"could not register mirror due to error {response.text}")

    for action in actions:
        task.execute_channel_action(
            action,
            channel,
        )
예제 #9
0
파일: mirror.py 프로젝트: stjordanis/quetz
def initial_sync_mirror(
    channel_name: str,
    remote_repository: RemoteRepository,
    arch: str,
    dao: Dao,
    pkgstore: PackageStore,
    auth: authorization.Rules,
    includelist: List[str] = None,
    excludelist: List[str] = None,
    skip_errors: bool = True,
    use_repodata: bool = False,
):

    force = True  # needed for updating packages

    for repodata_fn in ["repodata_from_packages.json", "repodata.json"]:
        try:
            repo_file = remote_repository.open(os.path.join(arch, repodata_fn))
            repodata = json.load(repo_file.file)
            break
        except RemoteServerError:
            logger.error(
                f"can not get {repodata_fn} for channel {arch}/{channel_name}."
            )
            if repodata_fn == "repodata.json":
                logger.error(f"Giving up for {channel_name}/{arch}.")
                return
            else:
                logger.error("Trying next filename.")
                continue
        except json.JSONDecodeError:
            logger.error(f"repodata.json badly formatted for arch {arch}"
                         f"in channel {channel_name}")
            if repodata_fn == "repodata.json":
                return

    channel = dao.get_channel(channel_name)

    if not channel:
        logger.error(f"channel {channel_name} not found")
        return

    from quetz.main import handle_package_files

    packages = repodata.get("packages", {})

    version_methods = [
        _check_checksum(dao, channel_name, arch, "sha256"),
        _check_checksum(dao, channel_name, arch, "md5"),
    ]

    config = Config()
    max_batch_length = config.mirroring_batch_length
    max_batch_size = config.mirroring_batch_size
    # version_methods are context managers (for example, to update the db
    # after all packages have been checked), so we need to enter the context
    # for each
    any_updated = False
    with contextlib.ExitStack() as version_stack:

        version_checks = [
            version_stack.enter_context(method) for method in version_methods
        ]

        update_batch = []
        update_size = 0

        def handle_batch(update_batch):
            # i_batch += 1
            logger.debug(f"Handling batch: {update_batch}")
            if not update_batch:
                return False

            remote_packages = []
            remote_packages_with_metadata = []

            with ThreadPoolExecutor(
                    max_workers=config.mirroring_num_parallel_downloads
            ) as executor:
                for f in executor.map(
                        download_file,
                    (remote_repository, ) * len(update_batch),
                        update_batch,
                ):
                    if f is not None:
                        remote_packages.append(f[0])
                        remote_packages_with_metadata.append(f)

            try:
                if use_repodata:
                    handle_repodata_package(
                        channel,
                        remote_packages_with_metadata,
                        dao,
                        auth,
                        force,
                        pkgstore,
                        config,
                    )

                else:
                    handle_package_files(
                        channel,
                        remote_packages,
                        dao,
                        auth,
                        force,
                        is_mirror_op=True,
                    )
                return True

            except Exception as exc:
                logger.error(
                    f"could not process package {update_batch} from channel"
                    f"{channel_name} due to error {exc} of "
                    f"type {exc.__class__.__name__}")
                if not skip_errors:
                    raise exc

            return False

        for package_name, metadata in packages.items():
            if check_package_membership(package_name, includelist,
                                        excludelist):
                path = os.path.join(arch, package_name)

                # try to find out whether it's a new package version

                is_uptodate = None
                for _check in version_checks:
                    is_uptodate = _check(package_name, metadata)
                    if is_uptodate is not None:
                        break

                # if package is up-to-date skip uploading file
                if is_uptodate:
                    continue
                else:
                    logger.debug(
                        f"updating package {package_name} from {arch}")

                update_batch.append((path, package_name, metadata))
                update_size += metadata.get('size', 100_000)

            if len(update_batch
                   ) >= max_batch_length or update_size >= max_batch_size:
                logger.debug(f"Executing batch with {update_size}")
                any_updated |= handle_batch(update_batch)
                update_batch.clear()
                update_size = 0

        # handle final batch
        any_updated |= handle_batch(update_batch)

    if any_updated:
        indexing.update_indexes(dao, pkgstore, channel_name, subdirs=[arch])
예제 #10
0
def test_post_package_indexing(
    pkgstore,
    dao,
    package_version,
    channel_name,
    package_repodata_patches,
    db,
    package_file_name,
    repodata_stem,
    compressed_repodata,
    revoke_instructions,
    remove_instructions,
    package_format,
    patched_package_name,
):
    @contextmanager
    def get_db():
        yield db

    with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
        indexing.update_indexes(dao, pkgstore, channel_name)

    ext = "json.bz2" if compressed_repodata else "json"
    open_ = bz2.open if compressed_repodata else open

    repodata_path = os.path.join(
        pkgstore.channels_dir, channel_name, "noarch", f"{repodata_stem}.{ext}"
    )

    assert os.path.isfile(repodata_path)

    with open_(repodata_path) as fid:
        data = json.load(fid)

    key = "packages" if package_format == 'tarbz2' else "packages.conda"

    packages = data[key]

    is_package_removed = (package_file_name in remove_instructions) or (
        package_file_name.replace(".conda", ".tar.bz2") in remove_instructions
    )
    is_package_revoked = (package_file_name in revoke_instructions) or (
        package_file_name.replace(".conda", ".tar.bz2") in revoke_instructions
    )

    if not is_package_removed:
        assert packages[package_file_name]['run_exports'] == {
            "weak": ["otherpackage > 0.2"]
        }

        if is_package_revoked:
            revoked_pkg = packages[package_file_name]
            assert revoked_pkg.get("revoked", False)
            assert 'package_has_been_revoked' in revoked_pkg["depends"]

    else:
        assert package_file_name not in packages
        assert package_file_name in data.get("removed", ())

    orig_repodata_path = os.path.join(
        pkgstore.channels_dir,
        channel_name,
        "noarch",
        f"{repodata_stem}_from_packages.{ext}",
    )

    assert os.path.isfile(orig_repodata_path)
    with open_(orig_repodata_path) as fid:
        data = json.load(fid)
    package_data = data[key][package_file_name]
    assert package_data['run_exports'] == {"weak": ["otherpackage > 0.1"]}
    assert not package_data.get("revoked", False)
    assert "package_has_been_revoked" not in package_data
    assert not data.get("removed")
예제 #11
0
def initial_sync_mirror(
    channel_name: str,
    remote_repository: RemoteRepository,
    arch: str,
    dao: Dao,
    pkgstore: PackageStore,
    auth: authorization.Rules,
    skip_errors: bool = True,
):

    force = True  # needed for updating packages

    try:
        repo_file = remote_repository.open(os.path.join(arch, "repodata.json"))
        repodata = json.load(repo_file.file)
    except RemoteServerError:
        logger.error(f"can not get repodata.json for channel {channel_name}")
        return
    except json.JSONDecodeError:
        logger.error(
            f"repodata.json badly formatted for arch {arch} in channel {channel_name}"
        )
        return

    channel = dao.get_channel(channel_name)

    from quetz.main import handle_package_files

    packages = repodata.get("packages", {})

    version_methods = [
        _check_timestamp(channel, dao),
        _check_checksum(pkgstore, channel_name, arch, "sha256"),
        _check_checksum(pkgstore, channel_name, arch, "md5"),
    ]

    # version_methods are context managers (for example, to update the db
    # after all packages have been checked), so we need to enter the context
    # for each
    any_updated = False
    with contextlib.ExitStack() as version_stack:

        version_checks = [
            version_stack.enter_context(method) for method in version_methods
        ]

        for package_name, metadata in packages.items():
            path = os.path.join(arch, package_name)

            # try to find out whether it's a new package version

            is_uptodate = None
            for _check in version_checks:
                is_uptodate = _check(package_name, metadata)
                if is_uptodate is not None:
                    break

            # if package is up-to-date skip uploading file
            if is_uptodate:
                logger.debug(
                    f"package {package_name} from {arch} up-to-date. Not updating"
                )
                continue
            else:
                logger.debug(f"updating package {package_name} form {arch}")

            try:
                remote_package = remote_repository.open(path)
            except RemoteServerError:
                logger.error(f"remote server error when getting a file {path}")
                continue

            files = [remote_package]
            try:
                handle_package_files(
                    channel_name,
                    files,
                    dao,
                    auth,
                    force,
                )
                any_updated = True
            except Exception as exc:
                logger.error(
                    f"could not process package {package_name} from channel"
                    f"{channel_name} due to error {exc}")
                if not skip_errors:
                    raise exc

    if any_updated:
        indexing.update_indexes(dao, pkgstore, channel_name, subdirs=[arch])