def test_patches_for_subdir( pkgstore, package_version, channel_name, package_file_name, package_repodata_patches, dao, db, package_subdir, patches_subdir, ): @contextmanager def get_db(): yield db with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db): indexing.update_indexes(dao, pkgstore, channel_name) index_path = os.path.join( pkgstore.channels_dir, channel_name, package_subdir, "index.html", ) assert os.path.isfile(index_path) with open(index_path, 'r') as fid: content = fid.read() assert "repodata.json" in content assert "repodata.json.bz2" in content assert "repodata_from_packages.json" in content assert "repodata_from_packages.json.bz2" in content fname = "repodata.json" repodata_path = os.path.join( pkgstore.channels_dir, channel_name, package_subdir, fname ) assert os.path.isfile(repodata_path) with open(repodata_path) as fid: data = json.load(fid) packages = data["packages"] pkg = packages[package_file_name] if patches_subdir == package_subdir: assert pkg['run_exports'] == {"weak": ["otherpackage > 0.2"]} else: assert pkg['run_exports'] == {"weak": ["otherpackage > 0.1"]}
def test_no_repodata_patches_package( pkgstore, package_version, channel_name, package_file_name, dao, db, ): @contextmanager def get_db(): yield db with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db): indexing.update_indexes(dao, pkgstore, channel_name) index_path = os.path.join( pkgstore.channels_dir, channel_name, "noarch", "index.html", ) assert os.path.isfile(index_path) with open(index_path, 'r') as fid: content = fid.read() assert "repodata.json" in content assert "repodata.json.bz2" in content assert "repodata_from_packages.json" not in content assert "repodata_from_packages.json.bz2" not in content fname = "repodata.json" repodata_path = os.path.join(pkgstore.channels_dir, channel_name, "noarch", fname) assert os.path.isfile(repodata_path) with open(repodata_path) as fid: data = json.load(fid) packages = data["packages"] pkg = packages[package_file_name] assert pkg['run_exports'] == {"weak": ["otherpackage > 0.1"]} assert not pkg.get("revoked", False) assert 'package_has_been_revoked' not in pkg["depends"] assert package_file_name not in data.get("removed", ())
def test_synchronisation_no_checksums_in_db( repo_content, mirror_channel, dao, config, dummy_response, db, user, n_new_packages, arch, package_version, mocker, ): package_info = '{"size": 5000, "subdirs":["noarch"]}' package_version.info = package_info db.commit() pkgstore = config.get_package_store() rules = Rules("", {"user_id": str(uuid.UUID(bytes=user.id))}, db) class DummySession: def get(self, path, stream=False): return dummy_response() def close(self): pass # generate local repodata.json update_indexes(dao, pkgstore, mirror_channel.name) dummy_repo = RemoteRepository("", DummySession()) initial_sync_mirror( mirror_channel.name, dummy_repo, arch, dao, pkgstore, rules, skip_errors=False, ) versions = ( db.query(PackageVersion) .filter(PackageVersion.channel_name == mirror_channel.name) .all() ) assert len(versions) == n_new_packages + 1
def test_update_indexes(config: Config, local_channel, dao): pkgstore = config.get_package_store() update_indexes(dao, pkgstore, local_channel.name) files = pkgstore.list_files(local_channel.name) base_files = [ 'channeldata.json', 'index.html', 'noarch/index.html', 'noarch/repodata.json', ] expected_files = base_files.copy() for suffix in ['.bz2', '.gz']: expected_files.extend(s + suffix for s in base_files) assert sorted(files) == sorted(expected_files)
def test_repodata_zchunk( pkgstore, package_version, channel_name, package_file_name, dao, db, ): indexing.update_indexes(dao, pkgstore, channel_name) index_path = os.path.join( pkgstore.channels_dir, channel_name, "noarch", "index.html", ) assert os.path.isfile(index_path) with open(index_path, 'r') as fid: content = fid.read() assert "repodata.json" in content assert "repodata.json.bz2" in content assert "repodata.json.zck" in content for fname in ("repodata.json", "repodata.json.zck"): repodata_path = os.path.join(pkgstore.channels_dir, channel_name, "noarch", fname) assert os.path.isfile(repodata_path) if fname.endswith('.zck'): subprocess.check_call(['unzck', repodata_path]) with open('repodata.json') as f: repodata_unzck = f.read() assert repodata == repodata_unzck # NOQA else: with open(repodata_path) as f: repodata = f.read() # NOQA
def test_synchronisation_sha( repo_content, mirror_channel, dao, config, dummy_response, db, user, n_new_packages, arch, package_version, ): pkgstore = config.get_package_store() rules = Rules("", {"user_id": str(uuid.UUID(bytes=user.id))}, db) class DummySession: def get(self, path, stream=False): return dummy_response() # generate local repodata.json update_indexes(dao, pkgstore, mirror_channel.name) dummy_repo = RemoteRepository("", DummySession()) initial_sync_mirror( mirror_channel.name, dummy_repo, arch, dao, pkgstore, rules, skip_errors=False, ) versions = (db.query(PackageVersion).filter( PackageVersion.channel_name == mirror_channel.name).all()) assert len(versions) == n_new_packages + 1
def test_index_html( pkgstore, package_version, package_repodata_patches, channel_name, package_file_name, dao, db, remove_instructions, ): @contextmanager def get_db(): yield db with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db): indexing.update_indexes(dao, pkgstore, channel_name) index_path = os.path.join( pkgstore.channels_dir, channel_name, "noarch", "index.html", ) assert os.path.isfile(index_path) with open(index_path, 'r') as fid: content = fid.read() assert "repodata.json" in content assert "repodata.json.bz2" in content assert "repodata_from_packages.json" in content assert "repodata_from_packages.json.bz2" in content if remove_instructions: assert package_file_name not in content else: assert package_file_name in content
def post_channel( request: Request, new_channel: rest_models.Channel, background_tasks: BackgroundTasks, mirror_api_key: Optional[str] = None, register_mirror: bool = False, dao: Dao = Depends(get_dao), auth: authorization.Rules = Depends(get_rules), task: Task = Depends(get_tasks_worker), config=Depends(get_config), session: requests.Session = Depends(get_remote_session), ): user_id = auth.assert_user() existing_channel = dao.get_channel(new_channel.name) if existing_channel: raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail=f"Channel {new_channel.name} exists", ) if not new_channel.mirror_channel_url: auth.assert_create_channel() is_mirror = new_channel.mirror_channel_url and new_channel.mirror_mode == "mirror" is_proxy = new_channel.mirror_channel_url and new_channel.mirror_mode == "proxy" if is_mirror: auth.assert_create_mirror_channel() if is_proxy: auth.assert_create_proxy_channel() if new_channel.actions is None: if is_mirror: actions = [ChannelActionEnum.synchronize_repodata] else: actions = [] else: actions = new_channel.actions includelist = new_channel.metadata.includelist excludelist = new_channel.metadata.excludelist if includelist is not None and excludelist is not None: raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Cannot use both `includelist` and `excludelist` together.", ) user_attrs = new_channel.dict(exclude_unset=True) if "size_limit" in user_attrs: auth.assert_set_channel_size_limit() size_limit = new_channel.size_limit else: if config.configured_section("quotas"): size_limit = config.quotas_channel_quota else: size_limit = None channel = dao.create_channel(new_channel, user_id, authorization.OWNER, size_limit) pkgstore.create_channel(new_channel.name) indexing.update_indexes(dao, pkgstore, new_channel.name) # register mirror if is_mirror and register_mirror: mirror_url = str(new_channel.mirror_channel_url) mirror_url = mirror_url.replace("get", "api/channels") headers = {"x-api-key": mirror_api_key} if mirror_api_key else {} api_endpoint = str( request.url.replace(query=None)) + '/' + new_channel.name request.url response = session.post( mirror_url + '/mirrors', json={ "url": api_endpoint.replace("api/channels", "get"), "api_endpoint": api_endpoint, "metrics_endpoint": api_endpoint.replace("api", "metrics"), }, headers=headers, ) if response.status_code != 201: logger.warning( f"could not register mirror due to error {response.text}") for action in actions: task.execute_channel_action( action, channel, )
def initial_sync_mirror( channel_name: str, remote_repository: RemoteRepository, arch: str, dao: Dao, pkgstore: PackageStore, auth: authorization.Rules, includelist: List[str] = None, excludelist: List[str] = None, skip_errors: bool = True, use_repodata: bool = False, ): force = True # needed for updating packages for repodata_fn in ["repodata_from_packages.json", "repodata.json"]: try: repo_file = remote_repository.open(os.path.join(arch, repodata_fn)) repodata = json.load(repo_file.file) break except RemoteServerError: logger.error( f"can not get {repodata_fn} for channel {arch}/{channel_name}." ) if repodata_fn == "repodata.json": logger.error(f"Giving up for {channel_name}/{arch}.") return else: logger.error("Trying next filename.") continue except json.JSONDecodeError: logger.error(f"repodata.json badly formatted for arch {arch}" f"in channel {channel_name}") if repodata_fn == "repodata.json": return channel = dao.get_channel(channel_name) if not channel: logger.error(f"channel {channel_name} not found") return from quetz.main import handle_package_files packages = repodata.get("packages", {}) version_methods = [ _check_checksum(dao, channel_name, arch, "sha256"), _check_checksum(dao, channel_name, arch, "md5"), ] config = Config() max_batch_length = config.mirroring_batch_length max_batch_size = config.mirroring_batch_size # version_methods are context managers (for example, to update the db # after all packages have been checked), so we need to enter the context # for each any_updated = False with contextlib.ExitStack() as version_stack: version_checks = [ version_stack.enter_context(method) for method in version_methods ] update_batch = [] update_size = 0 def handle_batch(update_batch): # i_batch += 1 logger.debug(f"Handling batch: {update_batch}") if not update_batch: return False remote_packages = [] remote_packages_with_metadata = [] with ThreadPoolExecutor( max_workers=config.mirroring_num_parallel_downloads ) as executor: for f in executor.map( download_file, (remote_repository, ) * len(update_batch), update_batch, ): if f is not None: remote_packages.append(f[0]) remote_packages_with_metadata.append(f) try: if use_repodata: handle_repodata_package( channel, remote_packages_with_metadata, dao, auth, force, pkgstore, config, ) else: handle_package_files( channel, remote_packages, dao, auth, force, is_mirror_op=True, ) return True except Exception as exc: logger.error( f"could not process package {update_batch} from channel" f"{channel_name} due to error {exc} of " f"type {exc.__class__.__name__}") if not skip_errors: raise exc return False for package_name, metadata in packages.items(): if check_package_membership(package_name, includelist, excludelist): path = os.path.join(arch, package_name) # try to find out whether it's a new package version is_uptodate = None for _check in version_checks: is_uptodate = _check(package_name, metadata) if is_uptodate is not None: break # if package is up-to-date skip uploading file if is_uptodate: continue else: logger.debug( f"updating package {package_name} from {arch}") update_batch.append((path, package_name, metadata)) update_size += metadata.get('size', 100_000) if len(update_batch ) >= max_batch_length or update_size >= max_batch_size: logger.debug(f"Executing batch with {update_size}") any_updated |= handle_batch(update_batch) update_batch.clear() update_size = 0 # handle final batch any_updated |= handle_batch(update_batch) if any_updated: indexing.update_indexes(dao, pkgstore, channel_name, subdirs=[arch])
def test_post_package_indexing( pkgstore, dao, package_version, channel_name, package_repodata_patches, db, package_file_name, repodata_stem, compressed_repodata, revoke_instructions, remove_instructions, package_format, patched_package_name, ): @contextmanager def get_db(): yield db with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db): indexing.update_indexes(dao, pkgstore, channel_name) ext = "json.bz2" if compressed_repodata else "json" open_ = bz2.open if compressed_repodata else open repodata_path = os.path.join( pkgstore.channels_dir, channel_name, "noarch", f"{repodata_stem}.{ext}" ) assert os.path.isfile(repodata_path) with open_(repodata_path) as fid: data = json.load(fid) key = "packages" if package_format == 'tarbz2' else "packages.conda" packages = data[key] is_package_removed = (package_file_name in remove_instructions) or ( package_file_name.replace(".conda", ".tar.bz2") in remove_instructions ) is_package_revoked = (package_file_name in revoke_instructions) or ( package_file_name.replace(".conda", ".tar.bz2") in revoke_instructions ) if not is_package_removed: assert packages[package_file_name]['run_exports'] == { "weak": ["otherpackage > 0.2"] } if is_package_revoked: revoked_pkg = packages[package_file_name] assert revoked_pkg.get("revoked", False) assert 'package_has_been_revoked' in revoked_pkg["depends"] else: assert package_file_name not in packages assert package_file_name in data.get("removed", ()) orig_repodata_path = os.path.join( pkgstore.channels_dir, channel_name, "noarch", f"{repodata_stem}_from_packages.{ext}", ) assert os.path.isfile(orig_repodata_path) with open_(orig_repodata_path) as fid: data = json.load(fid) package_data = data[key][package_file_name] assert package_data['run_exports'] == {"weak": ["otherpackage > 0.1"]} assert not package_data.get("revoked", False) assert "package_has_been_revoked" not in package_data assert not data.get("removed")
def initial_sync_mirror( channel_name: str, remote_repository: RemoteRepository, arch: str, dao: Dao, pkgstore: PackageStore, auth: authorization.Rules, skip_errors: bool = True, ): force = True # needed for updating packages try: repo_file = remote_repository.open(os.path.join(arch, "repodata.json")) repodata = json.load(repo_file.file) except RemoteServerError: logger.error(f"can not get repodata.json for channel {channel_name}") return except json.JSONDecodeError: logger.error( f"repodata.json badly formatted for arch {arch} in channel {channel_name}" ) return channel = dao.get_channel(channel_name) from quetz.main import handle_package_files packages = repodata.get("packages", {}) version_methods = [ _check_timestamp(channel, dao), _check_checksum(pkgstore, channel_name, arch, "sha256"), _check_checksum(pkgstore, channel_name, arch, "md5"), ] # version_methods are context managers (for example, to update the db # after all packages have been checked), so we need to enter the context # for each any_updated = False with contextlib.ExitStack() as version_stack: version_checks = [ version_stack.enter_context(method) for method in version_methods ] for package_name, metadata in packages.items(): path = os.path.join(arch, package_name) # try to find out whether it's a new package version is_uptodate = None for _check in version_checks: is_uptodate = _check(package_name, metadata) if is_uptodate is not None: break # if package is up-to-date skip uploading file if is_uptodate: logger.debug( f"package {package_name} from {arch} up-to-date. Not updating" ) continue else: logger.debug(f"updating package {package_name} form {arch}") try: remote_package = remote_repository.open(path) except RemoteServerError: logger.error(f"remote server error when getting a file {path}") continue files = [remote_package] try: handle_package_files( channel_name, files, dao, auth, force, ) any_updated = True except Exception as exc: logger.error( f"could not process package {package_name} from channel" f"{channel_name} due to error {exc}") if not skip_errors: raise exc if any_updated: indexing.update_indexes(dao, pkgstore, channel_name, subdirs=[arch])