Exemple #1
0
def reindex_packages_from_store(
    dao: Dao,
    config: Config,
    channel_name: str,
    user_id: bytes,
):
    """Reindex packages from files in the package store"""

    db = dao.db

    pkgstore = config.get_package_store()

    all_files = pkgstore.list_files(channel_name)
    pkg_files = [f for f in all_files if f.endswith(".tar.bz2")]

    channel = dao.get_channel(channel_name)

    if channel:
        for package in channel.packages:
            db.delete(package)
        db.commit()
    else:
        data = rest_models.Channel(name=channel_name,
                                   description="re-indexed from files",
                                   private=True)
        channel = dao.create_channel(data, user_id, authorization.OWNER)

    for fname in pkg_files:
        fid = pkgstore.serve_path(channel_name, fname)
        handle_file(channel_name, fname, fid, dao, user_id)
    update_indexes(dao, pkgstore, channel_name)
Exemple #2
0
def get_channel_or_fail(channel_name: str, dao: Dao = Depends(get_dao)) -> db_models.Channel:
    channel = dao.get_channel(channel_name)

    if not channel:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=f'Channel {channel_name} not found')

    return channel
Exemple #3
0
def synchronize_packages(
    channel_name: str,
    dao: Dao,
    pkgstore: PackageStore,
    auth: authorization.Rules,
    session: requests.Session,
    includelist: List[str] = None,
    excludelist: List[str] = None,
    use_repodata: bool = False,
):

    logger.debug(
        f"executing synchronize_packages task in a process {os.getpid()}")

    new_channel = dao.get_channel(channel_name)

    if not new_channel:
        logger.error(f"channel {channel_name} not found")
        return

    host = new_channel.mirror_channel_url

    remote_repo = RemoteRepository(new_channel.mirror_channel_url, session)

    user_id = auth.assert_user()

    try:
        channel_data = remote_repo.open("channeldata.json").json()
        if use_repodata:
            create_packages_from_channeldata(channel_name, user_id,
                                             channel_data, dao)
        subdirs = channel_data.get("subdirs", [])
    except (RemoteFileNotFound, json.JSONDecodeError):
        subdirs = None
    except RemoteServerError:
        raise HTTPException(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            detail=f"Remote channel {host} unavailable",
        )
    # if no channel data use known architectures
    if subdirs is None:
        subdirs = KNOWN_SUBDIRS

    for arch in subdirs:
        initial_sync_mirror(
            new_channel.name,
            remote_repo,
            arch,
            dao,
            pkgstore,
            auth,
            includelist,
            excludelist,
            use_repodata=use_repodata,
        )
Exemple #4
0
def post_channel(new_channel: rest_models.Channel,
                 dao: Dao = Depends(get_dao),
                 auth: authorization.Rules = Depends(get_rules)):

    user_id = auth.assert_user()

    channel = dao.get_channel(new_channel.name)

    if channel:
        raise HTTPException(status_code=status.HTTP_409_CONFLICT,
                            detail=f'Channel {new_channel.name} exists')

    dao.create_channel(new_channel, user_id, authorization.OWNER)
Exemple #5
0
def post_channel(
        new_channel: rest_models.Channel,
        background_tasks: BackgroundTasks,
        dao: Dao = Depends(get_dao),
        auth: authorization.Rules = Depends(get_rules),
        task: Task = Depends(get_tasks_worker),
        remote_session: requests.Session = Depends(get_remote_session),
):

    user_id = auth.assert_user()

    channel = dao.get_channel(new_channel.name)

    if channel:
        raise HTTPException(
            status_code=status.HTTP_409_CONFLICT,
            detail=f"Channel {new_channel.name} exists",
        )

    if not new_channel.mirror_channel_url:
        auth.assert_create_channel()

    is_mirror = new_channel.mirror_channel_url and new_channel.mirror_mode == "mirror"

    is_proxy = new_channel.mirror_channel_url and new_channel.mirror_mode == "proxy"

    if is_mirror:
        auth.assert_create_mirror_channel()

    if is_proxy:
        auth.assert_create_proxy_channel()

    if new_channel.metadata.actions is None:
        if is_mirror:
            actions = [ChannelActionEnum.synchronize]
        else:
            actions = []
    else:
        actions = new_channel.metadata.actions

    channel = dao.create_channel(new_channel, user_id, authorization.OWNER)

    for action in actions:
        task.execute_channel_action(action, channel)
Exemple #6
0
def synchronize_packages(
    channel_name: str,
    dao: Dao,
    pkgstore: PackageStore,
    auth: authorization.Rules,
    session: requests.Session,
):

    logger.debug(
        f"executing synchronize_packages task in a process {os.getpid()}")

    new_channel = dao.get_channel(channel_name)

    host = new_channel.mirror_channel_url

    remote_repo = RemoteRepository(new_channel.mirror_channel_url, session)
    try:
        channel_data = remote_repo.open("channeldata.json").json()
        subdirs = channel_data.get("subdirs", [])
    except (RemoteFileNotFound, json.JSONDecodeError):
        subdirs = None
    except RemoteServerError:
        raise HTTPException(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            detail=f"Remote channel {host} unavailable",
        )
    # if no channel data use known architectures
    if subdirs is None:
        subdirs = KNOWN_SUBDIRS

    for arch in subdirs:
        initial_sync_mirror(
            new_channel.name,
            remote_repo,
            arch,
            dao,
            pkgstore,
            auth,
        )
Exemple #7
0
    def __call__(
        self,
        channel_name: str,
        dao: Dao = Depends(get_dao),
        auth: authorization.Rules = Depends(get_rules),
    ) -> db_models.Channel:
        channel = dao.get_channel(channel_name.lower())

        if not channel:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=f"Channel {channel_name} not found",
            )

        auth.assert_channel_read(channel)

        mirror_url = channel.mirror_channel_url

        is_proxy = mirror_url and channel.mirror_mode == "proxy"
        is_mirror = mirror_url and channel.mirror_mode == "mirror"
        is_local = not mirror_url
        if is_proxy and not self.allow_proxy:
            raise HTTPException(
                status_code=status.HTTP_405_METHOD_NOT_ALLOWED,
                detail="This method is not implemented for proxy channels",
            )
        if is_mirror and not self.allow_mirror:
            raise HTTPException(
                status_code=status.HTTP_405_METHOD_NOT_ALLOWED,
                detail="This method is not implemented for mirror channels",
            )
        if is_local and not self.allow_local:
            raise HTTPException(
                status_code=status.HTTP_405_METHOD_NOT_ALLOWED,
                detail="This method is not implemented for local channels",
            )
        return channel
Exemple #8
0
def post_channel(
        request: Request,
        new_channel: rest_models.Channel,
        background_tasks: BackgroundTasks,
        mirror_api_key: Optional[str] = None,
        register_mirror: bool = False,
        dao: Dao = Depends(get_dao),
        auth: authorization.Rules = Depends(get_rules),
        task: Task = Depends(get_tasks_worker),
        config=Depends(get_config),
        session: requests.Session = Depends(get_remote_session),
):

    user_id = auth.assert_user()

    existing_channel = dao.get_channel(new_channel.name)

    if existing_channel:
        raise HTTPException(
            status_code=status.HTTP_409_CONFLICT,
            detail=f"Channel {new_channel.name} exists",
        )

    if not new_channel.mirror_channel_url:
        auth.assert_create_channel()

    is_mirror = new_channel.mirror_channel_url and new_channel.mirror_mode == "mirror"

    is_proxy = new_channel.mirror_channel_url and new_channel.mirror_mode == "proxy"

    if is_mirror:
        auth.assert_create_mirror_channel()

    if is_proxy:
        auth.assert_create_proxy_channel()

    if new_channel.actions is None:
        if is_mirror:
            actions = [ChannelActionEnum.synchronize_repodata]
        else:
            actions = []
    else:
        actions = new_channel.actions

    includelist = new_channel.metadata.includelist
    excludelist = new_channel.metadata.excludelist

    if includelist is not None and excludelist is not None:
        raise HTTPException(
            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
            detail="Cannot use both `includelist` and `excludelist` together.",
        )

    user_attrs = new_channel.dict(exclude_unset=True)

    if "size_limit" in user_attrs:
        auth.assert_set_channel_size_limit()
        size_limit = new_channel.size_limit
    else:
        if config.configured_section("quotas"):
            size_limit = config.quotas_channel_quota
        else:
            size_limit = None

    channel = dao.create_channel(new_channel, user_id, authorization.OWNER,
                                 size_limit)
    pkgstore.create_channel(new_channel.name)
    indexing.update_indexes(dao, pkgstore, new_channel.name)

    # register mirror
    if is_mirror and register_mirror:
        mirror_url = str(new_channel.mirror_channel_url)
        mirror_url = mirror_url.replace("get", "api/channels")
        headers = {"x-api-key": mirror_api_key} if mirror_api_key else {}
        api_endpoint = str(
            request.url.replace(query=None)) + '/' + new_channel.name
        request.url
        response = session.post(
            mirror_url + '/mirrors',
            json={
                "url": api_endpoint.replace("api/channels", "get"),
                "api_endpoint": api_endpoint,
                "metrics_endpoint": api_endpoint.replace("api", "metrics"),
            },
            headers=headers,
        )
        if response.status_code != 201:
            logger.warning(
                f"could not register mirror due to error {response.text}")

    for action in actions:
        task.execute_channel_action(
            action,
            channel,
        )
Exemple #9
0
def initial_sync_mirror(
    channel_name: str,
    remote_repository: RemoteRepository,
    arch: str,
    dao: Dao,
    pkgstore: PackageStore,
    auth: authorization.Rules,
    includelist: List[str] = None,
    excludelist: List[str] = None,
    skip_errors: bool = True,
    use_repodata: bool = False,
):

    force = True  # needed for updating packages

    for repodata_fn in ["repodata_from_packages.json", "repodata.json"]:
        try:
            repo_file = remote_repository.open(os.path.join(arch, repodata_fn))
            repodata = json.load(repo_file.file)
            break
        except RemoteServerError:
            logger.error(
                f"can not get {repodata_fn} for channel {arch}/{channel_name}."
            )
            if repodata_fn == "repodata.json":
                logger.error(f"Giving up for {channel_name}/{arch}.")
                return
            else:
                logger.error("Trying next filename.")
                continue
        except json.JSONDecodeError:
            logger.error(f"repodata.json badly formatted for arch {arch}"
                         f"in channel {channel_name}")
            if repodata_fn == "repodata.json":
                return

    channel = dao.get_channel(channel_name)

    if not channel:
        logger.error(f"channel {channel_name} not found")
        return

    from quetz.main import handle_package_files

    packages = repodata.get("packages", {})

    version_methods = [
        _check_checksum(dao, channel_name, arch, "sha256"),
        _check_checksum(dao, channel_name, arch, "md5"),
    ]

    config = Config()
    max_batch_length = config.mirroring_batch_length
    max_batch_size = config.mirroring_batch_size
    # version_methods are context managers (for example, to update the db
    # after all packages have been checked), so we need to enter the context
    # for each
    any_updated = False
    with contextlib.ExitStack() as version_stack:

        version_checks = [
            version_stack.enter_context(method) for method in version_methods
        ]

        update_batch = []
        update_size = 0

        def handle_batch(update_batch):
            # i_batch += 1
            logger.debug(f"Handling batch: {update_batch}")
            if not update_batch:
                return False

            remote_packages = []
            remote_packages_with_metadata = []

            with ThreadPoolExecutor(
                    max_workers=config.mirroring_num_parallel_downloads
            ) as executor:
                for f in executor.map(
                        download_file,
                    (remote_repository, ) * len(update_batch),
                        update_batch,
                ):
                    if f is not None:
                        remote_packages.append(f[0])
                        remote_packages_with_metadata.append(f)

            try:
                if use_repodata:
                    handle_repodata_package(
                        channel,
                        remote_packages_with_metadata,
                        dao,
                        auth,
                        force,
                        pkgstore,
                        config,
                    )

                else:
                    handle_package_files(
                        channel,
                        remote_packages,
                        dao,
                        auth,
                        force,
                        is_mirror_op=True,
                    )
                return True

            except Exception as exc:
                logger.error(
                    f"could not process package {update_batch} from channel"
                    f"{channel_name} due to error {exc} of "
                    f"type {exc.__class__.__name__}")
                if not skip_errors:
                    raise exc

            return False

        for package_name, metadata in packages.items():
            if check_package_membership(package_name, includelist,
                                        excludelist):
                path = os.path.join(arch, package_name)

                # try to find out whether it's a new package version

                is_uptodate = None
                for _check in version_checks:
                    is_uptodate = _check(package_name, metadata)
                    if is_uptodate is not None:
                        break

                # if package is up-to-date skip uploading file
                if is_uptodate:
                    continue
                else:
                    logger.debug(
                        f"updating package {package_name} from {arch}")

                update_batch.append((path, package_name, metadata))
                update_size += metadata.get('size', 100_000)

            if len(update_batch
                   ) >= max_batch_length or update_size >= max_batch_size:
                logger.debug(f"Executing batch with {update_size}")
                any_updated |= handle_batch(update_batch)
                update_batch.clear()
                update_size = 0

        # handle final batch
        any_updated |= handle_batch(update_batch)

    if any_updated:
        indexing.update_indexes(dao, pkgstore, channel_name, subdirs=[arch])
Exemple #10
0
def synchronize_metrics_from_mirrors(
    channel_name: str,
    dao: Dao,
    session: requests.Session,
    now: datetime = datetime.utcnow(),
):
    logger = logging.getLogger("quetz")
    channel = dao.get_channel(channel_name)
    if not channel:
        return
    for m in channel.mirrors:
        if not m.metrics_endpoint:
            logger.warning(
                f"metrics endpoint not configured for mirror {m.url}."
                "Skipping metrics synchronisation"
            )
            continue
        query_str = ["period=H"]
        start_time: Optional[datetime]
        if m.last_synchronised:
            start_time = m.last_synchronised.replace(minute=0, second=0, microsecond=0)
            query_str.append(f"start={start_time.isoformat()}")
        else:
            start_time = None

        # exclude incomplete intervals (the current hour)
        end_time = now.replace(minute=0, second=0, microsecond=0)

        if start_time == end_time:
            logger.debug(f"metrics data for mirror {m.url} are up-to-date")
            continue

        query_str.append(f"end={end_time.isoformat()}")

        metrics_url = m.metrics_endpoint + "?" + "&".join(query_str)
        response = session.get(metrics_url)

        if response.status_code != 200:
            logger.error(
                f"mirror server {metrics_url} returned bad response with code "
                f"{response.status_code} and message {response.text}"
            )
            continue

        response_data = response.json()
        try:
            packages = response_data["packages"]
        except KeyError:
            logger.error(
                f"malfromated respose received from {metrics_url}: "
                "missing 'packages' key"
            )
            continue

        for platform_filename, data in packages.items():
            platform, filename = platform_filename.split('/')
            for s in data["series"]:
                timestamp = datetime.fromisoformat(s["timestamp"])
                count = s["count"]
                dao.incr_download_count(
                    channel_name, filename, platform, timestamp, count
                )
        logger.debug(f"synchronized metrics from {metrics_url}")
        m.last_synchronised = end_time
        dao.db.commit()
Exemple #11
0
def reindex_packages_from_store(dao: Dao,
                                config: Config,
                                channel_name: str,
                                user_id,
                                sync: bool = True):
    """Reindex packages from files in the package store"""

    logger.debug(f"Re-indexing channel {channel_name}")

    channel = dao.get_channel(channel_name)
    pkg_db = []
    if channel:
        if not sync:
            for package in channel.packages:
                dao.db.delete(package)
            dao.db.commit()
        else:
            dao.cleanup_channel_db(channel_name)
            for package in channel.packages:
                for pv in package.package_versions:  # type: ignore
                    pkg_db.append(f"{pv.platform}/{pv.filename}")
            dao.db.commit()
    else:
        data = rest_models.Channel(name=channel_name,
                                   description="re-indexed from store",
                                   private=True)
        channel = dao.create_channel(data, user_id, authorization.OWNER)

    logger.debug(f"Reading package list for channel {channel_name}")
    user_id = uuid_to_bytes(user_id)
    pkgstore = config.get_package_store()
    all_files = pkgstore.list_files(channel_name)
    pkg_files = [f for f in all_files if f.endswith(".tar.bz2")]
    nthreads = config.general_package_unpack_threads

    logger.debug(
        f"Found {len(pkg_db)} packages for channel {channel_name} in database")
    logger.debug(
        f"Found {len(pkg_files)} packages for channel {channel_name} in pkgstore"
    )

    pkg_files = list(set(pkg_files) - set(pkg_db))
    logger.debug(
        f"Importing {len(pkg_files)} packages for channel {channel_name}" +
        " from pkgstore")

    for pkg_group in chunks(pkg_files, nthreads * 8):
        tic = time.perf_counter()
        with ThreadPoolExecutor(max_workers=nthreads) as executor:
            results = []
            for fname in pkg_group:
                results.append(
                    executor.submit(handle_condainfo, pkgstore, channel_name,
                                    fname))
            for future in as_completed(results):
                condainfo = future.result()
                if condainfo:
                    handle_file(channel_name, condainfo, dao, user_id)

        toc = time.perf_counter()
        logger.debug(
            f"Imported files {pkg_group[0]} to {pkg_group[-1]} " +
            f"for channel {channel_name} in {toc - tic:0.4f} seconds " +
            f"using {nthreads} threads")

        try:
            update_indexes(dao, pkgstore, channel_name)
            dao.db.commit()
        except IntegrityError:
            dao.rollback()
            logger.error(f"Update index {channel_name} failed")
    dao.cleanup_channel_db(channel_name)
    dao.db.commit()
Exemple #12
0
def initial_sync_mirror(
    channel_name: str,
    remote_repository: RemoteRepository,
    arch: str,
    dao: Dao,
    pkgstore: PackageStore,
    auth: authorization.Rules,
    skip_errors: bool = True,
):

    force = True  # needed for updating packages

    try:
        repo_file = remote_repository.open(os.path.join(arch, "repodata.json"))
        repodata = json.load(repo_file.file)
    except RemoteServerError:
        logger.error(f"can not get repodata.json for channel {channel_name}")
        return
    except json.JSONDecodeError:
        logger.error(
            f"repodata.json badly formatted for arch {arch} in channel {channel_name}"
        )
        return

    channel = dao.get_channel(channel_name)

    from quetz.main import handle_package_files

    packages = repodata.get("packages", {})

    version_methods = [
        _check_timestamp(channel, dao),
        _check_checksum(pkgstore, channel_name, arch, "sha256"),
        _check_checksum(pkgstore, channel_name, arch, "md5"),
    ]

    # version_methods are context managers (for example, to update the db
    # after all packages have been checked), so we need to enter the context
    # for each
    any_updated = False
    with contextlib.ExitStack() as version_stack:

        version_checks = [
            version_stack.enter_context(method) for method in version_methods
        ]

        for package_name, metadata in packages.items():
            path = os.path.join(arch, package_name)

            # try to find out whether it's a new package version

            is_uptodate = None
            for _check in version_checks:
                is_uptodate = _check(package_name, metadata)
                if is_uptodate is not None:
                    break

            # if package is up-to-date skip uploading file
            if is_uptodate:
                logger.debug(
                    f"package {package_name} from {arch} up-to-date. Not updating"
                )
                continue
            else:
                logger.debug(f"updating package {package_name} form {arch}")

            try:
                remote_package = remote_repository.open(path)
            except RemoteServerError:
                logger.error(f"remote server error when getting a file {path}")
                continue

            files = [remote_package]
            try:
                handle_package_files(
                    channel_name,
                    files,
                    dao,
                    auth,
                    force,
                )
                any_updated = True
            except Exception as exc:
                logger.error(
                    f"could not process package {package_name} from channel"
                    f"{channel_name} due to error {exc}")
                if not skip_errors:
                    raise exc

    if any_updated:
        indexing.update_indexes(dao, pkgstore, channel_name, subdirs=[arch])