async def _check_hashes( package_version: Tuple[str, str, str], package_versions, source, removed_packages: set, missing_version: MissingVersionMessage, hash_mismatch: HashMismatchMessage, graph: GraphDatabase, ) -> bool: if not package_version[1] in package_versions.versions: try: await missing_version.publish_to_topic( missing_version.MessageContents( index_url=package_version[2], package_name=package_version[0], package_version=package_version[1], component_name=COMPONENT_NAME, service_version=__package_update_version__, ), ) _LOGGER.info("%r no longer provides %r-%r", package_version[2], package_version[0], package_version[1]) return False except Exception as identifier: _LOGGER.exception("Failed to publish with the following error message: %r", str(identifier)) try: source_hashes = {i["sha256"] for i in await source.get_package_hashes(package_version[0], package_version[1])} except ClientResponseError: _LOGGER.exception( "404 error retrieving hashes for: %r==%r on %r",package_version[0], package_version[1], package_version[2], ) return False # webpage might be down stored_hashes = set( graph.get_python_package_hashes_sha256(package_version[0], package_version[1], package_version[2]), ) if not source_hashes == stored_hashes: try: await hash_mismatch.publish_to_topic( hash_mismatch.MessageContents( index_url=package_version[2], package_name=package_version[0], package_version=package_version[1], missing_from_source=list(stored_hashes-source_hashes), missing_from_database=list(source_hashes-stored_hashes), component_name=COMPONENT_NAME, service_version=__package_update_version__, ), ) _LOGGER.debug("Source hashes:\n%r\nStored hashes:\n%r\nDo not match!", source_hashes, stored_hashes) return False except Exception as identifier: _LOGGER.exception("Failed to publish with the following error message: %r", str(identifier)) return True
from ..metrics import scheduled_workflows from .metrics_hash_mismatch import hash_mismatch_exceptions from .metrics_hash_mismatch import hash_mismatch_success from .metrics_hash_mismatch import hash_mismatch_in_progress from prometheus_async.aio import track_inprogress, count_exceptions from thoth.messaging import HashMismatchMessage from thoth.common import OpenShift from thoth.storages import GraphDatabase _LOGGER = logging.getLogger(__name__) @count_exceptions(hash_mismatch_exceptions) @track_inprogress(hash_mismatch_in_progress) @register_handler(HashMismatchMessage().topic_name, ["v1"]) async def parse_hash_mismatch(mismatch: Dict[str, Any], openshift: OpenShift, graph: GraphDatabase, **kwargs): """Process a hash mismatch message from package-update producer.""" if Configuration.THOTH_INVESTIGATOR_SCHEDULE_SOLVER: # Solver logic solver_wf_scheduled = await learn_using_solver( openshift=openshift, graph=graph, is_present=False, package_name=mismatch["package_name"], index_url=mismatch["index_url"], package_version=mismatch["package_version"], ) scheduled_workflows.labels(
import asyncio import logging import faust import os import ssl from urllib.parse import urlparse from aiohttp import web init_logging() _LOGGER = logging.getLogger(__name__) _LOGGER.info("Thoth Package Update consumer v%s", __service_version__) app = MessageBase().app hash_mismatch_topic = HashMismatchMessage().topic missing_package_topic = MissingPackageMessage().topic missing_version_topic = MissingVersionMessage().topic @app.page("/metrics") async def get_metrics(self, request): """Serve the metrics from the consumer registry.""" return web.Response(text=generate_latest().decode("utf-8")) @app.page("/_health") async def get_health(self, request): """Serve a readiness/liveness probe endpoint.""" data = {"status": "ready", "version": __service_version__} return web.json_response(data)
async def main(): """Run package-update.""" graph = GraphDatabase() graph.connect() removed_pkgs = set() hash_mismatch = HashMismatchMessage() missing_package = MissingPackageMessage() missing_version = MissingVersionMessage() indexes = {x["url"] for x in graph.get_python_package_index_all()} sources = dict() async_tasks = [] for i in indexes: async_tasks.append(_gather_index_info(i, sources)) await asyncio.gather(*async_tasks, return_exceptions=True) async_tasks.clear() all_pkgs = graph.get_python_packages_all(count=None, distinct=True) _LOGGER.info("Checking availability of %r package(s)", len(all_pkgs)) for pkg in all_pkgs: async_tasks.append( _check_package_availability( package=pkg, sources=sources, removed_packages=removed_pkgs, missing_package=missing_package, )) await asyncio.gather(*async_tasks, return_exceptions=True) async_tasks.clear() all_pkg_vers = graph.get_python_package_versions_all(count=None, distinct=True) all_pkg_names = {(i[0], i[2]) for i in all_pkg_vers} versions = dict.fromkeys(all_pkg_names) for i in all_pkg_names: async_tasks.append( _get_all_versions(package_name=i[0], source=i[1], sources=sources, accumulator=versions)) await asyncio.gather(*async_tasks, return_exceptions=True) async_tasks.clear() _LOGGER.info("Checking integrity of %r package(s)", len(all_pkg_vers)) for pkg_ver in all_pkg_vers: # Skip because we have already marked the entire package as missing if (pkg_ver[2], pkg_ver[0]) in removed_pkgs or versions[ (pkg_ver[0], pkg_ver[2])] is None: # in case of 404 continue src = sources[pkg_ver[2]]["source"] async_tasks.append( _check_hashes( package_version=pkg_ver, package_versions=versions[(pkg_ver[0], pkg_ver[2])], source=src, removed_packages=removed_pkgs, missing_version=missing_version, hash_mismatch=hash_mismatch, graph=graph, )) await asyncio.gather(*async_tasks, return_exceptions=True) async_tasks.clear()