Ejemplo n.º 1
0
def _send_schedule_message(
    message_contents: dict,
    message_type: MessageBase,
    content: typing.Type[BaseMessageContents],
    with_authentication: bool = False,
    authenticated: bool = False,
):
    message_contents["service_version"] = SERVICE_VERSION
    message_contents["component_name"] = COMPONENT_NAME
    message = content(**message_contents)
    producer.publish_to_topic(p, message_type, message)
    if "job_id" in message_contents:

        if with_authentication:
            return (
                {
                    "analysis_id": message_contents["job_id"],
                    "cached": False,
                    "authenticated": authenticated,
                    "parameters": message_contents,
                },
                202,
            )

        return (
            {
                "analysis_id": message_contents["job_id"],
                "parameters": message_contents,
                "cached": False,
            },
            202,
        )

    raise ValueError(
        f"job_id was not set for message sent to {message_type().topic_name}")
Ejemplo n.º 2
0
async def _check_hashes(
    package_version: Tuple[str, str, str], package_versions, source, removed_packages: set, graph: GraphDatabase,
) -> bool:
    if not package_version[1] in package_versions.versions:
        try:
            producer.publish_to_topic(
                p,
                missing_version_message,
                MissingVersionContents(
                    index_url=package_version[2],
                    package_name=package_version[0],
                    package_version=package_version[1],
                    component_name=COMPONENT_NAME,
                    service_version=__package_update_version__,
                ),
            )
            _LOGGER.info("%r no longer provides %r-%r", package_version[2], package_version[0], package_version[1])
            return False
        except Exception as identifier:
            _LOGGER.exception("Failed to publish with the following error message: %r", str(identifier))

    try:
        source_hashes = {i["sha256"] for i in await source.get_package_hashes(package_version[0], package_version[1])}
    except ClientResponseError:
        _LOGGER.warning(
            "404 error retrieving hashes for: %r==%r on %r", package_version[0], package_version[1], package_version[2],
        )
        return False  # webpage might be down

    stored_hashes = set(
        graph.get_python_package_hashes_sha256(package_version[0], package_version[1], package_version[2]),
    )
    if not source_hashes == stored_hashes:
        try:
            producer.publish_to_topic(
                p,
                hash_mismatch_message,
                HashMismatchContents(
                    index_url=package_version[2],
                    package_name=package_version[0],
                    package_version=package_version[1],
                    missing_from_source=list(stored_hashes - source_hashes),
                    missing_from_database=list(source_hashes - stored_hashes),
                    component_name=COMPONENT_NAME,
                    service_version=__package_update_version__,
                ),
            )
            _LOGGER.debug("Source hashes:\n%r\nStored hashes:\n%r\nDo not match!", source_hashes, stored_hashes)
            return False
        except Exception as identifier:
            _LOGGER.exception("Failed to publish with the following error message: %r", str(identifier))

    return True
Ejemplo n.º 3
0
def _record_vulnerability(graph: GraphDatabase, vulnerability: Dict[str, Any],
                          cve_messages_sent: int) -> int:
    """Record the given vulnerability in the database."""
    _LOGGER.info("Creating CVE entries for %r...", vulnerability["id"])

    cve_id = vulnerability["id"]
    for affected in vulnerability.get("affected") or []:
        if affected["package"]["ecosystem"] != "PyPI":
            _LOGGER.info(
                "Skipping affected package %r from ecosystem %r",
                affected["package"]["name"],
                affected["package"]["ecosystem"],
            )
            continue

        package_name = affected["package"]["name"]

        for package_version in affected.get("versions") or []:
            existed = graph.create_python_cve_record(
                package_name,
                package_version,
                "https://pypi.org/simple",
                cve_id=cve_id,
                details=vulnerability["details"],
                link=_get_vulnerability_link(vulnerability),
            )

            if existed:
                continue

            _LOGGER.info(
                "Created new CVE %r entry for package %r in version %r",
                cve_id,
                package_name,
                package_version,
            )

            publish_to_topic(
                _PRODUCER,
                cve_provided_message,
                CVEProvidedMessageContent(
                    component_name=_COMPONENT_NAME,
                    service_version=__component_version__,
                    package_name=package_name,
                    package_version=package_version,
                    index_url="https://pypi.org/simple",
                ),
            )

            cve_messages_sent += 1

    return cve_messages_sent
Ejemplo n.º 4
0
def _check_package_availability(package: Tuple[str, str, str], sources: Dict[str, Any], removed_packages: set) -> bool:
    src = sources[package[1]]
    if not package[0] in src["packages"]:
        removed_packages.add((package[1], package[0]))
        try:
            producer.publish_to_topic(
                p,
                missing_package_message,
                MissingPackageContents(
                    index_url=package[1],
                    package_name=package[0],
                    component_name=COMPONENT_NAME,
                    service_version=__package_update_version__,
                ),
            )
            _LOGGER.info("%r no longer provides %r", package[1], package[0])
            return False
        except Exception as e:
            _LOGGER.exception("Failed to publish with the following error message: %r", e)
    return True
Ejemplo n.º 5
0
def messaging(
    partitions: int,
    replication: int,
    topic_name: Optional[str],
    create_if_not_exist: bool,
    message_contents: Optional[str],
    message_file: Optional[str],
):
    """Run messaging cli with the given arguments."""
    admin = admin_client.create_admin_client()
    prod = producer.create_producer()
    if message_file:
        if topic_name or message_contents:
            _LOGGER.warning(
                "Topic name and/or message contents are being ignored due to presence of message file."
            )
        with open(message_file, "r") as m_file:
            all_messages = json.load(m_file)

    else:
        if topic_name is None or message_contents is None:
            raise AttributeError(
                "Both topic_name and message_contents must be set when not reading from file."
            )

        temp_message = {}
        temp_message["message_contents"] = json.loads(message_contents)
        temp_message["topic_name"] = topic_name
        all_messages = [temp_message]

    # NOTE: we don't need to check based on deployment because it is only prepended after we call __init__
    for m in all_messages:
        m_contents = m["message_contents"]
        if "component_name" not in m_contents:
            m_contents["component_name"] = "messaging-cli"
        if "service_version" not in m_contents:
            m_contents["service_version"] = __version__
        m_base_name = m["topic_name"]

        validate: bool
        # get or create message type
        for message in ALL_MESSAGES:
            if m_base_name == message.base_name:
                _LOGGER.info(
                    f"Found message in registered list: {m_base_name}")
                topic = message
                validate = True
                break
        else:
            validate = False
            _LOGGER.info(
                "Message not in the registered list checking topics on Kafka..."
            )

            kafka_topic_list = admin.list_topics().topics
            topic = message_factory(b_name=m_base_name,
                                    message_model=BaseMessageContents)

            if topic.topic_name not in kafka_topic_list:
                if not create_if_not_exist:
                    raise Exception(
                        "Topic name does not match messages and message should not be created."
                    )
                _LOGGER.info("Creating new topic.")
                admin_client.create_topic(admin,
                                          topic,
                                          partitions=partitions,
                                          replication_factor=replication)

        producer.publish_to_topic(prod, topic, m_contents, validate=validate)

        _LOGGER.info(
            f"Sent message {topic.topic_name} with content: {m_contents}")
    prod.flush()
Ejemplo n.º 6
0
def package_releases_update(
    monitored_packages: Optional[dict],
    *,
    graph: GraphDatabase,
    package_names: Optional[List[str]] = None,
    only_if_package_seen: bool = False,
) -> int:
    """Check for updates of packages, notify about updates if configured so."""
    sources = [
        Source(**config) for config in graph.get_python_package_index_all(
            enabled=True)  # type: ignore
    ]

    package_releases_messages_sent = 0

    for package_index in sources:
        _LOGGER.info("Checking index %r for new package releases",
                     package_index.url)
        for package_name in package_names or package_index.get_packages():
            try:
                package_versions = package_index.get_package_versions(
                    package_name)
            except NotFound as exc:
                _LOGGER.debug(
                    "No versions found for package %r on %r: %s",
                    package_name,
                    package_index.url,
                    str(exc),
                )
                continue
            except Exception as exc:
                _LOGGER.exception(
                    "Failed to retrieve package versions for %r: %s",
                    package_name,
                    str(exc),
                )
                continue

            for package_version in package_versions:
                added = graph.create_python_package_version_entity(
                    package_name,
                    package_version,
                    package_index.url,
                    only_if_package_seen=only_if_package_seen,
                )

                if added is None:
                    _LOGGER.debug(
                        "Package %r in version %r hosted on %r was not added - it was not previously seen",
                        package_name,
                        package_version,
                        package_index.url,
                    )
                    continue

                existed = added[1]
                if not existed:
                    _LOGGER.info(
                        "New release of package %r in version %r hosted on %r added",
                        package_name,
                        package_version,
                        package_index.url,
                    )

                    producer.publish_to_topic(
                        p,
                        package_released_message,
                        PackageReleasedContent(
                            package_name=package_name,
                            package_version=package_version,
                            index_url=package_index.url,
                            component_name=COMPONENT_NAME,
                            service_version=__service_version__,
                        ),
                    )

                    _LOGGER.debug(
                        "Package %r in version %r hosted on %r added to list to be sent as Kafka message %r",
                        package_name,
                        package_version,
                        package_index.url,
                        package_released_message.topic_name,
                    )
                    package_releases_messages_sent += 1
                else:
                    _LOGGER.debug(
                        "Release of %r in version %r hosted on %r already present",
                        package_name,
                        package_version,
                        package_index.url,
                    )

                if added and monitored_packages:
                    entity = added[0]
                    try:
                        release_notification(
                            monitored_packages,
                            entity.package_name,
                            entity.package_version,
                            package_index.url,
                        )
                    except Exception as exc:
                        _LOGGER.exception(
                            f"Failed to do release notification for {package_name} ({package_version} "
                            f"from {package_index.url}), error is not fatal: {str(exc)}"
                        )

    return package_releases_messages_sent
Ejemplo n.º 7
0
def main():
    """Run thoth-reporter to provide information on status of services to Thoth contributors."""
    if not _SEND_MESSAGES:
        _LOGGER.info(
            "No messages are sent. THOTH_REPORTER_SEND_KAFKA_MESSAGES is set to 0"
        )

    try:
        datetime.datetime.strptime(START_DATE, "%Y-%m-%d")
    except ValueError as err:
        _LOGGER.error(
            f"THOTH_REPORTER_START_DATE uses incorrect format: {err}")

    s_date = START_DATE.split("-")
    start_date = datetime.date(year=int(s_date[0]),
                               month=int(s_date[1]),
                               day=int(s_date[2]))

    try:
        datetime.datetime.strptime(END_DATE, "%Y-%m-%d")
    except ValueError as err:
        _LOGGER.error(f"THOTH_REPORTER_END_DATE uses incorrect format: {err}")

    e_date = END_DATE.split("-")
    end_date = datetime.date(year=int(e_date[0]),
                             month=int(e_date[1]),
                             day=int(e_date[2]))

    _LOGGER.info(f"Start Date considered: {start_date}")
    _LOGGER.info(f"End Date considered (excluded): {end_date}")

    delta = datetime.timedelta(days=1)

    if start_date == TODAY + delta:
        _LOGGER.info(
            f"start date ({start_date}) cannot be in the future. Today is: {TODAY}."
        )
        start_date = TODAY
        _LOGGER.info(f"new start date is: {start_date}.")

    if end_date > TODAY + delta:
        _LOGGER.info(
            f"end date ({end_date}) cannot be in the future. Today is: {TODAY}."
        )
        end_date = TODAY
        _LOGGER.info(f"new end date is: {end_date}.")

    if end_date < start_date:
        _LOGGER.error(
            f"Cannot analyze adviser data: end date ({end_date}) < start_date ({start_date})."
        )
        return

    if end_date == start_date:
        if start_date == TODAY:
            _LOGGER.info(
                f"end date ({end_date}) == start_date ({start_date}) == today ({TODAY})."
            )
            start_date = start_date - delta
            _LOGGER.info(f"new start date is: {start_date}.")
        else:
            _LOGGER.info(
                f"end date ({end_date}) == start_date ({start_date}).")
            end_date = end_date + datetime.timedelta(days=1)
            _LOGGER.info(f"new end date (excluded) is: {end_date}.")

    _LOGGER.info(f"Initial start date: {start_date}")
    _LOGGER.info(f"Initial end date (excluded): {end_date}")

    total_justifications: List[Dict[str, Any]] = []

    current_initial_date = start_date

    while current_initial_date < end_date:

        current_end_date = current_initial_date + delta

        _LOGGER.info(f"Analyzing for start date: {current_initial_date}")
        _LOGGER.info(f"Analyzing for end date (excluded): {current_end_date}")

        stats = evaluate_requests_statistics(
            current_initial_date=current_initial_date,
            current_end_date=current_end_date,
            results_store=RESULTS_STORES,
            store_on_ceph=STORE_ON_CEPH,
            store_on_public_bucket=STORE_ON_PUBLIC_CEPH,
        )

        # Assign metrics for pushgateway
        for stats_analysis in stats:

            thoth_reporter_requests_gauge.labels(
                stats_analysis["component"],
                _THOTH_DEPLOYMENT_NAME).set(stats_analysis["requests"])

            thoth_reporter_reports_gauge.labels(
                stats_analysis["component"],
                _THOTH_DEPLOYMENT_NAME).set(stats_analysis["documents"])

        daily_processed_dataframes = explore_adviser_files(
            current_initial_date=current_initial_date,
            current_end_date=current_end_date,
            total_justifications=total_justifications,
            store_on_ceph=STORE_ON_CEPH,
            store_on_public_bucket=STORE_ON_PUBLIC_CEPH,
        )

        justification_to_send = []
        statistics_to_send = []

        if not daily_processed_dataframes:
            current_initial_date += delta
            continue

        # Advise justifications
        total_js: Dict[str, Any] = {}
        daily_justifications_df = daily_processed_dataframes[
            "adviser_justifications"]

        if not daily_justifications_df.empty:
            for message in daily_justifications_df["message"].unique():
                for adviser_version in daily_justifications_df[
                        "adviser_version"].unique():
                    subset_df = daily_justifications_df[
                        (daily_justifications_df["message"] == message)
                        & (daily_justifications_df["adviser_version"] ==
                           adviser_version)]

                    if subset_df.shape[0] < 1:
                        continue

                    counts = subset_df["count"].values[0]

                    message_type = subset_df["type"].values[0]

                    if message_type != "ERROR":
                        continue

                    if adviser_version not in total_js:
                        total_js[adviser_version] = {}

                        total_js[adviser_version][message] = counts
                    else:
                        if message not in total_js[adviser_version]:
                            total_js[adviser_version][message] = counts
                        else:
                            total_js[adviser_version][message] += counts

            for adviser_version, justifications_info in total_js.items():

                total_errors = 0
                for _, errors_counts in justifications_info.items():
                    total_errors += errors_counts

                for justification, counts in justifications_info.items():

                    if not counts:
                        total = "0"
                        percentage = 0
                    else:
                        total = "+" + "{}".format(int(counts))
                        percentage = counts / total_errors

                    justification_to_send.append(
                        {
                            "adviser_version": adviser_version,
                            "justification": justification,
                            "total": total,
                            "percentage": abs(round(percentage * 100, 3)),
                        }, )
        else:
            _LOGGER.warning("No adviser justification identified.")

        # Advise statistics
        total_statistics: Dict[str, Any] = {}

        adviser_statistics = daily_processed_dataframes["adviser_statistics"]

        if not adviser_statistics.empty:
            for adviser_version in adviser_statistics[
                    "adviser_version"].unique():
                subset_df = adviser_statistics[
                    adviser_statistics["adviser_version"] == adviser_version]

                s_counts = 0
                f_counts = 0

                if not subset_df.empty:
                    s_counts = subset_df[subset_df["adviser_version"] ==
                                         adviser_version]["success"].values[0]
                    f_counts = subset_df[subset_df["adviser_version"] ==
                                         adviser_version]["failure"].values[0]

                if adviser_version not in total_statistics:
                    total_statistics[adviser_version] = {}

                    total_statistics[adviser_version]["success"] = s_counts
                    total_statistics[adviser_version]["failure"] = f_counts
                else:
                    total_statistics[adviser_version]["success"] += s_counts
                    total_statistics[adviser_version]["failure"] += f_counts

            for adviser_version, statistics_info in total_statistics.items():

                total = statistics_info["success"] + statistics_info["failure"]

                success_p = statistics_info["success"] / total
                failure_p = statistics_info["failure"] / total

                statistics_to_send.append(
                    {
                        "adviser_version": adviser_version,
                        "success_p": abs(round(success_p * 100, 3)),
                        "failure_p": abs(round(failure_p * 100, 3)),
                    }, )
        else:
            _LOGGER.warning("No adviser statistics identified.")

        current_initial_date += delta

    if _SEND_METRICS:

        if justification_to_send:
            for js in justification_to_send:

                thoth_reporter_failed_adviser_justifications_gauge.labels(
                    adviser_version=js["adviser_version"],
                    justification=js["justification"],
                    env=_THOTH_DEPLOYMENT_NAME).set(js["percentage"])

        if statistics_to_send:
            for a_stats in statistics_to_send:

                thoth_reporter_failed_adviser_gauge.labels(
                    adviser_version=a_stats["adviser_version"],
                    env=_THOTH_DEPLOYMENT_NAME).set(a_stats["failure_p"])

                thoth_reporter_success_adviser_gauge.labels(
                    adviser_version=a_stats["adviser_version"],
                    env=_THOTH_DEPLOYMENT_NAME).set(a_stats["success_p"])

        try:
            _LOGGER.debug(
                "Submitting metrics to Prometheus pushgateway %r",
                _THOTH_METRICS_PUSHGATEWAY_URL,
            )
            push_to_gateway(
                _THOTH_METRICS_PUSHGATEWAY_URL,
                job="advise-reporter",
                registry=prometheus_registry,
            )
        except Exception as exc:
            _LOGGER.exception("An error occurred pushing the metrics: %s",
                              str(exc))

    if not _SEND_MESSAGES:
        return

    for advise_justification in total_justifications:
        message = advise_justification["message"]
        count = advise_justification["count"]
        justification_type = advise_justification["type"]
        adviser_version = advise_justification["adviser_version"]

        try:
            producer.publish_to_topic(
                p,
                advise_justification_message,
                AdviseJustificationContents.MessageContents(
                    message=message,
                    count=int(count),
                    justification_type=justification_type,
                    adviser_version=adviser_version,
                    component_name=COMPONENT_NAME,
                    service_version=__service_version__,
                ),
            )
            _LOGGER.debug(
                "Adviser justification message:\n%r\nJustification type:\n%r\nCount:\n%r\n",
                message,
                justification_type,
                count,
            )
        except Exception as identifier:
            _LOGGER.exception(
                "Failed to publish with the following error message: %r",
                identifier)

    p.flush()
Ejemplo n.º 8
0
async def _package_releases_worker(graph: GraphDatabase,
                                   package_index: AIOSource,
                                   package_name: str) -> int:
    """Async handling of new package releases checks."""
    try:
        package_versions = await package_index.get_package_versions(
            package_name)
    except NotFoundError as exc:
        _LOGGER.debug(
            "No versions found for package %r on %r: %s",
            package_name,
            package_index.url,
            str(exc),
        )
        return 0
    except Exception as exc:
        _LOGGER.warning(
            "Failed to retrieve package versions for %r: %s",
            package_name,
            str(exc),
        )
        return 0

    package_releases_messages_sent = 0
    async for package_version in package_versions:
        added = graph.create_python_package_version_entity(
            package_name,
            package_version,
            package_index.url,
        )

        if added is None:
            _LOGGER.debug(
                "Package %r in version %r hosted on %r was not added - it was not previously seen",
                package_name,
                package_version,
                package_index.url,
            )
            continue

        existed = added[1]
        if not existed:
            _LOGGER.info(
                "New release of package %r in version %r hosted on %r added",
                package_name,
                package_version,
                package_index.url,
            )

            producer.publish_to_topic(
                p,
                package_released_message,
                PackageReleasedContent(
                    package_name=package_name,
                    package_version=package_version,
                    index_url=package_index.url,
                    component_name=COMPONENT_NAME,
                    service_version=__service_version__,
                ),
            )

            _LOGGER.debug(
                "Package %r in version %r hosted on %r added to list to be sent as Kafka message %r",
                package_name,
                package_version,
                package_index.url,
                package_released_message.topic_name,
            )
            package_releases_messages_sent += 1
        else:
            _LOGGER.debug(
                "Release of %r in version %r hosted on %r already present",
                package_name,
                package_version,
                package_index.url,
            )

    return package_releases_messages_sent
Ejemplo n.º 9
0
def update_cve_info() -> None:
    """Gather CVEs from pyup and assign them in the graph database to packages."""
    graph = GraphDatabase()
    graph.connect()
    pypi_solver = get_ecosystem_solver("pypi")
    cve_provided = CVEProvidedMessage()

    for package_name, cves in _list_cves().items():
        package_name = graph.normalize_python_package_name(package_name)

        _LOGGER.debug("Iterating over CVEs for package %r", package_name)
        for cve in cves:
            try:
                package = package_name + cve["v"]
                solved = pypi_solver.solve([package])
            except NotFound as exc:
                _LOGGER.warning("Skipping not found package %s: %s",
                                package_name, str(exc))
                continue
            except Exception as exc:
                _LOGGER.error(
                    "Skipping exception that occurred while resolving %r: %s",
                    package_name,
                    str(exc),
                )
                continue

            _LOGGER.debug(
                "Affected versions for %r using version range %r for CVE %s: %s",
                package_name,
                cve["v"],
                cve["id"],
                str(solved),
            )

            for package_name, package_versions in solved.items():
                # Discard index information for now.
                for package_version, _ in package_versions:
                    _LOGGER.debug(
                        "Assigning database CVE %s to package %r in version %r",
                        cve["id"],
                        package_name,
                        package_version,
                    )
                    existed = graph.create_python_cve_record(
                        package_name,
                        package_version,
                        "https://pypi.org/simple",
                        cve=cve["cve"],
                        record_id=cve["id"],
                        version_range=cve["v"],
                        advisory=cve["advisory"],
                    )

                    publish_to_topic(
                        producer,
                        cve_provided,
                        cve_provided.MessageContents(
                            component_name=COMPONENT_NAME,
                            service_version=__service_version__,
                            package_name=package_name,
                            package_version=package_version,
                            index_url="https://pypi.org/simple",
                        ),
                    )

                    if not existed:
                        _LOGGER.info(
                            "Created new CVE entry for package %r in version %r (pyup id: %s)",
                            package_name,
                            package_version,
                            cve["id"],
                        )
                    else:
                        _LOGGER.info(
                            "CVE entry for package %r in version %r (pyup id: %s) already present",
                            package_name,
                            package_version,
                            cve["id"],
                        )
Ejemplo n.º 10
0
def main() -> None:
    """Produce Kafka messages depending on the knowledge that needs to be acquired for a certain package."""
    if _COUNT:
        _LOGGER.info(
            "Graph refresh will produce at most %d messages per each category of messages.",
            _COUNT,
        )

        factor = 0
        if THOTH_GRAPH_REFRESH_SOLVER:
            _LOGGER.info("unresolved_package_message messages will be sent!")
            factor += 1

        if THOTH_GRAPH_REFRESH_REVSOLVER:
            _LOGGER.info("unrevsolved_package_message messages will be sent!")
            factor += 1

        if THOTH_GRAPH_REFRESH_SECURITY:
            _LOGGER.info("si_unanalyzed_package messages will be sent!")
            factor += 1

        max_messages_sent = _COUNT * factor

    if not max_messages_sent:
        _LOGGER.info("All messages for Graph-refresh-job are disabled.")
        return

    else:
        _LOGGER.info("Graph refresh will produce at most %d messages ",
                     max_messages_sent)

    packages: list = []

    solver_messages_sent = 0
    revsolver_messages_sent = 0
    security_messages_sent = 0

    # We dont fetch unsolved packages if both solver and revsolver messages are disabled.
    if THOTH_GRAPH_REFRESH_SOLVER or THOTH_GRAPH_REFRESH_REVSOLVER:
        indexes = _GRAPH_DB.get_python_package_index_urls_all()
        packages = _unsolved_packages(packages=packages)

    if not packages:
        _LOGGER.info("No unsolved packages found")

    # Shuffle not to be dependent on solver message ordering.
    random.shuffle(packages)

    revsolver_packages_seen = set()

    for package_name, package_version, index_url, solver_name in packages:
        if THOTH_GRAPH_REFRESH_SOLVER:
            for index_url in [index_url] if index_url is not None else indexes:
                try:
                    producer.publish_to_topic(
                        p,
                        unresolved_package_message,
                        UnresolvedPackageContents(
                            package_name=package_name,
                            package_version=package_version,
                            index_url=[index_url],
                            solver=solver_name,
                            component_name=COMPONENT_NAME,
                            service_version=__service_version__,
                        ),
                    )
                    _LOGGER.info(
                        "Published message for solver %r for package %r in version %r from index %r",
                        solver_name,
                        package_name,
                        package_version,
                        index_url,
                    )
                    solver_messages_sent += 1
                except Exception as identifier:
                    _LOGGER.exception(
                        "Failed to publish solver message with the following error message: %r",
                        identifier,
                    )

        # Send reverse solver message if not done for this packge, package_version
        if THOTH_GRAPH_REFRESH_REVSOLVER:
            if (package_name, package_version) not in revsolver_packages_seen:
                try:
                    producer.publish_to_topic(
                        p,
                        unrevsolved_package_message,
                        UnrevsolvedPackageContents(
                            package_name=package_name,
                            package_version=package_version,
                            component_name=COMPONENT_NAME,
                            service_version=__service_version__,
                        ),
                    )
                    _LOGGER.info(
                        "Published message for reverse solver message for package %r in version %r",
                        package_name,
                        package_version,
                    )
                    revsolver_messages_sent += 1
                    revsolver_packages_seen.add(
                        (package_name, package_version))
                except Exception as identifier:
                    _LOGGER.exception(
                        "Failed to publish reverse solver message with the following error message: %r",
                        identifier,
                    )

    # Lets find the packages solved by solver, but unsolved by SI.
    if THOTH_GRAPH_REFRESH_SECURITY:
        for (
                package_name,
                package_version,
                index_url,
        ) in _GRAPH_DB.get_si_unanalyzed_python_package_versions_all(
                count=_COUNT):
            try:
                producer.publish_to_topic(
                    p,
                    si_unanalyzed_package_message,
                    SIUnanalyzedPackageContents(
                        package_name=package_name,
                        package_version=package_version,
                        index_url=index_url,
                        component_name=COMPONENT_NAME,
                        service_version=__service_version__,
                    ),
                )
                _LOGGER.info(
                    "Published message for SI unanalyzed package message for package %r in version %r, index_url is %r",
                    package_name,
                    package_version,
                    index_url,
                )
                security_messages_sent += 1
            except Exception as identifier:
                _LOGGER.exception(
                    "Failed to publish SI unanalyzed package message with the following error message: %r",
                    identifier,
                )
    p.flush()

    _METRIC_MESSSAGES_SENT.labels(
        message_type=unresolved_package_message.topic_name,
        env=THOTH_DEPLOYMENT_NAME,
        version=__service_version__,
    ).inc(solver_messages_sent)

    _METRIC_MESSSAGES_SENT.labels(
        message_type=unrevsolved_package_message.topic_name,
        env=THOTH_DEPLOYMENT_NAME,
        version=__service_version__,
    ).inc(revsolver_messages_sent)

    _METRIC_MESSSAGES_SENT.labels(
        message_type=si_unanalyzed_package_message.topic_name,
        env=THOTH_DEPLOYMENT_NAME,
        version=__service_version__,
    ).inc(security_messages_sent)

    if _THOTH_METRICS_PUSHGATEWAY_URL:
        try:
            _LOGGER.debug(
                f"Submitting metrics to Prometheus pushgateway {_THOTH_METRICS_PUSHGATEWAY_URL}"
            )
            push_to_gateway(
                _THOTH_METRICS_PUSHGATEWAY_URL,
                job="graph-refresh",
                registry=prometheus_registry,
            )
        except Exception as e:
            _LOGGER.exception(
                f"An error occurred pushing the metrics: {str(e)}")
Ejemplo n.º 11
0
def main():
    """Run thoth-reporter to provide information on status of services to Thoth contributors."""
    if not _SEND_MESSAGES:
        _LOGGER.info("No messages are sent. THOTH_REPORTER_SEND_KAFKA_MESSAGES is set to 0")

    try:
        datetime.datetime.strptime(START_DATE, "%Y-%m-%d")
    except ValueError as err:
        _LOGGER.error(f"THOTH_REPORTER_START_DATE uses incorrect format: {err}")

    s_date = START_DATE.split("-")
    start_date = datetime.date(year=int(s_date[0]), month=int(s_date[1]), day=int(s_date[2]))

    try:
        datetime.datetime.strptime(END_DATE, "%Y-%m-%d")
    except ValueError as err:
        _LOGGER.error(f"THOTH_REPORTER_END_DATE uses incorrect format: {err}")

    e_date = END_DATE.split("-")
    end_date = datetime.date(year=int(e_date[0]), month=int(e_date[1]), day=int(e_date[2]))

    _LOGGER.info(f"Start Date considered: {start_date}")
    _LOGGER.info(f"End Date considered (excluded): {end_date}")

    delta = datetime.timedelta(days=1)

    if start_date == TODAY + delta:
        _LOGGER.info(f"start date ({start_date}) cannot be in the future. Today is: {TODAY}.")
        start_date = TODAY
        _LOGGER.info(f"new start date is: {start_date}.")

    if end_date > TODAY + delta:
        _LOGGER.info(f"end date ({end_date}) cannot be in the future. Today is: {TODAY}.")
        end_date = TODAY
        _LOGGER.info(f"new end date is: {end_date}.")

    if end_date < start_date:
        _LOGGER.error(f"Cannot analyze adviser data: end date ({end_date}) < start_date ({start_date}).")
        return

    if end_date == start_date:
        if start_date == TODAY:
            _LOGGER.info(f"end date ({end_date}) == start_date ({start_date}) == today ({TODAY}).")
            start_date = start_date - delta
            _LOGGER.info(f"new start date is: {start_date}.")
        else:
            _LOGGER.info(f"end date ({end_date}) == start_date ({start_date}).")
            end_date = end_date + datetime.timedelta(days=1)
            _LOGGER.info(f"new end date (excluded) is: {end_date}.")

    _LOGGER.info(f"Initial start date: {start_date}")
    _LOGGER.info(f"Initial end date (excluded): {end_date}")

    total_justifications: List[Dict[str, Any]] = []

    current_initial_date = start_date

    while current_initial_date < end_date:

        current_end_date = current_initial_date + delta

        _LOGGER.info(f"Analyzing for start date: {current_initial_date}")
        _LOGGER.info(f"Analyzing for end date (excluded): {current_end_date}")

        stats = evaluate_requests_statistics(
            current_initial_date=current_initial_date,
            current_end_date=current_end_date,
            results_store=RESULTS_STORES,
            store_on_ceph=STORE_ON_CEPH,
            store_on_public_bucket=STORE_ON_PUBLIC_CEPH,
        )

        # Assign metrics for pushgateway
        for stats_analysis in stats:

            thoth_reporter_requests_gauge.labels(stats_analysis["component"]).set(stats_analysis["requests"])

            thoth_reporter_reports_gauge.labels(stats_analysis["component"]).set(stats_analysis["documents"])

        explore_adviser_files(
            current_initial_date=current_initial_date,
            current_end_date=current_end_date,
            total_justifications=total_justifications,
            store_on_ceph=STORE_ON_CEPH,
            store_on_public_bucket=STORE_ON_PUBLIC_CEPH,
        )

        current_initial_date += delta

    if _SEND_METRICS:
        try:
            _LOGGER.debug(
                "Submitting metrics to Prometheus pushgateway %r",
                _THOTH_METRICS_PUSHGATEWAY_URL,
            )
            push_to_gateway(
                _THOTH_METRICS_PUSHGATEWAY_URL,
                job="advise-reporter",
                registry=prometheus_registry,
            )
        except Exception as exc:
            _LOGGER.exception("An error occurred pushing the metrics: %s", str(exc))

    if not _SEND_MESSAGES:
        return

    for advise_justification in total_justifications:
        message = advise_justification["message"]
        count = advise_justification["count"]
        justification_type = advise_justification["type"]
        adviser_version = advise_justification["adviser_version"]

        try:
            producer.publish_to_topic(
                p,
                advise_justification_message,
                AdviseJustificationContents.MessageContents(
                    message=message,
                    count=int(count),
                    justification_type=justification_type,
                    adviser_version=adviser_version,
                    component_name=COMPONENT_NAME,
                    service_version=__service_version__,
                ),
            )
            _LOGGER.debug(
                "Adviser justification message:\n%r\nJustification type:\n%r\nCount:\n%r\n",
                message,
                justification_type,
                count,
            )
        except Exception as identifier:
            _LOGGER.exception("Failed to publish with the following error message: %r", identifier)