def test_security_indicator_scoring_missing_secure(
            self, recommendation_type) -> None:
        """Make sure we don't accept package if si info is missing when recommendation is secure."""
        flexmock(GraphDatabase)
        GraphDatabase.should_receive(
            "get_si_aggregated_python_package_version").with_args(
                package_name="flask",
                package_version="0.12.0",
                index_url="https://pypi.org/simple").and_raise(
                    NotFoundError).once()

        package_version = PackageVersion(
            name="flask",
            version="==0.12.0",
            index=Source("https://pypi.org/simple"),
            develop=False,
        )

        context = flexmock(graph=GraphDatabase(), stack_info=[])
        context.recommendation_type = recommendation_type
        with pytest.raises(NotAcceptable):
            with SecurityIndicatorStep.assigned_context(context):
                step = SecurityIndicatorStep()
                step.run(None, package_version)

        assert len(context.stack_info) == 1
        assert set(context.stack_info[0].keys()) == {"message", "type", "link"}
Example #2
0
    def test_cve_not_acceptable(self) -> None:
        """Test raising an exception if a secure software stack should be resolved."""
        flexmock(GraphDatabase)
        GraphDatabase.should_receive("get_python_cve_records_all").with_args(
            package_name="flask",
            package_version="0.12.0").and_return([self._FLASK_CVE]).once()

        package_version = PackageVersion(
            name="flask",
            version="==0.12.0",
            index=Source("https://pypi.org/simple"),
            develop=False,
        )

        context = flexmock(graph=GraphDatabase(),
                           recommendation_type=RecommendationType.SECURITY,
                           stack_info=[])
        step = CvePenalizationStep()
        with CvePenalizationStep.assigned_context(context):
            assert not step._messages_logged
            with pytest.raises(NotAcceptable):
                step.run(None, package_version)

        assert len(step._messages_logged) == 1
        assert ("flask", "0.12.0",
                "https://pypi.org/simple") in step._messages_logged
        assert len(context.stack_info) == 1
        assert set(context.stack_info[0].keys()) == {"message", "link", "type"}
        assert self.verify_justification_schema(context.stack_info)
Example #3
0
    def test_cve_penalization(self) -> None:
        """Make sure a CVE affects stack score."""
        flexmock(GraphDatabase)
        GraphDatabase.should_receive("get_python_cve_records_all").with_args(
            package_name="flask",
            package_version="0.12.0").and_return([self._FLASK_CVE]).once()

        package_version = PackageVersion(
            name="flask",
            version="==0.12.0",
            index=Source("https://pypi.org/simple"),
            develop=False,
        )

        context = flexmock(graph=GraphDatabase(),
                           recommendation_type=RecommendationType.TESTING)
        with CvePenalizationStep.assigned_context(context):
            step = CvePenalizationStep()
            result = step.run(None, package_version)

        assert result is not None
        assert isinstance(result, tuple) and len(result) == 2
        assert isinstance(result[0], float)
        assert result[0] == 1 * CvePenalizationStep.CONFIGURATION_DEFAULT[
            "cve_penalization"]
        assert isinstance(result[1], list)
        assert result[1] == [self._FLASK_CVE]
        assert self.verify_justification_schema(result[1])
Example #4
0
    def test_security_indicator_with_high_confidence(self) -> None:
        """Make sure we don't accept package if si info is missing when recommendation is secure."""
        flexmock(GraphDatabase)
        GraphDatabase.should_receive(
            "get_si_aggregated_python_package_version").with_args(
                package_name="flask",
                package_version="0.12.0",
                index_url="https://pypi.org/simple").and_return(
                    self._HIGH_HIGH_SECURITY_INFO).once()

        package_version = PackageVersion(
            name="flask",
            version="==0.12.0",
            index=Source("https://pypi.org/simple"),
            develop=False,
        )

        context = flexmock(graph=GraphDatabase(), stack_info=[])
        context.recommendation_type = RecommendationType.SECURITY
        with pytest.raises(NotAcceptable):
            with SecurityIndicatorStep.assigned_context(context):
                step = SecurityIndicatorStep()
                step.run(None, package_version)
        assert len(context.stack_info) == 1
        assert self.verify_justification_schema(context.stack_info)
Example #5
0
    def test_security_indicator_scoring(self) -> None:
        """Make sure we do score security indicators when the info is available."""
        flexmock(GraphDatabase)
        GraphDatabase.should_receive(
            "get_si_aggregated_python_package_version").with_args(
                package_name="flask",
                package_version="0.12.0",
                index_url="https://pypi.org/simple").and_return(
                    self._SECURITY_INFO_EXISTS).once()

        package_version = PackageVersion(
            name="flask",
            version="==0.12.0",
            index=Source("https://pypi.org/simple"),
            develop=False,
        )

        context = flexmock(graph=GraphDatabase())
        context.recommendation_type = RecommendationType.STABLE
        with SecurityIndicatorStep.assigned_context(context):
            step = SecurityIndicatorStep()
            result = step.run(None, package_version)

        assert result is not None
        assert isinstance(result, tuple) and len(result) == 2
        assert isinstance(result[0], float)
        assert self.verify_justification_schema(result[1])
Example #6
0
def main(
    kebechet_analysis: Optional[bool],
    kebechet_merge: Optional[bool],
    gh_repo_analysis: Optional[bool],
    subdir: Optional[str],
):
    """MI-Scheduler entrypoint."""
    gh = Github(login_or_token=GITHUB_ACCESS_TOKEN)
    oc = OpenShift()

    # regular mi schedule
    if gh_repo_analysis:
        repos, orgs = oc.get_mi_repositories_and_organizations()
        Schedule(
            github=gh, openshift=oc, organizations=orgs, repositories=repos, subdir=subdir
        ).schedule_for_mi_analysis()

    if kebechet_analysis:
        graph = GraphDatabase()
        graph.connect()
        kebechet_repos = graph.get_active_kebechet_github_installations_repos()
        Schedule(github=gh, openshift=oc, repositories=kebechet_repos, subdir=subdir).schedule_for_kebechet_analysis()

    if kebechet_merge:
        Schedule(openshift=oc, subdir=subdir).schedule_for_kebechet_merge()
Example #7
0
    def test_security_indicator_scoring_missing_stable(
            self, recommendation_type) -> None:
        """Make sure package is kept even if no score exists for security indicators and add justification."""
        flexmock(GraphDatabase)
        GraphDatabase.should_receive(
            "get_si_aggregated_python_package_version").with_args(
                package_name="flask",
                package_version="0.12.0",
                index_url="https://pypi.org/simple").and_raise(
                    NotFoundError).once()

        package_version = PackageVersion(
            name="flask",
            version="==0.12.0",
            index=Source("https://pypi.org/simple"),
            develop=False,
        )

        context = flexmock(graph=GraphDatabase())
        context.recommendation_type = recommendation_type
        with SecurityIndicatorStep.assigned_context(context):
            step = SecurityIndicatorStep()
            result = step.run(None, package_version)

        assert result is not None
        assert isinstance(result, tuple) and len(result) == 2
        assert result[0] == 0
        assert len(result[1]) == 1
        assert self.verify_justification_schema(result[1])
        assert result[1][0]["type"] == "WARNING"
        assert (result[1][0]["message"] ==
                "flask===0.12.0 on https://pypi.org/simple has no "
                "gathered information regarding security.")
Example #8
0
async def parse_missing_version(version: Dict[str, Any], openshift: OpenShift, graph: GraphDatabase, **kwargs):
    """Process a missing version message from package-update producer."""
    graph.update_missing_flag_package_version(
        index_url=version["index_url"],
        package_name=version["package_name"],
        package_version=version["package_version"],
        value=True,
    )

    if Configuration.THOTH_INVESTIGATOR_SCHEDULE_KEBECHET_ADMIN:
        message_info = {
            "PACKAGE_NAME": version["package_name"],
            "THOTH_PACKAGE_VERSION": version["package_version"],
            "THOTH_PACKAGE_INDEX": version["index_url"],
        }

        # We schedule Kebechet Administrator workflow here -
        workflow_id = await schedule_kebechet_administrator(
            openshift=openshift, message_info=message_info, message_name=MissingVersionMessage.__name__,
        )

        _LOGGER.info(f"Scheduled kebechet administrator workflow {workflow_id}")

        scheduled_workflows.labels(
            message_type=MissingVersionMessage.base_name, workflow_type="kebechet-administrator"
        ).inc()
    missing_version_success.inc()
Example #9
0
def adviser(end: Optional[str], adviser_version: Optional[str]) -> None:
    """Purge adviser data."""
    graph = GraphDatabase()
    graph.connect()
    result = graph.delete_adviser_run(end_datetime=parse(end) if end else None,
                                      adviser_version=adviser_version or None)
    _LOGGER.info("Removed %d adviser entries from the database", result)
Example #10
0
    def test_not_solved_without_error(self, context: Context) -> None:
        """Test a not found package is not accepted by sieve."""
        package_version, project = self._get_case()
        (GraphDatabase.should_receive("has_python_solver_error").with_args(
            package_version.name,
            package_version.locked_version,
            package_version.index.url,
            os_name=None,
            os_version=None,
            python_version=None,
        ).and_return(True).once())

        context.graph = GraphDatabase()
        context.project = flexmock(
            runtime_environment=RuntimeEnvironment.from_dict({}))

        assert not context.stack_info, "No stack info should be provided before test run"

        sieve = SolvedSieve()
        sieve.pre_run()

        with SolvedSieve.assigned_context(context):
            assert list(sieve.run(p for p in [package_version])) == []

        assert context.stack_info, "No stack info provided by the pipeline unit"
        assert self.verify_justification_schema(context.stack_info) is True
Example #11
0
    def test_acceptable_with_error(self) -> None:
        """Test accepted with an error."""
        package_version, project = self._get_case()
        (
            GraphDatabase.should_receive("has_python_solver_error")
            .with_args(
                package_version.name,
                package_version.locked_version,
                package_version.index.url,
                os_name=None,
                os_version=None,
                python_version=None,
            )
            .and_return(True)
            .once()
        )

        context = flexmock(
            graph=GraphDatabase(),
            project=flexmock(runtime_environment=RuntimeEnvironment.from_dict({})),
        )
        with SolvedSieve.assigned_context(context):
            sieve = SolvedSieve()
            sieve.update_configuration({"without_error": False})
            assert list(sieve.run(p for p in [package_version])) == [package_version]
Example #12
0
    def __init__(self, graph: GraphDatabase = None):
        """Initialize graph digests fetcher."""
        if not graph:
            graph = GraphDatabase()
            graph.connect()

        self.graph = graph
Example #13
0
def _register_indexes(graph: GraphDatabase,
                      index_base_url: str,
                      dry_run: bool = False) -> List[str]:
    """Register available AICoE indexes into Thoth's database."""
    _LOGGER.info("Registering PyPI index %r", _PYPI_SIMPLE_API_URL)
    index_urls = [_PYPI_SIMPLE_API_URL]

    if not dry_run:
        _LOGGER.info("Registering index %r", index_urls[0])

        graph.register_python_package_index(
            index_urls[0],
            warehouse_api_url=_PYPI_WAREHOUSE_JSON_API_URL,
            verify_ssl=True,
            enabled=True)

    aicoe_indexes = _list_available_indexes(index_base_url)
    if not aicoe_indexes:
        _LOGGER.error("No AICoE indexes to register")

    for index_url in aicoe_indexes:
        _LOGGER.info("Registering index %r", index_url)

        if not dry_run:
            graph.register_python_package_index(index_url, enabled=True)

        index_urls.append(index_url)

    return index_urls
Example #14
0
    def test_no_cve_record(self,
                           recommendation_type: RecommendationType) -> None:
        """Make sure no CVEs do not affect CVE scoring."""
        flexmock(GraphDatabase)
        GraphDatabase.should_receive("get_python_cve_records_all").with_args(
            package_name="flask",
            package_version="0.12.0").and_return([]).once()

        package_version = PackageVersion(
            name="flask",
            version="==0.12.0",
            index=Source("https://pypi.org/simple"),
            develop=False,
        )

        context = flexmock(graph=GraphDatabase(),
                           recommendation_type=recommendation_type)
        with CvePenalizationStep.assigned_context(context):
            step = CvePenalizationStep()
            result = step.run(None, package_version)

        assert isinstance(result, tuple)
        assert len(result) == 2
        assert result[0] == 0.0
        assert result[1] == [{
            "link": "https://thoth-station.ninja/j/no_cve",
            "message": "No known CVE known for 'flask' in version '0.12.0'",
            "package_name": "flask",
            "type": "INFO",
        }]
Example #15
0
def graph() -> GraphDatabase:
    """Create a knowledge graph connector fixture."""
    flexmock(GraphDatabase)

    graph = GraphDatabase()
    graph.connect()
    return graph
Example #16
0
    def test_abi_compat_symbols_not_present(self) -> None:
        """Test if required symbols being missing is correctly identified."""
        source = Source("https://pypi.org/simple")
        package_version = PackageVersion(name="tensorflow",
                                         version="==1.9.0",
                                         index=source,
                                         develop=False)
        flexmock(GraphDatabase)
        GraphDatabase.should_receive("get_analyzed_image_symbols_all"
                                     ).and_return(_SYSTEM_SYMBOLS).once()
        GraphDatabase.should_receive("get_python_package_required_symbols"
                                     ).and_return(_REQUIRED_SYMBOLS_A).once()

        context = flexmock(
            graph=GraphDatabase,
            project=flexmock(runtime_environment=flexmock(
                operating_system=flexmock(name="rhel", version="8.0"),
                cuda_version="4.6",
                python_version="3.6",
            )),
        )
        with AbiCompatibilitySieve.assigned_context(context):
            sieve = AbiCompatibilitySieve()
            sieve.pre_run()
            assert list(sieve.run((p for p in [package_version]))) == []
Example #17
0
def post_provenance_python(application_stack: dict,
                           origin: str = None,
                           debug: bool = False,
                           force: bool = False):
    """Check provenance for the given application stack."""
    parameters = locals()

    try:
        project = Project.from_strings(application_stack["requirements"],
                                       application_stack["requirements_lock"])
    except ThothPythonException as exc:
        return {
            "parameters": parameters,
            "error": f"Invalid application stack supplied: {str(exc)}"
        }, 400
    except Exception as exc:
        return {
            "parameters": parameters,
            "error": "Invalid application stack supplied"
        }, 400

    graph = GraphDatabase()
    graph.connect()
    parameters["whitelisted_sources"] = list(
        graph.get_python_package_index_urls())

    force = parameters.pop("force", False)
    cached_document_id = _compute_digest_params(
        dict(**project.to_dict(),
             origin=origin,
             whitelisted_sources=parameters["whitelisted_sources"]))

    timestamp_now = int(time.mktime(datetime.datetime.utcnow().timetuple()))
    cache = ProvenanceCacheStore()
    cache.connect()

    if not force:
        try:
            cache_record = cache.retrieve_document_record(cached_document_id)
            if cache_record[
                    "timestamp"] + Configuration.THOTH_CACHE_EXPIRATION > timestamp_now:
                return {
                    "analysis_id": cache_record.pop("analysis_id"),
                    "cached": True,
                    "parameters": parameters
                }, 202
        except CacheMiss:
            pass

    response, status = _do_schedule(
        parameters,
        _OPENSHIFT.schedule_provenance_checker,
        output=Configuration.THOTH_PROVENANCE_CHECKER_OUTPUT)
    if status == 202:
        cache.store_document_record(cached_document_id, {
            "analysis_id": response["analysis_id"],
            "timestamp": timestamp_now
        })

    return response, status
Example #18
0
    def test_no_rule(self, context: Context) -> None:
        """Test if no rule is configured for the given package."""
        package_version = PackageVersion(
            name="flask",
            version="==1.1.2",
            index=Source("https://pypi.org/simple"),
            develop=False)
        (GraphDatabase.should_receive(
            "get_python_package_version_solver_rules_all").with_args(
                "flask",
                "1.1.2",
                "https://pypi.org/simple",
            ).and_return([]))
        (GraphDatabase.should_receive(
            "get_python_package_version_solver_rules_all").with_args(
                "flask",
                "1.1.2",
            ).and_return([]))

        context.graph = GraphDatabase()

        assert not context.stack_info, "No stack info should be provided before test run"

        sieve = self.UNIT_TESTED()
        sieve.pre_run()

        with self.UNIT_TESTED.assigned_context(context):
            assert list(sieve.run(
                p for p in [package_version])) == [package_version]

        assert not context.stack_info, "No stack info should be provided by the pipeline unit"
Example #19
0
def _fill_graph_score(graph: GraphDatabase, session: Session) -> None:
    """Compute and fill in graph score per each package."""
    _LOGGER.info("Computing graph score for each package")

    subgraphs = deque()

    # The very first walk will mark down libraries that do not have any dependencies.
    for package_name in graph.get_python_package_version_names_all(
            distinct=True):
        dependencies = graph.get_depends_on_package_names(package_name)
        subgraphs.append(
            SubGraphEntity(subgraph_name=package_name,
                           to_visit=set(dependencies)))
        if not dependencies:
            entry = session.query(Package).filter(
                Package.package_name == package_name).first()
            if not entry:
                # Might be ingesting in the mean time, do not mark down and continue.
                continue

            entry.subgraph_size = entry.version_count
            session.commit()
        else:
            subgraphs.append(
                SubGraphEntity(subgraph_name=package_name,
                               to_visit=set(dependencies)))

    while subgraphs:
        subgraph = subgraphs.popleft()

        for package_name in subgraph.to_visit:
            entry = session.query(Package).filter(
                Package.package_name == package_name).first()
            if not entry:
                _LOGGER.warning(
                    "Cannot score subgraph %r as not all the dependencies were resolved",
                    package_name)
                break

            if entry.subgraph_size is None:
                # Scheduling for the next round.
                subgraphs.append(subgraph)
                break

            subgraph.subgraph_size *= entry.subgraph_size * entry.version_count
            subgraph.subgraphs_seen.add(package_name)
        else:
            entry = session.query(Package).filter(
                Package.package_name == subgraph.subgraph_name).first()
            if not entry:
                _LOGGER.error(
                    "No subgraph for %r found, this looks like a programming error"
                )
                continue

            entry.subgraph_size = subgraph.subgraph_size
            session.commit()

        subgraph.to_visit -= subgraph.subgraphs_seen
Example #20
0
def api_readiness():
    """Report readiness for OpenShift readiness probe."""
    graph = GraphDatabase()
    graph.connect()
    if not graph.is_schema_up2date():
        raise ValueError("Database schema is not up to date")

    return _healthiness()
Example #21
0
def solver(os_name: str, os_version: str, python_version: str) -> None:
    """Purge solver data."""
    graph = GraphDatabase()
    graph.connect()
    result = graph.delete_solved(os_name=os_name,
                                 os_version=os_version,
                                 python_version=python_version)
    _LOGGER.info("Removed %d solver entries from the database", result)
Example #22
0
def _healthiness():
    graph = GraphDatabase()
    graph.connect()
    return jsonify({
        "status": "ready",
        "version": __version__
    }), 200, {
        "ContentType": "application/json"
    }
Example #23
0
def get_graphdb_connection_error_status():
    """Raise a flag if there is an error connecting to database."""
    graph_db = GraphDatabase()
    try:
        graph_db.connect()
    except Exception as excptn:
        metrics.graphdb_connection_error_status.set(1)
        _LOGGER.exception(excptn)
    else:
        metrics.graphdb_connection_error_status.set(0)
Example #24
0
def erase_graph(secret: str):
    """Clean content of the graph database."""
    if secret != Configuration.THOTH_SECRET:
        return {'error': 'Wrong secret provided'}, 401

    adapter = GraphDatabase()
    adapter.connect()
    loop = asyncio.get_event_loop()
    loop.run_until_complete(adapter.g.V().drop().next())
    return {}, 201
Example #25
0
def before_request_callback():
    """Callback registered, runs before each request to this service."""
    method = request.method
    path = request.path

    # Update up2date metric exposed.
    if method == "GET" and path == "/metrics":
        graph = GraphDatabase()
        graph.connect()
        _API_GAUGE_METRIC.set(int(graph.is_schema_up2date()))
Example #26
0
def package_extract(end: Optional[str],
                    package_extract_version: Optional[str]) -> None:
    """Purge package-extract data."""
    graph = GraphDatabase()
    graph.connect()
    result = graph.delete_package_extract_run(
        end_datetime=parse(end) if end else None,
        package_extract_version=package_extract_version or None)
    _LOGGER.info("Removed %d package-extract entries from the database",
                 result)
Example #27
0
    def test_transitive_query_cycles(self):
        flexmock(GraphDatabase)
        GraphDatabase.should_receive("_query_raw") \
            .and_return(self._TRANSITIVE_SUBRESULT_1)\
            .ordered()
        GraphDatabase.should_receive("_query_raw") \
            .and_return(self._TRANSITIVE_SUBRESULT_2)\
            .ordered()
        GraphDatabase.should_receive("get_python_package_tuples") \
            .with_args({"0x1", "0x2", "0x3"}) \
            .and_return(self._TRANSITIVE_RESULT) \
            .ordered()

        old_depth = GraphDatabase._TRANSITIVE_QUERY_DEPTH
        try:
            GraphDatabase._TRANSITIVE_QUERY_DEPTH = 2
            # Explicitly set recirsive limit to lower number not to create complex tests.
            graph = GraphDatabase()
            result = graph.retrieve_transitive_dependencies_python(
                package_name="flask",
                package_version="1.0.2",
                index_url="https://pypi.org/simple",
                os_name="fedora",
                os_version="29",
                python_version="3.6",
            )
            # 0x3 links back to 0x2, but that one was already checked.
            assert result == [[('a', 'b', 'c'), ('A', 'B', 'C'), ('X', 'Y', 'Z'), ('A', 'B', 'C')]]
        finally:
            GraphDatabase._TRANSITIVE_QUERY_DEPTH = old_depth
Example #28
0
def get_unique_run_software_environment_count():
    """Get the total number of unique software environment for run in Thoth Knowledge Graph."""
    graph_db = GraphDatabase()
    graph_db.connect()

    thoth_graphdb_total_run_software_environment = len(
        set(graph_db.run_software_environment_listing()))
    metrics.graphdb_total_run_software_environment.set(
        thoth_graphdb_total_run_software_environment)
    _LOGGER.debug("graphdb_total_unique_run_software_environment=%r",
                  thoth_graphdb_total_run_software_environment)
Example #29
0
def get_unsolved_python_packages_count():
    """Get number of unsolved Python packages per solver."""
    graph_db = GraphDatabase()
    graph_db.connect()

    for solver_name in _OPENSHIFT.get_solver_names():
        count = graph_db.retrieve_unsolved_python_packages_count(solver_name)
        metrics.graphdb_total_number_unsolved_python_packages.labels(
            solver_name).set(count)
        _LOGGER.debug("graphdb_total_number_unsolved_python_packages(%r)=%r",
                      solver_name, count)
Example #30
0
    def test_sieve_index_disabled(self) -> None:
        """Test removals of Python package if Python package index used is disabled."""
        source = Source("https://tensorflow.pypi.thoth-station.ninja/index/manylinux2010/jemalloc/simple/")
        package_version = PackageVersion(name="tensorflow", version="==1.9.0", index=source, develop=False)
        flexmock(GraphDatabase)
        GraphDatabase.should_receive("is_python_package_index_enabled").with_args(source.url).and_return(False).once()

        context = flexmock(graph=GraphDatabase())
        with PackageIndexSieve.assigned_context(context):
            sieve = PackageIndexSieve()
            assert list(sieve.run(p for p in [package_version])) == []