def test_security_indicator_scoring_missing_secure( self, recommendation_type) -> None: """Make sure we don't accept package if si info is missing when recommendation is secure.""" flexmock(GraphDatabase) GraphDatabase.should_receive( "get_si_aggregated_python_package_version").with_args( package_name="flask", package_version="0.12.0", index_url="https://pypi.org/simple").and_raise( NotFoundError).once() package_version = PackageVersion( name="flask", version="==0.12.0", index=Source("https://pypi.org/simple"), develop=False, ) context = flexmock(graph=GraphDatabase(), stack_info=[]) context.recommendation_type = recommendation_type with pytest.raises(NotAcceptable): with SecurityIndicatorStep.assigned_context(context): step = SecurityIndicatorStep() step.run(None, package_version) assert len(context.stack_info) == 1 assert set(context.stack_info[0].keys()) == {"message", "type", "link"}
def test_cve_not_acceptable(self) -> None: """Test raising an exception if a secure software stack should be resolved.""" flexmock(GraphDatabase) GraphDatabase.should_receive("get_python_cve_records_all").with_args( package_name="flask", package_version="0.12.0").and_return([self._FLASK_CVE]).once() package_version = PackageVersion( name="flask", version="==0.12.0", index=Source("https://pypi.org/simple"), develop=False, ) context = flexmock(graph=GraphDatabase(), recommendation_type=RecommendationType.SECURITY, stack_info=[]) step = CvePenalizationStep() with CvePenalizationStep.assigned_context(context): assert not step._messages_logged with pytest.raises(NotAcceptable): step.run(None, package_version) assert len(step._messages_logged) == 1 assert ("flask", "0.12.0", "https://pypi.org/simple") in step._messages_logged assert len(context.stack_info) == 1 assert set(context.stack_info[0].keys()) == {"message", "link", "type"} assert self.verify_justification_schema(context.stack_info)
def test_cve_penalization(self) -> None: """Make sure a CVE affects stack score.""" flexmock(GraphDatabase) GraphDatabase.should_receive("get_python_cve_records_all").with_args( package_name="flask", package_version="0.12.0").and_return([self._FLASK_CVE]).once() package_version = PackageVersion( name="flask", version="==0.12.0", index=Source("https://pypi.org/simple"), develop=False, ) context = flexmock(graph=GraphDatabase(), recommendation_type=RecommendationType.TESTING) with CvePenalizationStep.assigned_context(context): step = CvePenalizationStep() result = step.run(None, package_version) assert result is not None assert isinstance(result, tuple) and len(result) == 2 assert isinstance(result[0], float) assert result[0] == 1 * CvePenalizationStep.CONFIGURATION_DEFAULT[ "cve_penalization"] assert isinstance(result[1], list) assert result[1] == [self._FLASK_CVE] assert self.verify_justification_schema(result[1])
def test_security_indicator_with_high_confidence(self) -> None: """Make sure we don't accept package if si info is missing when recommendation is secure.""" flexmock(GraphDatabase) GraphDatabase.should_receive( "get_si_aggregated_python_package_version").with_args( package_name="flask", package_version="0.12.0", index_url="https://pypi.org/simple").and_return( self._HIGH_HIGH_SECURITY_INFO).once() package_version = PackageVersion( name="flask", version="==0.12.0", index=Source("https://pypi.org/simple"), develop=False, ) context = flexmock(graph=GraphDatabase(), stack_info=[]) context.recommendation_type = RecommendationType.SECURITY with pytest.raises(NotAcceptable): with SecurityIndicatorStep.assigned_context(context): step = SecurityIndicatorStep() step.run(None, package_version) assert len(context.stack_info) == 1 assert self.verify_justification_schema(context.stack_info)
def test_security_indicator_scoring(self) -> None: """Make sure we do score security indicators when the info is available.""" flexmock(GraphDatabase) GraphDatabase.should_receive( "get_si_aggregated_python_package_version").with_args( package_name="flask", package_version="0.12.0", index_url="https://pypi.org/simple").and_return( self._SECURITY_INFO_EXISTS).once() package_version = PackageVersion( name="flask", version="==0.12.0", index=Source("https://pypi.org/simple"), develop=False, ) context = flexmock(graph=GraphDatabase()) context.recommendation_type = RecommendationType.STABLE with SecurityIndicatorStep.assigned_context(context): step = SecurityIndicatorStep() result = step.run(None, package_version) assert result is not None assert isinstance(result, tuple) and len(result) == 2 assert isinstance(result[0], float) assert self.verify_justification_schema(result[1])
def main( kebechet_analysis: Optional[bool], kebechet_merge: Optional[bool], gh_repo_analysis: Optional[bool], subdir: Optional[str], ): """MI-Scheduler entrypoint.""" gh = Github(login_or_token=GITHUB_ACCESS_TOKEN) oc = OpenShift() # regular mi schedule if gh_repo_analysis: repos, orgs = oc.get_mi_repositories_and_organizations() Schedule( github=gh, openshift=oc, organizations=orgs, repositories=repos, subdir=subdir ).schedule_for_mi_analysis() if kebechet_analysis: graph = GraphDatabase() graph.connect() kebechet_repos = graph.get_active_kebechet_github_installations_repos() Schedule(github=gh, openshift=oc, repositories=kebechet_repos, subdir=subdir).schedule_for_kebechet_analysis() if kebechet_merge: Schedule(openshift=oc, subdir=subdir).schedule_for_kebechet_merge()
def test_security_indicator_scoring_missing_stable( self, recommendation_type) -> None: """Make sure package is kept even if no score exists for security indicators and add justification.""" flexmock(GraphDatabase) GraphDatabase.should_receive( "get_si_aggregated_python_package_version").with_args( package_name="flask", package_version="0.12.0", index_url="https://pypi.org/simple").and_raise( NotFoundError).once() package_version = PackageVersion( name="flask", version="==0.12.0", index=Source("https://pypi.org/simple"), develop=False, ) context = flexmock(graph=GraphDatabase()) context.recommendation_type = recommendation_type with SecurityIndicatorStep.assigned_context(context): step = SecurityIndicatorStep() result = step.run(None, package_version) assert result is not None assert isinstance(result, tuple) and len(result) == 2 assert result[0] == 0 assert len(result[1]) == 1 assert self.verify_justification_schema(result[1]) assert result[1][0]["type"] == "WARNING" assert (result[1][0]["message"] == "flask===0.12.0 on https://pypi.org/simple has no " "gathered information regarding security.")
async def parse_missing_version(version: Dict[str, Any], openshift: OpenShift, graph: GraphDatabase, **kwargs): """Process a missing version message from package-update producer.""" graph.update_missing_flag_package_version( index_url=version["index_url"], package_name=version["package_name"], package_version=version["package_version"], value=True, ) if Configuration.THOTH_INVESTIGATOR_SCHEDULE_KEBECHET_ADMIN: message_info = { "PACKAGE_NAME": version["package_name"], "THOTH_PACKAGE_VERSION": version["package_version"], "THOTH_PACKAGE_INDEX": version["index_url"], } # We schedule Kebechet Administrator workflow here - workflow_id = await schedule_kebechet_administrator( openshift=openshift, message_info=message_info, message_name=MissingVersionMessage.__name__, ) _LOGGER.info(f"Scheduled kebechet administrator workflow {workflow_id}") scheduled_workflows.labels( message_type=MissingVersionMessage.base_name, workflow_type="kebechet-administrator" ).inc() missing_version_success.inc()
def adviser(end: Optional[str], adviser_version: Optional[str]) -> None: """Purge adviser data.""" graph = GraphDatabase() graph.connect() result = graph.delete_adviser_run(end_datetime=parse(end) if end else None, adviser_version=adviser_version or None) _LOGGER.info("Removed %d adviser entries from the database", result)
def test_not_solved_without_error(self, context: Context) -> None: """Test a not found package is not accepted by sieve.""" package_version, project = self._get_case() (GraphDatabase.should_receive("has_python_solver_error").with_args( package_version.name, package_version.locked_version, package_version.index.url, os_name=None, os_version=None, python_version=None, ).and_return(True).once()) context.graph = GraphDatabase() context.project = flexmock( runtime_environment=RuntimeEnvironment.from_dict({})) assert not context.stack_info, "No stack info should be provided before test run" sieve = SolvedSieve() sieve.pre_run() with SolvedSieve.assigned_context(context): assert list(sieve.run(p for p in [package_version])) == [] assert context.stack_info, "No stack info provided by the pipeline unit" assert self.verify_justification_schema(context.stack_info) is True
def test_acceptable_with_error(self) -> None: """Test accepted with an error.""" package_version, project = self._get_case() ( GraphDatabase.should_receive("has_python_solver_error") .with_args( package_version.name, package_version.locked_version, package_version.index.url, os_name=None, os_version=None, python_version=None, ) .and_return(True) .once() ) context = flexmock( graph=GraphDatabase(), project=flexmock(runtime_environment=RuntimeEnvironment.from_dict({})), ) with SolvedSieve.assigned_context(context): sieve = SolvedSieve() sieve.update_configuration({"without_error": False}) assert list(sieve.run(p for p in [package_version])) == [package_version]
def __init__(self, graph: GraphDatabase = None): """Initialize graph digests fetcher.""" if not graph: graph = GraphDatabase() graph.connect() self.graph = graph
def _register_indexes(graph: GraphDatabase, index_base_url: str, dry_run: bool = False) -> List[str]: """Register available AICoE indexes into Thoth's database.""" _LOGGER.info("Registering PyPI index %r", _PYPI_SIMPLE_API_URL) index_urls = [_PYPI_SIMPLE_API_URL] if not dry_run: _LOGGER.info("Registering index %r", index_urls[0]) graph.register_python_package_index( index_urls[0], warehouse_api_url=_PYPI_WAREHOUSE_JSON_API_URL, verify_ssl=True, enabled=True) aicoe_indexes = _list_available_indexes(index_base_url) if not aicoe_indexes: _LOGGER.error("No AICoE indexes to register") for index_url in aicoe_indexes: _LOGGER.info("Registering index %r", index_url) if not dry_run: graph.register_python_package_index(index_url, enabled=True) index_urls.append(index_url) return index_urls
def test_no_cve_record(self, recommendation_type: RecommendationType) -> None: """Make sure no CVEs do not affect CVE scoring.""" flexmock(GraphDatabase) GraphDatabase.should_receive("get_python_cve_records_all").with_args( package_name="flask", package_version="0.12.0").and_return([]).once() package_version = PackageVersion( name="flask", version="==0.12.0", index=Source("https://pypi.org/simple"), develop=False, ) context = flexmock(graph=GraphDatabase(), recommendation_type=recommendation_type) with CvePenalizationStep.assigned_context(context): step = CvePenalizationStep() result = step.run(None, package_version) assert isinstance(result, tuple) assert len(result) == 2 assert result[0] == 0.0 assert result[1] == [{ "link": "https://thoth-station.ninja/j/no_cve", "message": "No known CVE known for 'flask' in version '0.12.0'", "package_name": "flask", "type": "INFO", }]
def graph() -> GraphDatabase: """Create a knowledge graph connector fixture.""" flexmock(GraphDatabase) graph = GraphDatabase() graph.connect() return graph
def test_abi_compat_symbols_not_present(self) -> None: """Test if required symbols being missing is correctly identified.""" source = Source("https://pypi.org/simple") package_version = PackageVersion(name="tensorflow", version="==1.9.0", index=source, develop=False) flexmock(GraphDatabase) GraphDatabase.should_receive("get_analyzed_image_symbols_all" ).and_return(_SYSTEM_SYMBOLS).once() GraphDatabase.should_receive("get_python_package_required_symbols" ).and_return(_REQUIRED_SYMBOLS_A).once() context = flexmock( graph=GraphDatabase, project=flexmock(runtime_environment=flexmock( operating_system=flexmock(name="rhel", version="8.0"), cuda_version="4.6", python_version="3.6", )), ) with AbiCompatibilitySieve.assigned_context(context): sieve = AbiCompatibilitySieve() sieve.pre_run() assert list(sieve.run((p for p in [package_version]))) == []
def post_provenance_python(application_stack: dict, origin: str = None, debug: bool = False, force: bool = False): """Check provenance for the given application stack.""" parameters = locals() try: project = Project.from_strings(application_stack["requirements"], application_stack["requirements_lock"]) except ThothPythonException as exc: return { "parameters": parameters, "error": f"Invalid application stack supplied: {str(exc)}" }, 400 except Exception as exc: return { "parameters": parameters, "error": "Invalid application stack supplied" }, 400 graph = GraphDatabase() graph.connect() parameters["whitelisted_sources"] = list( graph.get_python_package_index_urls()) force = parameters.pop("force", False) cached_document_id = _compute_digest_params( dict(**project.to_dict(), origin=origin, whitelisted_sources=parameters["whitelisted_sources"])) timestamp_now = int(time.mktime(datetime.datetime.utcnow().timetuple())) cache = ProvenanceCacheStore() cache.connect() if not force: try: cache_record = cache.retrieve_document_record(cached_document_id) if cache_record[ "timestamp"] + Configuration.THOTH_CACHE_EXPIRATION > timestamp_now: return { "analysis_id": cache_record.pop("analysis_id"), "cached": True, "parameters": parameters }, 202 except CacheMiss: pass response, status = _do_schedule( parameters, _OPENSHIFT.schedule_provenance_checker, output=Configuration.THOTH_PROVENANCE_CHECKER_OUTPUT) if status == 202: cache.store_document_record(cached_document_id, { "analysis_id": response["analysis_id"], "timestamp": timestamp_now }) return response, status
def test_no_rule(self, context: Context) -> None: """Test if no rule is configured for the given package.""" package_version = PackageVersion( name="flask", version="==1.1.2", index=Source("https://pypi.org/simple"), develop=False) (GraphDatabase.should_receive( "get_python_package_version_solver_rules_all").with_args( "flask", "1.1.2", "https://pypi.org/simple", ).and_return([])) (GraphDatabase.should_receive( "get_python_package_version_solver_rules_all").with_args( "flask", "1.1.2", ).and_return([])) context.graph = GraphDatabase() assert not context.stack_info, "No stack info should be provided before test run" sieve = self.UNIT_TESTED() sieve.pre_run() with self.UNIT_TESTED.assigned_context(context): assert list(sieve.run( p for p in [package_version])) == [package_version] assert not context.stack_info, "No stack info should be provided by the pipeline unit"
def _fill_graph_score(graph: GraphDatabase, session: Session) -> None: """Compute and fill in graph score per each package.""" _LOGGER.info("Computing graph score for each package") subgraphs = deque() # The very first walk will mark down libraries that do not have any dependencies. for package_name in graph.get_python_package_version_names_all( distinct=True): dependencies = graph.get_depends_on_package_names(package_name) subgraphs.append( SubGraphEntity(subgraph_name=package_name, to_visit=set(dependencies))) if not dependencies: entry = session.query(Package).filter( Package.package_name == package_name).first() if not entry: # Might be ingesting in the mean time, do not mark down and continue. continue entry.subgraph_size = entry.version_count session.commit() else: subgraphs.append( SubGraphEntity(subgraph_name=package_name, to_visit=set(dependencies))) while subgraphs: subgraph = subgraphs.popleft() for package_name in subgraph.to_visit: entry = session.query(Package).filter( Package.package_name == package_name).first() if not entry: _LOGGER.warning( "Cannot score subgraph %r as not all the dependencies were resolved", package_name) break if entry.subgraph_size is None: # Scheduling for the next round. subgraphs.append(subgraph) break subgraph.subgraph_size *= entry.subgraph_size * entry.version_count subgraph.subgraphs_seen.add(package_name) else: entry = session.query(Package).filter( Package.package_name == subgraph.subgraph_name).first() if not entry: _LOGGER.error( "No subgraph for %r found, this looks like a programming error" ) continue entry.subgraph_size = subgraph.subgraph_size session.commit() subgraph.to_visit -= subgraph.subgraphs_seen
def api_readiness(): """Report readiness for OpenShift readiness probe.""" graph = GraphDatabase() graph.connect() if not graph.is_schema_up2date(): raise ValueError("Database schema is not up to date") return _healthiness()
def solver(os_name: str, os_version: str, python_version: str) -> None: """Purge solver data.""" graph = GraphDatabase() graph.connect() result = graph.delete_solved(os_name=os_name, os_version=os_version, python_version=python_version) _LOGGER.info("Removed %d solver entries from the database", result)
def _healthiness(): graph = GraphDatabase() graph.connect() return jsonify({ "status": "ready", "version": __version__ }), 200, { "ContentType": "application/json" }
def get_graphdb_connection_error_status(): """Raise a flag if there is an error connecting to database.""" graph_db = GraphDatabase() try: graph_db.connect() except Exception as excptn: metrics.graphdb_connection_error_status.set(1) _LOGGER.exception(excptn) else: metrics.graphdb_connection_error_status.set(0)
def erase_graph(secret: str): """Clean content of the graph database.""" if secret != Configuration.THOTH_SECRET: return {'error': 'Wrong secret provided'}, 401 adapter = GraphDatabase() adapter.connect() loop = asyncio.get_event_loop() loop.run_until_complete(adapter.g.V().drop().next()) return {}, 201
def before_request_callback(): """Callback registered, runs before each request to this service.""" method = request.method path = request.path # Update up2date metric exposed. if method == "GET" and path == "/metrics": graph = GraphDatabase() graph.connect() _API_GAUGE_METRIC.set(int(graph.is_schema_up2date()))
def package_extract(end: Optional[str], package_extract_version: Optional[str]) -> None: """Purge package-extract data.""" graph = GraphDatabase() graph.connect() result = graph.delete_package_extract_run( end_datetime=parse(end) if end else None, package_extract_version=package_extract_version or None) _LOGGER.info("Removed %d package-extract entries from the database", result)
def test_transitive_query_cycles(self): flexmock(GraphDatabase) GraphDatabase.should_receive("_query_raw") \ .and_return(self._TRANSITIVE_SUBRESULT_1)\ .ordered() GraphDatabase.should_receive("_query_raw") \ .and_return(self._TRANSITIVE_SUBRESULT_2)\ .ordered() GraphDatabase.should_receive("get_python_package_tuples") \ .with_args({"0x1", "0x2", "0x3"}) \ .and_return(self._TRANSITIVE_RESULT) \ .ordered() old_depth = GraphDatabase._TRANSITIVE_QUERY_DEPTH try: GraphDatabase._TRANSITIVE_QUERY_DEPTH = 2 # Explicitly set recirsive limit to lower number not to create complex tests. graph = GraphDatabase() result = graph.retrieve_transitive_dependencies_python( package_name="flask", package_version="1.0.2", index_url="https://pypi.org/simple", os_name="fedora", os_version="29", python_version="3.6", ) # 0x3 links back to 0x2, but that one was already checked. assert result == [[('a', 'b', 'c'), ('A', 'B', 'C'), ('X', 'Y', 'Z'), ('A', 'B', 'C')]] finally: GraphDatabase._TRANSITIVE_QUERY_DEPTH = old_depth
def get_unique_run_software_environment_count(): """Get the total number of unique software environment for run in Thoth Knowledge Graph.""" graph_db = GraphDatabase() graph_db.connect() thoth_graphdb_total_run_software_environment = len( set(graph_db.run_software_environment_listing())) metrics.graphdb_total_run_software_environment.set( thoth_graphdb_total_run_software_environment) _LOGGER.debug("graphdb_total_unique_run_software_environment=%r", thoth_graphdb_total_run_software_environment)
def get_unsolved_python_packages_count(): """Get number of unsolved Python packages per solver.""" graph_db = GraphDatabase() graph_db.connect() for solver_name in _OPENSHIFT.get_solver_names(): count = graph_db.retrieve_unsolved_python_packages_count(solver_name) metrics.graphdb_total_number_unsolved_python_packages.labels( solver_name).set(count) _LOGGER.debug("graphdb_total_number_unsolved_python_packages(%r)=%r", solver_name, count)
def test_sieve_index_disabled(self) -> None: """Test removals of Python package if Python package index used is disabled.""" source = Source("https://tensorflow.pypi.thoth-station.ninja/index/manylinux2010/jemalloc/simple/") package_version = PackageVersion(name="tensorflow", version="==1.9.0", index=source, develop=False) flexmock(GraphDatabase) GraphDatabase.should_receive("is_python_package_index_enabled").with_args(source.url).and_return(False).once() context = flexmock(graph=GraphDatabase()) with PackageIndexSieve.assigned_context(context): sieve = PackageIndexSieve() assert list(sieve.run(p for p in [package_version])) == []