class MockScoreStep(Step): """A step that is mocking scoring of packages.""" # Assign probability is used to "assign" a score to the package to simulate knowledge # coverage for packages resolved - 0.75 means ~75% of packages will have a score. CONFIGURATION_SCHEMA: Schema = Schema( { SchemaOptional("package_name"): SchemaAny(str, None), SchemaOptional("assign_probability"): float, SchemaRequired("multi_package_resolution"): bool, } ) CONFIGURATION_DEFAULT: Dict[str, Any] = { "package_name": None, "assign_probability": 0.75, "multi_package_resolution": False, } _score_history = attr.ib(type=Dict[Tuple[str, str, str], float], factory=dict, init=False) @classmethod def should_include(cls, builder_context: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Register self, never.""" yield from () return None def pre_run(self) -> None: """Initialize self, before each run.""" self._score_history.clear() super().pre_run() def post_run(self) -> None: """Print the generated history after the run.""" packages = {} # type: Dict[Any, Any] for key, value in self._score_history.items(): packages.setdefault(key[0], []).append((key, value)) for key, value in packages.items(): packages[key] = sorted(value, key=operator.itemgetter(1), reverse=True) # type: ignore print("-" * 10, " Mock score report ", "-" * 10, file=sys.stderr) for key in sorted(packages): print(key, file=sys.stderr) for entry in packages[key]: print(f"{str((entry[0][1], entry[0][2])):>50} | {entry[1]:+f}", file=sys.stderr) print("-" * 40, file=sys.stderr) def run( self, _: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Score the given package regardless of the state.""" # Using seed set to process on the adviser run affects this call - so adviser # with same seed set shared scores generated across runs. score = self._score_history.setdefault( package_version.to_tuple(), random.uniform(self.SCORE_MIN, self.SCORE_MAX) if random.random() <= self.configuration["assign_probability"] else 0.0, ) return score, None
class AdviserTestCase: """A base class for implementing adviser's test cases.""" data_dir = Path(os.path.dirname(os.path.realpath(__file__))) / "data" JUSTIFICATION_SAMPLE_1 = [ {"message": "Justification sample 1", "type": "WARNING", "link": "https://thoth-station.ninja"}, {"message": "Justification sample 1", "type": "INFO", "link": "https://thoth-station.ninja"}, {"message": "Justification sample 1", "type": "ERROR", "link": "https://thoth-station.ninja"}, ] JUSTIFICATION_SAMPLE_2 = [ { "message": "Justification sample 2", "type": "INFO", "link": "https://thoth-station.ninja", "advisory": "Bark!", }, ] JUSTIFICATION_SAMPLE_3 = [ { "message": "Justification sample 2", "type": "INFO", "link": "https://thoth-station.ninja", "package_name": "tensorflow", "version_range": "<2.3>=", }, ] _JUSTIFICATION_SCHEMA = Schema( [ { "message": All(str, Length(min=1)), "type": SchemaAny("INFO", "WARNING", "ERROR", "LATEST", "CVE"), "link": All(str, Length(min=1)), SchemaOptional("advisory"): All(str, Length(min=1)), SchemaOptional("cve_id"): All(str, Length(min=1)), SchemaOptional("cve_name"): All(str, Length(min=1)), SchemaOptional("package_name"): All(str, Length(min=1)), SchemaOptional("version_range"): All(str, Length(min=1)), } ] ) @classmethod def verify_justification_schema(cls, justification: Optional[List[Dict[str, Any]]]) -> bool: """Verify the justification schema is correct.""" if justification is None: return True try: cls._JUSTIFICATION_SCHEMA(justification) except Invalid as exc: raise AdviserJustificationSchemaError(exc.msg) from exc else: return True
class SetScoreStep(Step): """A step that is setting score for packages.""" # Assign probability is used to "assign" a score to the package to simulate knowledge # coverage for packages resolved - 0.75 means ~75% of packages will have a score. CONFIGURATION_SCHEMA: Schema = Schema({ Required("multi_package_resolution"): bool, Required("package_name"): str, SchemaOptional("index_url"): SchemaAny(str, None), SchemaOptional("package_version"): SchemaAny(str, None), SchemaOptional("score"): SchemaAny(float, None), }) CONFIGURATION_DEFAULT: Dict[str, Any] = { "index_url": None, "multi_package_resolution": False, "package_name": None, "package_version": None, "score": None, } @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Optional[Dict[str, Any]]: """Register self, never.""" return None def pre_run(self) -> None: """Initialize this pipeline unit before each run.""" if self.configuration["score"] is None: self.configuration["score"] = random.uniform( self.SCORE_MIN, self.SCORE_MAX) super().pre_run() def run( self, _: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Score the given package.""" if (self.configuration["package_version"] is not None and package_version.locked_version != self.configuration["package_version"]) or ( self.configuration["index_url"] is not None and package_version.index.url != self.configuration["index_url"]): return None return self.configuration["score"], None
class Sieve2(Sieve): """A testing sieve implementation.""" CONFIGURATION_DEFAULT = {"date": "2015-09-15", "package_name": "selinon"} CONFIGURATION_SCHEMA: Schema = Schema( {Required("package_name"): str, Required("date"): str, SchemaOptional("foo"): str} ) @classmethod def should_include(cls, builder_context: "PipelineBuilderContext") -> Generator[Dict[str, Any], None, None]: """Check if this pipeline unit should be included in the pipeline configuration.""" yield from () return None def run(self, package_versions: Generator[PackageVersion, None, None]) -> Generator[PackageVersion, None, None]: """Run noop method.""" return package_versions
class MemTraceBoot(Boot): """A boot that traces memory consumption of the adviser/dependency-monkey run.""" CONFIGURATION_DEFAULT = { "frame_count": 100, # Number of frames traced. "top_limit": 100, # Number of top mem usage consumers printed. } CONFIGURATION_SCHEMA = Schema({ SchemaOptional("frame_count"): int, }) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Register self, never.""" # Uncomment this to release the beast. # if not builder_context.is_included(cls): # yield {} # return None yield from () return None def pre_run(self) -> None: """Initialize memory tracing.""" _LOGGER.warning( "Enabling memory tracing, this has negative impact on the overall pipeline performance" ) tracemalloc.start(self.configuration["frame_count"]) super().pre_run() @staticmethod def _display_top(snapshot: tracemalloc.Snapshot, *, key_type: str = "lineno", limit: int = 10) -> None: """Print top consumers. Inspired by Python docs: https://docs.python.org/3/library/tracemalloc.html#get-the-traceback-of-a-memory-block """ snapshot = snapshot.filter_traces(( tracemalloc.Filter(False, "<frozen importlib._bootstrap>"), tracemalloc.Filter(False, "<unknown>"), )) top_stats = snapshot.statistics(key_type) print("Top %s lines" % limit) for index, stat in enumerate(top_stats[:limit], 1): frame = stat.traceback[0] print("#%s: %s:%s: %.1f KiB" % (index, frame.filename, frame.lineno, stat.size / 1024)) line = linecache.getline(frame.filename, frame.lineno).strip() if line: print(" %s" % line) other = top_stats[limit:] if other: size = sum(stat.size for stat in other) print("%s other: %.1f KiB" % (len(other), size / 1024)) total = sum(stat.size for stat in top_stats) print("Total allocated size: %.1f KiB" % (total / 1024)) def post_run(self) -> None: """De-initialize memory tracing and print the stats.""" _LOGGER.warning( "Turning memory consumption tracing off and aggregating statistics; this might take a while..." ) snapshot = tracemalloc.take_snapshot() self._display_top(snapshot, limit=self.configuration["top_limit"]) tracemalloc.stop() def run(self) -> None: """Do not perform anything valuable in the actual implementation."""
class GenerateScoreStep(Step): """A step that is assigning scores in a deterministic way. This unit can be used to measure assigning score in a deterministic way across multiple runs without a need to store all the score for packages. """ # Assign probability is used to "assign" a score to the package to simulate knowledge # coverage for packages resolved - 0.75 means ~75% of packages will have a score. CONFIGURATION_SCHEMA: Schema = Schema({ SchemaOptional("assign_probability"): float, SchemaOptional("buffer_size"): int, SchemaOptional("package_name"): SchemaAny(str, None), SchemaOptional("seed"): int, SchemaRequired("multi_package_resolution"): bool, }) CONFIGURATION_DEFAULT: Dict[str, Any] = { "assign_probability": 0.75, "buffer_size": 1024, "multi_package_resolution": False, "package_name": None, "seed": 42, } _history = attr.ib(type=Dict[Tuple[str, str, str], float], factory=dict, init=False) _buffer = attr.ib(type=List[float], factory=list, init=False) _idx = attr.ib(type=int, default=0, init=False) def pre_run(self) -> None: """Initialize this pipeline units before each run.""" self._history.clear() self._idx = 0 if not self._buffer: state = random.getstate() random.seed(self.configuration["seed"]) self._buffer = [0.0] * self.configuration["buffer_size"] for i in range(self.configuration["buffer_size"]): self._buffer[i] = ( random.uniform(self.SCORE_MIN, self.SCORE_MAX) if random.random() <= self.configuration["assign_probability"] else 0.0) random.setstate(state) super().pre_run() def post_run(self) -> None: """Print the generated scores on finish to stdout.""" pprint(self._history) @classmethod def should_include( cls, builder_context: "PipelineBuilderContext" ) -> Generator[Dict[str, Any], None, None]: """Register self, never.""" yield from () return None def run( self, _: State, package_version: PackageVersion ) -> Optional[Tuple[Optional[float], Optional[List[Dict[str, str]]]]]: """Score the given package.""" package_tuple = package_version.to_tuple() score = self._history.get(package_tuple) if score is not None: return score, None idx = self._idx self._idx = (self._idx + 1) % self.configuration["buffer_size"] self._history[package_tuple] = self._buffer[idx] return self._buffer[idx], None