def __init__(self): # parsers self.ipparser = ImportPathParserBuilder().buildWithLocalMapping() # acts self.go_code_inspection_act = ActFactory().bake("go-code-inspection") self.scan_upstream_repository_act = ActFactory().bake("scan-upstream-repository") # snapshot self._snapshot = Snapshot() # dependency space self.detected_projects = {} self.unscanned_projects = {} self.scanned_projects = {}
class ProjectDatasetBuilder(object): def __init__(self, project, commit): self.project = project self.commit = commit self.act = ActFactory().bake("go-code-inspection") def build(self): data = { "type": "upstream_source_code", "project": self.project, "commit": self.commit, "ipprefix": self.project, "directories_to_skip": [], } builder = DatasetBuilder() try: artefact = self.act.call(data) except FunctionFailedError as e: raise DatasetError("Unable to create dataset: %s" % e) builder.addArtefact(artefact) return builder.build().dataset()
class LocalProjectDatasetBuilder(object): def __init__(self, directory, ipprefix): self.directory = directory self.ipprefix = ipprefix self.act = ActFactory().bake("go-code-inspection") def build(self): data = { "type": "user_directory", "ipprefix": self.ipprefix, "directories_to_skip": [], "resource": self.directory } builder = DatasetBuilder() try: artefact = self.act.call(data) except FunctionFailedError as e: raise DatasetError("Unable to create dataset: %s" % e) builder.addArtefact(artefact) return builder.build().dataset()
def __init__(self, koji_client, pkgdb_client): """ :param koji_client: Koji client :type koji_client: KojiClient or FakeKojiClient :param pkgdb_client: PkgDB client :type pkgdb_client: PkgDBClient or FakePkgDBClient """ self.koji_client = koji_client self.pkgdb_client = pkgdb_client self.artefactreaderact = ActFactory().bake("artefact-reader") self.artefactwriteract = ActFactory().bake("artefact-writer") self.scanbuildact = ActFactory().bake("scan-distribution-build") self._failed = {} self._scanned = {}
def __init__(self, distribution, packages): """The graph can be built from all packages or from selected. The decision is up to user of the class. :param distribution: OS distribution :type distribution: string :param packages: list of packages in question :type packages: [string] """ # TODO(jchaloup): # - inject the product together with buildsystem client self.product = "Fedora" self.distribution = distribution self.packages = packages # TODO(jchaloup): # - inject the client so the class can be used with Brew and CentOS as well self.client = FakeKojiClient() # TODO(jchaloup): # - inject the act and replace it with datasource instead # so the artefact/data can be picked from more sources self.act = ActFactory().bake("scan-distribution-build")
def __init__(self, pkgdb_client): self.pkgdb_client = pkgdb_client self.artefactreaderact = ActFactory().bake("artefact-reader") self.artefactwriteract = ActFactory().bake("artefact-writer") self.scan_act = ActFactory().bake("scan-distribution-package")
class DistributionBuildsFetcher(object): def __init__(self, pkgdb_client): self.pkgdb_client = pkgdb_client self.artefactreaderact = ActFactory().bake("artefact-reader") self.artefactwriteract = ActFactory().bake("artefact-writer") self.scan_act = ActFactory().bake("scan-distribution-package") def fetch(self, distributions, since = 0, to = int(time.time() + 86400)): """Collect list of builds since a given date for each package whose builds is younger than since. :param distributions: list of distributions to fetch builds from :type distributions: [distribution] :param since: since timestamp from which to collect new builds :type since: int :param to: timestamp to which collect new builds :type to: int """ collections = self.pkgdb_client.getCollections() for distribution in distributions: if distribution["product"] not in collections: logging.error("Product '%s' unknown" % distribution["product"]) return if distribution["version"] not in collections[ distribution["product"] ]: logging.error("Version '%s' unknown" % distribution["version"]) return # get list of packages to scan for distribution in distributions: dist_tag = collections[distribution["product"]][distribution["version"]]["dist_tag"] print "%sScanning %s %s ...%s" % (BLUE, distribution["product"], distribution["version"], ENDC) try: data = self.artefactreaderact.call({ "artefact": ARTEFACT_GOLANG_DISTRIBUTION_SNAPSHOT, "distribution": distribution }) except ActFailedError: continue builds = DistributionSnapshot().read(data).builds() for build in builds: if builds[build]["build_ts"] >= since: print "%s Scanning %s ...%s" % (BLUE, build, ENDC) # get package's items info artefact try: items_info = self.artefactreaderact.call({ "artefact": ARTEFACT_GOLANG_PROJECT_DISTRIBUTION_PACKAGE_BUILDS, "product": distribution["product"], "distribution": dist_tag, "package": build }) except ActFailedError as e: items_info = None # if items_info artefact for package is found, take the build timestamp # of the youngest covered build if items_info == None: start_ts = since else: start_ts = 0 for coverage in items_info["coverage"]: start_ts = max(coverage["end"], start_ts) # end is always > 0 start_ts = start_ts - 1 try: self.scan_act.call({ "package": build, "product": distribution["product"], "distribution": dist_tag, "start_timestamp": start_ts, "end_timestamp": to }) except ActFailedError as e: logging.error("%s: %s" (build, e)) continue
class DistributionSnapshotChecker(object): """Checkout the ecosystem for new builds 1. get the latest snapshot of each requested distribution 2. get the current shapshot for each requested distribution 3. compare both snapshots 4. scan new rpms """ def __init__(self, koji_client, pkgdb_client): """ :param koji_client: Koji client :type koji_client: KojiClient or FakeKojiClient :param pkgdb_client: PkgDB client :type pkgdb_client: PkgDBClient or FakePkgDBClient """ self.koji_client = koji_client self.pkgdb_client = pkgdb_client self.artefactreaderact = ActFactory().bake("artefact-reader") self.artefactwriteract = ActFactory().bake("artefact-writer") self.scanbuildact = ActFactory().bake("scan-distribution-build") self._failed = {} self._scanned = {} def _scanRpms(self, snapshot): """Scan rpms captured in snapshot :param snapshot: distribution snapshot or difference :type snapshot: DistributionSnapshot """ distribution = snapshot.distribution() key = "%s:%s" % (distribution["product"], distribution["version"]) self._failed[key] = [] self._scanned[key] = 0 print "%sScanning %s %s ...%s" % (BLUE, distribution["product"], distribution["version"], ENDC) total = len(snapshot.json()["builds"]) index = 1 for package in snapshot.json()["builds"]: # scan devel and unit-tests only rpms = filter(lambda l: GolangRpm(package["build"], l).provideSourceCode(), package["rpms"]) if rpms == []: continue data = { "product": distribution["product"], "distribution": distribution["version"], "build": { "name": package["build"], "rpms": map(lambda l: {"name": l}, rpms) } } print "%sScanning %s ... [%s/%s]%s" % (WHITE, package["build"], index, total, ENDC) index = index + 1 try: self.scanbuildact.call(data) except ActFailedError as e: logging.error(e) self._failed[key].append(package) continue except FunctionFailedError as e: logging.error(e) self._failed[key].append(package) continue self._scanned[key] = self._scanned[key] + 1 print "%sscanned %s, failed %s%s" % (YELLOW, self._scanned[key], len(self._failed[key]), ENDC) print "" def _distroKey(self, distribution): return "%s:%s" % (distribution["product"], distribution["version"]) def check(self, distributions, custom_packages, blacklist = [], skip_failed = True, full_check = False): """ :param distributions: list of distributions, each item as {"product": ..., "version": ...} :type distributions: [{}] :param custom_packages: list of golang packages not prefixed with golang-* :type custom_packages: [string] :param skip_failed: even if any rpm scan fails, store the latest snapshot, default True :type skip_failed: boolean :param full_scan: don't check the current snapshot and scan all rpms in the latest snapshot :type full_scan: boolean """ # read all latest snapshots and get a list of all packages across them distro_packages = [] for distribution in distributions: print distribution try: data = self.artefactreaderact.call({ "artefact": ARTEFACT_GOLANG_DISTRIBUTION_SNAPSHOT, "distribution": distribution }) except ActFailedError: continue distro_packages = distro_packages + DistributionSnapshot().read(data).builds().keys() known_packages = list(set(distro_packages + custom_packages)) # capture the current distribution snapshot capturer = EcoCapturer(self.koji_client, self.pkgdb_client) snapshots = capturer.captureLatest(distributions, known_packages, blacklist).snapshots() for snapshot in snapshots: new_snapshot = snapshots[snapshot]["snapshot"] # get the latest distribution snapshot latest_snapshot = {} if not full_check: try: data = self.artefactreaderact.call({ "artefact": ARTEFACT_GOLANG_DISTRIBUTION_SNAPSHOT, "distribution": snapshots[snapshot]["distribution"] }) latest_snapshot = DistributionSnapshot().read(data) except ActFailedError: # TODO(jchaloup): catch additional exception once extended pass # scan new rpms # the latest snapshot not found => no comparison if latest_snapshot == {}: diff_snapshot = new_snapshot else: diff_snapshot = new_snapshot.compare(latest_snapshot) self._scanRpms(diff_snapshot) # Set the latest snapshot write = True if not skip_failed: distribution = snapshots[snapshot]["distribution"] key = "%s:%s" % (distribution["product"], distribution["version"]) if self._failed[key] > 0: write = False if write: data = new_snapshot.json() data["artefact"] = ARTEFACT_GOLANG_DISTRIBUTION_SNAPSHOT try: self.artefactwriteract.call(data) except ActFactory: logging.error("Unable to store snapshot for %s:%s" % (distribution["product"], distribution["version"]))
def __init__(self, project, commit): self.project = project self.commit = commit self.act = ActFactory().bake("go-code-inspection")
def __init__(self): self.ipparser = ImportPathParserBuilder().buildWithLocalMapping() self.artefactreaderact = ActFactory().bake("artefact-reader") self.commitreaderact = ActFactory().bake("scan-upstream-repository")
class SnapshotChecker(object): def __init__(self): self.ipparser = ImportPathParserBuilder().buildWithLocalMapping() self.artefactreaderact = ActFactory().bake("artefact-reader") self.commitreaderact = ActFactory().bake("scan-upstream-repository") def _getCommitDate(self, repository, commit): try: artefact = self.commitreaderact.call({ "repository": repository, "commit": commit }) except ValueError as e: logging.error(e) return {} return artefact["commits"][commit] def _comparePackages(self, package, upstream_commit, distro_commit): if upstream_commit["cdate"] == distro_commit["cdate"]: return "%s%s is up-to-date%s" % (GREEN, package, ENDC) elif upstream_commit["cdate"] < distro_commit["cdate"]: return "%s%s is newer in distribution%s" % (BLUE, package, ENDC) elif upstream_commit["cdate"] > distro_commit["cdate"]: return "%s%s is outdated in distribution%s" % (RED, package, ENDC) def _checkPackageCoverage(self, product, distribution, build, rpm, ipprefix, packages): data = { "artefact": ARTEFACT_GOLANG_PROJECT_DISTRIBUTION_PACKAGES, "product": product, "distribution": distribution, "build": build, "rpm": rpm } artefact = self.artefactreaderact.call(data) # get list of defined packages for ipprefix_class in artefact["data"]: if ipprefix_class["ipprefix"] == ipprefix: # All packages covered? return list(set(packages) - set(ipprefix_class["packages"])) def check(self, snapshot, product, distribution): """Check if a given snapshot is covered in a distribution :param snapshot: project snapshot :type snapshot: Snapshot :param distribution: OS distribution, e.g. f23, f25, rawhide, centos7, ... :type distribution: string """ packages = snapshot.packages() ipprefixes = {} providers = {} rpms = {} upstream = {} not_recognized = [] for package in packages: try: self.ipparser.parse(package) except ValueError: not_recognized.append(package) continue ipprefix = self.ipparser.getImportPathPrefix() try: ipprefixes[ipprefix].append(package) except KeyError: ipprefixes[ipprefix] = [package] # store ipprefix commit (assuming all packages with the same prefix has the same commit) upstream[ipprefix] = packages[package] # iprefix -> provider prefix providers[ipprefix] = self.ipparser.getProviderSignature() # ipprefix -> rpm data = { "artefact": ARTEFACT_GOLANG_IPPREFIX_TO_RPM, "distribution": "rawhide", "product": "Fedora", "ipprefix": ipprefix } # if ipprefix2rpm artefact does not exist => report it and continue, no fallback # TODO(jchaloup): FF: fallback to generic mapping if ipprefix to pkg name # and report that "maybe" the ipprefix is provided by this package try: rpms[ipprefix] = self.artefactreaderact.call(data) except ActFailedError as e: logging.error("Unable to get mapping for %s" % package) pass for ipprefix in ipprefixes: if ipprefix not in providers: print "%sUnable to find provider for '%s' ipprefix%s" % (WHITE, ipprefix, ENDC) continue if ipprefix not in rpms: print "%sUnable to find ipprefix2rpm mapping '%s' ipprefix%s" % (WHITE, ipprefix, ENDC) continue upstream_commit = self._getCommitDate(providers[ipprefix], upstream[ipprefix]) distro_commit = self._getCommitDate(providers[ipprefix], rpms[ipprefix]["commit"]) if upstream_commit == {}: logging.error("Unable to retrieve commit info for %s %s" % (package, packages[package])) continue if distro_commit == {}: logging.error("Unable to retrieve commit info for %s %s" % (package, rpms[package]["commit"])) continue # compare commits comparison = self._comparePackages(ipprefix, upstream_commit, distro_commit) # check if packages in ipprefix class are covered in distribution not_covered = self._checkPackageCoverage(product, distribution, rpms[ipprefix]["build"], rpms[ipprefix]["rpm"], ipprefix, ipprefixes[ipprefix]) if not_covered != []: print "%s: %snot covered: %s%s" % (comparison, RED, not_covered, ENDC) else: print comparison
class DistributionLatestBuildGraphDataset: def __init__(self, distribution, packages): """The graph can be built from all packages or from selected. The decision is up to user of the class. :param distribution: OS distribution :type distribution: string :param packages: list of packages in question :type packages: [string] """ # TODO(jchaloup): # - inject the product together with buildsystem client self.product = "Fedora" self.distribution = distribution self.packages = packages # TODO(jchaloup): # - inject the client so the class can be used with Brew and CentOS as well self.client = FakeKojiClient() # TODO(jchaloup): # - inject the act and replace it with datasource instead # so the artefact/data can be picked from more sources self.act = ActFactory().bake("scan-distribution-build") def build(self): """Build dataset for a given list of buildes """ # TODO(jchaloup): specify json schema for a dataset # get a list of latest rpms for selected packages counter = 0 builder = DatasetBuilder() for pkg in self.packages: if pkg in [ "golang-github-aws-aws-sdk-go", "golang-googlecode-google-api-go-client", "golang-googlecode-google-api-client", ]: continue try: data = self.client.getLatestRPMS("rawhide", pkg) except ValueError as e: logging.error("ValueError: %s" % e) continue except KeyError as e: logging.error("KeyError: %s" % e) continue rpms = [] for rpm in data["rpms"]: rpm_name = Rpm(data["name"], rpm["name"]).name() # if not rpm_name.endswith("devel"): # and not rpm["name"].endswith("unit-test"): # continue rpms.append({"name": rpm["name"]}) # get artefact data = { "product": self.product, "distribution": self.distribution, "build": {"name": data["name"], "rpms": rpms}, } try: artefacts = self.act.call(data) except FunctionFailedError as e: logging.error(e) continue for rpm in artefacts["packages"]: builder.addArtefact(artefacts["packages"][rpm], rpm) # if counter == 40: # break # counter = counter + 1 return builder.build().dataset()
class SnapshotReconstructor(object): def __init__(self): # parsers self.ipparser = ImportPathParserBuilder().buildWithLocalMapping() # acts self.go_code_inspection_act = ActFactory().bake("go-code-inspection") self.scan_upstream_repository_act = ActFactory().bake("scan-upstream-repository") # snapshot self._snapshot = Snapshot() # dependency space self.detected_projects = {} self.unscanned_projects = {} self.scanned_projects = {} def _getCommitTimestamp(self, repository, commit): """Retrieve commit from a repository, returns its commits date :param repository: repository :type repository: dict :param commit: commit :type commit: hex string """ data = { "repository": repository, "commit": commit } # TODO(jchaloup): catch exception if the commit is not found commit_data = self.scan_upstream_repository_act.call(data) return commit_data["commits"][commit]["cdate"] def _findYoungestCommits(self, commits): # sort commits commits = map(lambda l: {"c": l, "d": commits[l]["cdate"]}, commits) commits = sorted(commits, key = lambda commit: commit["d"]) return commits[-1] def _findClosestCommit(self, repository, timestamp): """Get the oldest commits from the repository that is at most old as timestamp. :param repository: repository :type repository: dict :param timestamp: commit timestamp :type timestamp: integer """ # TODO(jchaloup): search for commits only on master branch!!! # other branches can be in inconsystem state with experimental features # and get picked unintensionaly data = { "repository": repository, "end_timestamp": timestamp } DAY = 3600*24 # try the last day, week, last month, last year for delta in [1, 7, 30, 365]: data["start_timestamp"] = timestamp - delta*DAY rdata = self.scan_upstream_repository_act.call(data) if rdata["commits"] != {}: return self._findYoungestCommits(rdata["commits"]) # unbound start_timestamp del data["start_timestamp"] rdata = self.scan_upstream_repository_act.call(data) if rdata["commits"] != {}: return self._findYoungestCommits(rdata["commits"]) # no commit foud => raise exception raise KeyError("Commit not found") def _detectNextDependencies(self, dependencies, ipprefix, commit_timestamp): dependencies = list(set(dependencies)) # normalize paths normalizer = ImportPathNormalizer() dependencies = map(lambda l: normalizer.normalize(l), dependencies) decomposer = ImportPathsDecomposerBuilder().buildLocalDecomposer() decomposer.decompose(dependencies) prefix_classes = decomposer.getClasses() next_projects = {} for prefix in prefix_classes: # filter out Native prefix if prefix == "Native": continue # filter out project's import path prefix if prefix == ipprefix: continue logging.warning("Processing %s ..." % prefix) # for each imported path get a list of commits in a given interval try: self.ipparser.parse(prefix) # ipprefix already covered? if self.ipparser.getImportPathPrefix() in self.detected_projects: # ip covered in the prefix class? not_covered = [] for ip in prefix_classes[prefix]: if ip not in self.detected_projects[prefix]: not_covered.append(ip) if not_covered == []: logging.warning("Prefix %s already covered" % prefix) continue logging.warning("Some paths '%s' not yet covered in '%s' prefix" % (str(not_covered), prefix)) # scan only ips not yet covered prefix_classes[prefix] = not_covered provider = self.ipparser.getProviderSignature() provider_prefix = self.ipparser.getProviderPrefix() except ValueError as e: raise ReconstructionError("Prefix provider error: %s" % e) try: closest_commit = self._findClosestCommit(provider, commit_timestamp) except KeyError as e: raise ReconstructionError("Closest commit to %s timestamp for %s not found" % (commit_timestamp, provider_prefix)) # update packages to scan next_projects[prefix] = { "ipprefix": prefix, "paths": map(lambda l: str(l), prefix_classes[prefix]), "provider": provider, "commit": closest_commit["c"], #"timestamp": closest_commit["d"], "provider_prefix": provider_prefix } return next_projects def _detectDirectDependencies(self, repository, commit, ipprefix, commit_timestamp, mains, tests): data = { "type": "upstream_source_code", "project": "github.com/coreos/etcd", "commit": commit, "ipprefix": ipprefix, "directories_to_skip": [] } packages_artefact = self.go_code_inspection_act.call(data) # collect dependencies direct_dependencies = [] for package in packages_artefact["data"]["dependencies"]: direct_dependencies = direct_dependencies + map(lambda l: l["name"], package["dependencies"]) if mains != []: paths = {} for path in packages_artefact["data"]["main"]: paths[path["filename"]] = path["dependencies"] for main in mains: if main not in paths: raise ReconstructionError("Main package file %s not found" % main) direct_dependencies = direct_dependencies + paths[main] if tests: for dependencies in map(lambda l: l["dependencies"], packages_artefact["data"]["tests"]): direct_dependencies = direct_dependencies + dependencies # remove duplicates direct_dependencies = list(set(direct_dependencies)) next_projects = self._detectNextDependencies(direct_dependencies, ipprefix, commit_timestamp) # update detected projects for project in next_projects: self.detected_projects[project] = next_projects[project]["paths"] # update packages to scan for prefix in next_projects: if prefix in self.unscanned_projects: continue self.unscanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) self.scanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) def _detectIndirectDependencies(self, ipprefix, commit_timestamp): nodes = [] next_projects = {} for prefix in self.unscanned_projects: # get dataset dataset = ProjectDatasetBuilder( self.unscanned_projects[prefix]["provider_prefix"], self.unscanned_projects[prefix]["commit"] ).build() # construct dependency graph from the dataset graph = DatasetDependencyGraphBuilder().build(dataset, LEVEL_GOLANG_PACKAGES) # get the subgraph of evolved dependency's packages subgraph = GraphUtils.truncateGraph(graph, self.unscanned_projects[prefix]["paths"]) # get dependencies from the subgraph package_nodes = filter(lambda l: l.startswith(self.unscanned_projects[prefix]["ipprefix"]), subgraph.nodes()) label_edges = dataset.getLabelEdges() for node in package_nodes: nodes = nodes + label_edges[node] nodes = list(set(nodes)) next_projects = self._detectNextDependencies(nodes, ipprefix, commit_timestamp) if next_projects == {}: return False # update packages to scan one_at_least = False self.unscanned_projects = {} for prefix in next_projects: # prefix already covered? Just extend the current coverage if prefix in self.detected_projects: for ip in next_projects[prefix]["paths"]: if str(ip) not in self.detected_projects[prefix]: self.detected_projects[prefix].append(ip) self.scanned_projects[prefix]["paths"].append(ip) continue one_at_least = True self.unscanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) self.scanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) self.detected_projects[prefix] = copy.deepcopy(next_projects[prefix]["paths"]) return one_at_least def reconstruct(self, repository, commit, ipprefix, mains = [], tests = False): """Reconstruct snapshot :param repository: project repository :type repository: dict :param commit: repository commit :type commit: string :param ipprefix: import path prefix :type ipprefix: string :param mains: list of main packages with root path to go file to cover, implicitly no main package, just devel :type mains: [string] :param tests: cover unit tests as well, default is False :type tests: boolean """ # clear snapshot self._snapshot.clear() # get commit date of project's commit commit_timestamp = self._getCommitTimestamp(repository, commit) # get direct dependencies logging.info("=============DIRECT==============") self._detectDirectDependencies(repository, commit, ipprefix, commit_timestamp, mains, tests) # scan detected dependencies logging.info("=============UNDIRECT==============") while self._detectIndirectDependencies(ipprefix, commit_timestamp): logging.info("=============UNDIRECT==============") # create snapshot for prefix in self.scanned_projects: for ip in sorted(self.scanned_projects[prefix]["paths"]): self._snapshot.addPackage(ip, self.scanned_projects[prefix]["commit"]) return self def snapshot(self): return self._snapshot
def __init__(self, directory, ipprefix): self.directory = directory self.ipprefix = ipprefix self.act = ActFactory().bake("go-code-inspection")