Example #1
0
	def __init__(self):
		# parsers
		self.ipparser = ImportPathParserBuilder().buildWithLocalMapping()

		# acts
		self.go_code_inspection_act = ActFactory().bake("go-code-inspection")
		self.scan_upstream_repository_act = ActFactory().bake("scan-upstream-repository")

		# snapshot
		self._snapshot = Snapshot()

		# dependency space
		self.detected_projects = {}
		self.unscanned_projects = {}
		self.scanned_projects = {}
Example #2
0
	def __init__(self):
		self.ipparser = ImportPathParserBuilder().buildWithLocalMapping()
		self.artefactreaderact = ActFactory().bake("artefact-reader")
		self.commitreaderact = ActFactory().bake("scan-upstream-repository")
Example #3
0
class SnapshotChecker(object):

	def __init__(self):
		self.ipparser = ImportPathParserBuilder().buildWithLocalMapping()
		self.artefactreaderact = ActFactory().bake("artefact-reader")
		self.commitreaderact = ActFactory().bake("scan-upstream-repository")

	def _getCommitDate(self, repository, commit):
		try: 
			artefact = self.commitreaderact.call({
				"repository": repository,
				"commit": commit
			})
		except ValueError as e:
			logging.error(e)
			return {}

		return artefact["commits"][commit]

	def _comparePackages(self, package, upstream_commit, distro_commit):
		if upstream_commit["cdate"] == distro_commit["cdate"]:
			return "%s%s is up-to-date%s" % (GREEN, package, ENDC)
		elif upstream_commit["cdate"] < distro_commit["cdate"]:
			return "%s%s is newer in distribution%s" % (BLUE, package, ENDC)
		elif upstream_commit["cdate"] > distro_commit["cdate"]:
			return "%s%s is outdated in distribution%s" % (RED, package, ENDC)

	def _checkPackageCoverage(self, product, distribution, build, rpm, ipprefix, packages):
		data = {
			"artefact": ARTEFACT_GOLANG_PROJECT_DISTRIBUTION_PACKAGES,
			"product": product,
			"distribution": distribution,
			"build": build,
			"rpm": rpm
		}
		artefact = self.artefactreaderact.call(data)

		# get list of defined packages
		for ipprefix_class in artefact["data"]:
			if ipprefix_class["ipprefix"] == ipprefix:
				# All packages covered?
				return list(set(packages) - set(ipprefix_class["packages"]))

	def check(self, snapshot, product, distribution):
		"""Check if a given snapshot is covered in a distribution
		:param snapshot: project snapshot
		:type  snapshot: Snapshot
		:param distribution: OS distribution, e.g. f23, f25, rawhide, centos7, ...
		:type  distribution: string
		"""

		packages = snapshot.packages()

		ipprefixes = {}
		providers = {}
		rpms = {}
		upstream = {}
		not_recognized = []
		for package in packages:
			try:
				self.ipparser.parse(package)
			except ValueError:
				not_recognized.append(package)
				continue

			ipprefix = self.ipparser.getImportPathPrefix()
			try:
				ipprefixes[ipprefix].append(package)
			except KeyError:
				ipprefixes[ipprefix] = [package]

			# store ipprefix commit (assuming all packages with the same prefix has the same commit)
			upstream[ipprefix] = packages[package]

			# iprefix -> provider prefix
			providers[ipprefix] = self.ipparser.getProviderSignature()

			# ipprefix -> rpm
			data = {
				"artefact": ARTEFACT_GOLANG_IPPREFIX_TO_RPM,
				"distribution": "rawhide",
				"product": "Fedora",
				"ipprefix": ipprefix
			}
			# if ipprefix2rpm artefact does not exist => report it and continue, no fallback
			# TODO(jchaloup): FF: fallback to generic mapping if ipprefix to pkg name
			# and report that "maybe" the ipprefix is provided by this package
			try:
				rpms[ipprefix] = self.artefactreaderact.call(data)
			except ActFailedError as e:
				logging.error("Unable to get mapping for %s" % package)
				pass

		for ipprefix in ipprefixes:
			if ipprefix not in providers:
				print "%sUnable to find provider for '%s' ipprefix%s" % (WHITE, ipprefix, ENDC)
				continue

			if ipprefix not in rpms:
				print "%sUnable to find ipprefix2rpm mapping '%s' ipprefix%s" % (WHITE, ipprefix, ENDC)
				continue

			upstream_commit = self._getCommitDate(providers[ipprefix], upstream[ipprefix])
			distro_commit = self._getCommitDate(providers[ipprefix], rpms[ipprefix]["commit"])

			if upstream_commit == {}:
				logging.error("Unable to retrieve commit info for %s %s" % (package, packages[package]))
				continue

			if distro_commit == {}:
				logging.error("Unable to retrieve commit info for %s %s" % (package, rpms[package]["commit"]))
				continue

			# compare commits
			comparison = self._comparePackages(ipprefix, upstream_commit, distro_commit)

			# check if packages in ipprefix class are covered in distribution
			not_covered = self._checkPackageCoverage(product, distribution, rpms[ipprefix]["build"], rpms[ipprefix]["rpm"], ipprefix, ipprefixes[ipprefix])

			if not_covered != []:
				print "%s: %snot covered: %s%s" % (comparison, RED, not_covered, ENDC)
			else:
				print comparison
Example #4
0
class SnapshotReconstructor(object):

	def __init__(self):
		# parsers
		self.ipparser = ImportPathParserBuilder().buildWithLocalMapping()

		# acts
		self.go_code_inspection_act = ActFactory().bake("go-code-inspection")
		self.scan_upstream_repository_act = ActFactory().bake("scan-upstream-repository")

		# snapshot
		self._snapshot = Snapshot()

		# dependency space
		self.detected_projects = {}
		self.unscanned_projects = {}
		self.scanned_projects = {}

	def _getCommitTimestamp(self, repository, commit):
		"""Retrieve commit from a repository, returns its commits date

		:param repository: repository
		:type  repository: dict
		:param commit: commit
		:type  commit: hex string
		"""
		data = {
			"repository": repository,
			"commit": commit
		}
		# TODO(jchaloup): catch exception if the commit is not found
		commit_data = self.scan_upstream_repository_act.call(data)
		return commit_data["commits"][commit]["cdate"]

	def _findYoungestCommits(self, commits):
		# sort commits
		commits = map(lambda l: {"c": l, "d": commits[l]["cdate"]}, commits)
		commits = sorted(commits, key = lambda commit: commit["d"])

		return commits[-1]

	def _findClosestCommit(self, repository, timestamp):
		"""Get the oldest commits from the repository that is at most old as timestamp.

		:param repository: repository
		:type  repository: dict
		:param timestamp: commit timestamp
		:type  timestamp: integer
		"""
		# TODO(jchaloup): search for commits only on master branch!!!
		# other branches can be in inconsystem state with experimental features
		# and get picked unintensionaly
		data = {
			"repository": repository,
			"end_timestamp": timestamp
		}

		DAY = 3600*24
		# try the last day, week, last month, last year
		for delta in [1, 7, 30, 365]:
			data["start_timestamp"] = timestamp - delta*DAY
			rdata = self.scan_upstream_repository_act.call(data)
			if rdata["commits"] != {}:
				return self._findYoungestCommits(rdata["commits"])

		# unbound start_timestamp
		del data["start_timestamp"]
		rdata = self.scan_upstream_repository_act.call(data)
		if rdata["commits"] != {}:
			return self._findYoungestCommits(rdata["commits"])

		# no commit foud => raise exception
		raise KeyError("Commit not found")

	def _detectNextDependencies(self, dependencies, ipprefix, commit_timestamp):
		dependencies = list(set(dependencies))
		# normalize paths
		normalizer = ImportPathNormalizer()
		dependencies = map(lambda l: normalizer.normalize(l), dependencies)

		decomposer = ImportPathsDecomposerBuilder().buildLocalDecomposer()
		decomposer.decompose(dependencies)
		prefix_classes = decomposer.getClasses()

		next_projects = {}

		for prefix in prefix_classes:
			# filter out Native prefix
			if prefix == "Native":
				continue

			# filter out project's import path prefix
			if prefix == ipprefix:
				continue

			logging.warning("Processing %s ..." % prefix)

			# for each imported path get a list of commits in a given interval
			try:
				self.ipparser.parse(prefix)
				# ipprefix already covered?
				if self.ipparser.getImportPathPrefix() in self.detected_projects:
					# ip covered in the prefix class?
					not_covered = []
					for ip in prefix_classes[prefix]:
						if ip not in self.detected_projects[prefix]:
							not_covered.append(ip)

					if not_covered == []:
						logging.warning("Prefix %s already covered" % prefix)
						continue

						logging.warning("Some paths '%s' not yet covered in '%s' prefix" % (str(not_covered), prefix))
					# scan only ips not yet covered
					prefix_classes[prefix] = not_covered

				provider = self.ipparser.getProviderSignature()
				provider_prefix = self.ipparser.getProviderPrefix()
			except ValueError as e:
				raise ReconstructionError("Prefix provider error: %s" % e)

			try:
				closest_commit = self._findClosestCommit(provider, commit_timestamp)
			except KeyError as e:
				raise ReconstructionError("Closest commit to %s timestamp for %s not found" % (commit_timestamp, provider_prefix))

			# update packages to scan
			next_projects[prefix] = {
				"ipprefix": prefix,
				"paths": map(lambda l: str(l), prefix_classes[prefix]),
				"provider": provider,
				"commit": closest_commit["c"],
				#"timestamp": closest_commit["d"],
				"provider_prefix": provider_prefix
			}

		return next_projects

	def _detectDirectDependencies(self, repository, commit, ipprefix, commit_timestamp, mains, tests):
		data = {
			"type": "upstream_source_code",
			"project": "github.com/coreos/etcd",
			"commit": commit,
			"ipprefix": ipprefix,
			"directories_to_skip": []
		}

		packages_artefact = self.go_code_inspection_act.call(data)

		# collect dependencies
		direct_dependencies = []
		for package in packages_artefact["data"]["dependencies"]:
			direct_dependencies = direct_dependencies + map(lambda l: l["name"], package["dependencies"])

		if mains != []:
			paths = {}
			for path in packages_artefact["data"]["main"]:
				paths[path["filename"]] = path["dependencies"]

			for main in mains:
				if main not in paths:
					raise ReconstructionError("Main package file %s not found" % main)

				direct_dependencies = direct_dependencies + paths[main]

		if tests:
			for dependencies in map(lambda l: l["dependencies"], packages_artefact["data"]["tests"]):
				direct_dependencies = direct_dependencies + dependencies

		# remove duplicates
		direct_dependencies = list(set(direct_dependencies))

		next_projects = self._detectNextDependencies(direct_dependencies, ipprefix, commit_timestamp)

		# update detected projects
		for project in next_projects:
			self.detected_projects[project] = next_projects[project]["paths"]

		# update packages to scan
		for prefix in next_projects:
			if prefix in self.unscanned_projects:
				continue

			self.unscanned_projects[prefix] = copy.deepcopy(next_projects[prefix])
			self.scanned_projects[prefix] = copy.deepcopy(next_projects[prefix])

	def _detectIndirectDependencies(self, ipprefix, commit_timestamp):
		nodes = []
		next_projects = {}
		for prefix in self.unscanned_projects:
			# get dataset
			dataset = ProjectDatasetBuilder(
				self.unscanned_projects[prefix]["provider_prefix"],
				self.unscanned_projects[prefix]["commit"]
			).build()

			# construct dependency graph from the dataset
			graph = DatasetDependencyGraphBuilder().build(dataset, LEVEL_GOLANG_PACKAGES)

			# get the subgraph of evolved dependency's packages
			subgraph = GraphUtils.truncateGraph(graph, self.unscanned_projects[prefix]["paths"])

			# get dependencies from the subgraph
			package_nodes = filter(lambda l: l.startswith(self.unscanned_projects[prefix]["ipprefix"]), subgraph.nodes())
			label_edges = dataset.getLabelEdges()
			for node in package_nodes:
				nodes = nodes + label_edges[node]

		nodes = list(set(nodes))

		next_projects = self._detectNextDependencies(nodes, ipprefix, commit_timestamp)
		if next_projects == {}:
			return False

		# update packages to scan
		one_at_least = False
		self.unscanned_projects = {}

		for prefix in next_projects:
			# prefix already covered? Just extend the current coverage
			if prefix in self.detected_projects:
				for ip in next_projects[prefix]["paths"]:
					if str(ip) not in self.detected_projects[prefix]:
						self.detected_projects[prefix].append(ip)
						self.scanned_projects[prefix]["paths"].append(ip)
				continue

			one_at_least = True
			self.unscanned_projects[prefix] = copy.deepcopy(next_projects[prefix])
			self.scanned_projects[prefix] = copy.deepcopy(next_projects[prefix])
			self.detected_projects[prefix] = copy.deepcopy(next_projects[prefix]["paths"])

		return one_at_least

	def reconstruct(self, repository, commit, ipprefix, mains = [], tests = False):
		"""Reconstruct snapshot
		:param repository: project repository
		:type  repository: dict
		:param commit: repository commit
		:type  commit: string
		:param ipprefix: import path prefix
		:type  ipprefix: string
		:param mains: list of main packages with root path to go file to cover, implicitly no main package, just devel
		:type  mains: [string]
		:param tests: cover unit tests as well, default is False
		:type  tests: boolean
		"""

		# clear snapshot
		self._snapshot.clear()

		# get commit date of project's commit
		commit_timestamp = self._getCommitTimestamp(repository, commit)
		# get direct dependencies
		logging.info("=============DIRECT==============")
		self._detectDirectDependencies(repository, commit, ipprefix, commit_timestamp, mains, tests)

		# scan detected dependencies
		logging.info("=============UNDIRECT==============")
		while self._detectIndirectDependencies(ipprefix, commit_timestamp):
			logging.info("=============UNDIRECT==============")

		# create snapshot
		for prefix in self.scanned_projects:
			for ip in sorted(self.scanned_projects[prefix]["paths"]):
				self._snapshot.addPackage(ip, self.scanned_projects[prefix]["commit"])

		return self

	def snapshot(self):
		return self._snapshot