예제 #1
0
    def __init__(self, parser_config, commit_date, verbose=False):
        Base.__init__(self)
        self.err = []
        self.warn = []

        self.parser_config = parser_config
        self.import_path_prefix = self.parser_config.getImportPathPrefix()
        self.verbose = verbose
        self.pull = False

        self.local_repos = {}
        self.upstream_repo = {}
        self.commit_date = commit_date
        self.detected_commits = {}
        self.deps_queue = []
        self.defined_packages = {}

        self.source_code_storage = SourceCodeStorage("/var/lib/gofed/storage")
예제 #2
0
	def getAPIDiff(self, deps, commits):
		"""
		For each pair make apidiff(upstream, rawhide).
		Request for a directory containing tarballs in the form:
			provider-project-repo-commit
		"""

		scs = SourceCodeStorage("/var/lib/gofed/storage", self.verbose)
		if self.verbose:
			print "Collection tarballs..."

		noGodeps = Config().getSkippedDirectories()

		self.apidiff = {}

		print "Upstream\t\tRawhide"
		for ip in deps:
			if commits[ip] == "":
				continue

			self.apidiff[ip] = {'rawhide': commits[ip], 'upstream': deps[ip], 'diff': []}

			deps_dir = scs.getDirectory(ip, deps[ip])
			commits_dir = scs.getDirectory(ip, commits[ip])
			#print (deps_dir, commits_dir)
			config = ParserConfig()
			if options.skiperrors:
				config.setSkipErrors()
			config.setNoGodeps(noGodeps)

			cmp_src = CompareSourceCodes(config)
			#print "Comparing"
			print "Processing %s ..." % ip
			cmp_src.compareDirs(deps_dir, commits_dir)

			for e in cmp_src.getError():
				print "Error: %s" % e

			apichanges = cmp_src.getStatus()

			for pkg in apichanges:
				if pkg == "+":
					continue

				if pkg == "-":
					self.apidiff[ip]['diff'].append({'package': '', 'change': apichanges[pkg]})
					continue

				#print "%sPackage: %s%s" % (BLUE, pkg, ENDC)
				for change in apichanges[pkg]:
					if change[0] == '-':
						self.apidiff[ip]['diff'].append({'package': pkg, 'change': change})

		return True

		for ip in deps:
			if ip not in apidiff:
				continue

			print "%s: |%s - %s|" % (ip, apidiff[ip]['upstream'], apidiff[ip]['rawhide'])
			for change in apidiff[ip]['diff']:
				print "\t%s%s%s" % (RED, change, ENDC)

			# TODO:
			# - output all deps without any rawhide commits as well (smth like NOT FOUND)
			# - cound the deviation

		return True
예제 #3
0
class DependencyApproximator(Base):
    """
	1) Provide a golang project and commit
	2) For the project get a list of its dependencies and determinate
	   the closest commit possible to the one provided (e.g. based on date)
	3) Decompose imported packages into classes
	4) For each class get a list of used packages (subset of project's packages)
	5) Mark import path prefix as of each processed class to avoid cycles
	6) For each new import path prefix found put it into a queue of projects to process (with a subset of packages to read)

	Here, I have a list of direct dependencies. Now get a list of indirect dependencies
	7) For each dependencies and a subset of its packages (in queue) construct a dependency graph of a project for given commit
	8) From each node in the dependency graph get a list of imported packages.
	9) For each list repeat step 2), 3), 4), 5) and 6)
	10) Repeat steps 7)-10) until the queue is empty

	There is a change to have two different commits for the same dependency (take the younger of them but report it to user)

	As dependencies are parsed only partially, cyclic dependencies can bring packages out of partially explored set.
	Thus dependencies are stored by package, not by prefix. This way it is assured all imported packages are processed.
	E.g A(1,2)->B(1), B(1)->C(2), C(2)->A(3). Here, packages 1 and 2 from A were imported first (and processed). Later on,
	package 3 from A was imported. As A was processed only partial, package 3 would not get processed normally. However as
	all imported packages are stored in queue, package 3 get processed eventually.

	"""

    def __init__(self, parser_config, commit_date, verbose=False):
        Base.__init__(self)
        self.err = []
        self.warn = []

        self.parser_config = parser_config
        self.import_path_prefix = self.parser_config.getImportPathPrefix()
        self.verbose = verbose
        self.pull = False

        self.local_repos = {}
        self.upstream_repo = {}
        self.commit_date = commit_date
        self.detected_commits = {}
        self.deps_queue = []
        self.defined_packages = {}

        self.source_code_storage = SourceCodeStorage("/var/lib/gofed/storage")

    def construct(self):
        self.getRepos()

        if self.verbose:
            sys.stderr.write("####Scanning direct dependencies####\n")

        self.getDirectDependencies()
        self.popDepsQueue()

        if self.verbose:
            sys.stderr.write("\n####Scanning indirect dependencies####\n")

        self.getIndirectDependencies()
        while self.deps_queue != []:
            self.getIndirectDependencies()

    def getDependencies(self):
        return self.detected_commits

    def getRepos(self):
        r_obj = Repos()
        repos = r_obj.parseReposInfo()
        self.local_repos = {}
        self.upstream_repo = {}

        # 'golang-github-boltdb-bolt': ('/var/lib/gofed/packages/golang-github-boltdb-bolt/upstream//bolt', 'https://github.com/boltdb/bolt.git')
        for name in repos:
            dir, repo = repos[name]

            m_repo = str.replace(repo, "https://", "")
            m_repo = str.replace(m_repo, "http://", "")

            if m_repo.endswith(".git"):
                m_repo = m_repo[:-4]

            if m_repo.endswith(".hg"):
                m_repo = m_repo[:-3]

            self.local_repos[m_repo] = dir
            self.upstream_repo[m_repo] = repo

    def popDepsQueue(self):
        # pop direct dependencies
        for dep in self.detected_commits:
            self.deps_queue.append(dep)

            # in case of cyclic deps let's pop project's packages as well
            # for pkg in self.defined_packages:
            # 	self.deps_queue.append(pkg)

        return
        for ip in self.deps_queue:
            if ip in self.detected_commits:
                print "%s: %s" % (ip, self.detected_commits[ip]["Date"])
            else:
                print ip

    def detectProjectSubpackages(self, prefix, imported_packages):
        subpackages = []
        prefix_len = len(prefix)
        for ip in imported_packages:
            if ip.startswith(prefix):
                subpackage = ip[prefix_len:]
                if subpackage == "":
                    subpackage = "."
                else:
                    subpackage = subpackage[1:]
                subpackages.append(subpackage)
        return subpackages

    def processElement(self, element):
        # convert each import path prefix to provider prefix
        ip_obj = ImportPath(element)
        if not ip_obj.parse():
            self.err.append(ip_obj.getError())
            return {}

        provider_prefix = ip_obj.getProviderPrefix()
        if provider_prefix not in self.local_repos:
            self.err.append("Repository for %s not found" % provider_prefix)
            return {}

            # print self.local_repos[provider_prefix]
        path = self.local_repos[provider_prefix]
        upstream = self.upstream_repo[provider_prefix]

        # the list is not sorted by date
        commits = getRepoCommits(path, upstream, pull=self.pull)
        commit_dates = {}
        for commit in commits:
            commit_dates[commits[commit]] = commit

        last_commit_date = 1
        last_commit = -1
        for comm_date in sorted(commit_dates.keys()):
            # print (comm_date, self.commit_date)
            if int(comm_date) <= self.commit_date:
                last_commit_date = comm_date
                last_commit = commit_dates[comm_date]
            else:
                break

        str_date = datetime.datetime.fromtimestamp(int(last_commit_date)).strftime("%Y-%m-%d %H:%M:%S")

        info = {}
        info["Date"] = str_date
        info["Rev"] = last_commit
        info["ProviderPrefix"] = provider_prefix

        return info

    def getIndirectDependencies(self):
        """
		All new deps put into local queue.
		Once deps_queue is done, replace it with local one.
		"""
        queue = []

        for ip in self.deps_queue:
            # for a fiven import path construct its partial graph
            parser_config = self.parser_config
            import_path_prefix = self.detected_commits[ip]["ImportPathPrefix"]
            parser_config.setImportPathPrefix(import_path_prefix)
            # set path to SourceCodeStorage
            path = self.source_code_storage.getDirectory(
                self.detected_commits[ip]["ProviderPrefix"], self.detected_commits[ip]["Rev"]
            )
            parser_config.setParsePath(path)
            subpackages = self.detectProjectSubpackages(self.detected_commits[ip]["ImportPathPrefix"], [ip])
            # TODO(jchaloup): Later, add all packages of the same prefix to speed it up
            parser_config.setPartial(subpackages)

            if self.verbose:
                sys.stderr.write(
                    "Scanning %s: %s\n" % (self.detected_commits[ip]["ImportPathPrefix"], ",".join(subpackages))
                )

            gb = ProjectDecompositionGraphBuilder(parser_config)
            gb.buildFromDirectory(path)

            partial_deps = gb.getPartial()
            for ip_used in partial_deps:

                ipd = ImportPathsDecomposer(partial_deps[ip_used])
                if not ipd.decompose():
                    self.err.append(ipd.getError())
                    return False

                self.warn.append(ipd.getWarning())

                classes = ipd.getClasses()
                sorted_classes = sorted(classes.keys())

                for element in sorted_classes:
                    if element == "Native":
                        continue

                        # class name starts with prefix => filter out
                    if element.startswith(import_path_prefix):
                        continue

                    element_info = self.processElement(element)
                    if element_info == {}:
                        continue

                    for ip in classes[element]:
                        # is import path already checked in?
                        if ip in self.detected_commits:
                            # print "^^^^%s" % ip
                            continue
                            # TODO(jchaloup): or is ip in defined packages?

                        info = copy.deepcopy(element_info)
                        info["ImportPath"] = str(ip)
                        info["ImportPathPrefix"] = element
                        self.detected_commits[ip] = info
                        if self.verbose:
                            sys.stderr.write("%s\n" % str(info))

                        queue.append(ip)

        self.deps_queue = queue
        return True

    def getDirectDependencies(self):

        gse_obj = GoSymbolsExtractor(self.parser_config)
        if not gse_obj.extract():
            self.err.append(gse_obj.getError())
            return False

        package_imports_occurence = gse_obj.getPackageImportsOccurences()

        ip_used = gse_obj.getImportedPackages()
        ipd = ImportPathsDecomposer(ip_used)
        if not ipd.decompose():
            self.err.append(ipd.getError())
            return False

        self.warn.append(ipd.getWarning())

        classes = ipd.getClasses()
        sorted_classes = sorted(classes.keys())

        for element in sorted_classes:
            if element == "Native":
                continue

                # class name starts with prefix => filter out
            if element.startswith(self.import_path_prefix):
                continue

            element_info = self.processElement(element)
            if element_info == {}:
                continue

            if self.verbose:
                sys.stderr.write(element + " (" + str(self.detectProjectSubpackages(element, classes[element])) + ")\n")

            for ip in classes[element]:
                info = copy.deepcopy(element_info)
                info["ImportPath"] = str(ip)
                info["ImportPathPrefix"] = element
                self.detected_commits[ip] = info
                if self.verbose:
                    sys.stderr.write("%s\n" % str(info))

        for pkg in gse_obj.getSymbols().keys():
            ip, _ = pkg.split(":")
            if ip == ".":
                ip = self.import_path_prefix
            else:
                ip = "%s/%s" % (self.import_path_prefix, ip)
            self.defined_packages[ip] = {}

        return True
예제 #4
0
	def getAPIDiff(self, deps, commits):
		"""
		For each pair make apidiff(upstream, rawhide).
		Request for a directory containing tarballs in the form:
			provider-project-repo-commit
		"""

		scs = SourceCodeStorage("/tmp/test", self.verbose)
		if self.verbose:
			print "Collection tarballs..."

		noGodeps = Config().getSkippedDirectories()

		apidiff = {}

		print "Upstream\t\tRawhide"
		for ip in deps:
			if commits[ip] == "":
				continue

			deps_dir = scs.getDirectory(ip, deps[ip])
			commits_dir = scs.getDirectory(ip, commits[ip])
			#print (deps_dir, commits_dir)

			cmp_src = CompareSourceCodes(skip_errors=True, noGodeps=noGodeps)
			#print "Comparing"
			print "Processing %s ..." % ip
			cmp_src.compareDirs(deps_dir, commits_dir)

			for e in cmp_src.getError():
				print "Error: %s" % e

			apichanges = cmp_src.getStatus()

			for pkg in apichanges:
				if pkg == "+":
					continue

				if pkg == "-":
					if ip not in apidiff:
						apidiff[ip] = [{'package': '', 'change': apichanges[pkg]}]
					else:
						apidiff[ip].append({'package': '', 'change': apichanges[pkg]})
					continue

				#print "%sPackage: %s%s" % (BLUE, pkg, ENDC)
				for change in apichanges[pkg]:
					if change[0] == '-':
						if ip not in apidiff:
							apidiff[ip] = [{'package': pkg, 'change': change}]
						else:
							apidiff[ip].append({'package': pkg, 'change': change})

		for ip in deps:
			if ip not in apidiff:
				continue

			print "%s: |%s - %s|" % (ip, deps[ip], commits[ip])
			for change in apidiff[ip]:
				print "\t%s%s%s" % (RED, change, ENDC)





			# TODO:
			# - skipp all empty apidiffs
			# - shows only apidiffs with -
			# - output all deps without any rawhide commits as well (smth like NOT FOUND)
			# - cound the deviation

		return {}