class ReinitTypesTest(Experiment): inputs = { "string": String("A"), "bool": Bool(True), "string_optional": Optional(String()), 'list': List(String, []) } def run(self): assert self.string.value == "X" assert self.bool.value == False assert self.string_optional.value == None
class SimpleExperiment(Experiment): inputs = { "input_key": String("default key"), "input_value": String("default value") } outputs = {"output_file": File("output")} def run(self): # Combine the input parameters content = self.inputs.input_key.value \ + ": " + self.inputs.input_value.value # write the result to the output file self.outputs.output_file.value = content + "\n"
class BasicTypesTest(Experiment): inputs = {"string": String("ABC"), "bool" : Bool(True), "string_optional": Optional(String())} def run(self): assert not self.string_optional.was_given() assert self.string.was_given() assert str(self.string) == "ABC" assert str(self.string) != repr(self.string) assert "<versuchung.types.String" in repr(self.string) assert "%s" % self.string == "ABC" assert self.bool.value == False
class SimpleExperiment(Experiment): inputs = { "strings": List(String(), default_value=[]), "default": List(String, default_value=[String("foo")]), "default2": List(String, default_value=[String("fox")]), "default3": List(String, default_value=[String("a"), String("b")]) } def run(self): strings = [s.value for s in self.i.strings] assert strings == ["x86", "sparc"] default = [s.value for s in self.i.default] assert default == ["foo"] default2 = [s.value for s in self.i.default2] assert default2 == ["bar"] assert self.metadata["strings-0"] == "x86" assert self.metadata["strings-1"] == "sparc"
class RandomConfig(AttributeExperiment): inputs = { "randconfig_seed": String("FIXME"), "config_hash": String("FIXME"), "kconfig_hash": String("FIXME"), "project_root": Directory("/tmp"), "project_version": String("FIXME"), "clean_command": String("make clean"), "build_command": String("make"), "attr_command": String("make attributes"), }
class SimpleExperiment(Experiment): inputs = { 'abc': File("/dev/null"), "xxx": File("/dev/null"), "empty": String(None) } outputs = {'xyz': File("asd"), "zzz": File("asd")} def run(self): assert self.abc == self.inputs.abc assert self.xyz == self.outputs.xyz exception = False try: self.xxx != None except AttributeError: exception = True assert exception == False assert self.empty.value is None
class TimingInternal(Experiment, ClangHashHelper): inputs = { "clang_hash": GitArchive("/home/stettberger/w/clang-hash/"), "project": GitArchive("/home/stettberger/w/clang-hash/hash-projects/lua", shallow=True), "cflags": String(""), "jobs": Integer(4), "mode": String("normal"), # Unchangable } outputs = { "stats": File("summary.dict"), 'tex': DatarefDict('data.dref'), } def save(self, path, value): self.tex['/'.join(path)] = value logging.info("%s = %s", '/'.join(path), value) def run(self): with self.clang_hash as cl_path: logging.info("Cloning clang hash...") logging.info("Cloning project... %s", self.project_name()) # First, we redirect all calls to the compiler to our # gcc wrapper CC = os.path.join(cl_path, "wrappers/gcc-time") os.environ["CC"] = CC os.environ["TIMING_REPORT"] = self.stats.path os.environ["CHASH_EXTRA_FLAGS"] = self.cflags.value with self.project as src_path: info = {} self.call_configure(src_path) self.rebuild(src_path, info, True) collect = defaultdict(list) compiler_calls = 0 with open(self.stats.path) as fd: for line in fd.readlines(): data = eval(line) if "name" in data: compiler_calls += 1 for key in data: if type(data[key]) is float: collect[key].append(data[key]) self.save([self.project_name(), "phase", self.cflags.value, "count"], compiler_calls) for phase in collect: if phase in ("preprocessing", "parser (global)", "phase opt and generate"): self.save( [self.project_name(), "phase", phase, self.cflags.value], np.average(collect[phase])) def symlink_name(self): return "%s-%s%s" % (self.title, self.project_name(), self.cflags.value)
class HistoricalCompilation(Experiment, ClangHashHelper): inputs = { "clang_hash": GitArchive("/home/stettberger/w/clang-hash/"), "project": GitArchive("/home/stettberger/w/clang-hash/hash-projects/lua"), "mode": String("normal"), "commits": Integer(500), "jobs": Integer(4), } outputs = { "stats": File("summary.dict"), "ccache_stats": File("ccache.stats"), "clang_hash_log": File("clang-hash.log"), } def build_parent(self, commit, from_scratch = False): def eq_hash(a, b): if len(a) == 0 or len(b) == 0: return if len(a) > len(b): return a.startswith(b) else: return b.startswith(a) src_path = self.project.path if from_scratch: shell("cd %s; git clean -dfx -e '*.hash' -e '*.hash.copy'", src_path) logging.info("Parent [%s^]: clean build", commit) shell("cd %s; git reset --hard %s^", src_path, commit) info = {"commit": commit + "^"} self.call_configure(src_path) self.rebuild(src_path, info, True) # Did initial commit fail? Try again if info.get("failed"): logging.info("Parent[%s^]: failed", commit) return False return True else: (lines, _) = shell("cd %s; git rev-parse %s^", src_path, commit) parent_revision = lines[0].strip() if self.current_revision and eq_hash(self.current_revision, parent_revision): logging.info("Parent[%s^]: resuse good parent", commit) return True else: logging.info("Parent[%s^]: resuse similar build directory", commit) shell("cd %s; git reset --hard %s^", src_path, commit) info = {"commit": commit +"^"} self.call_reconfigure(src_path) self.rebuild(src_path, info, True) # Did initial commit fail? Try again if info.get("failed"): return self.build_parent(commit, from_scratch=True) return True def run(self): # Determine the mode modes = ('normal', 'ccache', 'clang-hash', 'ccache-clang-hash') if not self.mode.value in modes: raise RuntimeError("Mode can only be one of: %s"%modes) logging.info("Build the Clang-Hash Plugin") with self.clang_hash as cl_path: shell("cd %s; mkdir build; cd build; cmake .. -DCMAKE_BUILD_TYPE=Release; make -j 4", cl_path) shell("strip %s/build/clang-plguin/*.so", cl_path) # Project name logging.info("Cloning project... %s", self.project_name()) self.build_info = {"project-name": self.project_name(), "commit-hash": self.metadata["project-hash"], 'builds': []} with self.project as src_path: (commits, _) = shell("cd %s; git log --no-merges --oneline --topo-order --format='%%H %%P %%s'", src_path) # [0] is hash. [1] is parent, [2] rest commits = [x.split(" ", 2) for x in reversed(commits)] commits = commits[-self.commits.value:] self.current_revision = None # First, we redirect all calls to the compiler to our # clang hash wrapper self.setup_compiler_paths(cl_path) time = 0 last_failed = True while commits: # Search for a child of the current revision commit = None if self.current_revision: for idx in range(0, len(commits)): if commits[idx][1] == self.current_revision: commit = commits[idx] del commits[idx] break # No Child found -> Take the first one. if not commit: commit = commits.pop(0) # Bash initial commit if commit[0] == "726f63884db0132f01745f1fb4465e6621088ccf": continue info = {"commit": commit[0], "parent": commit[1], "summary": commit[2]} # Somehow this commit in musl is weird. It behaves # totally different, if build with a fresh parent and # a non-fresh parent. With this we are one the save side if commit[0] == "416d1c7a711807384cc21a18163475cf757bbcb5": last_failed = True # First, we build the parent. In a total linear # history, this is a NOP. Otherwise, we try to reset # to the actual parent, and rebuild the project. This # may fail, since the current commit might fix this. ret = self.build_parent(commit[0], from_scratch = last_failed) info['parent-ok'] = ret # Change to the ACTUAL commit. Call reconfigure, and # then go on building the commit. shell("cd %s; git reset --hard %s", src_path, commit[0]) self.call_reconfigure(src_path) if os.path.exists("/tmp/clang-hash.log"): os.unlink("/tmp/clang-hash.log") # Rebuild and Measure self.rebuild(src_path, info, fail_ok=True) if os.path.exists("/tmp/clang-hash.log") and not info.get("failed"): with open("/tmp/clang-hash.log") as fd: self.clang_hash_log.value += fd.read() self.build_info["builds"].append(info) if not info.get("failed"): time += info['build-time'] / 1e9 # Build was good. Remember that. self.current_revision = commit[0] last_failed = False else: self.current_revision = None last_failed = True logging.info("Rebuild for %d commits takes %f minutes", self.commits.value, time/60.) # Output the summary of this build into the statistics file. with open(self.stats.path, "w+") as fd: fd.write(repr(self.build_info)) def variant_name(self): return "%s-%s"%(self.project_name(), self.metadata['mode']) def symlink_name(self): return "%s-%s"%(self.title, self.variant_name())
class HistoricalCompilation(Experiment, ClangHashHelper): inputs = { "clang_hash": GitArchive("/home/cip/2015/yb90ifym/clang-hash/"), "project": GitArchive("/home/cip/2015/yb90ifym/lua"), "mode": String("normal"), "commits": Integer(500), # was 500 "jobs": Integer(1), # was 4 } outputs = { "stats": File("summary.dict"), "ccache_stats": File("ccache.stats"), "clang_hash_log": File("clang-hash.log"), } def build_parent(self, commit, from_scratch=False): def eq_hash(a, b): if len(a) == 0 or len(b) == 0: return if len(a) > len(b): return a.startswith(b) else: return b.startswith(a) src_path = self.project.path if from_scratch: shell("cd %s; git clean -dfx -e '*.hash' -e '*.hash.copy'", src_path) logging.info("Parent [%s^]: clean build", commit) shell("cd %s; git reset --hard %s^", src_path, commit) info = {"commit": commit + "^"} self.call_configure(src_path) self.rebuild(src_path, info, True) # Did initial commit fail? Try again if info.get("failed"): logging.info("Parent[%s^]: failed", commit) return False return True else: (lines, _) = shell("cd %s; git rev-parse %s^", src_path, commit) parent_revision = lines[0].strip() if self.current_revision and eq_hash(self.current_revision, parent_revision): logging.info("Parent[%s^]: resuse good parent", commit) return True else: logging.info("Parent[%s^]: resuse similar build directory", commit) shell("cd %s; git reset --hard %s^", src_path, commit) info = {"commit": commit + "^"} self.call_reconfigure(src_path) self.rebuild(src_path, info, True) # Did initial commit fail? Try again if info.get("failed"): return self.build_parent(commit, from_scratch=True) return True def run(self): # Determine the mode modes = ('normal', 'ccache', 'clang-hash', 'ccache-clang-hash') if not self.mode.value in modes: raise RuntimeError("Mode can only be one of: %s" % modes) logging.info("Build the Clang-Hash Plugin") with self.clang_hash as cl_path: shell( "cd %s; mkdir build; cd build; cmake .. -DCMAKE_BUILD_TYPE=Release; make -j 4", cl_path) shell("strip %s/build/clang-plugin/*.so", cl_path) # Project name logging.info("Cloning project... %s", self.project_name()) self.build_info = { "project-name": self.project_name(), "commit-hash": self.metadata["project-hash"], 'builds': [] } with self.project as src_path: (commits, _) = shell( "cd %s; git log --no-merges --oneline --topo-order --format='%%H %%P %%s'", src_path) # [0] is hash. [1] is parent, [2] rest commits = [x.split(" ", 2) for x in reversed(commits)] commits = commits[-self.commits.value:] self.current_revision = None # First, we redirect all calls to the compiler to our # clang hash wrapper self.setup_compiler_paths(cl_path) time = 0 last_failed = True nr_of_commits = len(commits) original_commits = commits[:] occurred_errors = {} # map commit -> [error strings] def gather_local_hashes(src_path): remove_keys = [ 'project', 'return-code', 'start-time', 'run_id', 'compile-duration', 'processed-bytes', 'hash-duration', 'hash-start-time', 'object-file-size' ] # TODO: ofile-size useful? hashes = read_hash_directory(src_path, remove_keys) local_hashes = {} for entry in hashes: element_hashes = entry['element-hashes'] for element in element_hashes: local_hashes[element[0]] = element[1] return local_hashes def gather_global_hashes(local_hashes, occurred_errors): global_hashes = {} for symbol in local_hashes: symbol = symbol.split(':')[ 1] # Remove the prefix ('function:' etc.) try: shell("cd %s; %s/clang-hash-global --definition %s", src_path, self.inputs.clang_hash.path, symbol) except Exception as e: occurred_errors[commit[0]] = e # don't raise exception return global_hashes def add_additional_commit_info_to(info): gitshow = subprocess.Popen(["git", "show"], stdout=subprocess.PIPE) dstat_out = subprocess.check_output(('diffstat'), stdin=gitshow.stdout) gitshow.wait() lines = dstat_out.split('\n') index = -1 while lines[index] == '': index -= 1 last_line = lines[index] changedInsertionsDeletions = [ int(s) for s in last_line.split() if s.isdigit() ] if "insertion" in last_line: info['insertions'] = changedInsertionsDeletions[1] if "deletion" in last_line: info['deletions'] = changedInsertionsDeletions[2] elif "deletion" in last_line: info['deletions'] = changedInsertionsDeletions[1] # Get changed files changed_files = {} for line in lines: if '|' in line: elems = line.split() assert elems[1] == '|' filename = elems[0] nr_of_changes = int(elems[2]) changed_files[filename] = nr_of_changes assert len(changed_files) == changedInsertionsDeletions[0] info['changes'] = changed_files while commits: # Search for a child of the current revision commit = None if self.current_revision: for idx in range(0, len(commits)): if commits[idx][1] == self.current_revision: commit = commits[idx] del commits[idx] break # No Child found -> Take the first one. if not commit: commit = commits.pop(0) info = { "commit": commit[0], "parent": commit[1], "summary": commit[2] } # First, we build the parent. In a total linear # history, this is a NOP. Otherwise, we try to reset # to the actual parent, and rebuild the project. This # may fail, since the current commit might fix this. ret = self.build_parent(commit[0], from_scratch=last_failed) info['parent-ok'] = ret parent_info = {} add_additional_commit_info_to(parent_info) info['parent-info'] = parent_info # Gather hashes of parent parent_local_hashes = gather_local_hashes(src_path) parent_global_hashes = gather_global_hashes( parent_local_hashes, occurred_errors) #info['parent-local-hashes'] = parent_local_hashes #info['parent-global-hashes'] = parent_global_hashes # Change to the ACTUAL commit. shell("cd %s; git reset --hard %s", src_path, commit[0]) add_additional_commit_info_to(info) # Call reconfigure, and then go on building the commit. self.call_reconfigure(src_path) if os.path.exists("/tmp/clang-hash.log"): os.unlink("/tmp/clang-hash.log") # Rebuild and Measure self.rebuild(src_path, info, fail_ok=True) # Don't need those atm del info['clang-hash-hits'] del info['clang-hash-misses'] # Gather hashes local_hashes = gather_local_hashes(src_path) global_hashes = gather_global_hashes(local_hashes, occurred_errors) #info['local-hashes'] = local_hashes #info['global-hashes'] = global_hashes # Compare hashes/search for changed hashes # The parent's global hashes are copied to find removed symbols changed_symbols = {} parent_hashes = deepcopy(parent_global_hashes) for symbol, global_hash in global_hashes.iteritems(): parent_global_hash = parent_hashes.pop(symbol, None) if global_hash != parent_global_hash: # Store it as [before, after] changed_symbols[symbol] = [ parent_global_hash, global_hash ] # Add removed symbols for symbol, parent_global_hash in parent_hashes.iteritems(): changed_symbols[symbol] = [parent_global_hash, None] # Compare hashes/search for changed hashes # The parent's global hashes are copied to find removed symbols local_changed_symbols = {} parent_hashes = deepcopy(parent_local_hashes) for symbol, local_hash in local_hashes.iteritems(): parent_local_hash = parent_hashes.pop(symbol, None) if local_hash != parent_local_hash: # Store it as [before, after] local_changed_symbols[symbol] = [ parent_local_hash, local_hash ] # Add removed symbols for symbol, parent_local_hash in parent_hashes.iteritems(): local_changed_symbols[symbol] = [parent_local_hash, None] info['changed-symbols'] = changed_symbols #info['local-changed-symbols'] = local_changed_symbols info['local-changed-sym-count'] = len(local_changed_symbols) # TODO: add more analysis # TODO: for each changed local hash, the symbol's global hash should also change... # check every symbol for changed global hash\ # also check the commits, if the correct ones are used... if os.path.exists( "/tmp/clang-hash.log") and not info.get("failed"): with open("/tmp/clang-hash.log") as fd: self.clang_hash_log.value += fd.read() self.build_info["builds"].append(info) if not info.get("failed"): time += info['build-time'] / 1e9 # Build was good. Remember that. self.current_revision = commit[0] last_failed = False else: self.current_revision = None last_failed = True logging.info("Rebuild for %d commits takes %f minutes", self.commits.value, time / 60.) print "\n\noccurred errors:\n" print occurred_errors print "\n\nchanged symbols:\n" print changed_symbols print "\n\nlocal changed symbols:\n" print local_changed_symbols print "\n\n\n" if len(changed_symbols) or len(local_changed_symbols): print "!!! success: found one !!!" # Output the summary of this build into the statistics file. with open(self.stats.path, "w+") as fd: fd.write(repr(self.build_info)) def variant_name(self): return "%s-%s" % (self.project_name(), self.metadata['mode']) def symlink_name(self): return "%s-%s" % (self.title, self.variant_name())
class IncrementalCompilation(Experiment, ClangHashHelper): inputs = { "clang_hash": GitArchive("/home/stettberger/w/clang-hash/"), "project": GitArchive("/home/stettberger/w/clang-hash/hash-projects/musl", shallow=True), "touch-only": Bool(False), "mode": String("normal"), "jobs": Integer(4), } outputs = { "stats": File("summary.dict"), } def get_sources(self, path): ret = [] for root, dirnames, filenames in os.walk(path): for filename in filenames: if filename.endswith(('.h', '.c')): ret.append(os.path.join(root, filename)) if self.project_name() == "musl": # We do not touch headers that are external, since they # are untouchable. ret = [x for x in ret if x.endswith(".c") or "internal" in x] return sorted(ret) def touch(self, path): if self.touch_only.value: os.utime(path, None) else: with open(path) as fd: content = fd.read() content = "#line 1\n" + content with open(path, "w") as fd: fd.write(content) def run(self): # Determine the mode modes = ('normal', 'ccache', 'clang-hash') if not self.mode.value in modes: raise RuntimeError("Mode can only be one of: %s" % modes) logging.info("Build the Clang-Hash Plugin") with self.clang_hash as cl_path: shell("cd %s; mkdir build; cd build; cmake ..; make -j 4", cl_path) # Project name logging.info("Cloning project... %s", self.project_name()) self.build_info = { "project-name": self.project_name(), "commit-hash": self.metadata["project-hash"], 'builds': [] } with self.project as src_path: # First, we redirect all calls to the compiler to our # clang hash wrapper self.setup_compiler_paths(cl_path) # Count the number of files sources = list(self.get_sources(src_path)) nr_files = len(sources) logging.info("#files: %d", nr_files) self.build_info['file-count'] = nr_files # Initial build of the given project self.call_configure(src_path) info = {"filename": "FRESH_BUILD"} self.rebuild(src_path, info) self.build_info["builds"].append(info) # Iterate over all files for fn in sources: self.touch(fn) info = {"filename": fn} self.rebuild(src_path, info) self.build_info["builds"].append(info) # Output the summary of this build into the statistics file. with open(self.stats.path, "w+") as fd: fd.write(repr(self.build_info)) def method_name(self): mod = "append" if self.metadata['touch-only']: mod = "touch" return "%s-%s" % (mod, self.metadata['mode']) def variant_name(self): return "%s-%s" % (self.project_name(), self.method_name()) def symlink_name(self): return "%s-%s" % (self.title, self.variant_name())