def __setup_value(self): if "path" in dir(self.__filename): self.subobjects["filename"] = self.__filename self.__filename = self.__filename.path self.__filename = os.path.abspath(self.__filename) fn = self.__filename extract_mode = "" if "tar.gz" in fn or "tgz" in fn: extract_mode = "x" if "tar.bz2" in fn or "bzip2" in fn: extract_mode = "j" with self.tmp_directory as d: try: os.mkdir(self.name) except OSError: # ignore errors if the directory should already exist for some reason pass with Directory(self.name) as d2: dirname = os.path.abspath(".") (out, ret) = shell("tar %szvf %s", extract_mode, fn) if ret != 0: raise RuntimeError("Extracting of %s failed" % fn) cd = None for line in out: if (cd == None or len(line) < len(cd)) and line.endswith("/"): cd = line if cd and all([x.startswith(cd) for x in out]): dirname = cd return Directory(os.path.abspath(dirname))
class SimpleExperiment(Experiment): outputs = {"dir1": Directory("d1"), "dir2": Directory("d2"), "filtered": Directory(".", filename_filter="*.log*"), } def run(self): a = self.o.dir1.new_file("barfoo") a.value="abc" a.flush() os.mkdir(self.o.dir1.path + "/tmpdir") with open(self.o.dir1.path + "/tmpdir/foo", "w+") as fd: fd.write("Hallo") self.o.dir2.mirror_directory(self.o.dir1.path, lambda x: True) a = self.filtered.new_file("foo.log") a.value = "xx" try: a = self.filtered.new_file("bar.xxx") raise Exception("Filter does not work") except RuntimeError as e: pass # Everything is good b = self.filtered.new_file("barfoo.log.gz", compressed=True) b.value = "xx" assert type(a) == File assert type(b) == GzipFile
class SimpleExperiment(Experiment): outputs = {"dir1": Directory("d1"), "dir2": Directory("d2")} def run(self): a = self.o.dir1.new_file("barfoo") a.value="abc" a.flush() os.mkdir(self.o.dir1.path + "/tmpdir") with open(self.o.dir1.path + "/tmpdir/foo", "w+") as fd: fd.write("Hallo") self.o.dir2.mirror_directory(self.o.dir1.path, lambda x: True)
def __setup_value(self): if "path" in dir(self.__clone_url): self.subobjects["clone-url"] = self.__clone_url self.__clone_url = self.__clone_url.path logging.info("copying git archive %s", self.__clone_url) with self.tmp_directory as d: os.mkdir(self.name) if self.__shallow: cmd = "cd '%s' && git archive --format=tar --remote=%s %s | tar x" args = (self.name, self.__clone_url, self.__ref) else: cmd = "git clone %s %s" args = (self.__clone_url, self.name) (lines, ret) = shell(cmd, *args) if ret != 0: print("\n".join(lines)) sys.exit(-1) if not self.__shallow: cmd = "cd %s && git gc && git fetch %s %s && git checkout FETCH_HEAD" args = (self.name, self.__clone_url, self.__ref) (lines, ret) = shell(cmd, *args) if ret != 0: print("\n".join(lines)) sys.exit(-1) return Directory(os.path.abspath(self.name))
class FailImport(Experiment): inputs = { "trace": FailTrace("FailTrace"), "fail-tool-dir": Directory("/proj/i4danceos/tools/fail"), } def run(self): variant = "erika/error-hook" for (label, importer, importer_args) in [\ ("mem", "MemoryImporter", []), ("regs", "RegisterImporter", []), ("ip", "RegisterImporter", ["--no-gp", "--ip"]), ("flags", "RegisterImporter", ["--no-gp", "--flags"]), ]: benchmark = label logging.info("Importing coredos/%s", benchmark) cmdline = "%(path)s/import-trace -v %(variant)s -b %(benchmark)s -i %(importer)s "\ + "-t %(trace)s -e %(elf)s %(args)s" shell(cmdline %\ {"path": self.fail_tool_dir.path, "variant": variant, "benchmark": benchmark, "importer": importer, "trace": os.path.join(self.trace.trace.path, "trace.pb"), "elf": self.trace.elf.path, "args": " ".join(importer_args)}) shell("%s/prune-trace -v %s -b %% -p basic --overwrite", self.fail_tool_dir.path, variant)
class FailTrace(Experiment): inputs = { "erika": GitArchive("[email protected]:erika"), "bochs-runner": Executable("/proj/i4danceos/tools/fail/bochs-experiment-runner.py"), "erika-tracing": Executable("/proj/i4danceos/tools/fail/erika-tracing"), } outputs = { "trace": Directory("trace"), "elf": File("erika.elf"), "iso": File("erika.iso"), } def run(self): logging.info("Cloning ERIKA...") with self.erika as erika_path: shell("cd %s/examples/x86/coptermock-isorc; make", erika_path) self.iso.copy_contents(os.path.join(erika_path, "examples/x86/coptermock-isorc/Debug/erika.iso")) self.elf.copy_contents(os.path.join(erika_path, "examples/x86/coptermock-isorc/Debug/Debug/out.elf")) shell(("cd %(resultdir)s; python %(bochs)s -F 50 -i %(iso)s -e %(elf)s -f %(fail)s" + " -m 8 -1 -- -Wf,--end-symbol=test_finish -Wf,--start-symbol=EE_oo_StartOS" + " -Wf,--trace-file=trace.pb -Wf,--save-symbol=EE_oo_StartOS") % { "resultdir": self.trace.path, "bochs": self.bochs_runner.path, "iso": self.iso.path, "elf": self.elf.path, "fail": self.erika_tracing.path } )
class ExploreConfig(AttributeExperiment): inputs = { "config_hash": String("FIXME"), "kconfig_hash": String("FIXME"), "project_root": Directory("/tmp"), "project_version": String("FIXME"), "clean_command": String("make clean"), "build_command": String("make"), "attr_command": String("make attributes"), }
class SimpleExperiment(Experiment): inputs = { "input_key": String("default key"), "input_value": String("default value") } outputs = { "output_file": File("output"), "output_directory": Directory("output_directory") } def run(self): # Combine the input parameters content = self.inputs.input_key.value \ + ": " + self.inputs.input_value.value # write the result to the output file self.outputs.output_file.value = content + "\n" # New output directory x = self.output_directory.new_directory("foo").new_file("lala")
def __setup_value(self): if "path" in dir(self.__clone_url): self.subobjects["clone-url"] = self.__clone_url self.__clone_url = self.__clone_url.path logging.info("copying git archive %s", self.__clone_url) with self.tmp_directory as d: os.mkdir(self.name) if self.__shallow: cmd = "cd '%s' && git archive --format=tar --remote=%s %s | tar x" args = (self.name, self.__clone_url, self.__ref) else: cmd = "git clone %s %s" args = (self.__clone_url, self.name) (lines, ret) = shell(cmd, *args, stderr=sys.stderr) if ret != 0: print("\n".join(lines)) sys.exit(-1) if not self.__shallow: cmd = "cd %s && git gc && git fetch %s %s && git checkout FETCH_HEAD" args = (self.name, self.__clone_url, self.__ref) (lines, ret) = shell(cmd, *args, stderr=sys.stderr) if ret != 0: print("\n".join(lines)) sys.exit(-1) # Fetch all visible branches and tags for branch in self.__metadata.get("branches", {}): cmd = "cd %s && git fetch %s refs/heads/%s && git update-ref refs/heads/%s FETCH_HEAD" shell(cmd, self.name, self.__clone_url, branch, branch, stderr=sys.stderr) for tag in self.__metadata.get("tags", {}): cmd = "cd %s && git fetch %s refs/tags/%s && git update-ref refs/tags/%s FETCH_HEAD" shell(cmd, self.name, self.__clone_url, tag, tag, stderr=sys.stderr) return Directory(os.path.abspath(self.name))
def before_experiment_run(self, parameter_type): # When experiment run as input, just run the normal input handlers if parameter_type == "input": Type.before_experiment_run(self, "input") return for (name, inp) in self.inputs.items(): if type(inp) == LambdaType: continue ret = inp.inp_extract_cmdline_parser(self.__opts, self.__args) if ret: (self.__opts, self.__args) = ret # After all input parameters are parsed. Execute the # calculated input parameters for (name, inp) in self.inputs.items(): if type(inp) != LambdaType: continue inp = inp(self) inp.name = name self.subobjects[name] = inp self.inputs[name] = inp self.subobjects.update() # Now set up the experiment tmp directory self.tmp_directory = Directory(tempfile.mkdtemp()) self.subobjects["tmp_directory"] = self.tmp_directory for obj in self.inputs.values(): obj.before_experiment_run("input") self.__calculate_metadata() for obj in self.outputs.values(): obj.before_experiment_run("output")
pass # Everything is good b = self.filtered.new_file("barfoo.log.gz", compressed=True) b.value = "xx" assert type(a) == File assert type(b) == GzipFile if __name__ == "__main__": import shutil, sys,os experiment = SimpleExperiment() dirname = experiment(sys.argv) assert os.path.isdir(experiment.o.dir2.path + "/tmpdir") assert os.path.exists(experiment.o.dir2.path + "/barfoo") assert os.path.exists(experiment.o.dir2.path + "/tmpdir/foo") N = Directory(experiment.path, "*.log*") assert experiment.filtered.value == N.value assert os.path.exists(experiment.path + "/foo.log") contents = [x.value for x in N] assert len(contents) == 2 assert contents[0] == contents[1], contents if dirname: shutil.rmtree(dirname) print("success")
class HistoricalCompilationGlobalEvaluation(Experiment): inputs = { "clang_hash": GitArchive("/home/cip/2015/yb90ifym/clang-hash/"), "project": GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"), "commits": Integer(4744), "jobs": Integer(1), # was 4 "dataset": Directory("/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c"), # full lua "hot_threshold_percentage": Integer(10), # minimal change percentage for commit to be classified as "hot" } outputs = { "stats": File("summary.dict"), "eval_data": File("eval.txt"), "hot_commits_histo": File("global_hot_commits.pdf"), } def project_name(self): return os.path.basename(self.metadata['project-clone-url']) def run(self): # Project name logging.info("Cloning project... %s", self.project_name()) self.build_info = {"project-name": self.project_name(), "commit-hash": self.metadata["project-hash"], 'builds': []} with self.project as src_path: time = 0 os.chdir(self.dataset.path) # Read summary file from data collection run commits = None with open("summary.dict") as sf: summary = eval(sf.read()) commits = summary['builds'] def read_chash_data(commit): element_hashes = [] try: with open(commit, 'r') as cf: commit_data = eval(cf.read()) for ofile_data in commit_data: element_hashes.extend(ofile_data['element-hashes']) except: pass return element_hashes stats = { 'data-empty': set(), # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.) 'commits': {}, 'elements': {}, # symbol -> how often did this symbol change } total_changed_globals = 0 # How often was any global changed throughout the history? total_changed_records = 0 # How often was any record changed throughout the history? total_changed_static_funcs = 0 # How often was any static function changed throughout the history? total_changed_functions = 0 # without static functions total_insdel_globals = 0 # How often was any global introduced/removed throughout the history? total_insdel_records = 0 # How often was any record introduced/removed throughout the history? total_insdel_static_funcs = 0 # How often was any static function introduced/removed throughout the history? total_insdel_functions = 0 # without static functions # in-degree: how many SLOs depend on E? # out-degree: how many SLOs does E depend on? in_degrees = {} # indegree -> nr of elements with that indegree out_degrees = {} # outdegree -> nr of elements wirh that outdegree max_in_degree = (None, 0) # (element, degree) max_out_degree = (None, 0) # (element, degree) prev_commit = None prev_hashes = None prev_used_definitions = None prev_global_hashes = None counter = 1 for info in commits: print "\n%d/%d" % (counter, len(commits)) counter += 1 commit = info['commit'] parent = info['parent'] if not parent: # first commit has no parent print "No parent" continue commit_data = read_chash_data(commit) if not commit_data: # If the data does not exist, note and skip #print "Data empty" stats['data-empty'].add(commit) continue local_hashes = {} used_definitions = {} # just 4 testing: for element in commit_data: name = element[0] if name.startswith('static function:') or name.startswith('function:'): name = element[0].split(':')[1] local_hashes[name] = element[1] try: used_definitions[name] = set() for used_def in element[2]: if used_def.startswith('static function:') or used_def.startswith('function:'): used_definitions[name].add(used_def.split(':')[1]) except: pass # prev: #for element in commit_data: # local_hashes[element[0]] = element[1] # try: # used_definitions[element[0]] = element[2] # except: # pass parent_hashes = {} parent_global_hashes = {} parent_used_definitions = {} if parent == prev_commit and prev_global_hashes and prev_used_definitions and prev_hashes: #print "Reuse prev_commit" parent_hashes = prev_hashes parent_used_definitions = prev_used_definitions parent_global_hashes = prev_global_hashes else: #print "Cannot reuse prev_commit" parent_data = read_chash_data(parent) # just 4 testing: for element in parent_data: name = element[0] if name.startswith('static function:') or name.startswith('function:'): name = element[0].split(':')[1] parent_hashes[name] = element[1] try: parent_used_definitions[name] = set() for used_def in element[2]: if used_def.startswith('static function:') or used_def.startswith('function:'): parent_used_definitions[name].add(used_def.split(':')[1]) except: pass # prev: #for element in parent_data: # parent_hashes[element[0]] = element[1] # try: # parent_used_definitions[element[0]] = element[2] # except: # pass if not parent_hashes: # If the data does not exist, note and skip stats['data-empty'].add(commit) # Save data for reuse prev_commit = commit prev_hashes = local_hashes prev_used_definitions = used_definitions continue ########################## # GLOBAL HASH EVALUATION # ########################## commit_stats = { 'element-count' : len(local_hashes), 'changed-elements' : [], } elements = set(local_hashes.keys()) parent_elements = set(parent_hashes.keys()) # calculate in- and out-degree # reverse used_definitions out_use_defs = { s:0 for s in used_definitions.keys() } # element -> nr of depending elements for element in elements: for el in used_definitions[element]: try: out_use_defs[el] += 1 except: pass for element in elements: out_degree = len(used_definitions[element]) in_degree = out_use_defs[element] if in_degree > max_in_degree[1]: max_in_degree = (element, in_degree) if out_degree > max_out_degree[1]: max_out_degree = (element, out_degree) if in_degree not in in_degrees: in_degrees[in_degree] = 0 in_degrees[in_degree] += 1 if out_degree not in out_degrees: out_degrees[out_degree] = 0 out_degrees[out_degree] += 1 commit_stats['changed-elements'] = elements ^ parent_elements # elements either added or removed for element in commit_stats['changed-elements']: if element.startswith('record:'): # do this here to get only insertions and deletions total_insdel_records += 1 elif element.startswith('variable:') or element.startswith('static variable:'): total_insdel_globals += 1 elif element.startswith('static function:'): total_insdel_static_funcs += 1 else: total_insdel_functions += 1 # Compare hashes common_elements = elements & parent_elements global_hashes = {} for element in common_elements: global_hash = get_global_hash(element, global_hashes, local_hashes, used_definitions) parent_global_hash = get_global_hash(element, parent_global_hashes, parent_hashes, parent_used_definitions) if global_hash != parent_global_hash: commit_stats['changed-elements'].add(element) if element.startswith('record:'): # do this here to ignore insertions and deletions total_changed_records += 1 elif element.startswith('variable:') or element.startswith('static variable:'): total_changed_globals += 1 elif element.startswith('static function:'): total_changed_static_funcs += 1 else: total_changed_functions += 1 commit_stats['changed-element-count'] = len(commit_stats['changed-elements']); stats['commits'][commit] = commit_stats # Count how often each element was changed over the whole history for element in commit_stats['changed-elements']: if element not in stats['elements']: stats['elements'][element] = 0; stats['elements'][element] += 1 # Save data for reuse prev_commit = commit prev_hashes = local_hashes prev_used_definitions = used_definitions prev_global_hashes = global_hashes self.build_info['stats'] = stats #in_degrees = {} # indegree -> nr of elements with that indegree #out_degrees = {} # outdegree -> nr of elements wirh that outdegree #max_in_degree = (None, 0) # (element, degree) #max_out_degree = (None, 0) # (element, degree) summed_in_degrees = sum([k*v for k,v in in_degrees.iteritems()]) nr_of_elements = sum(in_degrees.values()) avg_in_degree = summed_in_degrees/float(nr_of_elements) avg_out_degree = sum([k*v for k,v in out_degrees.iteritems()])/float(sum(out_degrees.values())) eval_info = { 'nr-of-commits' : len(commits), 'change-percentage' : {}, # change percentage -> nr of commits with change < percentage 'hot-commits': {}, 'total-changed-globals': total_changed_globals, 'total-changed-records': total_changed_records, 'total-changed-static-funcs': total_changed_static_funcs, 'total-changed-functions': total_changed_functions, 'total-insdel-globals': total_insdel_globals, 'total-insdel-records': total_insdel_records, 'total-insdel-static-funcs': total_insdel_static_funcs, 'total-insdel-functions': total_insdel_functions, 'max_in_degree': max_in_degree, 'max_out_degree': max_out_degree, 'avg_in_degree': avg_in_degree, 'avg_out_degree': avg_out_degree, } # Get most changed elements eval_info['most-changed-elements'] = {k:v for k,v in stats['elements'].iteritems() if v > 1000} # arbitrary value (about 20% of commits) # Calc average nr and percentage of (changed) symbols per commit summed_avg_change_percentage = 0 summed_changed_elements = 0 summed_total_elements = 0 commits = self.build_info['stats']['commits'] for commit in commits: commit_stat = commits[commit] change_percentage = len(commit_stat['changed-elements'])/float(commit_stat['element-count']) summed_avg_change_percentage += change_percentage summed_changed_elements += len(commit_stat['changed-elements']) summed_total_elements += commit_stat['element-count'] percentage = int(round(change_percentage * 100)) if percentage not in eval_info['change-percentage']: eval_info['change-percentage'][percentage] = 0 eval_info['change-percentage'][percentage] += 1 # Identify hot commits #if percentage > self.hot_threshold_percentage.value: #eval_info['hot-commits'][commit] = percentage eval_info['avg-change-percentage'] = summed_avg_change_percentage / float(len(stats['commits'])) eval_info['avg-changed-elements'] = summed_changed_elements / eval_info['nr-of-commits'] eval_info['avg-total-elements'] = summed_total_elements / eval_info['nr-of-commits'] eval_info['nr-hot-commits'] = len(eval_info['hot-commits']) with open(self.eval_data.path, "w+") as fd: fd.write(repr(eval_info)) # Output the summary of this build into the statistics file. with open(self.stats.path, "w+") as fd: fd.write(repr(self.build_info)) def plot_hash_count_histogram(hash_values, filename): dictionary = plt.figure() fig, ax = plt.subplots() plt.xlabel('Prozentanteil geaenderter Elemente') plt.ylabel('Anzahl von Commits') axes = plt.gca() axes.set_xlim([-10,100]) axes.set_ylim([0,1600]) ax.bar(hash_values.keys(), hash_values.values(), align='center') fig.savefig(filename) # clean data for plotting data = {k:v for k,v in eval_info['change-percentage'].iteritems() if k <= 100} plot_hash_count_histogram(data, self.hot_commits_histo.path) def variant_name(self): return "%s-%s"%(self.project_name(), self.metadata['mode']) def symlink_name(self): return "%s-%s"%(self.title, self.variant_name())
class HistoricalCompilationCallGraphEvaluation(Experiment): inputs = { "clang_hash": GitArchive("/home/cip/2015/yb90ifym/clang-hash/"), "project": GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"), "commits": Integer(4744), "jobs": Integer(1), # was 4 "dataset": Directory( "/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c" ), # full lua "hot_threshold_percentage": Integer( 10 ), # minimal change percentage for commit to be classified as "hot" } outputs = { "stats": File("summary.dict"), "eval_data": File("eval.txt"), "hot_commits_histo": File("cg_hot_commits.pdf"), } def project_name(self): return os.path.basename(self.metadata['project-clone-url']) def run(self): # Project name logging.info("Cloning project... %s", self.project_name()) self.build_info = { "project-name": self.project_name(), "commit-hash": self.metadata["project-hash"], 'builds': [] } with self.project as src_path: time = 0 os.chdir(self.dataset.path) # Read summary file from data collection run commits = None with open("summary.dict") as sf: summary = eval(sf.read()) commits = summary['builds'] def read_chash_data(commit): element_hashes = [] try: with open(commit, 'r') as cf: commit_data = eval(cf.read()) for ofile_data in commit_data: element_hashes.extend(ofile_data['element-hashes']) except: pass return element_hashes stats = { 'data-empty': set( ), # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.) 'commits': {}, 'elements': {}, # symbol -> how often did this symbol change } total_changed_functions = 0 # How often was any function changed throughout the history? total_insdel_functions = 0 # How often was any function introduced/removed throughout the history? prev_commit = None prev_functions = None prev_used_definitions = None counter = 1 for info in commits: print "%d/%d" % (counter, len(commits)) counter += 1 commit = info['commit'] parent = info['parent'] if not parent: # first commit has no parent print "No parent" continue commit_data = read_chash_data(commit) if not commit_data: # If the data does not exist, note and skip #print "Data empty" stats['data-empty'].add(commit) continue functions = set() used_definitions = {} for element in commit_data: if element[0].startswith('static function:') or element[ 0].startswith('function:'): clean_name = element[0].split(':')[1] functions.add(clean_name) used_definitions[clean_name] = set() for used_def in element[2]: if used_def.startswith( 'static function:') or used_def.startswith( 'function:'): used_definitions[clean_name].add( used_def.split(':')[1]) parent_functions = {} parent_used_definitions = {} if parent == prev_commit and prev_functions and prev_used_definitions: #print "Reuse prev_commit" parent_functions = prev_functions parent_used_definitions = prev_used_definitions else: #print "Cannot reuse prev_commit" parent_data = read_chash_data(parent) for element in parent_data: if element[0].startswith( 'static function:') or element[0].startswith( 'function:'): clean_name = element[0].split(':')[1] parent_functions.insert(clean_name) parent_used_definitions[clean_name] = set() for used_def in element[2]: if used_def.startswith( 'static function:' ) or used_def.startswith('function:'): parent_used_definitions[clean_name].add( used_def.split(':')[1]) if not parent_functions: # If the data does not exist, note and skip stats['data-empty'].add(commit) # Save data for reuse prev_commit = commit prev_functions = functions prev_used_definitions = used_definitions continue ######################### # CALL GRAPH EVALUATION # ######################### commit_stats = { 'element-count': len(functions), 'changed-elements': [], # contains changed + impacted functions #'changed-not-impacted': set(), # contains directly changed functions only } elements = functions parent_elements = parent_functions commit_stats['changed-elements'] = set( ) #elements ^ parent_elements # elements either added or removed total_insdel_functions += len(commit_stats['changed-elements']) cwd = os.getcwd() os.chdir(src_path) changed_functions = get_changed_functions_from_commit( src_path, commit) os.chdir(cwd) commit_stats['changed-not-impacted'] = changed_functions.copy() # Get impacted functions changed_functions |= get_impacted_funcs_fake_hash( changed_functions, used_definitions) commit_stats['changed-elements'] |= changed_functions total_changed_functions += len(changed_functions) commit_stats['changed-element-count'] = len( commit_stats['changed-elements']) stats['commits'][commit] = commit_stats # Count how often each element was changed over the whole history for element in commit_stats['changed-elements']: if element not in stats['elements']: stats['elements'][element] = 0 stats['elements'][element] += 1 # Save data for reuse prev_commit = commit prev_functions = functions prev_used_definitions = used_definitions self.build_info['stats'] = stats eval_info = { 'nr-of-commits': len(commits), 'change-percentage': {}, # change percentage -> nr of commits with change < percentage 'hot-commits': {}, 'total-changed-functions': total_changed_functions, 'total-insdel-functions': total_insdel_functions, } # Get most changed elements eval_info['most-changed-elements'] = { k: v for k, v in stats['elements'].iteritems() if v > 400 } # arbitrary value (about 10% of commits) # Calc average nr and percentage of (changed) symbols per commit summed_avg_change_percentage = 0 summed_changed_elements = 0 summed_total_elements = 0 commits = self.build_info['stats']['commits'] for commit in commits: commit_stat = commits[commit] change_percentage = len(commit_stat['changed-elements']) / float( commit_stat['element-count']) summed_avg_change_percentage += change_percentage summed_changed_elements += len(commit_stat['changed-elements']) summed_total_elements += commit_stat['element-count'] percentage = int(round(change_percentage * 100)) if percentage not in eval_info['change-percentage']: eval_info['change-percentage'][percentage] = 0 eval_info['change-percentage'][percentage] += 1 # Identify hot commits #if percentage > self.hot_threshold_percentage.value: #eval_info['hot-commits'][commit] = percentage eval_info[ 'avg-change-percentage'] = summed_avg_change_percentage / float( len(stats['commits'])) eval_info[ 'avg-changed-elements'] = summed_changed_elements / eval_info[ 'nr-of-commits'] eval_info['avg-total-elements'] = summed_total_elements / eval_info[ 'nr-of-commits'] eval_info['nr-hot-commits'] = len(eval_info['hot-commits']) with open(self.eval_data.path, "w+") as fd: fd.write(repr(eval_info)) # Output the summary of this build into the statistics file. with open(self.stats.path, "w+") as fd: fd.write(repr(self.build_info)) def plot_hash_count_histogram(hash_values, filename): dictionary = plt.figure() fig, ax = plt.subplots() plt.xlabel('Prozentanteil geaenderter Elemente') plt.ylabel('Anzahl von Commits') axes = plt.gca() axes.set_xlim([-10, 100]) axes.set_ylim([0, 1600]) ax.bar(hash_values.keys(), hash_values.values(), align='center') fig.savefig(filename) # clean data for plotting data = { k: v for k, v in eval_info['change-percentage'].iteritems() if k <= 100 } plot_hash_count_histogram(data, self.hot_commits_histo.path) def variant_name(self): return "%s-%s" % (self.project_name(), self.metadata['mode']) def symlink_name(self): return "%s-%s" % (self.title, self.variant_name())
def __setup_tmp_directory(self): """Creat temporary directory and assign it to every input and output directories tmp_directory slots""" # Create temp directory self.tmp_directory = Directory(tempfile.mkdtemp()) self.subobjects["tmp_directory"] = self.tmp_directory
class HistoricalCompilationEvaluation(Experiment): inputs = { "clang_hash": GitArchive("/home/cip/2015/yb90ifym/clang-hash/"), "project": GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"), "commits": Integer(4744), "jobs": Integer(1), # was 4 "dataset": Directory( "/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c" ), # full lua "hot_threshold_percentage": Integer( 50 ), # minimal change percentage for commit to be classified as "hot" } outputs = { "stats": File("summary.dict"), "eval_data": File("eval.txt"), "hot_commits_histo": File("local_hot_commits.pdf"), "compare_approx_elem": File("local_compare_approx_elem.pdf"), } def project_name(self): return os.path.basename(self.metadata['project-clone-url']) def run(self): # Project name logging.info("Cloning project... %s", self.project_name()) self.build_info = { "project-name": self.project_name(), "commit-hash": self.metadata["project-hash"], 'builds': [] } with self.project as src_path: time = 0 os.chdir(self.dataset.path) # Read summary file from data collection run commits = None with open("summary.dict") as sf: summary = eval(sf.read()) commits = summary['builds'] def read_chash_data(commit): element_hashes = [] try: with open(commit, 'r') as cf: commit_data = eval(cf.read()) for ofile_data in commit_data: element_hashes.extend(ofile_data['element-hashes']) except: pass return element_hashes stats = { 'data-empty': set( ), # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.) 'commits': {}, 'elements': {}, # symbol -> how often did this symbol change } total_changed_globals = 0 # How often was any global changed/introduced throughout the history? total_changed_records = 0 # How often was any record changed/introduced throughout the history? total_changed_static_funcs = 0 # How often was any static function changed/introduced throughout the history? total_changed_functions = 0 # How often was any function changed/introduced throughout the history? (incl. static) prev_commit = None prev_hashes = None counter = 1 for info in commits: print "%d/%d" % (counter, len(commits)) counter += 1 commit = info['commit'] parent = info['parent'] if not parent: # first commit has no parent print "No parent" continue commit_data = read_chash_data(commit) if not commit_data: # If the data does not exist, note and skip #print "Data empty" stats['data-empty'].add(commit) continue local_hashes = {} for element in commit_data: local_hashes[element[0]] = element[1] parent_hashes = {} if parent == prev_commit: #print "Reuse prev_commit" parent_hashes = prev_hashes else: #print "Cannot reuse prev_commit" parent_data = read_chash_data(parent) for element in parent_data: parent_hashes[element[0]] = element[1] if not parent_hashes: # If the data does not exist, note and skip stats['data-empty'].add(commit) # Save data for reuse prev_commit = commit prev_hashes = local_hashes continue ######################### # LOCAL HASH EVALUATION # ######################### commit_stats = { 'element-count': len(local_hashes), 'changed-elements': [], 'changed-functions-approx': [], } # Get data from approximation cwd = os.getcwd() os.chdir(src_path) commit_stats[ 'changed-functions-approx'] = get_changed_functions_from_commit( src_path, commit) os.chdir(cwd) elements = set(local_hashes.keys()) parent_elements = set(parent_hashes.keys()) commit_stats['changed-elements'] = set( ) #TODO here elements ^ parent_elements # elements either added or removed: if this is initialized with the insdel items, causes weird data to show um in result. should perhaps include it and add explanation # Compare hashes common_elements = elements & parent_elements for element in common_elements: if local_hashes[element] != parent_hashes[element]: commit_stats['changed-elements'].add(element) if element.startswith( 'record:' ): # do this here to ignore insertions and deletions total_changed_records += 1 elif element.startswith( 'variable:') or element.startswith( 'static variable:'): total_changed_globals += 1 elif element.startswith('static function:'): total_changed_static_funcs += 1 total_changed_functions += 1 else: total_changed_functions += 1 commit_stats['changed-element-count'] = len( commit_stats['changed-elements']) stats['commits'][commit] = commit_stats # Count how often each element was changed over the whole history for element in commit_stats['changed-elements']: if element not in stats['elements']: stats['elements'][element] = 0 stats['elements'][element] += 1 # Save data for reuse prev_commit = commit prev_hashes = local_hashes self.build_info['stats'] = stats eval_info = { 'nr-of-commits': len(commits), 'change-percentage': {}, # change percentage -> nr of commits with change < percentage 'hot-commits': {}, 'total-changed-globals': total_changed_globals, 'total-changed-records': total_changed_records, 'total-changed-static-funcs': total_changed_static_funcs, 'total-changed-functions': total_changed_functions, 'total-changed-elements': total_changed_functions + total_changed_records + total_changed_globals, } # Get most changed elements eval_info['most-changed-elements'] = { k: v for k, v in stats['elements'].iteritems() if v > self.commits.value / 10 } # arbitrary value (about 10% of commits) # Calc average nr and percentage of (changed) symbols per commit summed_avg_change_percentage = 0 summed_changed_elements = 0 summed_total_elements = 0 commits = self.build_info['stats']['commits'] for commit in commits: commit_stat = commits[commit] change_percentage = len(commit_stat['changed-elements']) / float( commit_stat['element-count']) summed_avg_change_percentage += change_percentage summed_changed_elements += len(commit_stat['changed-elements']) summed_total_elements += commit_stat['element-count'] percentage = int(round(change_percentage * 100)) if percentage not in eval_info['change-percentage']: eval_info['change-percentage'][percentage] = 0 eval_info['change-percentage'][percentage] += 1 # Identify hot commits if percentage > self.hot_threshold_percentage.value: eval_info['hot-commits'][commit] = ( percentage, len(commit_stat['changed-elements']), commit_stat['element-count']) eval_info[ 'avg-change-percentage'] = summed_avg_change_percentage / float( len(stats['commits'])) eval_info[ 'avg-changed-elements'] = summed_changed_elements / eval_info[ 'nr-of-commits'] eval_info['avg-total-elements'] = summed_total_elements / eval_info[ 'nr-of-commits'] eval_info['nr-hot-commits'] = len(eval_info['hot-commits']) with open(self.eval_data.path, "w+") as fd: fd.write(repr(eval_info)) # Output the summary of this build into the statistics file. with open(self.stats.path, "w+") as fd: fd.write(repr(self.build_info)) ''' def plot_hash_count_histogram(hash_values, filename): dictionary = plt.figure() fig, ax = plt.subplots() plt.xlabel('Prozentanteil geaenderter Elemente') plt.ylabel('Anzahl von Commits') ax.bar(hash_values.keys(), hash_values.values(), align='center') fig.savefig(filename) # clean data for plotting data = {k:v for k,v in eval_info['change-percentage'].iteritems() if k <= 100} plot_hash_count_histogram(data, self.hot_commits_histo.path) changed_funcs_approx_list = [] changed_elements_list = [] for commit in commits: commit_stat = commits[commit] changed_functions_approx = commit_stat['changed-functions-approx'] changed_elements = commit_stat['changed-elements'] changed_funcs_approx_list.append(len(changed_functions_approx)) changed_elements_list.append(len(changed_elements)) #TODO plot changed elements vs approx. changed functions # and also changed functions vs approx changed functions fig, ax = plt.subplots() ax.plot(changed_elements_list, label='Geaenderte Elemente (lokal)') ax.plot(changed_funcs_approx_list, 'm', label='Geaenderte Funktionen (Approx)') lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right plt.xlabel('Commits') plt.ylabel('Anzahl') fig.savefig(self.compare_approx_elem.path, bbox_extra_artists=(lgd,), bbox_inches='tight') ''' def variant_name(self): return "%s-%s" % (self.project_name(), self.metadata['mode']) def symlink_name(self): return "%s-%s" % (self.title, self.variant_name())