Ejemplo n.º 1
0
class FailTrace(Experiment):
    inputs = {
        "erika": GitArchive("[email protected]:erika"),
        "bochs-runner": Executable("/proj/i4danceos/tools/fail/bochs-experiment-runner.py"),
        "erika-tracing": Executable("/proj/i4danceos/tools/fail/erika-tracing"),
    }
    outputs = {
        "trace": Directory("trace"),
        "elf": File("erika.elf"),
        "iso": File("erika.iso"),
    }

    def run(self):
        logging.info("Cloning ERIKA...")

        with self.erika as erika_path:
            shell("cd %s/examples/x86/coptermock-isorc; make", erika_path)

            self.iso.copy_contents(os.path.join(erika_path, "examples/x86/coptermock-isorc/Debug/erika.iso"))
            self.elf.copy_contents(os.path.join(erika_path, "examples/x86/coptermock-isorc/Debug/Debug/out.elf"))

        shell(("cd %(resultdir)s;  python %(bochs)s -F 50 -i %(iso)s -e %(elf)s -f %(fail)s"
              + " -m 8 -1 --  -Wf,--end-symbol=test_finish -Wf,--start-symbol=EE_oo_StartOS"
              + " -Wf,--trace-file=trace.pb -Wf,--save-symbol=EE_oo_StartOS") % {
              "resultdir": self.trace.path,
              "bochs": self.bochs_runner.path,
              "iso": self.iso.path,
              "elf": self.elf.path,
              "fail": self.erika_tracing.path
              }
        )
Ejemplo n.º 2
0
class AttributeExperiment(Experiment):
    outputs = {
        "config": File(".config"),
        "attributes": File("attributes.json"),
        "build_out": File("build.out"),
        "build_err": File("build.err"),
    }

    def run(self):
        build_command = self.build_command.value.split()
        attr_command = self.attr_command.value.split()
        shutil.copyfile(f"{self.project_root.path}/.config", self.config.path)
        subprocess.check_call(
            ["make", "clean"],
            cwd=self.project_root.path,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        try:
            with open(self.build_out.path,
                      "w") as out_fd, open(self.build_err.path, "w") as err_fd:
                subprocess.check_call(
                    build_command,
                    cwd=self.project_root.path,
                    stdout=out_fd,
                    stderr=err_fd,
                )
        except subprocess.CalledProcessError:
            logger.info("build error")
            return
        with open(self.attributes.path, "w") as attr_fd:
            subprocess.check_call(attr_command,
                                  cwd=self.project_root.path,
                                  stdout=attr_fd)
Ejemplo n.º 3
0
    def before_experiment_run(self, parameter_type):
        self.parameter_type = parameter_type
        if parameter_type == "input":
            self.__original_filename = File.path.fget(self)
            self.subobjects["filename"] = File(self.__original_filename)
            filename = self.name + "_" + os.path.basename(self.path.rstrip(".gz"))
            self.set_path(self.tmp_directory.path, filename)

        File.before_experiment_run(self, parameter_type)
Ejemplo n.º 4
0
    def __init__(self, filename = "data.tex", pgfkey = "/versuchung", setmacro="pgfkeyssetvalue"):
        File.__init__(self, filename)
        dict.__init__(self)

        self.__pgfkey = pgfkey
        self.format_string = "\\" + setmacro + "{%s/%s}{%s}"

        # Ensure the file is written
        if os.path.exists(self.path):
            a = self.value
Ejemplo n.º 5
0
    def before_experiment_run(self, parameter_type):
        self.parameter_type = parameter_type
        if parameter_type == "input":
            self.__original_filename = File.path.fget(self)
            self.subobjects["filename"] = File(self.__original_filename)
            filename = self.name + "_" + os.path.basename(
                self.path.rstrip(".gz"))
            self.set_path(self.tmp_directory.path, filename)

        File.before_experiment_run(self, parameter_type)
Ejemplo n.º 6
0
class TestExperiment(Experiment):
    inputs = {'stringlist': List(String)}
    outputs = {'result': File("result")}

    def run(self):
        for i in self.i.stringlist:
            self.o.result.write(i.value)
Ejemplo n.º 7
0
class SimpleExperiment2(Experiment):
    inputs = {"se": SimpleExperiment()}
    outputs = {"key": File("key")}

    def run(self):
        content = "%s: %s\n" % (self.i.se.metadata["input_key"],
                                self.i.se.metadata["input_value"])
        assert content == self.i.se.o.output_file.value
        assert self.metadata["experiment-name"] == self.title
Ejemplo n.º 8
0
class SimpleExperiment(Experiment):
    inputs = {
        'abc': File("/dev/null"),
        "xxx": File("/dev/null"),
        "empty": String(None)
    }
    outputs = {'xyz': File("asd"), "zzz": File("asd")}

    def run(self):
        assert self.abc == self.inputs.abc
        assert self.xyz == self.outputs.xyz
        exception = False
        try:
            self.xxx != None
        except AttributeError:
            exception = True
        assert exception == False

        assert self.empty.value is None
Ejemplo n.º 9
0
class SimpleExperiment(Experiment):
    inputs = {
        "input_key": String("default key"),
        "input_value": String("default value")
    }
    outputs = {"output_file": File("output")}

    def run(self):
        # Combine the input parameters
        content = self.inputs.input_key.value \
            + ": " + self.inputs.input_value.value

        # write the result to the output file
        self.outputs.output_file.value = content + "\n"
Ejemplo n.º 10
0
class HistoricalCompilationGlobalEvaluation(Experiment):
    inputs = {
        "clang_hash": GitArchive("/home/cip/2015/yb90ifym/clang-hash/"),
        "project": GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"),
        "commits": Integer(4744),
        "jobs": Integer(1), # was 4
        "dataset": Directory("/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c"), # full lua
        "hot_threshold_percentage": Integer(10), # minimal change percentage for commit to be classified as "hot"
    }
    outputs = {
        "stats": File("summary.dict"),
        "eval_data": File("eval.txt"),
        "hot_commits_histo": File("global_hot_commits.pdf"),
    }


    def project_name(self):
        return os.path.basename(self.metadata['project-clone-url'])


    def run(self):
        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {"project-name": self.project_name(),
                           "commit-hash": self.metadata["project-hash"],
                           'builds': []}

        with self.project as src_path:
            time = 0
            
            os.chdir(self.dataset.path)
            
            # Read summary file from data collection run
            commits = None
            with open("summary.dict") as sf:
                summary = eval(sf.read())
                commits = summary['builds']


            def read_chash_data(commit):
                element_hashes = []
                try:
                    with open(commit, 'r') as cf:
                        commit_data = eval(cf.read())
                        for ofile_data in commit_data:
                            element_hashes.extend(ofile_data['element-hashes'])
                except:
                    pass

                return element_hashes



            stats = {
                'data-empty': set(), # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.)
                'commits': {},
                'elements': {}, # symbol -> how often did this symbol change
            }




            total_changed_globals = 0 # How often was any global changed throughout the history?
            total_changed_records = 0 # How often was any record changed throughout the history?
            total_changed_static_funcs = 0 # How often was any static function changed throughout the history?
            total_changed_functions = 0 # without static functions

            total_insdel_globals = 0 # How often was any global introduced/removed throughout the history?
            total_insdel_records = 0 # How often was any record introduced/removed throughout the history?
            total_insdel_static_funcs = 0 # How often was any static function introduced/removed throughout the history?
            total_insdel_functions = 0 # without static functions

            # in-degree: how many SLOs depend on E?
            # out-degree: how many SLOs does E depend on?
            in_degrees = {} # indegree -> nr of elements with that indegree
            out_degrees = {} # outdegree -> nr of elements wirh that outdegree
            max_in_degree = (None, 0) # (element, degree)
            max_out_degree = (None, 0) # (element, degree)

            prev_commit = None
            prev_hashes = None
            prev_used_definitions = None
            prev_global_hashes = None
            counter = 1
            for info in commits:
                print "\n%d/%d" % (counter, len(commits))
                counter += 1
                commit = info['commit']
                parent = info['parent']
 
                if not parent: # first commit has no parent
                    print "No parent"
                    continue

                commit_data = read_chash_data(commit)
                if not commit_data:
                    # If the data does not exist, note and skip
                    #print "Data empty"
                    stats['data-empty'].add(commit)
                    continue

                local_hashes = {}
                used_definitions = {}

                # just 4 testing:
                for element in commit_data:
                    name = element[0]
                    if name.startswith('static function:') or name.startswith('function:'):
                        name = element[0].split(':')[1]
                    local_hashes[name] = element[1]
                    try:
                        used_definitions[name] = set()
                        for used_def in element[2]:
                            if used_def.startswith('static function:') or used_def.startswith('function:'):
                                used_definitions[name].add(used_def.split(':')[1])
                    except:
                        pass

                # prev:
                #for element in commit_data:
                #    local_hashes[element[0]] = element[1]
                #    try:
                #        used_definitions[element[0]] = element[2]
                #    except:
                #        pass


                parent_hashes = {}
                parent_global_hashes = {}
                parent_used_definitions = {}
                if parent == prev_commit and prev_global_hashes and prev_used_definitions and prev_hashes:
                    #print "Reuse prev_commit"
                    parent_hashes = prev_hashes
                    parent_used_definitions = prev_used_definitions
                    parent_global_hashes = prev_global_hashes
                else:
                    #print "Cannot reuse prev_commit"
                    parent_data = read_chash_data(parent)

                    # just 4 testing:
                    for element in parent_data:
                        name = element[0]
                        if name.startswith('static function:') or name.startswith('function:'):
                            name = element[0].split(':')[1]
                        parent_hashes[name] = element[1]
                        try:
                            parent_used_definitions[name] = set()
                            for used_def in element[2]:
                                if used_def.startswith('static function:') or used_def.startswith('function:'):
                                    parent_used_definitions[name].add(used_def.split(':')[1])
                        except:
                            pass



                    # prev:
                    #for element in parent_data:
                    #    parent_hashes[element[0]] = element[1]
                    #    try:
                    #        parent_used_definitions[element[0]] = element[2]
                    #    except:
                    #        pass

                                
                if not parent_hashes:
                    # If the data does not exist, note and skip
                    stats['data-empty'].add(commit)
                    
                    # Save data for reuse
                    prev_commit = commit
                    prev_hashes = local_hashes
                    prev_used_definitions = used_definitions
                    continue

                ##########################
                # GLOBAL HASH EVALUATION #
                ##########################
                
                commit_stats = {
                    'element-count' : len(local_hashes),
                    'changed-elements' : [],
                }


                elements = set(local_hashes.keys())
                parent_elements = set(parent_hashes.keys())
              


                # calculate in- and out-degree
                # reverse used_definitions
                out_use_defs = { s:0 for s in used_definitions.keys() } # element -> nr of depending elements
                for element in elements:
                   for el in used_definitions[element]:
                        try:
                            out_use_defs[el] += 1
                        except:
                            pass
                    

                for element in elements:
                    out_degree = len(used_definitions[element])
                    in_degree = out_use_defs[element] 
                    
                    if in_degree > max_in_degree[1]:
                        max_in_degree = (element, in_degree)
                    if out_degree > max_out_degree[1]:
                        max_out_degree = (element, out_degree)

                    if in_degree not in in_degrees:
                        in_degrees[in_degree] = 0
                    in_degrees[in_degree] += 1

                    if out_degree not in out_degrees:
                        out_degrees[out_degree] = 0
                    out_degrees[out_degree] += 1

 
                commit_stats['changed-elements'] = elements ^ parent_elements # elements either added or removed

                for element in commit_stats['changed-elements']:
                     if element.startswith('record:'): # do this here to get only insertions and deletions
                         total_insdel_records += 1
                     elif element.startswith('variable:') or element.startswith('static variable:'):
                         total_insdel_globals += 1
                     elif element.startswith('static function:'):
                         total_insdel_static_funcs += 1
                     else:
                         total_insdel_functions += 1


                # Compare hashes
                common_elements = elements & parent_elements
                
                global_hashes = {}
                for element in common_elements:
                    global_hash = get_global_hash(element, global_hashes, local_hashes, used_definitions)
                    parent_global_hash = get_global_hash(element, parent_global_hashes, parent_hashes, parent_used_definitions)
                    if global_hash != parent_global_hash:
                        commit_stats['changed-elements'].add(element)
                        if element.startswith('record:'): # do this here to ignore insertions and deletions
                            total_changed_records += 1
                        elif element.startswith('variable:') or element.startswith('static variable:'):
                            total_changed_globals += 1
                        elif element.startswith('static function:'):
                            total_changed_static_funcs += 1
                        else:
                            total_changed_functions += 1

                commit_stats['changed-element-count'] = len(commit_stats['changed-elements']);
                stats['commits'][commit] = commit_stats


                # Count how often each element was changed over the whole history
                for element in commit_stats['changed-elements']:
                    if element not in stats['elements']:
                        stats['elements'][element] = 0;
                    stats['elements'][element] += 1


                # Save data for reuse
                prev_commit = commit
                prev_hashes = local_hashes
                prev_used_definitions = used_definitions
                prev_global_hashes = global_hashes

            self.build_info['stats'] = stats

        #in_degrees = {} # indegree -> nr of elements with that indegree
        #out_degrees = {} # outdegree -> nr of elements wirh that outdegree
        #max_in_degree = (None, 0) # (element, degree)
        #max_out_degree = (None, 0) # (element, degree)
        summed_in_degrees = sum([k*v for k,v in in_degrees.iteritems()])
        nr_of_elements = sum(in_degrees.values())
        avg_in_degree = summed_in_degrees/float(nr_of_elements)
        avg_out_degree = sum([k*v for k,v in out_degrees.iteritems()])/float(sum(out_degrees.values()))


        eval_info = {
            'nr-of-commits' : len(commits),
            'change-percentage' : {}, # change percentage -> nr of commits with change < percentage
            'hot-commits': {},
            'total-changed-globals': total_changed_globals,
            'total-changed-records': total_changed_records,
            'total-changed-static-funcs': total_changed_static_funcs,
            'total-changed-functions': total_changed_functions,
            'total-insdel-globals': total_insdel_globals,
            'total-insdel-records': total_insdel_records,
            'total-insdel-static-funcs': total_insdel_static_funcs,
            'total-insdel-functions': total_insdel_functions,
            'max_in_degree': max_in_degree,
            'max_out_degree': max_out_degree,
            'avg_in_degree': avg_in_degree, 
            'avg_out_degree': avg_out_degree,
        }

        # Get most changed elements
        eval_info['most-changed-elements'] = {k:v for k,v in stats['elements'].iteritems() if v > 1000} # arbitrary value (about 20% of commits)
        
        # Calc average nr and percentage of (changed) symbols per commit
        summed_avg_change_percentage = 0
        summed_changed_elements = 0
        summed_total_elements = 0
        commits = self.build_info['stats']['commits']
        for commit in commits:
            commit_stat = commits[commit]
            change_percentage = len(commit_stat['changed-elements'])/float(commit_stat['element-count'])
            summed_avg_change_percentage += change_percentage

            summed_changed_elements += len(commit_stat['changed-elements'])
            summed_total_elements += commit_stat['element-count']

            percentage = int(round(change_percentage * 100))
            if percentage not in eval_info['change-percentage']:
                eval_info['change-percentage'][percentage] = 0
            eval_info['change-percentage'][percentage] += 1
 
            
            # Identify hot commits
            #if percentage > self.hot_threshold_percentage.value:
                #eval_info['hot-commits'][commit] = percentage

                
        eval_info['avg-change-percentage'] = summed_avg_change_percentage / float(len(stats['commits']))
        eval_info['avg-changed-elements'] = summed_changed_elements / eval_info['nr-of-commits']
        eval_info['avg-total-elements'] = summed_total_elements / eval_info['nr-of-commits']
        



        eval_info['nr-hot-commits'] = len(eval_info['hot-commits'])

        with open(self.eval_data.path, "w+") as fd:
            fd.write(repr(eval_info))



        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))










        def plot_hash_count_histogram(hash_values, filename):
            dictionary = plt.figure()
            fig, ax = plt.subplots()
            plt.xlabel('Prozentanteil geaenderter Elemente')
            plt.ylabel('Anzahl von Commits')
            axes = plt.gca()
            axes.set_xlim([-10,100])
            axes.set_ylim([0,1600])

            ax.bar(hash_values.keys(), hash_values.values(), align='center')
            fig.savefig(filename)

        # clean data for plotting
        data = {k:v for k,v in eval_info['change-percentage'].iteritems() if k <= 100}
 
        plot_hash_count_histogram(data, self.hot_commits_histo.path)






    def variant_name(self):
        return "%s-%s"%(self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s"%(self.title, self.variant_name())
Ejemplo n.º 11
0
 def __init__(self, default_filename=""):
     File.__init__(self, default_filename, binary=True)
class HistoricalCompilationCallGraphEvaluation(Experiment):
    inputs = {
        "clang_hash":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/"),
        "project":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"),
        "commits":
        Integer(4744),
        "jobs":
        Integer(1),  # was 4
        "dataset":
        Directory(
            "/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c"
        ),  # full lua
        "hot_threshold_percentage":
        Integer(
            10
        ),  # minimal change percentage for commit to be classified as "hot"
    }
    outputs = {
        "stats": File("summary.dict"),
        "eval_data": File("eval.txt"),
        "hot_commits_histo": File("cg_hot_commits.pdf"),
    }

    def project_name(self):
        return os.path.basename(self.metadata['project-clone-url'])

    def run(self):
        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {
            "project-name": self.project_name(),
            "commit-hash": self.metadata["project-hash"],
            'builds': []
        }

        with self.project as src_path:
            time = 0

            os.chdir(self.dataset.path)

            # Read summary file from data collection run
            commits = None
            with open("summary.dict") as sf:
                summary = eval(sf.read())
                commits = summary['builds']

            def read_chash_data(commit):
                element_hashes = []
                try:
                    with open(commit, 'r') as cf:
                        commit_data = eval(cf.read())
                        for ofile_data in commit_data:
                            element_hashes.extend(ofile_data['element-hashes'])
                except:
                    pass

                return element_hashes

            stats = {
                'data-empty': set(
                ),  # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.)
                'commits': {},
                'elements': {},  # symbol -> how often did this symbol change
            }

            total_changed_functions = 0  # How often was any function changed throughout the history?

            total_insdel_functions = 0  # How often was any function introduced/removed throughout the history?

            prev_commit = None
            prev_functions = None
            prev_used_definitions = None
            counter = 1
            for info in commits:
                print "%d/%d" % (counter, len(commits))
                counter += 1
                commit = info['commit']
                parent = info['parent']

                if not parent:  # first commit has no parent
                    print "No parent"
                    continue

                commit_data = read_chash_data(commit)
                if not commit_data:
                    # If the data does not exist, note and skip
                    #print "Data empty"
                    stats['data-empty'].add(commit)
                    continue

                functions = set()
                used_definitions = {}
                for element in commit_data:
                    if element[0].startswith('static function:') or element[
                            0].startswith('function:'):
                        clean_name = element[0].split(':')[1]
                        functions.add(clean_name)
                        used_definitions[clean_name] = set()
                        for used_def in element[2]:
                            if used_def.startswith(
                                    'static function:') or used_def.startswith(
                                        'function:'):
                                used_definitions[clean_name].add(
                                    used_def.split(':')[1])

                parent_functions = {}
                parent_used_definitions = {}
                if parent == prev_commit and prev_functions and prev_used_definitions:
                    #print "Reuse prev_commit"
                    parent_functions = prev_functions
                    parent_used_definitions = prev_used_definitions
                else:
                    #print "Cannot reuse prev_commit"
                    parent_data = read_chash_data(parent)
                    for element in parent_data:
                        if element[0].startswith(
                                'static function:') or element[0].startswith(
                                    'function:'):
                            clean_name = element[0].split(':')[1]
                            parent_functions.insert(clean_name)
                            parent_used_definitions[clean_name] = set()
                            for used_def in element[2]:
                                if used_def.startswith(
                                        'static function:'
                                ) or used_def.startswith('function:'):
                                    parent_used_definitions[clean_name].add(
                                        used_def.split(':')[1])

                if not parent_functions:
                    # If the data does not exist, note and skip
                    stats['data-empty'].add(commit)

                    # Save data for reuse
                    prev_commit = commit
                    prev_functions = functions
                    prev_used_definitions = used_definitions
                    continue

                #########################
                # CALL GRAPH EVALUATION #
                #########################

                commit_stats = {
                    'element-count': len(functions),
                    'changed-elements':
                    [],  # contains changed + impacted functions
                    #'changed-not-impacted': set(), # contains directly changed functions only
                }

                elements = functions
                parent_elements = parent_functions

                commit_stats['changed-elements'] = set(
                )  #elements ^ parent_elements # elements either added or removed

                total_insdel_functions += len(commit_stats['changed-elements'])

                cwd = os.getcwd()
                os.chdir(src_path)
                changed_functions = get_changed_functions_from_commit(
                    src_path, commit)
                os.chdir(cwd)

                commit_stats['changed-not-impacted'] = changed_functions.copy()

                # Get impacted functions
                changed_functions |= get_impacted_funcs_fake_hash(
                    changed_functions, used_definitions)

                commit_stats['changed-elements'] |= changed_functions

                total_changed_functions += len(changed_functions)

                commit_stats['changed-element-count'] = len(
                    commit_stats['changed-elements'])
                stats['commits'][commit] = commit_stats

                # Count how often each element was changed over the whole history
                for element in commit_stats['changed-elements']:
                    if element not in stats['elements']:
                        stats['elements'][element] = 0
                    stats['elements'][element] += 1

                # Save data for reuse
                prev_commit = commit
                prev_functions = functions
                prev_used_definitions = used_definitions

            self.build_info['stats'] = stats

        eval_info = {
            'nr-of-commits': len(commits),
            'change-percentage':
            {},  # change percentage -> nr of commits with change < percentage
            'hot-commits': {},
            'total-changed-functions': total_changed_functions,
            'total-insdel-functions': total_insdel_functions,
        }

        # Get most changed elements
        eval_info['most-changed-elements'] = {
            k: v
            for k, v in stats['elements'].iteritems() if v > 400
        }  # arbitrary value (about 10% of commits)

        # Calc average nr and percentage of (changed) symbols per commit
        summed_avg_change_percentage = 0
        summed_changed_elements = 0
        summed_total_elements = 0
        commits = self.build_info['stats']['commits']
        for commit in commits:
            commit_stat = commits[commit]
            change_percentage = len(commit_stat['changed-elements']) / float(
                commit_stat['element-count'])
            summed_avg_change_percentage += change_percentage

            summed_changed_elements += len(commit_stat['changed-elements'])
            summed_total_elements += commit_stat['element-count']

            percentage = int(round(change_percentage * 100))
            if percentage not in eval_info['change-percentage']:
                eval_info['change-percentage'][percentage] = 0
            eval_info['change-percentage'][percentage] += 1

            # Identify hot commits
            #if percentage > self.hot_threshold_percentage.value:
            #eval_info['hot-commits'][commit] = percentage

        eval_info[
            'avg-change-percentage'] = summed_avg_change_percentage / float(
                len(stats['commits']))
        eval_info[
            'avg-changed-elements'] = summed_changed_elements / eval_info[
                'nr-of-commits']
        eval_info['avg-total-elements'] = summed_total_elements / eval_info[
            'nr-of-commits']

        eval_info['nr-hot-commits'] = len(eval_info['hot-commits'])

        with open(self.eval_data.path, "w+") as fd:
            fd.write(repr(eval_info))

        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))

        def plot_hash_count_histogram(hash_values, filename):
            dictionary = plt.figure()
            fig, ax = plt.subplots()
            plt.xlabel('Prozentanteil geaenderter Elemente')
            plt.ylabel('Anzahl von Commits')
            axes = plt.gca()
            axes.set_xlim([-10, 100])
            axes.set_ylim([0, 1600])

            ax.bar(hash_values.keys(), hash_values.values(), align='center')
            fig.savefig(filename)

        # clean data for plotting
        data = {
            k: v
            for k, v in eval_info['change-percentage'].iteritems() if k <= 100
        }

        plot_hash_count_histogram(data, self.hot_commits_histo.path)

    def variant_name(self):
        return "%s-%s" % (self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s" % (self.title, self.variant_name())
Ejemplo n.º 13
0
 def flush(self):
     self.value = self.before_write(self)
     File.flush(self)
Ejemplo n.º 14
0
class IncrementalCompilation(Experiment, ClangHashHelper):
    inputs = {
        "clang_hash":
        GitArchive("/home/stettberger/w/clang-hash/"),
        "project":
        GitArchive("/home/stettberger/w/clang-hash/hash-projects/musl",
                   shallow=True),
        "touch-only":
        Bool(False),
        "mode":
        String("normal"),
        "jobs":
        Integer(4),
    }
    outputs = {
        "stats": File("summary.dict"),
    }

    def get_sources(self, path):
        ret = []
        for root, dirnames, filenames in os.walk(path):
            for filename in filenames:
                if filename.endswith(('.h', '.c')):
                    ret.append(os.path.join(root, filename))
        if self.project_name() == "musl":
            # We do not touch headers that are external, since they
            # are untouchable.
            ret = [x for x in ret if x.endswith(".c") or "internal" in x]
        return sorted(ret)

    def touch(self, path):
        if self.touch_only.value:
            os.utime(path, None)
        else:
            with open(path) as fd:
                content = fd.read()
            content = "#line 1\n" + content
            with open(path, "w") as fd:
                fd.write(content)

    def run(self):
        # Determine the mode
        modes = ('normal', 'ccache', 'clang-hash')
        if not self.mode.value in modes:
            raise RuntimeError("Mode can only be one of: %s" % modes)

        logging.info("Build the Clang-Hash Plugin")
        with self.clang_hash as cl_path:
            shell("cd %s; mkdir build; cd build; cmake ..; make -j 4", cl_path)

        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {
            "project-name": self.project_name(),
            "commit-hash": self.metadata["project-hash"],
            'builds': []
        }
        with self.project as src_path:
            # First, we redirect all calls to the compiler to our
            # clang hash wrapper
            self.setup_compiler_paths(cl_path)

            # Count the number of files
            sources = list(self.get_sources(src_path))
            nr_files = len(sources)
            logging.info("#files: %d", nr_files)
            self.build_info['file-count'] = nr_files

            # Initial build of the given project
            self.call_configure(src_path)
            info = {"filename": "FRESH_BUILD"}
            self.rebuild(src_path, info)
            self.build_info["builds"].append(info)

            # Iterate over all files
            for fn in sources:
                self.touch(fn)
                info = {"filename": fn}
                self.rebuild(src_path, info)
                self.build_info["builds"].append(info)

        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))

    def method_name(self):
        mod = "append"
        if self.metadata['touch-only']:
            mod = "touch"
        return "%s-%s" % (mod, self.metadata['mode'])

    def variant_name(self):
        return "%s-%s" % (self.project_name(), self.method_name())

    def symlink_name(self):
        return "%s-%s" % (self.title, self.variant_name())
class HistoricalCompilationEvaluation(Experiment):
    inputs = {
        "clang_hash":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/"),
        "project":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"),
        "commits":
        Integer(4744),
        "jobs":
        Integer(1),  # was 4
        "dataset":
        Directory(
            "/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c"
        ),  # full lua
        "hot_threshold_percentage":
        Integer(
            50
        ),  # minimal change percentage for commit to be classified as "hot"
    }
    outputs = {
        "stats": File("summary.dict"),
        "eval_data": File("eval.txt"),
        "hot_commits_histo": File("local_hot_commits.pdf"),
        "compare_approx_elem": File("local_compare_approx_elem.pdf"),
    }

    def project_name(self):
        return os.path.basename(self.metadata['project-clone-url'])

    def run(self):
        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {
            "project-name": self.project_name(),
            "commit-hash": self.metadata["project-hash"],
            'builds': []
        }

        with self.project as src_path:
            time = 0

            os.chdir(self.dataset.path)

            # Read summary file from data collection run
            commits = None
            with open("summary.dict") as sf:
                summary = eval(sf.read())
                commits = summary['builds']

            def read_chash_data(commit):
                element_hashes = []
                try:
                    with open(commit, 'r') as cf:
                        commit_data = eval(cf.read())
                        for ofile_data in commit_data:
                            element_hashes.extend(ofile_data['element-hashes'])
                except:
                    pass

                return element_hashes

            stats = {
                'data-empty': set(
                ),  # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.)
                'commits': {},
                'elements': {},  # symbol -> how often did this symbol change
            }

            total_changed_globals = 0  # How often was any global changed/introduced throughout the history?
            total_changed_records = 0  # How often was any record changed/introduced throughout the history?
            total_changed_static_funcs = 0  # How often was any static function changed/introduced throughout the history?
            total_changed_functions = 0  # How often was any function changed/introduced throughout the history? (incl. static)

            prev_commit = None
            prev_hashes = None
            counter = 1
            for info in commits:
                print "%d/%d" % (counter, len(commits))
                counter += 1
                commit = info['commit']
                parent = info['parent']

                if not parent:  # first commit has no parent
                    print "No parent"
                    continue

                commit_data = read_chash_data(commit)
                if not commit_data:
                    # If the data does not exist, note and skip
                    #print "Data empty"
                    stats['data-empty'].add(commit)
                    continue

                local_hashes = {}
                for element in commit_data:
                    local_hashes[element[0]] = element[1]

                parent_hashes = {}
                if parent == prev_commit:
                    #print "Reuse prev_commit"
                    parent_hashes = prev_hashes
                else:
                    #print "Cannot reuse prev_commit"
                    parent_data = read_chash_data(parent)
                    for element in parent_data:
                        parent_hashes[element[0]] = element[1]

                if not parent_hashes:
                    # If the data does not exist, note and skip
                    stats['data-empty'].add(commit)

                    # Save data for reuse
                    prev_commit = commit
                    prev_hashes = local_hashes

                    continue

                #########################
                # LOCAL HASH EVALUATION #
                #########################

                commit_stats = {
                    'element-count': len(local_hashes),
                    'changed-elements': [],
                    'changed-functions-approx': [],
                }

                # Get data from approximation
                cwd = os.getcwd()
                os.chdir(src_path)
                commit_stats[
                    'changed-functions-approx'] = get_changed_functions_from_commit(
                        src_path, commit)
                os.chdir(cwd)

                elements = set(local_hashes.keys())
                parent_elements = set(parent_hashes.keys())

                commit_stats['changed-elements'] = set(
                )  #TODO here elements ^ parent_elements # elements either added or removed: if this is initialized with the insdel items, causes weird data to show um in result. should perhaps include it and add explanation

                # Compare hashes
                common_elements = elements & parent_elements
                for element in common_elements:
                    if local_hashes[element] != parent_hashes[element]:
                        commit_stats['changed-elements'].add(element)
                        if element.startswith(
                                'record:'
                        ):  # do this here to ignore insertions and deletions
                            total_changed_records += 1
                        elif element.startswith(
                                'variable:') or element.startswith(
                                    'static variable:'):
                            total_changed_globals += 1
                        elif element.startswith('static function:'):
                            total_changed_static_funcs += 1
                            total_changed_functions += 1
                        else:
                            total_changed_functions += 1

                commit_stats['changed-element-count'] = len(
                    commit_stats['changed-elements'])
                stats['commits'][commit] = commit_stats

                # Count how often each element was changed over the whole history
                for element in commit_stats['changed-elements']:
                    if element not in stats['elements']:
                        stats['elements'][element] = 0
                    stats['elements'][element] += 1

                # Save data for reuse
                prev_commit = commit
                prev_hashes = local_hashes

            self.build_info['stats'] = stats

        eval_info = {
            'nr-of-commits':
            len(commits),
            'change-percentage':
            {},  # change percentage -> nr of commits with change < percentage
            'hot-commits': {},
            'total-changed-globals':
            total_changed_globals,
            'total-changed-records':
            total_changed_records,
            'total-changed-static-funcs':
            total_changed_static_funcs,
            'total-changed-functions':
            total_changed_functions,
            'total-changed-elements':
            total_changed_functions + total_changed_records +
            total_changed_globals,
        }

        # Get most changed elements
        eval_info['most-changed-elements'] = {
            k: v
            for k, v in stats['elements'].iteritems()
            if v > self.commits.value / 10
        }  # arbitrary value (about 10% of commits)

        # Calc average nr and percentage of (changed) symbols per commit
        summed_avg_change_percentage = 0
        summed_changed_elements = 0
        summed_total_elements = 0
        commits = self.build_info['stats']['commits']
        for commit in commits:
            commit_stat = commits[commit]
            change_percentage = len(commit_stat['changed-elements']) / float(
                commit_stat['element-count'])
            summed_avg_change_percentage += change_percentage

            summed_changed_elements += len(commit_stat['changed-elements'])
            summed_total_elements += commit_stat['element-count']

            percentage = int(round(change_percentage * 100))
            if percentage not in eval_info['change-percentage']:
                eval_info['change-percentage'][percentage] = 0
            eval_info['change-percentage'][percentage] += 1

            # Identify hot commits
            if percentage > self.hot_threshold_percentage.value:
                eval_info['hot-commits'][commit] = (
                    percentage, len(commit_stat['changed-elements']),
                    commit_stat['element-count'])

        eval_info[
            'avg-change-percentage'] = summed_avg_change_percentage / float(
                len(stats['commits']))
        eval_info[
            'avg-changed-elements'] = summed_changed_elements / eval_info[
                'nr-of-commits']
        eval_info['avg-total-elements'] = summed_total_elements / eval_info[
            'nr-of-commits']

        eval_info['nr-hot-commits'] = len(eval_info['hot-commits'])

        with open(self.eval_data.path, "w+") as fd:
            fd.write(repr(eval_info))

        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))
        '''
        def plot_hash_count_histogram(hash_values, filename):
            dictionary = plt.figure()
            fig, ax = plt.subplots()
            plt.xlabel('Prozentanteil geaenderter Elemente')
            plt.ylabel('Anzahl von Commits')
            ax.bar(hash_values.keys(), hash_values.values(), align='center')
            fig.savefig(filename)

        # clean data for plotting
        data = {k:v for k,v in eval_info['change-percentage'].iteritems() if k <= 100}
 
        plot_hash_count_histogram(data, self.hot_commits_histo.path)




        changed_funcs_approx_list = []
        changed_elements_list = []
        for commit in commits:
            commit_stat = commits[commit]
            changed_functions_approx = commit_stat['changed-functions-approx']
            changed_elements = commit_stat['changed-elements']
            
            changed_funcs_approx_list.append(len(changed_functions_approx))
            changed_elements_list.append(len(changed_elements))

        
        #TODO plot changed elements vs approx. changed functions
        # and also changed functions vs approx changed functions
        fig, ax = plt.subplots()
        ax.plot(changed_elements_list, label='Geaenderte Elemente (lokal)')
        ax.plot(changed_funcs_approx_list, 'm', label='Geaenderte Funktionen (Approx)')

        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right
        plt.xlabel('Commits')
        plt.ylabel('Anzahl')
        fig.savefig(self.compare_approx_elem.path, bbox_extra_artists=(lgd,), bbox_inches='tight')
        '''

    def variant_name(self):
        return "%s-%s" % (self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s" % (self.title, self.variant_name())
Ejemplo n.º 16
0
 def after_experiment_run(self, parameter_type):
     File.after_experiment_run(self, parameter_type)
     if parameter_type == "output":
         shell("gzip -c %s > %s.1", self.path, self.path)
         shell("mv %s.1 %s", self.path, self.path)
Ejemplo n.º 17
0
 def after_experiment_run(self, parameter_type):
     File.after_experiment_run(self, parameter_type)
     if parameter_type == "output":
         shell("gzip -c %s > %s.1", self.path, self.path)
         shell("mv %s.1 %s", self.path, self.path)
Ejemplo n.º 18
0
class TimingInternal(Experiment, ClangHashHelper):
    inputs = {
        "clang_hash":
        GitArchive("/home/stettberger/w/clang-hash/"),
        "project":
        GitArchive("/home/stettberger/w/clang-hash/hash-projects/lua",
                   shallow=True),
        "cflags":
        String(""),
        "jobs":
        Integer(4),
        "mode":
        String("normal"),  # Unchangable
    }
    outputs = {
        "stats": File("summary.dict"),
        'tex': DatarefDict('data.dref'),
    }

    def save(self, path, value):
        self.tex['/'.join(path)] = value
        logging.info("%s = %s", '/'.join(path), value)

    def run(self):

        with self.clang_hash as cl_path:
            logging.info("Cloning clang hash...")

        logging.info("Cloning project... %s", self.project_name())

        # First, we redirect all calls to the compiler to our
        # gcc wrapper
        CC = os.path.join(cl_path, "wrappers/gcc-time")
        os.environ["CC"] = CC
        os.environ["TIMING_REPORT"] = self.stats.path
        os.environ["CHASH_EXTRA_FLAGS"] = self.cflags.value
        with self.project as src_path:
            info = {}
            self.call_configure(src_path)
            self.rebuild(src_path, info, True)

        collect = defaultdict(list)
        compiler_calls = 0
        with open(self.stats.path) as fd:
            for line in fd.readlines():
                data = eval(line)
                if "name" in data:
                    compiler_calls += 1
                for key in data:
                    if type(data[key]) is float:
                        collect[key].append(data[key])
        self.save([self.project_name(), "phase", self.cflags.value, "count"],
                  compiler_calls)
        for phase in collect:
            if phase in ("preprocessing", "parser (global)",
                         "phase opt and generate"):
                self.save(
                    [self.project_name(), "phase", phase, self.cflags.value],
                    np.average(collect[phase]))

    def symlink_name(self):
        return "%s-%s%s" % (self.title, self.project_name(), self.cflags.value)
Ejemplo n.º 19
0
class HistoricalCompilation(Experiment, ClangHashHelper):
    inputs = {
        "clang_hash": GitArchive("/home/stettberger/w/clang-hash/"),
        "project": GitArchive("/home/stettberger/w/clang-hash/hash-projects/lua"),
        "mode": String("normal"),
        "commits": Integer(500),
        "jobs": Integer(4),
    }
    outputs = {
        "stats": File("summary.dict"),
        "ccache_stats": File("ccache.stats"),
        "clang_hash_log": File("clang-hash.log"),
    }

    def build_parent(self, commit, from_scratch = False):
        def eq_hash(a, b):
            if len(a) == 0 or len(b) == 0:
                return
            if len(a) > len(b):
                return a.startswith(b)
            else:
                return b.startswith(a)

        src_path = self.project.path

        if from_scratch:
            shell("cd %s; git clean -dfx -e '*.hash' -e '*.hash.copy'", src_path)
            logging.info("Parent [%s^]: clean build", commit)
            shell("cd %s; git reset --hard %s^", src_path, commit)
            info = {"commit": commit + "^"}
            self.call_configure(src_path)
            self.rebuild(src_path, info, True)
            # Did initial commit fail? Try again
            if info.get("failed"):
                logging.info("Parent[%s^]: failed", commit)
                return False
            return True
        else:
            (lines, _) = shell("cd %s; git rev-parse %s^",
                               src_path, commit)
            parent_revision = lines[0].strip()
            if self.current_revision and eq_hash(self.current_revision, parent_revision):
                logging.info("Parent[%s^]: resuse good parent", commit)
                return True
            else:
                logging.info("Parent[%s^]: resuse similar build directory", commit)
                shell("cd %s; git reset --hard %s^", src_path, commit)
                info = {"commit": commit +"^"}
                self.call_reconfigure(src_path)
                self.rebuild(src_path, info, True)
                # Did initial commit fail? Try again
                if info.get("failed"):
                    return self.build_parent(commit, from_scratch=True)
                return True


    def run(self):
        # Determine the mode
        modes = ('normal', 'ccache', 'clang-hash', 'ccache-clang-hash')
        if not self.mode.value in modes:
            raise RuntimeError("Mode can only be one of: %s"%modes)

        logging.info("Build the Clang-Hash Plugin")
        with self.clang_hash as cl_path:
            shell("cd %s; mkdir build; cd build; cmake .. -DCMAKE_BUILD_TYPE=Release; make -j 4", cl_path)
            shell("strip %s/build/clang-plguin/*.so", cl_path)

        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {"project-name": self.project_name(),
                           "commit-hash": self.metadata["project-hash"],
                           'builds': []}

        with self.project as src_path:
            (commits, _) = shell("cd %s; git log --no-merges --oneline --topo-order --format='%%H %%P %%s'", src_path)
            # [0] is hash. [1] is parent, [2] rest
            commits = [x.split(" ", 2) for x in reversed(commits)]
            commits = commits[-self.commits.value:]

            self.current_revision = None

            # First, we redirect all calls to the compiler to our
            # clang hash wrapper
            self.setup_compiler_paths(cl_path)

            time = 0
            last_failed = True

            while commits:
                # Search for a child of the current revision
                commit = None
                if self.current_revision:
                    for idx in range(0, len(commits)):
                        if commits[idx][1] == self.current_revision:
                            commit = commits[idx]
                            del commits[idx]
                            break
                # No Child found -> Take the first one.
                if not commit:
                    commit = commits.pop(0)

                # Bash initial commit
                if commit[0] == "726f63884db0132f01745f1fb4465e6621088ccf":
                    continue


                info = {"commit": commit[0],
                        "parent": commit[1],
                        "summary": commit[2]}

                # Somehow this commit in musl is weird. It behaves
                # totally different, if build with a fresh parent and
                # a non-fresh parent. With this we are one the save side
                if commit[0] == "416d1c7a711807384cc21a18163475cf757bbcb5":
                    last_failed = True
                # First, we build the parent. In a total linear
                # history, this is a NOP. Otherwise, we try to reset
                # to the actual parent, and rebuild the project. This
                # may fail, since the current commit might fix this.
                ret = self.build_parent(commit[0], from_scratch = last_failed)
                info['parent-ok'] = ret

                # Change to the ACTUAL commit. Call reconfigure, and
                # then go on building the commit.
                shell("cd %s; git reset --hard %s", src_path, commit[0])
                self.call_reconfigure(src_path)
                if os.path.exists("/tmp/clang-hash.log"):
                    os.unlink("/tmp/clang-hash.log")

                # Rebuild and Measure
                self.rebuild(src_path, info, fail_ok=True)


                if os.path.exists("/tmp/clang-hash.log") and not info.get("failed"):
                    with open("/tmp/clang-hash.log") as fd:
                        self.clang_hash_log.value += fd.read()

                self.build_info["builds"].append(info)
                if not info.get("failed"):
                    time += info['build-time'] / 1e9
                    # Build was good. Remember that.
                    self.current_revision = commit[0]
                    last_failed = False
                else:
                    self.current_revision = None
                    last_failed = True

            logging.info("Rebuild for %d commits takes %f minutes",
                         self.commits.value, time/60.)

        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))

    def variant_name(self):
        return "%s-%s"%(self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s"%(self.title, self.variant_name())
Ejemplo n.º 20
0
class HistoricalCompilation(Experiment, ClangHashHelper):
    inputs = {
        "clang_hash": GitArchive("/home/cip/2015/yb90ifym/clang-hash/"),
        "project": GitArchive("/home/cip/2015/yb90ifym/lua"),
        "mode": String("normal"),
        "commits": Integer(500),  # was 500
        "jobs": Integer(1),  # was 4
    }
    outputs = {
        "stats": File("summary.dict"),
        "ccache_stats": File("ccache.stats"),
        "clang_hash_log": File("clang-hash.log"),
    }

    def build_parent(self, commit, from_scratch=False):
        def eq_hash(a, b):
            if len(a) == 0 or len(b) == 0:
                return
            if len(a) > len(b):
                return a.startswith(b)
            else:
                return b.startswith(a)

        src_path = self.project.path

        if from_scratch:
            shell("cd %s; git clean -dfx -e '*.hash' -e '*.hash.copy'",
                  src_path)
            logging.info("Parent [%s^]: clean build", commit)
            shell("cd %s; git reset --hard %s^", src_path, commit)
            info = {"commit": commit + "^"}
            self.call_configure(src_path)
            self.rebuild(src_path, info, True)
            # Did initial commit fail? Try again
            if info.get("failed"):
                logging.info("Parent[%s^]: failed", commit)
                return False
            return True
        else:
            (lines, _) = shell("cd %s; git rev-parse %s^", src_path, commit)
            parent_revision = lines[0].strip()
            if self.current_revision and eq_hash(self.current_revision,
                                                 parent_revision):
                logging.info("Parent[%s^]: resuse good parent", commit)
                return True
            else:
                logging.info("Parent[%s^]: resuse similar build directory",
                             commit)
                shell("cd %s; git reset --hard %s^", src_path, commit)
                info = {"commit": commit + "^"}
                self.call_reconfigure(src_path)
                self.rebuild(src_path, info, True)
                # Did initial commit fail? Try again
                if info.get("failed"):
                    return self.build_parent(commit, from_scratch=True)
                return True

    def run(self):
        # Determine the mode
        modes = ('normal', 'ccache', 'clang-hash', 'ccache-clang-hash')
        if not self.mode.value in modes:
            raise RuntimeError("Mode can only be one of: %s" % modes)

        logging.info("Build the Clang-Hash Plugin")
        with self.clang_hash as cl_path:
            shell(
                "cd %s; mkdir build; cd build; cmake .. -DCMAKE_BUILD_TYPE=Release; make -j 4",
                cl_path)
            shell("strip %s/build/clang-plugin/*.so", cl_path)

        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {
            "project-name": self.project_name(),
            "commit-hash": self.metadata["project-hash"],
            'builds': []
        }

        with self.project as src_path:
            (commits, _) = shell(
                "cd %s; git log --no-merges --oneline --topo-order --format='%%H %%P %%s'",
                src_path)
            # [0] is hash. [1] is parent, [2] rest
            commits = [x.split(" ", 2) for x in reversed(commits)]
            commits = commits[-self.commits.value:]

            self.current_revision = None

            # First, we redirect all calls to the compiler to our
            # clang hash wrapper
            self.setup_compiler_paths(cl_path)

            time = 0
            last_failed = True

            nr_of_commits = len(commits)
            original_commits = commits[:]
            occurred_errors = {}  # map commit -> [error strings]

            def gather_local_hashes(src_path):
                remove_keys = [
                    'project', 'return-code', 'start-time', 'run_id',
                    'compile-duration', 'processed-bytes', 'hash-duration',
                    'hash-start-time', 'object-file-size'
                ]  # TODO: ofile-size useful?
                hashes = read_hash_directory(src_path, remove_keys)
                local_hashes = {}
                for entry in hashes:
                    element_hashes = entry['element-hashes']
                    for element in element_hashes:
                        local_hashes[element[0]] = element[1]

                return local_hashes

            def gather_global_hashes(local_hashes, occurred_errors):
                global_hashes = {}
                for symbol in local_hashes:
                    symbol = symbol.split(':')[
                        1]  # Remove the prefix ('function:' etc.)
                    try:
                        shell("cd %s; %s/clang-hash-global --definition %s",
                              src_path, self.inputs.clang_hash.path, symbol)
                    except Exception as e:
                        occurred_errors[commit[0]] = e
                        # don't raise exception

                return global_hashes

            def add_additional_commit_info_to(info):
                gitshow = subprocess.Popen(["git", "show"],
                                           stdout=subprocess.PIPE)
                dstat_out = subprocess.check_output(('diffstat'),
                                                    stdin=gitshow.stdout)
                gitshow.wait()

                lines = dstat_out.split('\n')
                index = -1
                while lines[index] == '':
                    index -= 1
                last_line = lines[index]
                changedInsertionsDeletions = [
                    int(s) for s in last_line.split() if s.isdigit()
                ]

                if "insertion" in last_line:
                    info['insertions'] = changedInsertionsDeletions[1]
                    if "deletion" in last_line:
                        info['deletions'] = changedInsertionsDeletions[2]
                elif "deletion" in last_line:
                    info['deletions'] = changedInsertionsDeletions[1]

                # Get changed files
                changed_files = {}
                for line in lines:
                    if '|' in line:
                        elems = line.split()

                        assert elems[1] == '|'

                        filename = elems[0]
                        nr_of_changes = int(elems[2])
                        changed_files[filename] = nr_of_changes

                assert len(changed_files) == changedInsertionsDeletions[0]
                info['changes'] = changed_files

            while commits:
                # Search for a child of the current revision
                commit = None
                if self.current_revision:
                    for idx in range(0, len(commits)):
                        if commits[idx][1] == self.current_revision:
                            commit = commits[idx]
                            del commits[idx]
                            break
                # No Child found -> Take the first one.
                if not commit:
                    commit = commits.pop(0)

                info = {
                    "commit": commit[0],
                    "parent": commit[1],
                    "summary": commit[2]
                }

                # First, we build the parent. In a total linear
                # history, this is a NOP. Otherwise, we try to reset
                # to the actual parent, and rebuild the project. This
                # may fail, since the current commit might fix this.
                ret = self.build_parent(commit[0], from_scratch=last_failed)
                info['parent-ok'] = ret

                parent_info = {}
                add_additional_commit_info_to(parent_info)
                info['parent-info'] = parent_info

                # Gather hashes of parent
                parent_local_hashes = gather_local_hashes(src_path)
                parent_global_hashes = gather_global_hashes(
                    parent_local_hashes, occurred_errors)

                #info['parent-local-hashes'] = parent_local_hashes
                #info['parent-global-hashes'] = parent_global_hashes

                # Change to the ACTUAL commit.
                shell("cd %s; git reset --hard %s", src_path, commit[0])

                add_additional_commit_info_to(info)

                # Call reconfigure, and then go on building the commit.
                self.call_reconfigure(src_path)
                if os.path.exists("/tmp/clang-hash.log"):
                    os.unlink("/tmp/clang-hash.log")

                # Rebuild and Measure
                self.rebuild(src_path, info, fail_ok=True)

                # Don't need those atm
                del info['clang-hash-hits']
                del info['clang-hash-misses']

                # Gather hashes
                local_hashes = gather_local_hashes(src_path)
                global_hashes = gather_global_hashes(local_hashes,
                                                     occurred_errors)

                #info['local-hashes'] = local_hashes
                #info['global-hashes'] = global_hashes

                # Compare hashes/search for changed hashes
                # The parent's global hashes are copied to find removed symbols
                changed_symbols = {}
                parent_hashes = deepcopy(parent_global_hashes)
                for symbol, global_hash in global_hashes.iteritems():
                    parent_global_hash = parent_hashes.pop(symbol, None)

                    if global_hash != parent_global_hash:
                        # Store it as [before, after]
                        changed_symbols[symbol] = [
                            parent_global_hash, global_hash
                        ]

                # Add removed symbols
                for symbol, parent_global_hash in parent_hashes.iteritems():
                    changed_symbols[symbol] = [parent_global_hash, None]

                # Compare hashes/search for changed hashes
                # The parent's global hashes are copied to find removed symbols
                local_changed_symbols = {}
                parent_hashes = deepcopy(parent_local_hashes)
                for symbol, local_hash in local_hashes.iteritems():
                    parent_local_hash = parent_hashes.pop(symbol, None)

                    if local_hash != parent_local_hash:
                        # Store it as [before, after]
                        local_changed_symbols[symbol] = [
                            parent_local_hash, local_hash
                        ]

                # Add removed symbols
                for symbol, parent_local_hash in parent_hashes.iteritems():
                    local_changed_symbols[symbol] = [parent_local_hash, None]

                info['changed-symbols'] = changed_symbols
                #info['local-changed-symbols'] = local_changed_symbols
                info['local-changed-sym-count'] = len(local_changed_symbols)

                # TODO: add more analysis
                # TODO: for each changed local hash, the symbol's global hash should also change...
                # check every symbol for changed global hash\
                # also check the commits, if the correct ones are used...

                if os.path.exists(
                        "/tmp/clang-hash.log") and not info.get("failed"):
                    with open("/tmp/clang-hash.log") as fd:
                        self.clang_hash_log.value += fd.read()

                self.build_info["builds"].append(info)
                if not info.get("failed"):
                    time += info['build-time'] / 1e9
                    # Build was good. Remember that.
                    self.current_revision = commit[0]
                    last_failed = False
                else:
                    self.current_revision = None
                    last_failed = True

            logging.info("Rebuild for %d commits takes %f minutes",
                         self.commits.value, time / 60.)

            print "\n\noccurred errors:\n"
            print occurred_errors
            print "\n\nchanged symbols:\n"
            print changed_symbols
            print "\n\nlocal changed symbols:\n"
            print local_changed_symbols
            print "\n\n\n"

            if len(changed_symbols) or len(local_changed_symbols):
                print "!!! success: found one !!!"

        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))

    def variant_name(self):
        return "%s-%s" % (self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s" % (self.title, self.variant_name())
Ejemplo n.º 21
0
    def __init__(self, filename = "data.tex"):
        """Define tex macros directly as output of a experiment.

        Use this only as output parameter!."""
        File.__init__(self, filename)
Ejemplo n.º 22
0
 def run_nfpkeys(self):
     nfpkeys = File("nfpkeys.json")
     with open(nfpkeys.path, "w") as out_fd:
         subprocess.check_call(["make", "nfpkeys"],
                               cwd=self.cwd,
                               stdout=out_fd)
Ejemplo n.º 23
0
 def __init__(self, default_filename=""):
     File.__init__(self, default_filename, binary=True)