Esempio n. 1
0
    def __setup_value(self):
        if "path" in dir(self.__filename):
            self.subobjects["filename"] = self.__filename
            self.__filename = self.__filename.path

        self.__filename = os.path.abspath(self.__filename)
        fn = self.__filename

        extract_mode = ""
        if "tar.gz" in fn or "tgz" in fn:
            extract_mode = "x"
        if "tar.bz2" in fn or "bzip2" in fn:
            extract_mode = "j"

        with self.tmp_directory as d:
            try:
                os.mkdir(self.name)
            except OSError:
                # ignore errors if the directory should already exist for some reason
                pass
            with Directory(self.name) as d2:
                dirname = os.path.abspath(".")
                (out, ret) = shell("tar %szvf %s", extract_mode, fn)
                if ret != 0:
                    raise RuntimeError("Extracting of %s failed" % fn)

                cd = None
                for line in out:
                    if (cd == None
                            or len(line) < len(cd)) and line.endswith("/"):
                        cd = line
                if cd and all([x.startswith(cd) for x in out]):
                    dirname = cd
                return Directory(os.path.abspath(dirname))
Esempio n. 2
0
class SimpleExperiment(Experiment):
    outputs = {"dir1": Directory("d1"),
               "dir2": Directory("d2"),
               "filtered": Directory(".", filename_filter="*.log*"),
              }

    def run(self):
        a = self.o.dir1.new_file("barfoo")
        a.value="abc"
        a.flush()
        os.mkdir(self.o.dir1.path + "/tmpdir")
        with open(self.o.dir1.path + "/tmpdir/foo", "w+") as fd:
            fd.write("Hallo")

        self.o.dir2.mirror_directory(self.o.dir1.path,
                                     lambda x: True)

        a = self.filtered.new_file("foo.log")
        a.value = "xx"
        try:
            a = self.filtered.new_file("bar.xxx")
            raise Exception("Filter does not work")
        except RuntimeError as e:
            pass # Everything is good

        b = self.filtered.new_file("barfoo.log.gz", compressed=True)
        b.value = "xx"

        assert type(a) == File
        assert type(b) == GzipFile
Esempio n. 3
0
class SimpleExperiment(Experiment):
    outputs = {"dir1": Directory("d1"),
               "dir2": Directory("d2")}

    def run(self):
        a = self.o.dir1.new_file("barfoo")
        a.value="abc"
        a.flush()
        os.mkdir(self.o.dir1.path + "/tmpdir")
        with open(self.o.dir1.path + "/tmpdir/foo", "w+") as fd:
            fd.write("Hallo")

        self.o.dir2.mirror_directory(self.o.dir1.path,
                                     lambda x: True)
Esempio n. 4
0
    def __setup_value(self):
        if "path" in dir(self.__clone_url):
            self.subobjects["clone-url"] = self.__clone_url
            self.__clone_url = self.__clone_url.path

        logging.info("copying git archive %s", self.__clone_url)
        with self.tmp_directory as d:
            os.mkdir(self.name)
            if self.__shallow:
                cmd = "cd '%s' && git archive --format=tar --remote=%s %s | tar x"
                args = (self.name, self.__clone_url, self.__ref)
            else:
                cmd = "git clone %s %s"
                args = (self.__clone_url, self.name)

            (lines, ret) = shell(cmd, *args)

            if ret != 0:
                print("\n".join(lines))
                sys.exit(-1)

            if not self.__shallow:
                cmd = "cd %s && git gc && git fetch %s %s && git checkout FETCH_HEAD"
                args = (self.name, self.__clone_url, self.__ref)
                (lines, ret) = shell(cmd, *args)

                if ret != 0:
                    print("\n".join(lines))
                    sys.exit(-1)

            return Directory(os.path.abspath(self.name))
Esempio n. 5
0
class FailImport(Experiment):
    inputs = {
        "trace": FailTrace("FailTrace"),
        "fail-tool-dir": Directory("/proj/i4danceos/tools/fail"),
    }

    def run(self):
        variant = "erika/error-hook"
        for (label, importer, importer_args) in [\
                                ("mem",    "MemoryImporter", []),
                                ("regs",   "RegisterImporter", []),
                                ("ip",     "RegisterImporter", ["--no-gp", "--ip"]),
                                ("flags",  "RegisterImporter", ["--no-gp", "--flags"]),
                                            ]:
            benchmark = label
            logging.info("Importing coredos/%s", benchmark)
            cmdline = "%(path)s/import-trace -v %(variant)s -b %(benchmark)s -i %(importer)s "\
                      + "-t %(trace)s -e %(elf)s %(args)s"
            shell(cmdline %\
                  {"path": self.fail_tool_dir.path,
                   "variant": variant,
                   "benchmark": benchmark,
                   "importer": importer,
                   "trace":  os.path.join(self.trace.trace.path, "trace.pb"),
                   "elf":  self.trace.elf.path,
                   "args": " ".join(importer_args)})
        shell("%s/prune-trace -v %s -b %% -p basic --overwrite",
              self.fail_tool_dir.path, variant)
Esempio n. 6
0
class FailTrace(Experiment):
    inputs = {
        "erika": GitArchive("[email protected]:erika"),
        "bochs-runner": Executable("/proj/i4danceos/tools/fail/bochs-experiment-runner.py"),
        "erika-tracing": Executable("/proj/i4danceos/tools/fail/erika-tracing"),
    }
    outputs = {
        "trace": Directory("trace"),
        "elf": File("erika.elf"),
        "iso": File("erika.iso"),
    }

    def run(self):
        logging.info("Cloning ERIKA...")

        with self.erika as erika_path:
            shell("cd %s/examples/x86/coptermock-isorc; make", erika_path)

            self.iso.copy_contents(os.path.join(erika_path, "examples/x86/coptermock-isorc/Debug/erika.iso"))
            self.elf.copy_contents(os.path.join(erika_path, "examples/x86/coptermock-isorc/Debug/Debug/out.elf"))

        shell(("cd %(resultdir)s;  python %(bochs)s -F 50 -i %(iso)s -e %(elf)s -f %(fail)s"
              + " -m 8 -1 --  -Wf,--end-symbol=test_finish -Wf,--start-symbol=EE_oo_StartOS"
              + " -Wf,--trace-file=trace.pb -Wf,--save-symbol=EE_oo_StartOS") % {
              "resultdir": self.trace.path,
              "bochs": self.bochs_runner.path,
              "iso": self.iso.path,
              "elf": self.elf.path,
              "fail": self.erika_tracing.path
              }
        )
Esempio n. 7
0
class ExploreConfig(AttributeExperiment):
    inputs = {
        "config_hash": String("FIXME"),
        "kconfig_hash": String("FIXME"),
        "project_root": Directory("/tmp"),
        "project_version": String("FIXME"),
        "clean_command": String("make clean"),
        "build_command": String("make"),
        "attr_command": String("make attributes"),
    }
Esempio n. 8
0
class SimpleExperiment(Experiment):
    inputs = {
        "input_key": String("default key"),
        "input_value": String("default value")
    }
    outputs = {
        "output_file": File("output"),
        "output_directory": Directory("output_directory")
    }

    def run(self):
        # Combine the input parameters
        content = self.inputs.input_key.value \
            + ": " + self.inputs.input_value.value

        # write the result to the output file
        self.outputs.output_file.value = content + "\n"
        # New output directory
        x = self.output_directory.new_directory("foo").new_file("lala")
Esempio n. 9
0
    def __setup_value(self):
        if "path" in dir(self.__clone_url):
            self.subobjects["clone-url"] = self.__clone_url
            self.__clone_url = self.__clone_url.path

        logging.info("copying git archive %s", self.__clone_url)
        with self.tmp_directory as d:
            os.mkdir(self.name)
            if self.__shallow:
                cmd = "cd '%s' && git archive --format=tar --remote=%s %s | tar x"
                args = (self.name,
                        self.__clone_url,
                        self.__ref)
            else:
                cmd = "git clone %s %s"
                args = (self.__clone_url, self.name)

            (lines, ret) = shell(cmd, *args, stderr=sys.stderr)

            if ret != 0:
                print("\n".join(lines))
                sys.exit(-1)

            if not self.__shallow:
                cmd = "cd %s && git gc && git fetch %s %s && git checkout FETCH_HEAD"
                args = (self.name, self.__clone_url, self.__ref)
                (lines, ret) = shell(cmd, *args, stderr=sys.stderr)

                if ret != 0:
                    print("\n".join(lines))
                    sys.exit(-1)

                # Fetch all visible branches and tags
                for branch in self.__metadata.get("branches", {}):
                    cmd = "cd %s && git fetch %s refs/heads/%s && git update-ref refs/heads/%s FETCH_HEAD"
                    shell(cmd, self.name, self.__clone_url, branch, branch, stderr=sys.stderr)
                for tag in self.__metadata.get("tags", {}):
                    cmd = "cd %s && git fetch %s refs/tags/%s && git update-ref refs/tags/%s FETCH_HEAD"
                    shell(cmd, self.name, self.__clone_url, tag, tag, stderr=sys.stderr)

            return Directory(os.path.abspath(self.name))
Esempio n. 10
0
    def before_experiment_run(self, parameter_type):
        # When experiment run as input, just run the normal input handlers
        if parameter_type == "input":
            Type.before_experiment_run(self, "input")
            return

        for (name, inp) in self.inputs.items():
            if type(inp) == LambdaType:
                continue
            ret = inp.inp_extract_cmdline_parser(self.__opts, self.__args)
            if ret:
                (self.__opts, self.__args) = ret

        # After all input parameters are parsed. Execute the
        # calculated input parameters
        for (name, inp) in self.inputs.items():
            if type(inp) != LambdaType:
                continue
            inp = inp(self)
            inp.name = name
            self.subobjects[name] = inp
            self.inputs[name] = inp

        self.subobjects.update()

        # Now set up the experiment tmp directory
        self.tmp_directory = Directory(tempfile.mkdtemp())
        self.subobjects["tmp_directory"] = self.tmp_directory

        for obj in self.inputs.values():
            obj.before_experiment_run("input")

        self.__calculate_metadata()

        for obj in self.outputs.values():
            obj.before_experiment_run("output")
Esempio n. 11
0
            pass # Everything is good

        b = self.filtered.new_file("barfoo.log.gz", compressed=True)
        b.value = "xx"

        assert type(a) == File
        assert type(b) == GzipFile



if __name__ == "__main__":
    import shutil, sys,os
    experiment = SimpleExperiment()
    dirname = experiment(sys.argv)

    assert os.path.isdir(experiment.o.dir2.path + "/tmpdir")
    assert os.path.exists(experiment.o.dir2.path + "/barfoo")
    assert os.path.exists(experiment.o.dir2.path + "/tmpdir/foo")

    N = Directory(experiment.path, "*.log*")
    assert experiment.filtered.value == N.value
    assert os.path.exists(experiment.path + "/foo.log")

    contents = [x.value for x in N]
    assert len(contents) == 2
    assert contents[0] == contents[1], contents

    if dirname:
        shutil.rmtree(dirname)
    print("success")
Esempio n. 12
0
class HistoricalCompilationGlobalEvaluation(Experiment):
    inputs = {
        "clang_hash": GitArchive("/home/cip/2015/yb90ifym/clang-hash/"),
        "project": GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"),
        "commits": Integer(4744),
        "jobs": Integer(1), # was 4
        "dataset": Directory("/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c"), # full lua
        "hot_threshold_percentage": Integer(10), # minimal change percentage for commit to be classified as "hot"
    }
    outputs = {
        "stats": File("summary.dict"),
        "eval_data": File("eval.txt"),
        "hot_commits_histo": File("global_hot_commits.pdf"),
    }


    def project_name(self):
        return os.path.basename(self.metadata['project-clone-url'])


    def run(self):
        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {"project-name": self.project_name(),
                           "commit-hash": self.metadata["project-hash"],
                           'builds': []}

        with self.project as src_path:
            time = 0
            
            os.chdir(self.dataset.path)
            
            # Read summary file from data collection run
            commits = None
            with open("summary.dict") as sf:
                summary = eval(sf.read())
                commits = summary['builds']


            def read_chash_data(commit):
                element_hashes = []
                try:
                    with open(commit, 'r') as cf:
                        commit_data = eval(cf.read())
                        for ofile_data in commit_data:
                            element_hashes.extend(ofile_data['element-hashes'])
                except:
                    pass

                return element_hashes



            stats = {
                'data-empty': set(), # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.)
                'commits': {},
                'elements': {}, # symbol -> how often did this symbol change
            }




            total_changed_globals = 0 # How often was any global changed throughout the history?
            total_changed_records = 0 # How often was any record changed throughout the history?
            total_changed_static_funcs = 0 # How often was any static function changed throughout the history?
            total_changed_functions = 0 # without static functions

            total_insdel_globals = 0 # How often was any global introduced/removed throughout the history?
            total_insdel_records = 0 # How often was any record introduced/removed throughout the history?
            total_insdel_static_funcs = 0 # How often was any static function introduced/removed throughout the history?
            total_insdel_functions = 0 # without static functions

            # in-degree: how many SLOs depend on E?
            # out-degree: how many SLOs does E depend on?
            in_degrees = {} # indegree -> nr of elements with that indegree
            out_degrees = {} # outdegree -> nr of elements wirh that outdegree
            max_in_degree = (None, 0) # (element, degree)
            max_out_degree = (None, 0) # (element, degree)

            prev_commit = None
            prev_hashes = None
            prev_used_definitions = None
            prev_global_hashes = None
            counter = 1
            for info in commits:
                print "\n%d/%d" % (counter, len(commits))
                counter += 1
                commit = info['commit']
                parent = info['parent']
 
                if not parent: # first commit has no parent
                    print "No parent"
                    continue

                commit_data = read_chash_data(commit)
                if not commit_data:
                    # If the data does not exist, note and skip
                    #print "Data empty"
                    stats['data-empty'].add(commit)
                    continue

                local_hashes = {}
                used_definitions = {}

                # just 4 testing:
                for element in commit_data:
                    name = element[0]
                    if name.startswith('static function:') or name.startswith('function:'):
                        name = element[0].split(':')[1]
                    local_hashes[name] = element[1]
                    try:
                        used_definitions[name] = set()
                        for used_def in element[2]:
                            if used_def.startswith('static function:') or used_def.startswith('function:'):
                                used_definitions[name].add(used_def.split(':')[1])
                    except:
                        pass

                # prev:
                #for element in commit_data:
                #    local_hashes[element[0]] = element[1]
                #    try:
                #        used_definitions[element[0]] = element[2]
                #    except:
                #        pass


                parent_hashes = {}
                parent_global_hashes = {}
                parent_used_definitions = {}
                if parent == prev_commit and prev_global_hashes and prev_used_definitions and prev_hashes:
                    #print "Reuse prev_commit"
                    parent_hashes = prev_hashes
                    parent_used_definitions = prev_used_definitions
                    parent_global_hashes = prev_global_hashes
                else:
                    #print "Cannot reuse prev_commit"
                    parent_data = read_chash_data(parent)

                    # just 4 testing:
                    for element in parent_data:
                        name = element[0]
                        if name.startswith('static function:') or name.startswith('function:'):
                            name = element[0].split(':')[1]
                        parent_hashes[name] = element[1]
                        try:
                            parent_used_definitions[name] = set()
                            for used_def in element[2]:
                                if used_def.startswith('static function:') or used_def.startswith('function:'):
                                    parent_used_definitions[name].add(used_def.split(':')[1])
                        except:
                            pass



                    # prev:
                    #for element in parent_data:
                    #    parent_hashes[element[0]] = element[1]
                    #    try:
                    #        parent_used_definitions[element[0]] = element[2]
                    #    except:
                    #        pass

                                
                if not parent_hashes:
                    # If the data does not exist, note and skip
                    stats['data-empty'].add(commit)
                    
                    # Save data for reuse
                    prev_commit = commit
                    prev_hashes = local_hashes
                    prev_used_definitions = used_definitions
                    continue

                ##########################
                # GLOBAL HASH EVALUATION #
                ##########################
                
                commit_stats = {
                    'element-count' : len(local_hashes),
                    'changed-elements' : [],
                }


                elements = set(local_hashes.keys())
                parent_elements = set(parent_hashes.keys())
              


                # calculate in- and out-degree
                # reverse used_definitions
                out_use_defs = { s:0 for s in used_definitions.keys() } # element -> nr of depending elements
                for element in elements:
                   for el in used_definitions[element]:
                        try:
                            out_use_defs[el] += 1
                        except:
                            pass
                    

                for element in elements:
                    out_degree = len(used_definitions[element])
                    in_degree = out_use_defs[element] 
                    
                    if in_degree > max_in_degree[1]:
                        max_in_degree = (element, in_degree)
                    if out_degree > max_out_degree[1]:
                        max_out_degree = (element, out_degree)

                    if in_degree not in in_degrees:
                        in_degrees[in_degree] = 0
                    in_degrees[in_degree] += 1

                    if out_degree not in out_degrees:
                        out_degrees[out_degree] = 0
                    out_degrees[out_degree] += 1

 
                commit_stats['changed-elements'] = elements ^ parent_elements # elements either added or removed

                for element in commit_stats['changed-elements']:
                     if element.startswith('record:'): # do this here to get only insertions and deletions
                         total_insdel_records += 1
                     elif element.startswith('variable:') or element.startswith('static variable:'):
                         total_insdel_globals += 1
                     elif element.startswith('static function:'):
                         total_insdel_static_funcs += 1
                     else:
                         total_insdel_functions += 1


                # Compare hashes
                common_elements = elements & parent_elements
                
                global_hashes = {}
                for element in common_elements:
                    global_hash = get_global_hash(element, global_hashes, local_hashes, used_definitions)
                    parent_global_hash = get_global_hash(element, parent_global_hashes, parent_hashes, parent_used_definitions)
                    if global_hash != parent_global_hash:
                        commit_stats['changed-elements'].add(element)
                        if element.startswith('record:'): # do this here to ignore insertions and deletions
                            total_changed_records += 1
                        elif element.startswith('variable:') or element.startswith('static variable:'):
                            total_changed_globals += 1
                        elif element.startswith('static function:'):
                            total_changed_static_funcs += 1
                        else:
                            total_changed_functions += 1

                commit_stats['changed-element-count'] = len(commit_stats['changed-elements']);
                stats['commits'][commit] = commit_stats


                # Count how often each element was changed over the whole history
                for element in commit_stats['changed-elements']:
                    if element not in stats['elements']:
                        stats['elements'][element] = 0;
                    stats['elements'][element] += 1


                # Save data for reuse
                prev_commit = commit
                prev_hashes = local_hashes
                prev_used_definitions = used_definitions
                prev_global_hashes = global_hashes

            self.build_info['stats'] = stats

        #in_degrees = {} # indegree -> nr of elements with that indegree
        #out_degrees = {} # outdegree -> nr of elements wirh that outdegree
        #max_in_degree = (None, 0) # (element, degree)
        #max_out_degree = (None, 0) # (element, degree)
        summed_in_degrees = sum([k*v for k,v in in_degrees.iteritems()])
        nr_of_elements = sum(in_degrees.values())
        avg_in_degree = summed_in_degrees/float(nr_of_elements)
        avg_out_degree = sum([k*v for k,v in out_degrees.iteritems()])/float(sum(out_degrees.values()))


        eval_info = {
            'nr-of-commits' : len(commits),
            'change-percentage' : {}, # change percentage -> nr of commits with change < percentage
            'hot-commits': {},
            'total-changed-globals': total_changed_globals,
            'total-changed-records': total_changed_records,
            'total-changed-static-funcs': total_changed_static_funcs,
            'total-changed-functions': total_changed_functions,
            'total-insdel-globals': total_insdel_globals,
            'total-insdel-records': total_insdel_records,
            'total-insdel-static-funcs': total_insdel_static_funcs,
            'total-insdel-functions': total_insdel_functions,
            'max_in_degree': max_in_degree,
            'max_out_degree': max_out_degree,
            'avg_in_degree': avg_in_degree, 
            'avg_out_degree': avg_out_degree,
        }

        # Get most changed elements
        eval_info['most-changed-elements'] = {k:v for k,v in stats['elements'].iteritems() if v > 1000} # arbitrary value (about 20% of commits)
        
        # Calc average nr and percentage of (changed) symbols per commit
        summed_avg_change_percentage = 0
        summed_changed_elements = 0
        summed_total_elements = 0
        commits = self.build_info['stats']['commits']
        for commit in commits:
            commit_stat = commits[commit]
            change_percentage = len(commit_stat['changed-elements'])/float(commit_stat['element-count'])
            summed_avg_change_percentage += change_percentage

            summed_changed_elements += len(commit_stat['changed-elements'])
            summed_total_elements += commit_stat['element-count']

            percentage = int(round(change_percentage * 100))
            if percentage not in eval_info['change-percentage']:
                eval_info['change-percentage'][percentage] = 0
            eval_info['change-percentage'][percentage] += 1
 
            
            # Identify hot commits
            #if percentage > self.hot_threshold_percentage.value:
                #eval_info['hot-commits'][commit] = percentage

                
        eval_info['avg-change-percentage'] = summed_avg_change_percentage / float(len(stats['commits']))
        eval_info['avg-changed-elements'] = summed_changed_elements / eval_info['nr-of-commits']
        eval_info['avg-total-elements'] = summed_total_elements / eval_info['nr-of-commits']
        



        eval_info['nr-hot-commits'] = len(eval_info['hot-commits'])

        with open(self.eval_data.path, "w+") as fd:
            fd.write(repr(eval_info))



        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))










        def plot_hash_count_histogram(hash_values, filename):
            dictionary = plt.figure()
            fig, ax = plt.subplots()
            plt.xlabel('Prozentanteil geaenderter Elemente')
            plt.ylabel('Anzahl von Commits')
            axes = plt.gca()
            axes.set_xlim([-10,100])
            axes.set_ylim([0,1600])

            ax.bar(hash_values.keys(), hash_values.values(), align='center')
            fig.savefig(filename)

        # clean data for plotting
        data = {k:v for k,v in eval_info['change-percentage'].iteritems() if k <= 100}
 
        plot_hash_count_histogram(data, self.hot_commits_histo.path)






    def variant_name(self):
        return "%s-%s"%(self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s"%(self.title, self.variant_name())
class HistoricalCompilationCallGraphEvaluation(Experiment):
    inputs = {
        "clang_hash":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/"),
        "project":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"),
        "commits":
        Integer(4744),
        "jobs":
        Integer(1),  # was 4
        "dataset":
        Directory(
            "/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c"
        ),  # full lua
        "hot_threshold_percentage":
        Integer(
            10
        ),  # minimal change percentage for commit to be classified as "hot"
    }
    outputs = {
        "stats": File("summary.dict"),
        "eval_data": File("eval.txt"),
        "hot_commits_histo": File("cg_hot_commits.pdf"),
    }

    def project_name(self):
        return os.path.basename(self.metadata['project-clone-url'])

    def run(self):
        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {
            "project-name": self.project_name(),
            "commit-hash": self.metadata["project-hash"],
            'builds': []
        }

        with self.project as src_path:
            time = 0

            os.chdir(self.dataset.path)

            # Read summary file from data collection run
            commits = None
            with open("summary.dict") as sf:
                summary = eval(sf.read())
                commits = summary['builds']

            def read_chash_data(commit):
                element_hashes = []
                try:
                    with open(commit, 'r') as cf:
                        commit_data = eval(cf.read())
                        for ofile_data in commit_data:
                            element_hashes.extend(ofile_data['element-hashes'])
                except:
                    pass

                return element_hashes

            stats = {
                'data-empty': set(
                ),  # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.)
                'commits': {},
                'elements': {},  # symbol -> how often did this symbol change
            }

            total_changed_functions = 0  # How often was any function changed throughout the history?

            total_insdel_functions = 0  # How often was any function introduced/removed throughout the history?

            prev_commit = None
            prev_functions = None
            prev_used_definitions = None
            counter = 1
            for info in commits:
                print "%d/%d" % (counter, len(commits))
                counter += 1
                commit = info['commit']
                parent = info['parent']

                if not parent:  # first commit has no parent
                    print "No parent"
                    continue

                commit_data = read_chash_data(commit)
                if not commit_data:
                    # If the data does not exist, note and skip
                    #print "Data empty"
                    stats['data-empty'].add(commit)
                    continue

                functions = set()
                used_definitions = {}
                for element in commit_data:
                    if element[0].startswith('static function:') or element[
                            0].startswith('function:'):
                        clean_name = element[0].split(':')[1]
                        functions.add(clean_name)
                        used_definitions[clean_name] = set()
                        for used_def in element[2]:
                            if used_def.startswith(
                                    'static function:') or used_def.startswith(
                                        'function:'):
                                used_definitions[clean_name].add(
                                    used_def.split(':')[1])

                parent_functions = {}
                parent_used_definitions = {}
                if parent == prev_commit and prev_functions and prev_used_definitions:
                    #print "Reuse prev_commit"
                    parent_functions = prev_functions
                    parent_used_definitions = prev_used_definitions
                else:
                    #print "Cannot reuse prev_commit"
                    parent_data = read_chash_data(parent)
                    for element in parent_data:
                        if element[0].startswith(
                                'static function:') or element[0].startswith(
                                    'function:'):
                            clean_name = element[0].split(':')[1]
                            parent_functions.insert(clean_name)
                            parent_used_definitions[clean_name] = set()
                            for used_def in element[2]:
                                if used_def.startswith(
                                        'static function:'
                                ) or used_def.startswith('function:'):
                                    parent_used_definitions[clean_name].add(
                                        used_def.split(':')[1])

                if not parent_functions:
                    # If the data does not exist, note and skip
                    stats['data-empty'].add(commit)

                    # Save data for reuse
                    prev_commit = commit
                    prev_functions = functions
                    prev_used_definitions = used_definitions
                    continue

                #########################
                # CALL GRAPH EVALUATION #
                #########################

                commit_stats = {
                    'element-count': len(functions),
                    'changed-elements':
                    [],  # contains changed + impacted functions
                    #'changed-not-impacted': set(), # contains directly changed functions only
                }

                elements = functions
                parent_elements = parent_functions

                commit_stats['changed-elements'] = set(
                )  #elements ^ parent_elements # elements either added or removed

                total_insdel_functions += len(commit_stats['changed-elements'])

                cwd = os.getcwd()
                os.chdir(src_path)
                changed_functions = get_changed_functions_from_commit(
                    src_path, commit)
                os.chdir(cwd)

                commit_stats['changed-not-impacted'] = changed_functions.copy()

                # Get impacted functions
                changed_functions |= get_impacted_funcs_fake_hash(
                    changed_functions, used_definitions)

                commit_stats['changed-elements'] |= changed_functions

                total_changed_functions += len(changed_functions)

                commit_stats['changed-element-count'] = len(
                    commit_stats['changed-elements'])
                stats['commits'][commit] = commit_stats

                # Count how often each element was changed over the whole history
                for element in commit_stats['changed-elements']:
                    if element not in stats['elements']:
                        stats['elements'][element] = 0
                    stats['elements'][element] += 1

                # Save data for reuse
                prev_commit = commit
                prev_functions = functions
                prev_used_definitions = used_definitions

            self.build_info['stats'] = stats

        eval_info = {
            'nr-of-commits': len(commits),
            'change-percentage':
            {},  # change percentage -> nr of commits with change < percentage
            'hot-commits': {},
            'total-changed-functions': total_changed_functions,
            'total-insdel-functions': total_insdel_functions,
        }

        # Get most changed elements
        eval_info['most-changed-elements'] = {
            k: v
            for k, v in stats['elements'].iteritems() if v > 400
        }  # arbitrary value (about 10% of commits)

        # Calc average nr and percentage of (changed) symbols per commit
        summed_avg_change_percentage = 0
        summed_changed_elements = 0
        summed_total_elements = 0
        commits = self.build_info['stats']['commits']
        for commit in commits:
            commit_stat = commits[commit]
            change_percentage = len(commit_stat['changed-elements']) / float(
                commit_stat['element-count'])
            summed_avg_change_percentage += change_percentage

            summed_changed_elements += len(commit_stat['changed-elements'])
            summed_total_elements += commit_stat['element-count']

            percentage = int(round(change_percentage * 100))
            if percentage not in eval_info['change-percentage']:
                eval_info['change-percentage'][percentage] = 0
            eval_info['change-percentage'][percentage] += 1

            # Identify hot commits
            #if percentage > self.hot_threshold_percentage.value:
            #eval_info['hot-commits'][commit] = percentage

        eval_info[
            'avg-change-percentage'] = summed_avg_change_percentage / float(
                len(stats['commits']))
        eval_info[
            'avg-changed-elements'] = summed_changed_elements / eval_info[
                'nr-of-commits']
        eval_info['avg-total-elements'] = summed_total_elements / eval_info[
            'nr-of-commits']

        eval_info['nr-hot-commits'] = len(eval_info['hot-commits'])

        with open(self.eval_data.path, "w+") as fd:
            fd.write(repr(eval_info))

        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))

        def plot_hash_count_histogram(hash_values, filename):
            dictionary = plt.figure()
            fig, ax = plt.subplots()
            plt.xlabel('Prozentanteil geaenderter Elemente')
            plt.ylabel('Anzahl von Commits')
            axes = plt.gca()
            axes.set_xlim([-10, 100])
            axes.set_ylim([0, 1600])

            ax.bar(hash_values.keys(), hash_values.values(), align='center')
            fig.savefig(filename)

        # clean data for plotting
        data = {
            k: v
            for k, v in eval_info['change-percentage'].iteritems() if k <= 100
        }

        plot_hash_count_histogram(data, self.hot_commits_histo.path)

    def variant_name(self):
        return "%s-%s" % (self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s" % (self.title, self.variant_name())
Esempio n. 14
0
 def __setup_tmp_directory(self):
     """Creat temporary directory and assign it to every input and
     output directories tmp_directory slots"""
     # Create temp directory
     self.tmp_directory = Directory(tempfile.mkdtemp())
     self.subobjects["tmp_directory"] = self.tmp_directory
class HistoricalCompilationEvaluation(Experiment):
    inputs = {
        "clang_hash":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/"),
        "project":
        GitArchive("/home/cip/2015/yb90ifym/clang-hash/hash-projects/lua"),
        "commits":
        Integer(4744),
        "jobs":
        Integer(1),  # was 4
        "dataset":
        Directory(
            "/home/cip/2015/yb90ifym/clang-hash/experiments/HistoricalCompilation-4e7c977077afea3d2ad77aeefe3b472c"
        ),  # full lua
        "hot_threshold_percentage":
        Integer(
            50
        ),  # minimal change percentage for commit to be classified as "hot"
    }
    outputs = {
        "stats": File("summary.dict"),
        "eval_data": File("eval.txt"),
        "hot_commits_histo": File("local_hot_commits.pdf"),
        "compare_approx_elem": File("local_compare_approx_elem.pdf"),
    }

    def project_name(self):
        return os.path.basename(self.metadata['project-clone-url'])

    def run(self):
        # Project name
        logging.info("Cloning project... %s", self.project_name())
        self.build_info = {
            "project-name": self.project_name(),
            "commit-hash": self.metadata["project-hash"],
            'builds': []
        }

        with self.project as src_path:
            time = 0

            os.chdir(self.dataset.path)

            # Read summary file from data collection run
            commits = None
            with open("summary.dict") as sf:
                summary = eval(sf.read())
                commits = summary['builds']

            def read_chash_data(commit):
                element_hashes = []
                try:
                    with open(commit, 'r') as cf:
                        commit_data = eval(cf.read())
                        for ofile_data in commit_data:
                            element_hashes.extend(ofile_data['element-hashes'])
                except:
                    pass

                return element_hashes

            stats = {
                'data-empty': set(
                ),  # commits with empty info files, e.g. failed to be collected, (first n commits -> missing makefile o.a.)
                'commits': {},
                'elements': {},  # symbol -> how often did this symbol change
            }

            total_changed_globals = 0  # How often was any global changed/introduced throughout the history?
            total_changed_records = 0  # How often was any record changed/introduced throughout the history?
            total_changed_static_funcs = 0  # How often was any static function changed/introduced throughout the history?
            total_changed_functions = 0  # How often was any function changed/introduced throughout the history? (incl. static)

            prev_commit = None
            prev_hashes = None
            counter = 1
            for info in commits:
                print "%d/%d" % (counter, len(commits))
                counter += 1
                commit = info['commit']
                parent = info['parent']

                if not parent:  # first commit has no parent
                    print "No parent"
                    continue

                commit_data = read_chash_data(commit)
                if not commit_data:
                    # If the data does not exist, note and skip
                    #print "Data empty"
                    stats['data-empty'].add(commit)
                    continue

                local_hashes = {}
                for element in commit_data:
                    local_hashes[element[0]] = element[1]

                parent_hashes = {}
                if parent == prev_commit:
                    #print "Reuse prev_commit"
                    parent_hashes = prev_hashes
                else:
                    #print "Cannot reuse prev_commit"
                    parent_data = read_chash_data(parent)
                    for element in parent_data:
                        parent_hashes[element[0]] = element[1]

                if not parent_hashes:
                    # If the data does not exist, note and skip
                    stats['data-empty'].add(commit)

                    # Save data for reuse
                    prev_commit = commit
                    prev_hashes = local_hashes

                    continue

                #########################
                # LOCAL HASH EVALUATION #
                #########################

                commit_stats = {
                    'element-count': len(local_hashes),
                    'changed-elements': [],
                    'changed-functions-approx': [],
                }

                # Get data from approximation
                cwd = os.getcwd()
                os.chdir(src_path)
                commit_stats[
                    'changed-functions-approx'] = get_changed_functions_from_commit(
                        src_path, commit)
                os.chdir(cwd)

                elements = set(local_hashes.keys())
                parent_elements = set(parent_hashes.keys())

                commit_stats['changed-elements'] = set(
                )  #TODO here elements ^ parent_elements # elements either added or removed: if this is initialized with the insdel items, causes weird data to show um in result. should perhaps include it and add explanation

                # Compare hashes
                common_elements = elements & parent_elements
                for element in common_elements:
                    if local_hashes[element] != parent_hashes[element]:
                        commit_stats['changed-elements'].add(element)
                        if element.startswith(
                                'record:'
                        ):  # do this here to ignore insertions and deletions
                            total_changed_records += 1
                        elif element.startswith(
                                'variable:') or element.startswith(
                                    'static variable:'):
                            total_changed_globals += 1
                        elif element.startswith('static function:'):
                            total_changed_static_funcs += 1
                            total_changed_functions += 1
                        else:
                            total_changed_functions += 1

                commit_stats['changed-element-count'] = len(
                    commit_stats['changed-elements'])
                stats['commits'][commit] = commit_stats

                # Count how often each element was changed over the whole history
                for element in commit_stats['changed-elements']:
                    if element not in stats['elements']:
                        stats['elements'][element] = 0
                    stats['elements'][element] += 1

                # Save data for reuse
                prev_commit = commit
                prev_hashes = local_hashes

            self.build_info['stats'] = stats

        eval_info = {
            'nr-of-commits':
            len(commits),
            'change-percentage':
            {},  # change percentage -> nr of commits with change < percentage
            'hot-commits': {},
            'total-changed-globals':
            total_changed_globals,
            'total-changed-records':
            total_changed_records,
            'total-changed-static-funcs':
            total_changed_static_funcs,
            'total-changed-functions':
            total_changed_functions,
            'total-changed-elements':
            total_changed_functions + total_changed_records +
            total_changed_globals,
        }

        # Get most changed elements
        eval_info['most-changed-elements'] = {
            k: v
            for k, v in stats['elements'].iteritems()
            if v > self.commits.value / 10
        }  # arbitrary value (about 10% of commits)

        # Calc average nr and percentage of (changed) symbols per commit
        summed_avg_change_percentage = 0
        summed_changed_elements = 0
        summed_total_elements = 0
        commits = self.build_info['stats']['commits']
        for commit in commits:
            commit_stat = commits[commit]
            change_percentage = len(commit_stat['changed-elements']) / float(
                commit_stat['element-count'])
            summed_avg_change_percentage += change_percentage

            summed_changed_elements += len(commit_stat['changed-elements'])
            summed_total_elements += commit_stat['element-count']

            percentage = int(round(change_percentage * 100))
            if percentage not in eval_info['change-percentage']:
                eval_info['change-percentage'][percentage] = 0
            eval_info['change-percentage'][percentage] += 1

            # Identify hot commits
            if percentage > self.hot_threshold_percentage.value:
                eval_info['hot-commits'][commit] = (
                    percentage, len(commit_stat['changed-elements']),
                    commit_stat['element-count'])

        eval_info[
            'avg-change-percentage'] = summed_avg_change_percentage / float(
                len(stats['commits']))
        eval_info[
            'avg-changed-elements'] = summed_changed_elements / eval_info[
                'nr-of-commits']
        eval_info['avg-total-elements'] = summed_total_elements / eval_info[
            'nr-of-commits']

        eval_info['nr-hot-commits'] = len(eval_info['hot-commits'])

        with open(self.eval_data.path, "w+") as fd:
            fd.write(repr(eval_info))

        # Output the summary of this build into the statistics file.
        with open(self.stats.path, "w+") as fd:
            fd.write(repr(self.build_info))
        '''
        def plot_hash_count_histogram(hash_values, filename):
            dictionary = plt.figure()
            fig, ax = plt.subplots()
            plt.xlabel('Prozentanteil geaenderter Elemente')
            plt.ylabel('Anzahl von Commits')
            ax.bar(hash_values.keys(), hash_values.values(), align='center')
            fig.savefig(filename)

        # clean data for plotting
        data = {k:v for k,v in eval_info['change-percentage'].iteritems() if k <= 100}
 
        plot_hash_count_histogram(data, self.hot_commits_histo.path)




        changed_funcs_approx_list = []
        changed_elements_list = []
        for commit in commits:
            commit_stat = commits[commit]
            changed_functions_approx = commit_stat['changed-functions-approx']
            changed_elements = commit_stat['changed-elements']
            
            changed_funcs_approx_list.append(len(changed_functions_approx))
            changed_elements_list.append(len(changed_elements))

        
        #TODO plot changed elements vs approx. changed functions
        # and also changed functions vs approx changed functions
        fig, ax = plt.subplots()
        ax.plot(changed_elements_list, label='Geaenderte Elemente (lokal)')
        ax.plot(changed_funcs_approx_list, 'm', label='Geaenderte Funktionen (Approx)')

        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right
        plt.xlabel('Commits')
        plt.ylabel('Anzahl')
        fig.savefig(self.compare_approx_elem.path, bbox_extra_artists=(lgd,), bbox_inches='tight')
        '''

    def variant_name(self):
        return "%s-%s" % (self.project_name(), self.metadata['mode'])

    def symlink_name(self):
        return "%s-%s" % (self.title, self.variant_name())