Exemple #1
0
    def test_create_html(self):
        # Set the paths
        test_path = os.path.abspath(os.path.dirname(fixrgraph.test.__file__))
        test_data_path = os.path.join(test_path, "test_data")

        # Set the path of the html creator
        gather_results_path = TestPipeline.get_gather_results_path()
        cluster_path = os.path.join(test_data_path, "clusters_data_html")
        html_path = os.path.join(cluster_path, "html_files")

        config = Pipeline.ComputeHtmlConfig(cluster_path, "1",
                                            gather_results_path)

        Pipeline.computeHtml(config)

        created = [
            "cluster_1.html", "cluster_1_anom_1.dot", "cluster_1_pop_1.dot",
            "cluster_1_pop_2.dot", "index.html",
            os.path.join("cluster_1", "0.dot"),
            os.path.join("cluster_1", "1.dot"),
            os.path.join("cluster_1", "2.dot"),
            os.path.join("cluster_1", "3.dot"),
            os.path.join("cluster_1", "4.dot"),
            os.path.join("cluster_1", "out.dot")
        ]

        created = [os.path.join(html_path, s) for s in created]

        for c in created:
            self.assertTrue(os.path.exists(c))
            if DELETE_FILES:
                os.remove(c)
        if DELETE_FILES:
            shutil.rmtree(html_path)
Exemple #2
0
    def test_compute_duplicates(self):
        # Set the paths
        test_path = os.path.abspath(os.path.dirname(fixrgraph.test.__file__))
        test_data_path = os.path.join(test_path, "test_data")
        cluster_path = os.path.join(test_data_path, "clusters_data_duplicates")
        duplicates_path = TestPipeline.get_findduplicates_path()
        self.assertTrue(os.path.exists(duplicates_path))

        config = Pipeline.ComputeDuplicatesConfig(cluster_path, "2",
                                                  duplicates_path)
        Pipeline.computeDuplicates(config)

        lattice_list_file = os.path.join(config.cluster_path,
                                         Pipeline.LATTICE_LIST)
        pattern_duplicate_file = os.path.join(config.cluster_path,
                                              Pipeline.PATTERN_DUPLICATES)
        created = [lattice_list_file, pattern_duplicate_file]

        self.assertTrue(os.path.exists(lattice_list_file))
        self.assertTrue(len(open(lattice_list_file).readlines()) == 2)

        results = ["1,2,2,2", "1,3,2,3"]
        for (e, l) in zip(results, open(pattern_duplicate_file).readlines()):
            e = e.strip()
            l = l.strip()
            self.assertTrue(e == l)

        for c in created:
            self.assertTrue(os.path.exists(c))
            if DELETE_FILES:
                os.remove(c)
Exemple #3
0
    def test_itemset_computation(self):
        # Set the paths
        test_path = os.path.abspath(os.path.dirname(fixrgraph.test.__file__))
        test_data_path = os.path.join(test_path, "test_data")
        groums_path = os.path.join(test_data_path, "groums")

        # Set the path of the itemset computator
        fixrgraphiso_path = TestPipeline.get_fixrgraphiso_path()

        cluster_path = os.path.join(test_data_path, "clusters")
        os.mkdir(cluster_path)
        cluster_file_path = os.path.join(cluster_path, "clusters.txt")

        # get list of groums
        groum_files_path = os.path.join(cluster_path, "groums_list.txt")
        TestPipeline.create_groums_file(groums_path, groum_files_path)

        config = Pipeline.ItemsetCompConfig(fixrgraphiso_path, 2, 1,
                                            groum_files_path, cluster_path,
                                            cluster_file_path)
        Pipeline.computeItemsets(config)

        # Check the itemset computation
        self.assertTrue(config.cluster_file)
        cf = open(cluster_file_path, 'r')
        cf_lines = cf.readlines()
        self.assertTrue(cf_lines[0].startswith("I:"))
        for i in range(6):
            self.assertTrue(cf_lines[i + 1].startswith("F:"))
        self.assertTrue(cf_lines[7].startswith("E"))

        # Check the creation of the cluster folders
        acdfg_link = os.path.join(
            config.cluster_path, "all_clusters", "cluster_1",
            "tv.acfun.a63.DonateActivity_showErrorDialog.acdfg.bin")
        self.assertTrue(os.path.exists(acdfg_link))

        methods_file = os.path.join(config.cluster_path, "all_clusters",
                                    "cluster_1", "methods_1.txt")
        self.assertTrue(os.path.exists(methods_file))

        # cleanup
        if DELETE_FILES and os.path.exists(cluster_path):
            shutil.rmtree(cluster_path)
Exemple #4
0
    def test_pattern_computation(self):
        # Set the paths
        test_path = os.path.abspath(os.path.dirname(fixrgraph.test.__file__))
        test_data_path = os.path.join(test_path, "test_data")
        groums_path = os.path.join(test_data_path, "groums")

        # Set the path of the itemset computator
        frequentsubgraphs_path = TestPipeline.get_frequentsubgraphs_path()
        cluster_path = os.path.join(test_data_path, "clusters_data")
        cluster_file_path = os.path.join(cluster_path, "clusters.txt")

        config = Pipeline.ComputePatternsConfig(groums_path, cluster_path,
                                                cluster_file_path, 10, 2,
                                                frequentsubgraphs_path)

        Pipeline.computePatterns(config)

        cluster_1_path = os.path.join(cluster_path, "all_clusters",
                                      "cluster_1")
        created = [
            os.path.join(cluster_path, "makefile"),
            os.path.join(cluster_1_path, "run1.err.out"),
            os.path.join(cluster_1_path, "run1.out"),
            os.path.join(cluster_1_path, "cluster_1_info.txt"),
            os.path.join(cluster_1_path, "cluster_1_lattice.bin"),
            os.path.join(cluster_1_path, "pop_1.dot"),
            os.path.join(cluster_1_path, "pop_2.dot"),
            os.path.join(cluster_1_path, "anom_1.dot")
        ]

        for c in created:
            logging.debug("Checking creation of %s..." % c)
            self.assertTrue(os.path.exists(c))
            # cleanup
            if DELETE_FILES:
                print "Removing..."
                os.remove(c)
Exemple #5
0
    def test_graph_extraction(self):
        test_path = os.path.abspath(os.path.dirname(fixrgraph.test.__file__))
        test_data_path = os.path.join(test_path, "test_data")

        extractor_path = TestPipeline.get_extractor_path()

        repo_list = os.path.join(test_data_path, "repo_list.json")
        buildable_list = os.path.join(test_data_path, "buildable_small.json")
        build_data = os.path.join(test_data_path, "build-data")
        out_path = os.path.join(test_data_path, "output")

        fixrgraph_jar = os.path.join(test_data_path, "repo_list.json")

        config = Pipeline.ExtractConfig(extractor_path, repo_list,
                                        buildable_list, build_data, out_path,
                                        1, False)
        Pipeline.extractGraphs(config)

        # some files that must have been created by running the test
        created_files = [
            "graphs/learning-android/Yamba/46795d3c4a1f56416f88a18b708d9db36a429025/com.marakana.android.yamba.YambaWidget_onReceive.acdfg.bin",
            "provenance/learning-android/Yamba/46795d3c4a1f56416f88a18b708d9db36a429025/com.marakana.android.yamba.YambaWidget_onReceive.acdfg.dot",
            "provenance/learning-android/Yamba/46795d3c4a1f56416f88a18b708d9db36a429025/com.marakana.android.yamba.YambaWidget_onReceive.cdfg.dot",
            "provenance/learning-android/Yamba/46795d3c4a1f56416f88a18b708d9db36a429025/com.marakana.android.yamba.YambaWidget_onReceive.cfg.dot",
            "provenance/learning-android/Yamba/46795d3c4a1f56416f88a18b708d9db36a429025/com.marakana.android.yamba.YambaWidget_onReceive.html",
            "provenance/learning-android/Yamba/46795d3c4a1f56416f88a18b708d9db36a429025/com.marakana.android.yamba.YambaWidget_onReceive.jimple",
            "provenance/learning-android/Yamba/46795d3c4a1f56416f88a18b708d9db36a429025/com.marakana.android.yamba.YambaWidget_onReceive.sliced.jimple"
        ]

        for f in created_files:
            f_path = os.path.join(os.path.join(test_data_path, out_path), f)
            self.assertTrue(os.path.exists(f_path))

        # cleanup
        if DELETE_FILES and os.path.exists(out_path):
            shutil.rmtree(out_path)
Exemple #6
0
def run_extraction(config):
    extractor_path = get_default(config, "extraction","extractor_jar",
                                 TestPipeline.get_extractor_path())
    fixrgraphiso_path = get_default(config, "itemset", "binary",
                                    TestPipeline.get_fixrgraphiso_path())
    frequentsubgraphs_path = get_default(config, "pattern", "binary",
                                         TestPipeline.get_frequentsubgraphs_path())
    duplicates_path = get_default(config, "duplicates", "binary",
                                  TestPipeline.get_findduplicates_path())
    gather_results_path = get_default(config, "html", "result_script",
                                      TestPipeline.get_gather_results_path())

    disable_extraction = False
    disable_itemset = False
    disable_pattern = False
    disable_duplicates = False
    disable_html = False

    try:
        disable_extraction = config.getboolean("extraction","disabled")
    except:
        pass
    try:
        disable_itemset = config.getboolean("itemset","disabled")
    except:
        pass
    try:
        disable_pattern = config.getboolean("pattern","disabled")
    except:
        pass
    try:
        disable_duplicates = config.getboolean("duplicates","disabled")
    except:
        pass
    try:
        disable_html = config.getboolean("html","disabled")
    except:
        pass

    extract_config = None
    pattern_config = None
    itemset_config = None
    cluster_path = None
    cluster_file_path = None

    out_path = os.path.join(config.get("extraction", "out_path"))
    groums_path = os.path.join(out_path, "graphs")

    if (not disable_extraction):
        try:
            tot_thread = int(config.getint("extraction","processes"))
        except:
            return 1

        extract_config = Pipeline.ExtractConfig(extractor_path,
                                                config.get("extraction", "repo_list"),
                                                config.get("extraction", "buildable_list"),
                                                config.get("extraction", "build_data"),
                                                out_path,
                                                tot_thread,
                                                config.getboolean("extraction", "use_apk"))

    if ((not disable_itemset) or (not disable_pattern) or (not disable_html) or
        (not disable_duplicates)):
        cluster_path = os.path.join(out_path,"clusters")
        if (not os.path.isdir(cluster_path)):
            os.makedirs(cluster_path)
        cluster_file_path = os.path.join(cluster_path, "clusters.txt")


    if (not disable_itemset):
        print("Generating graphs list...")

        groum_files_path = os.path.join(cluster_path, "groums_list.txt")
        TestPipeline.create_groums_file(groums_path, groum_files_path, None)

        itemset_config = Pipeline.ItemsetCompConfig(fixrgraphiso_path,
                                                    config.get("itemset", "frequency_cutoff"),
                                                    config.get("itemset", "min_methods_in_itemset"),
                                                    groum_files_path,
                                                    cluster_path,
                                                    cluster_file_path)

    if (not disable_pattern):
        try:
            use_relative_frequency = config.getboolean("pattern", "use_relative_frequency")
        except:
            use_relative_frequency = False

        try:
            relative_frequency = config.getfloat("pattern", "relative_frequency")
        except:
            relative_frequency = 0.1

        try:
            anytime = config.getboolean("pattern", "anytime")
        except:
            anytime = False


        pattern_config = Pipeline.ComputePatternsConfig(groums_path,
                                                        cluster_path,
                                                        cluster_file_path,
                                                        config.get("pattern", "timeout"),
                                                        config.get("pattern", "frequency_cutoff"),
                                                        frequentsubgraphs_path,
                                                        use_relative_frequency,
                                                        relative_frequency,
                                                        anytime)

    # Extract the graphs
    if (not disable_extraction):
        print("Extract groums...")
        assert not extract_config is None
        Pipeline.extractGraphs(extract_config)

    # Run the itemset computation
    if (not disable_itemset):
        print("Extract itemsets...")
        assert not itemset_config is None
        Pipeline.computeItemsets(itemset_config)

    # Compute the patterns
    if (not disable_pattern):
        print("Compute patterns...")
        assert not pattern_config is None
        Pipeline.computePatterns(pattern_config)

    # Compute duplicate patterns
    if (not disable_duplicates):
        print("Compute duplicates...")
        cluster_folders = os.path.join(cluster_path, "all_clusters")
        if (os.path.isdir(cluster_folders)):
            max_cluster = get_max_clusters(cluster_folders)

            config_duplicates  = Pipeline.ComputeDuplicatesConfig(cluster_path,
                                                                  max_cluster,
                                                                  duplicates_path)
            Pipeline.computeDuplicates(config_duplicates)

    # Render the HTML results
    if (not disable_html):
        print("Render HTML pages...")

        cluster_folders = os.path.join(cluster_path, "all_clusters")
        if (os.path.isdir(cluster_folders)):
            max_cluster = get_max_clusters(cluster_folders)

            try:
                genpng = config.getboolean("html", "genpng")
            except: 
                genpng = False

            provenance_path = os.path.join(out_path,"provenance")
            html_config = Pipeline.ComputeHtmlConfig(cluster_path,
                                                     max_cluster,
                                                     gather_results_path,
                                                     genpng,
                                                     provenance_path)
            Pipeline.computeHtml(html_config)

        else:
            print("The extraction did not find any pattern.")
Exemple #7
0
def run_extraction(config):
    extractor_path = TestPipeline.get_extractor_path()
    fixrgraphiso_path = TestPipeline.get_fixrgraphiso_path()
    frequentsubgraphs_path = TestPipeline.get_frequentsubgraphs_path()
    gather_results_path = TestPipeline.get_gather_results_path()

    out_path = os.path.join(config.get("extraction", "out_path"))

    try:
        tot_thread = int(config.get("extraction", "processes"))
    except:
        return 1

    extract_config = Pipeline.ExtractConfig(
        extractor_path, config.get("extraction", "repo_list"),
        config.get("extraction", "buildable_list"),
        config.get("extraction", "build_data"), out_path, tot_thread)

    groums_path = os.path.join(out_path, "graphs")
    cluster_path = os.path.join(out_path, "clusters")
    os.makedirs(cluster_path)
    cluster_file_path = os.path.join(cluster_path, "clusters.txt")
    groum_files_path = os.path.join(cluster_path, "groums_list.txt")

    print("Generating graphs list...")
    TestPipeline.create_groums_file(groums_path, groum_files_path, None)

    itemset_config = Pipeline.ItemsetCompConfig(
        fixrgraphiso_path, config.get("itemset", "frequency_cutoff"),
        config.get("itemset", "min_methods_in_itemset"), groum_files_path,
        cluster_path, cluster_file_path)

    pattern_config = Pipeline.ComputePatternsConfig(
        groums_path, cluster_path, cluster_file_path,
        config.get("pattern", "timeout"),
        config.get("pattern", "frequency_cutoff"), frequentsubgraphs_path)

    html_path = os.path.join(cluster_path, "html_files")

    # Extract the graphs
    print("Extract groums...")
    Pipeline.extractGraphs(extract_config)

    # Run the itemset computation
    print("Extract itemsets...")
    Pipeline.computeItemsets(itemset_config)

    # Compute the patterns
    print("Compute patterns...")
    Pipeline.computePatterns(pattern_config)

    # Render the HTML results
    print("Render HTML pages...")
    cluster_folders = os.path.join(cluster_path, "all_clusters")

    if (os.path.isdir(cluster_folders)):
        max_cluster = -1
        for path in os.listdir(cluster_folders):
            if path.startswith("cluster_"):
                n_str = path[8:]
                try:
                    n = int(n_str)
                    if n > max_cluster:
                        max_cluster = n
                except:
                    pass
        # Missing: cluster number, to get automatically
        for i in range(n):
            html_config = Pipeline.ComputeHtmlConfig(cluster_path, i + 1,
                                                     gather_results_path)
            Pipeline.computeHtml(html_config)
    else:
        print("The extraction did not find any pattern.")
Exemple #8
0
    def test_pattern_computation(self):
        # Set the paths
        test_path = os.path.abspath(os.path.dirname(fixrgraph.test.__file__))
        test_data_path = os.path.join(test_path, "test_data")
        groums_path = os.path.join(test_data_path, "groums")

        # Set the path of the itemset computator
        frequentsubgraphs_path = TestPipeline.get_frequentsubgraphs_path()
        cluster_path = os.path.join(test_data_path, "clusters_data")
        cluster_file_path = os.path.join(cluster_path, "clusters.txt")

        configs = [
            Pipeline.ComputePatternsConfig(groums_path, cluster_path,
                                           cluster_file_path, 10, 2,
                                           frequentsubgraphs_path, False, 0.1,
                                           False, False),
            Pipeline.ComputePatternsConfig(groums_path, cluster_path,
                                           cluster_file_path, 10, 2,
                                           frequentsubgraphs_path, False, 0.1,
                                           True, False),
            Pipeline.ComputePatternsConfig(groums_path, cluster_path,
                                           cluster_file_path, 10, 2,
                                           frequentsubgraphs_path, False, 0.1,
                                           True, True),
            Pipeline.ComputePatternsConfig(groums_path, cluster_path,
                                           cluster_file_path, 10, 2,
                                           frequentsubgraphs_path, True, 0.1,
                                           False, False),
            Pipeline.ComputePatternsConfig(groums_path, cluster_path,
                                           cluster_file_path, 10, 2,
                                           frequentsubgraphs_path, True, 0.1,
                                           False, False),
            Pipeline.ComputePatternsConfig(groums_path, cluster_path,
                                           cluster_file_path, 10, 2,
                                           frequentsubgraphs_path, True, 0.1,
                                           False, True)
        ]

        cluster_1_path = os.path.join(cluster_path, "all_clusters",
                                      "cluster_1")
        created = [
            os.path.join(cluster_path, "makefile"),
            os.path.join(cluster_1_path, "run1.err.out"),
            os.path.join(cluster_1_path, "run1.out"),
            os.path.join(cluster_1_path, "cluster_1_info.txt"),
            os.path.join(cluster_1_path, "cluster_1_lattice.bin"),
            os.path.join(cluster_1_path, "pop_1.dot"),
            os.path.join(cluster_1_path, "pop_2.dot"),
            os.path.join(cluster_1_path, "all_acdfg_bin.txt")
        ]

        r0 = [os.path.join(cluster_1_path, "anom_1.dot")] + created
        r1 = [
            os.path.join(cluster_1_path, "pop_3.dot"),
            os.path.join(cluster_1_path, "pop_4.dot"),
            os.path.join(cluster_1_path, "pop_5.dot")
        ] + created
        results = [r0, r0, r0, r1, r1, r1]

        for config, res in zip(configs, results):
            Pipeline.computePatterns(config)

            for c in res:
                logging.debug("Checking creation of %s..." % c)
                self.assertTrue(os.path.exists(c))
                # cleanup
                if DELETE_FILES:
                    print("Removing... %s" % c)
                    os.remove(c)