Ejemplo n.º 1
0
    def test_itemset_computation(self):
        # Set the paths
        test_path = os.path.abspath(os.path.dirname(fixrgraph.test.__file__))
        test_data_path = os.path.join(test_path, "test_data")
        groums_path = os.path.join(test_data_path, "groums")

        # Set the path of the itemset computator
        fixrgraphiso_path = TestPipeline.get_fixrgraphiso_path()

        cluster_path = os.path.join(test_data_path, "clusters")
        os.mkdir(cluster_path)
        cluster_file_path = os.path.join(cluster_path, "clusters.txt")

        # get list of groums
        groum_files_path = os.path.join(cluster_path, "groums_list.txt")
        TestPipeline.create_groums_file(groums_path, groum_files_path)

        config = Pipeline.ItemsetCompConfig(fixrgraphiso_path, 2, 1,
                                            groum_files_path, cluster_path,
                                            cluster_file_path)
        Pipeline.computeItemsets(config)

        # Check the itemset computation
        self.assertTrue(config.cluster_file)
        cf = open(cluster_file_path, 'r')
        cf_lines = cf.readlines()
        self.assertTrue(cf_lines[0].startswith("I:"))
        for i in range(6):
            self.assertTrue(cf_lines[i + 1].startswith("F:"))
        self.assertTrue(cf_lines[7].startswith("E"))

        # Check the creation of the cluster folders
        acdfg_link = os.path.join(
            config.cluster_path, "all_clusters", "cluster_1",
            "tv.acfun.a63.DonateActivity_showErrorDialog.acdfg.bin")
        self.assertTrue(os.path.exists(acdfg_link))

        methods_file = os.path.join(config.cluster_path, "all_clusters",
                                    "cluster_1", "methods_1.txt")
        self.assertTrue(os.path.exists(methods_file))

        # cleanup
        if DELETE_FILES and os.path.exists(cluster_path):
            shutil.rmtree(cluster_path)
Ejemplo n.º 2
0
def run_extraction(config):
    extractor_path = get_default(config, "extraction","extractor_jar",
                                 TestPipeline.get_extractor_path())
    fixrgraphiso_path = get_default(config, "itemset", "binary",
                                    TestPipeline.get_fixrgraphiso_path())
    frequentsubgraphs_path = get_default(config, "pattern", "binary",
                                         TestPipeline.get_frequentsubgraphs_path())
    duplicates_path = get_default(config, "duplicates", "binary",
                                  TestPipeline.get_findduplicates_path())
    gather_results_path = get_default(config, "html", "result_script",
                                      TestPipeline.get_gather_results_path())

    disable_extraction = False
    disable_itemset = False
    disable_pattern = False
    disable_duplicates = False
    disable_html = False

    try:
        disable_extraction = config.getboolean("extraction","disabled")
    except:
        pass
    try:
        disable_itemset = config.getboolean("itemset","disabled")
    except:
        pass
    try:
        disable_pattern = config.getboolean("pattern","disabled")
    except:
        pass
    try:
        disable_duplicates = config.getboolean("duplicates","disabled")
    except:
        pass
    try:
        disable_html = config.getboolean("html","disabled")
    except:
        pass

    extract_config = None
    pattern_config = None
    itemset_config = None
    cluster_path = None
    cluster_file_path = None

    out_path = os.path.join(config.get("extraction", "out_path"))
    groums_path = os.path.join(out_path, "graphs")

    if (not disable_extraction):
        try:
            tot_thread = int(config.getint("extraction","processes"))
        except:
            return 1

        extract_config = Pipeline.ExtractConfig(extractor_path,
                                                config.get("extraction", "repo_list"),
                                                config.get("extraction", "buildable_list"),
                                                config.get("extraction", "build_data"),
                                                out_path,
                                                tot_thread,
                                                config.getboolean("extraction", "use_apk"))

    if ((not disable_itemset) or (not disable_pattern) or (not disable_html) or
        (not disable_duplicates)):
        cluster_path = os.path.join(out_path,"clusters")
        if (not os.path.isdir(cluster_path)):
            os.makedirs(cluster_path)
        cluster_file_path = os.path.join(cluster_path, "clusters.txt")


    if (not disable_itemset):
        print("Generating graphs list...")

        groum_files_path = os.path.join(cluster_path, "groums_list.txt")
        TestPipeline.create_groums_file(groums_path, groum_files_path, None)

        itemset_config = Pipeline.ItemsetCompConfig(fixrgraphiso_path,
                                                    config.get("itemset", "frequency_cutoff"),
                                                    config.get("itemset", "min_methods_in_itemset"),
                                                    groum_files_path,
                                                    cluster_path,
                                                    cluster_file_path)

    if (not disable_pattern):
        try:
            use_relative_frequency = config.getboolean("pattern", "use_relative_frequency")
        except:
            use_relative_frequency = False

        try:
            relative_frequency = config.getfloat("pattern", "relative_frequency")
        except:
            relative_frequency = 0.1

        try:
            anytime = config.getboolean("pattern", "anytime")
        except:
            anytime = False


        pattern_config = Pipeline.ComputePatternsConfig(groums_path,
                                                        cluster_path,
                                                        cluster_file_path,
                                                        config.get("pattern", "timeout"),
                                                        config.get("pattern", "frequency_cutoff"),
                                                        frequentsubgraphs_path,
                                                        use_relative_frequency,
                                                        relative_frequency,
                                                        anytime)

    # Extract the graphs
    if (not disable_extraction):
        print("Extract groums...")
        assert not extract_config is None
        Pipeline.extractGraphs(extract_config)

    # Run the itemset computation
    if (not disable_itemset):
        print("Extract itemsets...")
        assert not itemset_config is None
        Pipeline.computeItemsets(itemset_config)

    # Compute the patterns
    if (not disable_pattern):
        print("Compute patterns...")
        assert not pattern_config is None
        Pipeline.computePatterns(pattern_config)

    # Compute duplicate patterns
    if (not disable_duplicates):
        print("Compute duplicates...")
        cluster_folders = os.path.join(cluster_path, "all_clusters")
        if (os.path.isdir(cluster_folders)):
            max_cluster = get_max_clusters(cluster_folders)

            config_duplicates  = Pipeline.ComputeDuplicatesConfig(cluster_path,
                                                                  max_cluster,
                                                                  duplicates_path)
            Pipeline.computeDuplicates(config_duplicates)

    # Render the HTML results
    if (not disable_html):
        print("Render HTML pages...")

        cluster_folders = os.path.join(cluster_path, "all_clusters")
        if (os.path.isdir(cluster_folders)):
            max_cluster = get_max_clusters(cluster_folders)

            try:
                genpng = config.getboolean("html", "genpng")
            except: 
                genpng = False

            provenance_path = os.path.join(out_path,"provenance")
            html_config = Pipeline.ComputeHtmlConfig(cluster_path,
                                                     max_cluster,
                                                     gather_results_path,
                                                     genpng,
                                                     provenance_path)
            Pipeline.computeHtml(html_config)

        else:
            print("The extraction did not find any pattern.")
Ejemplo n.º 3
0
def run_extraction(config):
    extractor_path = TestPipeline.get_extractor_path()
    fixrgraphiso_path = TestPipeline.get_fixrgraphiso_path()
    frequentsubgraphs_path = TestPipeline.get_frequentsubgraphs_path()
    gather_results_path = TestPipeline.get_gather_results_path()

    out_path = os.path.join(config.get("extraction", "out_path"))

    try:
        tot_thread = int(config.get("extraction", "processes"))
    except:
        return 1

    extract_config = Pipeline.ExtractConfig(
        extractor_path, config.get("extraction", "repo_list"),
        config.get("extraction", "buildable_list"),
        config.get("extraction", "build_data"), out_path, tot_thread)

    groums_path = os.path.join(out_path, "graphs")
    cluster_path = os.path.join(out_path, "clusters")
    os.makedirs(cluster_path)
    cluster_file_path = os.path.join(cluster_path, "clusters.txt")
    groum_files_path = os.path.join(cluster_path, "groums_list.txt")

    print("Generating graphs list...")
    TestPipeline.create_groums_file(groums_path, groum_files_path, None)

    itemset_config = Pipeline.ItemsetCompConfig(
        fixrgraphiso_path, config.get("itemset", "frequency_cutoff"),
        config.get("itemset", "min_methods_in_itemset"), groum_files_path,
        cluster_path, cluster_file_path)

    pattern_config = Pipeline.ComputePatternsConfig(
        groums_path, cluster_path, cluster_file_path,
        config.get("pattern", "timeout"),
        config.get("pattern", "frequency_cutoff"), frequentsubgraphs_path)

    html_path = os.path.join(cluster_path, "html_files")

    # Extract the graphs
    print("Extract groums...")
    Pipeline.extractGraphs(extract_config)

    # Run the itemset computation
    print("Extract itemsets...")
    Pipeline.computeItemsets(itemset_config)

    # Compute the patterns
    print("Compute patterns...")
    Pipeline.computePatterns(pattern_config)

    # Render the HTML results
    print("Render HTML pages...")
    cluster_folders = os.path.join(cluster_path, "all_clusters")

    if (os.path.isdir(cluster_folders)):
        max_cluster = -1
        for path in os.listdir(cluster_folders):
            if path.startswith("cluster_"):
                n_str = path[8:]
                try:
                    n = int(n_str)
                    if n > max_cluster:
                        max_cluster = n
                except:
                    pass
        # Missing: cluster number, to get automatically
        for i in range(n):
            html_config = Pipeline.ComputeHtmlConfig(cluster_path, i + 1,
                                                     gather_results_path)
            Pipeline.computeHtml(html_config)
    else:
        print("The extraction did not find any pattern.")