コード例 #1
0
def start_bayspec():
    print("Hello! Here is BaySpec!")
    print("")
    avgs = []
    for i in range(20):
        model_generator = ModelGenerator()

        # Nodes
        model_generator.set_node_range(min_objects=4,
                                       max_objects=4,
                                       min_temp_nodes=5,
                                       max_temp_nodes=5,
                                       min_states=2,
                                       max_states=2)
        # Edges
        model_generator.set_connection_ranges(min_edges_per_object=2,
                                              max_edges_per_object=2,
                                              min_percent_inter=0.8,
                                              max_percent_inter=0.8)

        tscbn = model_generator.new_tscbn()

        bn = MaxAverageMiningGraph(tscbn)

        paths = bn.path_computation(min_prob_threshold=0.85)

        if paths:
            validation_bn = MaxAverageMiningGraph(
                model_generator.get_validation_model_rel(tscbn, 0.25))
            start = time.time()
            miner = CBMiner(bn, validation_bn)
            specs = miner.start()
            end = time.time()
            print("Time %s" % str(end - start))
            avgs += [end - start]
            #print("{} found specifications".format(len(specs)))
            #for spec in specs:
            #    print(spec)
    print(np.mean(avgs))
コード例 #2
0
def start():
    model_generator = ModelGenerator()

    # Nodes
    model_generator.set_node_range(min_objects=4, max_objects=5,
                                   min_temp_nodes=4, max_temp_nodes=5,
                                   min_states=2, max_states=2)
    # Edges
    model_generator.set_connection_ranges(min_edges_per_object=3, max_edges_per_object=3,
                                          min_percent_inter=0.8, max_percent_inter=0.8)

    threshold = 0.8
    edge_remove_ratio = 0.3

    model_pool_size = 50
    model_pool = []
    bayspec_fps_per_model = [[0,0] for _ in range(model_pool_size)]
    for i in range(model_pool_size):
        print("generate network {} of {}".format(i+1,model_pool_size))
        tscbn1 = model_generator.new_tscbn()
        model_pool.append(tscbn1)

        tscbn2 = model_generator.get_validation_model_rel(tscbn1, edge_remove_ratio)

        bn1 = MaxAverageMiningGraph(tscbn1)
        bn2 = MaxAverageMiningGraph(tscbn2)

        bn1.path_computation(threshold)

        miner = CBMiner(bn1, bn2)

        fps, specs = miner.start(evaluation=True)

        bayspec_fps_per_model[i] = [fps, len(specs)]

    min_models = 2
    max_models = 5
    n_models_list = list(range(min_models, max_models + 1))
    n_loops = 10

    traces_per_log = list(range(2, 5 + 1))
    samples_per_trace = list(range(2, 5 + 1))
    log_combinations = len(traces_per_log) * len(samples_per_trace)

    synoptic_fps = {n: [0, 0] for n in n_models_list}
    bayspec_fps = {n: [0, 0] for n in n_models_list}

    for n_models in n_models_list:
        print("n_models: {}".format(n_models))
        for loop in range(n_loops):
            print("  loop: {} / {}".format(loop+1,n_loops))
            index_tscbns = random.sample(list(enumerate(model_pool)), n_models)
            indices = [index for index, tscbn in index_tscbns]
            tscbn_subset = [tscbn for index, tscbn in index_tscbns]

            for index in indices:
                bayspec_fps[n_models][0] += (bayspec_fps_per_model[index][0] / n_loops)
                bayspec_fps[n_models][1] += (bayspec_fps_per_model[index][1] / n_loops)

            for traces in traces_per_log:
                for samples in samples_per_trace:
                    log = tg.multi_BN_log_interleaving(tscbn_subset, traces, samples)

                    # Synoptic
                    invariants = synoptic.execute(log, threshold)
                    syn_fp = synoptic.false_positive_invariants(invariants)
                    if syn_fp[1] > 0:
                        synoptic_fps[n_models][0] += (syn_fp[0] / (n_loops * log_combinations))
                        synoptic_fps[n_models][1] += (syn_fp[1] / (n_loops * log_combinations))

    bar_width = 0.9
    for i, n_nets in enumerate(n_models_list):
        if synoptic_fps[n_nets][1] > 0:
            plt.bar([2 * i + 0], [100 * synoptic_fps[n_nets][0] / synoptic_fps[n_nets][1]], color='#96b7c1',
                    width=bar_width)
        if bayspec_fps[n_nets][1] > 0:
            plt.bar([2 * i + 1], [100 * bayspec_fps[n_nets][0] / bayspec_fps[n_nets][1]], color='#335151',
                    width=bar_width)

    plt.show()
コード例 #3
0
ファイル: ev_runtime.py プロジェクト: arturmrowca/tscbn
def start():
    model_generator = ModelGenerator()

    # Nodes
    model_generator.set_node_range(min_objects=4,
                                   max_objects=5,
                                   min_temp_nodes=4,
                                   max_temp_nodes=5,
                                   min_states=2,
                                   max_states=2)
    # Edges
    model_generator.set_connection_ranges(min_edges_per_object=3,
                                          max_edges_per_object=3,
                                          min_percent_inter=0.8,
                                          max_percent_inter=0.8)

    rel_remove_list = [r / 10 for r in range(1, 5 + 1, 1)]
    threshold_list = [t / 100 for t in range(80, 100 + 1, 1)]

    n_models = 50

    bn1_paths_list = []

    runtimes = {
        removed: {th: 0
                  for th in threshold_list}
        for removed in rel_remove_list
    }
    for j in range(n_models):
        print("Model {} of {}".format(j + 1, n_models))

        tscbn1 = model_generator.new_tscbn()
        _start = timer()
        bn1 = MaxAverageMiningGraph(tscbn1)
        setup_1 = timer() - _start

        bn1_paths_list.append(bn1.n_paths)

        for rel_remove in rel_remove_list:
            print("  remove {}".format(rel_remove))
            tscbn2 = model_generator.get_validation_model_rel(
                tscbn1, rel_remove)
            bn2 = MaxAverageMiningGraph(tscbn2)
            for th in threshold_list:
                _start = timer()
                bn1.path_computation(min_prob_threshold=th)
                if bn1.paths:
                    miner = CBMiner(bn1, bn2)
                    miner.start()
                _end = timer()

                t = _end - _start

                runtimes[rel_remove][th] += (setup_1 + t) / (bn1.n_paths *
                                                             n_models)

    normalization_factor = np.average(bn1_paths_list)

    for rel_remove, data in runtimes.items():
        y = [runtime * normalization_factor for th, runtime in data.items()]
        plt.plot(threshold_list, y, label=rel_remove)

    plt.legend(loc="upper right")
    plt.legend()
    plt.show()
コード例 #4
0
def start():
    max_remove = 75
    rel_remove = list(range(max_remove + 1))
    thresholds = [th / 100 for th in [84, 86, 88, 90]]
    n_models = 50

    model_generator = ModelGenerator()

    # Nodes
    model_generator.set_node_range(min_objects=4, max_objects=5,
                                   min_temp_nodes=4, max_temp_nodes=5,
                                   min_states=2, max_states=2)
    # Edges
    model_generator.set_connection_ranges(min_edges_per_object=3, max_edges_per_object=3,
                                          min_percent_inter=0.8, max_percent_inter=0.8)

    cummulative_paths = {th: [0 for _ in rel_remove] for th in thresholds}
    cummulative_specs = {th: [0 for _ in rel_remove] for th in thresholds}

    for i in range(n_models):
        print("Model {} of {}".format(i + 1, n_models))
        tscbn1 = model_generator.new_tscbn()

        mining_graph = MaxAverageMiningGraph(tscbn1)
        n_cross_edges = model_utility.get_n_cross_edges(tscbn1)

        all_paths = mining_graph.path_computation(thresholds[0])

        for th in thresholds:
            paths = [p for p in all_paths if p["metric"] <= (1 - th)]
            mining_graph.paths = paths

            print("  p_min={} ({} paths)".format(th, len(mining_graph.paths)))

            if mining_graph.paths:

                tscbn2 = copy.deepcopy(tscbn1)
                last_specs = 0

                for r in rel_remove:
                    # if round(remove) > last_removed:
                    if r == 0 or round(n_cross_edges * r / 100) > round(n_cross_edges * (r - 1) / 100):
                        delta = round(n_cross_edges * r / 100) - round(n_cross_edges * (r - 1) / 100)
                        tscbn2 = model_generator.get_validation_model_abs(tscbn2, abs_remove=delta)

                        validation_graph = MaxAverageMiningGraph(tscbn2)
                        miner = CBMiner(mining_graph, validation_graph)
                        specs = miner.start()

                        cummulative_paths[th][r] += len(mining_graph.paths)
                        cummulative_specs[th][r] += len(specs)
                        last_specs = len(specs)

                    else:
                        cummulative_paths[th][r] += len(mining_graph.paths)
                        cummulative_specs[th][r] += last_specs

                    # print("remove: {}% ({}) - specs: {}".format(r, round(n_cross_edges * r / 100),last_specs))

    data = {th: [cummulative_specs[th][r] / n_models for r in rel_remove] for th in thresholds}

    for threshold, ys in data.items():
        plt.plot(rel_remove, ys, label=threshold)

    plt.legend(loc="lower left")
    plt.legend()
    plt.grid()
    plt.show()
    plt.show()
コード例 #5
0
ファイル: case_study.py プロジェクト: arturmrowca/tscbn
def run_bayspec_approach(sequences,
                         sb_max_time_difference,
                         k,
                         chi,
                         output_folder,
                         data,
                         sd_hyperparameter_estimation_chi=False,
                         sd_hyperparameter_estimation_t_th=False,
                         sd_hyperparameter_estimation_k=False,
                         learn_model=False,
                         bayspec=False,
                         map=False):

    evaluate_model = False  # loads the model and computes the log-likelihoods of sequences on this model
    combined = False  # generates heat map chi vs k
    df_sequences, sequences, states_dict = to_dataframe(sequences)

    # 1. Run Structure Discovery hyperparameter estimation
    if sd_hyperparameter_estimation_t_th:
        print("\n\n" + "-".join(20 * [""]) +
              "Run HP estimation and discover a good maximal t_th of %s" %
              str(2.0 * 10**13) + "-".join(20 * [""]))

        # find max time difference by plotting gaps distribution
        df_sequences = df_sequences.sort_values("start_time")
        df_sequences[
            "gap"] = df_sequences["end_time"] - df_sequences["start_time"]
        sns.distplot(np.nan_to_num(df_sequences["gap"]), kde=False)

        ax = plt.gca()

        def get_hist(ax):
            n, bins = [], []
            for rect in ax.patches:
                ((x0, y0), (x1, y1)) = rect.get_bbox().get_points()
                n.append(y1 - y0)
                bins.append(x0)  # left edge of each bin
            bins.append(x1)  # also get right edge of last bin
            return n, bins

        n, bins = get_hist(
            ax)  #",".join([str((bins[i], n[i])) for i in range(len(bins))])

        plt.xlabel("Time Gap between entries")
        plt.ylabel("Frequency")
        plt.show()
        # c1 ",".join([str((line.get_xdata()[i], line.get_ydata()[i])) for i in range(len(line.get_xdata()))])
        return

    if combined:
        inter_edge_numbers = []
        k_range = list(np.arange(0.05, 1.05, 0.05))
        params = []
        for k in k_range:
            chi_range = list(np.arange(0.05, 1.05, 0.1)) + list(
                np.arange(1.0, 5.0, 0.5)) + list(np.arange(5.0, 100.1, 5.0))
            for chi in chi_range:

                sd = PCTreeDiscoverer(
                    min_out_degree=0.15,
                    k_infrequent=k,
                    parallel=False,
                    alpha=0.5,
                    chi_square_thresh=chi,
                    optimization_chi_square=True,
                    max_time_difference=sb_max_time_difference)

                print("\n\n" + "-".join(20 * [""]) +
                      "Starting Structure Discovery k=%s chi=%s" %
                      (str(k), str(chi)) + "-".join(20 * [""]))
                try:
                    structure = sd.discover_structure(sequences)
                    edges = structure[1]
                    inter_edge_numbers += [
                        len([
                            e for e in edges
                            if not str.startswith(e[1], "dL_")
                            and e[0].split("_")[0] != e[1].split("_")[0]
                        ])
                    ]
                    print("Nodes: %s" % str(
                        len([
                            s for s in structure[0]
                            if not str.startswith(s, "dL_")
                        ])))
                    ie = len([
                        e for e in edges if not str.startswith(e[1], "dL_")
                        and e[0].split("_")[0] != e[1].split("_")[0]
                    ])
                    print("Inter-edges %d" % ie)
                except AssertionError:
                    ie = 0
                    inter_edge_numbers += [0]
                params += [[k, chi, ie]]
        print(str(params))
        #plt.plot(k_range, inter_edge_numbers)
        #plt.xlabel("k")
        #plt.ylabel("# Inter-Edges")
        #plt.show()
        return

    if sd_hyperparameter_estimation_chi:
        inter_edge_numbers = []
        chi_range = list(np.arange(0.05, 1.05, 0.1)) + list(
            np.arange(1.0, 5.0, 0.5)) + list(np.arange(5.0, 100.1, 5.0))
        for chi in chi_range:
            sd = PCTreeDiscoverer(min_out_degree=0.15,
                                  k_infrequent=k,
                                  parallel=False,
                                  alpha=0.5,
                                  chi_square_thresh=chi,
                                  optimization_chi_square=True,
                                  max_time_difference=sb_max_time_difference)

            print("\n\n" + "-".join(20 * [""]) +
                  "Starting Structure Discovery chi=%s" % str(chi) +
                  "-".join(20 * [""]))
            try:
                structure = sd.discover_structure(sequences)
                edges = structure[1]
                inter_edge_numbers += [
                    len([
                        e for e in edges if not str.startswith(e[1], "dL_")
                        and e[0].split("_")[0] != e[1].split("_")[0]
                    ])
                ]
                print("Nodes: %s" % str(
                    len([
                        s
                        for s in structure[0] if not str.startswith(s, "dL_")
                    ])))
                print("Inter-edges %d" % len([
                    e for e in edges if not str.startswith(e[1], "dL_")
                    and e[0].split("_")[0] != e[1].split("_")[0]
                ]))
            except AssertionError:
                inter_edge_numbers += [0]

    # 2. Run Structure Discovery hyperparameter estimation for k
    if sd_hyperparameter_estimation_k:
        inter_edge_numbers = []
        k_range = list(np.arange(0.05, 1.05, 0.05))
        for k in k_range:
            sd = PCTreeDiscoverer(min_out_degree=0.15,
                                  k_infrequent=k,
                                  parallel=False,
                                  alpha=0.1,
                                  chi_square_thresh=1,
                                  optimization_chi_square=True,
                                  max_time_difference=sb_max_time_difference)

            print("\n\n" + "-".join(20 * [""]) +
                  "Starting Structure Discovery k=%s" % str(k) +
                  "-".join(20 * [""]))
            try:
                structure = sd.discover_structure(sequences)
                edges = structure[1]
                inter_edge_numbers += [
                    len([
                        e for e in edges if not str.startswith(e[1], "dL_")
                        and e[0].split("_")[0] != e[1].split("_")[0]
                    ])
                ]
                print("Nodes: %s" % str(
                    len([
                        s
                        for s in structure[0] if not str.startswith(s, "dL_")
                    ])))
                print("Inter-edges %d" % len([
                    e for e in edges if not str.startswith(e[1], "dL_")
                    and e[0].split("_")[0] != e[1].split("_")[0]
                ]))
            except AssertionError:
                inter_edge_numbers += [0]
        plt.plot(k_range, inter_edge_numbers)
        plt.xlabel("k")
        plt.ylabel("# Inter-Edges")
        plt.show()
        return

    if sd_hyperparameter_estimation_chi:
        inter_edge_numbers = []
        chi_range = list(np.arange(0.05, 1.05, 0.1)) + list(
            np.arange(1.0, 5.0, 0.5)) + list(np.arange(5.0, 100.1, 5.0))
        for chi in chi_range:
            sd = PCTreeDiscoverer(min_out_degree=0.15,
                                  k_infrequent=k,
                                  parallel=False,
                                  alpha=0.5,
                                  chi_square_thresh=chi,
                                  optimization_chi_square=True,
                                  max_time_difference=sb_max_time_difference)

            print("\n\n" + "-".join(20 * [""]) +
                  "Starting Structure Discovery chi=%s" % str(chi) +
                  "-".join(20 * [""]))
            try:
                structure = sd.discover_structure(sequences)
                edges = structure[1]
                inter_edge_numbers += [
                    len([
                        e for e in edges if not str.startswith(e[1], "dL_")
                        and e[0].split("_")[0] != e[1].split("_")[0]
                    ])
                ]
                print("Nodes: %s" % str(
                    len([
                        s
                        for s in structure[0] if not str.startswith(s, "dL_")
                    ])))
                print("Inter-edges %d" % len([
                    e for e in edges if not str.startswith(e[1], "dL_")
                    and e[0].split("_")[0] != e[1].split("_")[0]
                ]))
            except AssertionError:
                inter_edge_numbers += [0]
        plt.plot(chi_range, inter_edge_numbers)
        plt.xlabel("chi")
        plt.ylabel("# Inter-Edges")
        plt.show()
        return

    # 3. Run SD and Parameter Estimation, with parameters found from evaluation
    if learn_model:

        print("\n\n" + "-".join(20 * [""]) +
              "Starting Structure Discovery k=%s, t_th=%s" %
              (str(k), str(sb_max_time_difference)) + "-".join(20 * [""]))
        #sd = SBTreeDiscoverer(min_out_degree=0.15, k_infrequent=k, approach='parent_graph', parallel=True, score="BIC", max_time_difference=sb_max_time_difference)
        sd = PCTreeDiscoverer(min_out_degree=0.15,
                              k_infrequent=k,
                              parallel=False,
                              alpha=0.5,
                              chi_square_thresh=chi,
                              optimization_chi_square=True,
                              max_time_difference=sb_max_time_difference)
        # PREFIX
        #sd = NovelStructureDiscoverer(filtering=False, k_infrequent=0.1, alpha=0.1 , draw=False, max_reach=2, min_out_degree=0.25,  draw_only_result=False)
        nodes, edges = sd.discover_structure(sequences)
        edges, nodes = structure_consistency(edges, nodes)
        tscbn = create_tscbn(states_dict, nodes, edges)
        print("Nodes: %s Inter-edges: %d" %
              (str(len(nodes)),
               len([
                   e for e in edges if not str.startswith(e[1], "dL_")
                   and e[0].split("_")[0] != e[1].split("_")[0]
               ])))

        # run parameter estimation
        print("\n\n" + "-".join(20 * [""]) +
              "Starting Parameter Estimation EM" + "-".join(20 * [""]))
        pe = EMAlgorithmParameterEstimator(
        )  #MLECountingLocalParameterEstimator()
        pe.tbn = tscbn
        pe.iteration_frequency = 4
        tscbn = pe.estimateParameter(sequences, "TSCBNStructureModel")

        # Store result to file
        destination_file = os.path.join(output_folder,
                                        data.__class__.__name__ + ".tscbn")
        print("Found model - storing to %s" % str(destination_file))
        with open(destination_file, "wb") as dill_file:
            dill.dump(tscbn, dill_file, recurse=True)

    if evaluate_model:
        # evaluate results
        destination_file = os.path.join(output_folder,
                                        data.__class__.__name__ + ".tscbn")
        with open(destination_file, 'rb') as in_strm:
            tscbn = dill.load(in_strm)
        ev = CSParameterEvaluator(False)
        ev.add_metric("jpd")
        ev.add_metric("log-likelihood")
        ev.add_metric("temp-log-likelihood")
        ev.add_metric("temp-jpd")
        eval_result = ev.evaluate_direct(tscbn, None, sequences, None)

        log_likelihood = float(eval_result["TSCBN"]["log-likelihood"].replace(
            "\t", ""))
        temp_log_likelihood = float(
            eval_result["TSCBN"]["temp-log-likelihood"].replace("\t", ""))
        print("LL %s, TLL %s" %
              (str(log_likelihood), str(temp_log_likelihood)))

        return

    if bayspec:
        min_prob_threshold = 0.6
        print("\n\n" + "-".join(20 * [""]) +
              "Running Bayspec with p_min = %s" % str(min_prob_threshold) +
              "-".join(20 * [""]))

        # Load file
        destination_file = os.path.join(output_folder,
                                        data.__class__.__name__ + ".tscbn")
        if not os.path.exists(destination_file):
            print(
                "Please run SD and PE first before extraction of specifications. \nNo file found at %s"
                % str(destination_file))
            return
        with open(destination_file, 'rb') as in_strm:
            tscbn = dill.load(in_strm)

        # Run Bayspec
        tscbn = sanitize_commas(tscbn)
        bn1 = MaxAverageMiningGraph(tscbn)
        all_paths = bn1.path_computation(min_prob_threshold)
        paths = [
            p for p in all_paths if p["metric"] <= (1 - 0.7)
        ]  # metric hier - sagt bei welcher Levenshtein er nimmer merged d.h. strenger heisst weniger verwaschen!
        bn1.paths = paths
        metric_based_miner = MBMiner(bn1)
        prespecs = metric_based_miner.start()

        # Print Results
        print("\n\n" + "-".join(40 * [""]) + "\n\tPaths\n" +
              "-".join(40 * [""]))
        for p in paths:
            print(p)

        print("\n\n" + "-".join(40 * [""]) + "\n\tFound Specifications\n" +
              "-".join(40 * [""]))
        specs = []
        for spec in prespecs[::-1]:
            if spec[1] is not None:
                print(str(spec))
                specs += [spec]

        return

    if map:
        min_prob_threshold = 0.6
        print("\n\n" + "-".join(20 * [""]) +
              "Running Bayspec with p_min = %s" % str(min_prob_threshold) +
              "-".join(20 * [""]))

        # Load file
        destination_file = os.path.join(output_folder,
                                        data.__class__.__name__ + ".tscbn")
        if not os.path.exists(destination_file):
            print(
                "Please run SD and PE first before extraction of specifications. \nNo file found at %s"
                % str(destination_file))
            return
        with open(destination_file, 'rb') as in_strm:
            tscbn = dill.load(in_strm)

        # Run MAP
        destination_file = os.path.join(
            output_folder,
            data.__class__.__name__ + str(chi).replace(".", "_") + ".txt")
        map_func(tscbn, destination_file)
コード例 #6
0
def start():
    n_models = 2
    objects = [4, 5]
    nodes_per_object = [4, 5]
    states_per_node = 2
    edges_per_object = 3
    percentage_inter = 0.8

    edge_remove_ratio = 0.20
    min_th = 80
    thresholds = [t / 100 for t in range(min_th, 100 + 1)]

    n_eval_models = 50
    sizes = ["(4,4)", "(5,5)"]

    MG_list = []
    for i in range(n_models):
        model_generator = ModelGenerator()

        # Nodes
        model_generator.set_node_range(min_objects=objects[i], max_objects=objects[i],
                                       min_temp_nodes=nodes_per_object[i], max_temp_nodes=nodes_per_object[i],
                                       min_states=states_per_node, max_states=states_per_node)
        # Edges
        model_generator.set_connection_ranges(min_edges_per_object=edges_per_object, max_edges_per_object=edges_per_object,
                                              min_percent_inter=percentage_inter, max_percent_inter=percentage_inter)

        MG_list.append(model_generator)

    mb_cummulative = {size: {th: 0 for th in thresholds} for size in range(n_models)}
    cb_cummulative = {size: {th: 0 for th in thresholds} for size in range(n_models)}

    for j in range(n_eval_models):
        print("Model {} of {}".format(j + 1, n_eval_models))
        for i in range(n_models):
            print("  size:{}".format(sizes[i]))
            tscbn1 = MG_list[i].new_tscbn()
            bn1 = MaxAverageMiningGraph(tscbn1)
            tscbn2 = MG_list[i].get_validation_model_rel(tscbn1, edge_remove_ratio)
            bn2 = MaxAverageMiningGraph(tscbn2)

            # all paths for the minimal threshold
            all_paths = bn1.path_computation(min_th / 100)

            for th in thresholds:
                print("    threshold:{}".format(th))
                paths = [p for p in all_paths if p["metric"] <= (1 - th)]
                bn1.paths = paths

                metric_based_miner = MBMiner(bn1)
                comparison_based_miner = CBMiner(bn1, bn2)

                mb_specs = metric_based_miner.start()
                cb_specs = comparison_based_miner.start()

                mb_cummulative[i][th] += len(mb_specs)
                cb_cummulative[i][th] += len(cb_specs)

    colors = ["#0065BD", "#E37222"]
    for i in range(n_models):
        mb_data = [mb_cummulative[i][th] / n_eval_models for th in thresholds]
        plt.plot(thresholds, mb_data, label="mb {}".format(sizes[i]), linestyle="dashed", color=colors[i])

        cb_data = [cb_cummulative[i][th] / n_eval_models for th in thresholds]
        plt.plot(thresholds, cb_data, label="cb {}".format(sizes[i]), linestyle="solid", color=colors[i])

    plt.legend()
    plt.show()
コード例 #7
0
def start():
    model_generator = ModelGenerator()

    # Nodes
    model_generator.set_node_range(min_objects=4,
                                   max_objects=5,
                                   min_temp_nodes=4,
                                   max_temp_nodes=5,
                                   min_states=2,
                                   max_states=2)
    # Edges
    model_generator.set_connection_ranges(min_edges_per_object=3,
                                          max_edges_per_object=3,
                                          min_percent_inter=0.8,
                                          max_percent_inter=0.8)

    edge_remove_ratio = 0.2
    threshold = 0.8
    n_models = 50

    tscbn = model_generator.new_tscbn()

    scatter_max_x = []
    scatter_max_y = []

    bayspec_total = defaultdict(lambda: defaultdict(lambda: 0))
    synoptic_total = defaultdict(lambda: defaultdict(lambda: 0))

    traces_per_log = [t for t in range(2, 5 + 1)]
    samples_per_trace = [s for s in range(2, 5 + 1)]
    log_combinations = len(traces_per_log) * len(samples_per_trace)

    for i in range(n_models):
        print("Model {} / {}".format(i + 1, n_models))
        tscbn1 = model_generator.new_tscbn()
        bn1 = MaxAverageMiningGraph(tscbn1)

        tscbn2 = model_generator.get_validation_model_rel(
            tscbn1, edge_remove_ratio)
        bn2 = MaxAverageMiningGraph(tscbn2)

        bn1.path_computation(threshold)

        if bn1.paths:
            miner = CBMiner(bn1, bn2)
            specs = miner.start()

            if specs:
                bayspec = utility.metrics_from_LTL_specs(specs)

                specs_x = [
                    x for x, ydict in bayspec.items()
                    for y, size in ydict.items()
                ]
                specs_y = [
                    y for x, ydict in bayspec.items()
                    for y, size in ydict.items()
                ]
                specs_s = [
                    size for x, ydict in bayspec.items()
                    for y, size in ydict.items()
                ]

                for h, u, size in zip(specs_x, specs_y, specs_s):
                    bayspec_total[h][u] += (size / n_models)

        j = 1
        for traces in traces_per_log:
            for samples in samples_per_trace:
                # print("  Trace set {} / {}".format(j, log_combinations))
                j += 1

                # create Log
                log = tg.single_BN_log(tscbn,
                                       traces_per_log=traces,
                                       samples_per_trace=samples)

                # Synoptic
                invariants = synoptic.execute(log, threshold)
                syn = synoptic.metrics_from_invariants(invariants)

                x = [
                    x for x, ydict in syn.items() for y, size in ydict.items()
                ]
                y = [
                    y for x, ydict in syn.items() for y, size in ydict.items()
                ]
                s = [
                    size for x, ydict in syn.items()
                    for y, size in ydict.items()
                ]

                for h, u, size in zip(x, y, s):
                    synoptic_total[h][u] += (size /
                                             (n_models * log_combinations))

    def process_results(total_dict, histogram, color):
        plot_x = [
            x for x, ydict in total_dict.items() for y, size in ydict.items()
        ]
        plot_y = [
            y for x, ydict in total_dict.items() for y, size in ydict.items()
        ]
        plot_s = [
            size for x, ydict in total_dict.items()
            for y, size in ydict.items()
        ]

        for height, unique, s in zip(plot_x, plot_y, plot_s):
            histogram[height] += s

        if plot_x:
            plt.scatter(x=plot_x,
                        y=plot_y,
                        s=[utility.scatter_size(_s) for _s in plot_s],
                        alpha=0.7,
                        c=color)
            scatter_max_x.append(max(plot_x))
            scatter_max_y.append(max(plot_y))

    # BaySpec
    bayspec_histogram = [0 for _ in range(1000)]
    process_results(bayspec_total, bayspec_histogram, color='#335151')

    # Synoptic
    synoptic_histogram = [0 for _ in range(1000)]
    process_results(synoptic_total, synoptic_histogram, color='#96b7c1')

    max_height = max(scatter_max_x)
    max_y = max(scatter_max_y)
    plt.xlabel("Spec Height")
    plt.ylabel("Unique Events")
    plt.xlim(0, max_height + 1)
    plt.ylim(0, max_y + 1)
    plt.show()

    # BAR PLOT
    # Barplot
    bar_width = 0.9
    bins = list(range(max_height + 1))

    plt.bar(bins,
            bayspec_histogram[:max_height + 1],
            color='#335151',
            width=bar_width,
            alpha=0.6)
    plt.bar(bins,
            synoptic_histogram[:max_height + 1],
            color='#96b7c1',
            width=bar_width,
            alpha=0.6)

    plt.xlim(0, max_height + 1)
    plt.xlabel("Spec Height")
    plt.show()