def stats_plot(learner: RefaelLearner, database):
    if not os.path.exists(
            os.path.join(learner.base_dir(), 'fig', 'stats_graphs')):
        os.mkdir(os.path.join(learner.base_dir(), 'fig', 'stats_graphs'))

    figures = []
    titles = [
        "Number of graphs over time", "Total number of nodes over time",
        "Total number of edges over time", "Average node degree over time",
        "Number of blacks over time", "Number of whites over time"
    ]
    y_labels = [
        "number of graphs", "number of nodes", "number of edges",
        "average node degree", "number of blacks", "number of whites"
    ]
    y_vals = []

    for i in range(6):
        figures.append(
            figure(plot_width=600,
                   plot_height=250,
                   title=titles[i],
                   x_axis_label='time',
                   y_axis_label=y_labels[i]))
        y_vals.append([])

    for t in range(len(database.multi_graphs_by_time)):
        mg = database.multi_graphs_by_time[t]
        ids = mg._list_id
        valids = [gid for gid in ids if mg._graph_valid[gid]]

        y_vals[0].append(len(valids))
        y_vals[1].append(sum([mg.node_count(k) for k in valids]))
        y_vals[2].append(sum([mg.edge_count(k) for k in valids]))

        degs = []
        gnxs = [mg.get_gnx(gid) for gid in valids]
        for gnx in gnxs:
            degs = degs + [
                t[1] for t in list(gnx.degree([x for x in gnx.nodes()]))
            ]
        avg_deg = sum(degs) / sum([mg.node_count(k) for k in valids])
        y_vals[3].append(avg_deg)

        labels = database._database._labels
        valid_labels = [labels[gr] for gr in valids]
        if REFAEL_PARAM['white_label']:
            y_vals[5].append(sum(valid_labels))
            y_vals[4].append(len(valid_labels) - sum(valid_labels))
        else:
            y_vals[4].append(sum(valid_labels))
            y_vals[5].append(len(valid_labels) - sum(valid_labels))
    for i in range(len(figures)):
        figures[i].line(list(range(len(database.multi_graphs_by_time))),
                        y_vals[i],
                        line_color='blue')
        save(
            figures[i],
            os.path.join(learner.base_dir(), "fig", "stats_graphs",
                         titles[i] + ".html"))
def statgraphs(learner: RefaelLearner):
    learner.data_loader().filter_by_nodes(min_nodes=REFAEL_PARAM['min_nodes']) if REFAEL_PARAM['min_nodes'] \
        else learner.data_loader().features_by_time
    db = learner.data_loader()
    db._load_database()
    # learner.stats_plot(db)
    multicolor_stats_plot(learner, db)
예제 #3
0
def grid_eps(base_name, eps_list: list):
    base = os.path.join("res", base_name, EPS)

    default_eps = REFAEL_PARAM['eps']

    for i in eps_list:
        REFAEL_PARAM['eps'] = i
        if i == eps_list[0]:
            res, best, rand, perf = RefaelLearner().run_al_bi_avg(rand=True)
            pickle._dump(best, open(os.path.join(base, "best_eps"), "wb"))
            pickle._dump(
                res, open(os.path.join(base, "res_XGBoost_eps_" + str(i)),
                          "wb"))
            pickle._dump(rand, open(os.path.join(base, "rand_eps"), "wb"))
            pickle._dump(
                perf,
                open(os.path.join(base, "perf_XGBoost_eps_" + str(i)), "wb"))
        else:
            res, best, _, perf = RefaelLearner().run_al_bi_avg()
            pickle._dump(
                res, open(os.path.join(base, "res_XGBoost_eps_" + str(i)),
                          "wb"))
            pickle._dump(
                perf,
                open(os.path.join(base, "perf_XGBoost_eps_" + str(i)), "wb"))
    REFAEL_PARAM['eps'] = default_eps
예제 #4
0
def multicolor_stats_plot(learner: RefaelLearner, database):
    labels = database._database._labels
    colors = list(Counter(labels.values()).keys())

    figures = []
    titles = ["Number of graphs over time", "Total number of nodes over time", "Total number of edges over time",
              "Average node degree over time"] + \
             ["Number of graphs of color " + str(color) + " over time" for color in colors]
    y_labels = ["number of graphs", "number of nodes", "number of edges", "average node degree"] + \
               ["number of color " + str(color) for color in colors]
    y_vals = []

    for i in range(4 + len(colors)):
        figures.append(
            figure(plot_width=600,
                   plot_height=250,
                   title=titles[i],
                   x_axis_label='time',
                   y_axis_label=y_labels[i]))
        y_vals.append([])

    for t in range(len(database.multi_graphs_by_time)):
        mg = database.multi_graphs_by_time[t]
        ids = mg._list_id
        valids = [gid for gid in ids if mg._graph_valid[gid]]

        y_vals[0].append(len(valids))
        y_vals[1].append(sum([mg.node_count(k) for k in valids]))
        y_vals[2].append(sum([mg.edge_count(k) for k in valids]))

        degs = []
        gnxs = [mg.get_gnx(gid) for gid in valids]
        for gnx in gnxs:
            degs = degs + [
                t[1] for t in list(gnx.degree([x for x in gnx.nodes()]))
            ]
        avg_deg = sum(degs) / sum([mg.node_count(k) for k in valids])
        y_vals[3].append(avg_deg)
        valid_labels = [labels[gr] for gr in valids]
        num_graphs_of_color = {c: 0 for c in colors}
        for i in range(len(valid_labels)):
            num_graphs_of_color[valid_labels[i]] += 1
        for val in range(len(num_graphs_of_color.keys())):
            y_vals[4 + val].append(num_graphs_of_color[colors[val]])
    if not os.path.exists(
            os.path.join(learner.base_dir(), 'fig',
                         'stats_graphs_' + str(len(colors)))):
        os.mkdir(
            os.path.join(learner.base_dir(), 'fig',
                         'stats_graphs_' + str(len(colors))))
    for i in range(len(figures)):
        figures[i].line(list(range(len(database.multi_graphs_by_time))),
                        y_vals[i],
                        line_color='blue')
        export_png(
            figures[i],
            os.path.join(learner.base_dir(), "fig",
                         "stats_graphs_" + str(len(colors)),
                         titles[i] + "_c2.png"))
예제 #5
0
def grid_learn_method(base_name):
    base = os.path.join("res", base_name, LEARN_METHOD)
    print("XG_boost")
    REFAEL_PARAM['learn_method'] = "XG_Boost"
    res, best = RefaelLearner().run_al()
    pickle._dump(best, open(os.path.join(base, "best_learn_method"), "wb"))
    pickle._dump(res,
                 open(os.path.join(base, "res_XG_Boost_learn_method"), "wb"))
    print("NN")
    REFAEL_PARAM['learn_method'] = "nn"
    res, best = RefaelLearner().run_al()
    pickle._dump(res, open(os.path.join(base, "res_NN_learn_method"), "wb"))
    pickle._dump(best, open(os.path.join(base, "best_NN_learn_method"), "wb"))
    REFAEL_PARAM['learn_method'] = "XG_Boost"
예제 #6
0
def grid_min_nodes(base_name, min_nodes_list: list):
    min_nodes_to_batch_needed = {0: 20, 3: 15, 5: 8, 10: 4}
    base = os.path.join("res", base_name, MIN_NODES)
    default_min_nodes = REFAEL_PARAM['min_nodes']

    for num_nodes in min_nodes_list:
        REFAEL_PARAM['min_nodes'] = num_nodes
        REFAEL_PARAM['batch_size'] = min_nodes_to_batch_needed[
            num_nodes]  # fewer graphs ==> fewer guesses
        res, best, rand, perf = RefaelLearner().run_al_bi_avg(rand=True)
        pickle._dump(
            best,
            open(os.path.join(base, "best_min_nodes_" + str(num_nodes)), "wb"))
        pickle._dump(
            res,
            open(os.path.join(base, "res_XGBoost_min_nodes_" + str(num_nodes)),
                 "wb"))
        pickle._dump(
            rand,
            open(os.path.join(base, "rand_min_nodes_" + str(num_nodes)), "wb"))
        pickle._dump(
            perf,
            open(
                os.path.join(base, "perf_XGBoost_min_nodes_" + str(num_nodes)),
                "wb"))

    REFAEL_PARAM['min_nodes'] = default_min_nodes
예제 #7
0
def grid_learn_method(base_name):
    base = os.path.join("res", base_name, LEARN_METHOD)
    print("XGBoost")
    REFAEL_PARAM['learn_method'] = "XG_Boost"
    # res, best, rand, perf = RefaelLearner().run_al(rand=True)
    res, best, rand, perf = RefaelLearner().run_al_bi_avg(rand=True)
    pickle._dump(best, open(os.path.join(base, "best_learn_method"), "wb"))
    pickle._dump(res, open(os.path.join(base, "res_XGBoost_learn_method"),
                           "wb"))
    pickle._dump(rand, open(os.path.join(base, "rand_learn_method"), "wb"))
    pickle._dump(perf,
                 open(os.path.join(base, "perf_XGBoost_learn_method"), "wb"))
    print("NN")
    REFAEL_PARAM['learn_method'] = "nn"
    # res, _, _, perf = RefaelLearner().run_al()
    res, _, _, perf = RefaelLearner().run_al_bi_avg()
    pickle._dump(res, open(os.path.join(base, "res_NN_learn_method"), "wb"))
    pickle._dump(perf, open(os.path.join(base, "perf_NN_learn_method"), "wb"))
    REFAEL_PARAM['learn_method'] = "XG_Boost"
예제 #8
0
def grid_batch_size(base_name, query_batch_list: list):
    base = os.path.join("res", base_name, QUE_BATCH)
    default_que = REFAEL_PARAM['queries_per_time']
    default_batch = REFAEL_PARAM['batch_size']

    for queries_per_time, batch_size in query_batch_list:
        REFAEL_PARAM['queries_per_time'] = queries_per_time
        REFAEL_PARAM['batch_size'] = batch_size
        if (queries_per_time, batch_size) == query_batch_list[0]:
            res, best, rand, perf = RefaelLearner().run_al_bi_avg(rand=True)
            pickle._dump(best,
                         open(os.path.join(base, "best_query_batch"), "wb"))
            pickle._dump(
                res,
                open(
                    os.path.join(
                        base, "res_XGBoost_query" + str(queries_per_time) +
                        "batch" + str(batch_size)), "wb"))
            pickle._dump(rand,
                         open(os.path.join(base, "rand_query_batch"), "wb"))
            pickle._dump(
                perf,
                open(
                    os.path.join(
                        base, "perf_XGBoost_query" + str(queries_per_time) +
                        "batch" + str(batch_size)), "wb"))
        else:
            res, _, _, perf = RefaelLearner().run_al_bi_avg()
            pickle._dump(
                res,
                open(
                    os.path.join(
                        base, "res_XGBoost_query" + str(queries_per_time) +
                        "batch" + str(batch_size)), "wb"))
            pickle._dump(
                perf,
                open(
                    os.path.join(
                        base, "perf_XGBoost_query" + str(queries_per_time) +
                        "batch" + str(batch_size)), "wb"))
    REFAEL_PARAM['queries_per_time'] = default_que
    REFAEL_PARAM['batch_size'] = default_batch
예제 #9
0
def grid_eps(base_name, eps_list: list):
    base = os.path.join("res", base_name, EPS)

    default_eps = REFAEL_PARAM['eps']

    for i in eps_list:
        REFAEL_PARAM['eps'] = i
        res, best = RefaelLearner().run_al()
        pickle._dump(best, open(os.path.join(base, "best_eps_" + str(i)),
                                "wb"))
        pickle._dump(
            res, open(os.path.join(base, "res_XG_Boost_eps_" + str(i)), "wb"))

    REFAEL_PARAM['eps'] = default_eps
예제 #10
0
def grid_min_nodes(base_name, min_nodes_list: list):
    base = os.path.join("res", base_name, MIN_NODES)
    default_min_nodes = REFAEL_PARAM['min_nodes']

    for num_nodes in min_nodes_list:
        REFAEL_PARAM['min_nodes'] = num_nodes
        res, best = RefaelLearner().run_al()
        pickle._dump(
            best,
            open(os.path.join(base, "best_min_nodes_" + str(num_nodes)), "wb"))
        pickle._dump(
            res,
            open(
                os.path.join(base, "res_XG_Boost_min_nodes_" + str(num_nodes)),
                "wb"))

    REFAEL_PARAM['min_nodes'] = default_min_nodes
예제 #11
0
def grid_slide_window(base_name, win_size_list: list):
    base = os.path.join("res", base_name, WIN)
    print("XG_boost")
    default_start = REFAEL_PARAM['start_time']
    default_win_size = REFAEL_PARAM['window_size']

    for i in win_size_list:
        REFAEL_PARAM['start_time'] = i
        REFAEL_PARAM['window_size'] = i
        res, best = RefaelLearner().run_al()
        pickle._dump(best, open(os.path.join(base, "best_win_" + str(i)),
                                "wb"))
        pickle._dump(
            res, open(os.path.join(base, "res_XG_Boost_win_" + str(i)), "wb"))

    REFAEL_PARAM['start_time'] = default_start
    REFAEL_PARAM['window_size'] = None
예제 #12
0
def grid_slide_window(base_name, win_size_list: list):
    base = os.path.join("res", base_name, WIN)
    print("XGBoost")
    default_start = REFAEL_PARAM['start_time']
    default_win_size = REFAEL_PARAM['window_size']

    for i in win_size_list:
        # REFAEL_PARAM['start_time'] = i
        REFAEL_PARAM['window_size'] = i
        res, best, rand, perf = RefaelLearner().run_al_bi_avg(rand=True)
        pickle._dump(best, open(os.path.join(base, "best_win_" + str(i)),
                                "wb"))
        pickle._dump(
            res, open(os.path.join(base, "res_XGBoost_win_" + str(i)), "wb"))
        pickle._dump(rand, open(os.path.join(base, "rand_win_" + str(i)),
                                "wb"))
        pickle._dump(perf, open(os.path.join(base, "perf_win_" + str(i)),
                                "wb"))

    REFAEL_PARAM['start_time'] = default_start
    REFAEL_PARAM['window_size'] = default_win_size
예제 #13
0
def grid_bach_size(base_name, query_batch_list: list):
    base = os.path.join("res", base_name, QUE_BATCH)
    default_que = REFAEL_PARAM['queries_per_time']
    default_batch = REFAEL_PARAM['batch_size']

    for queries_per_time, batch_size in query_batch_list:
        REFAEL_PARAM['queries_per_time'] = queries_per_time
        REFAEL_PARAM['batch_size'] = batch_size
        res, best = RefaelLearner().run_al()
        pickle._dump(
            best,
            open(
                os.path.join(
                    base, "best_queries_" + str(queries_per_time) + "_batch_" +
                    str(batch_size)), "wb"))
        pickle._dump(
            res,
            open(
                os.path.join(
                    base, "res_XG_Boost_queries_" + str(queries_per_time) +
                    "_batch_" + str(batch_size)), "wb"))

    REFAEL_PARAM['queries_per_time'] = default_que
    REFAEL_PARAM['batch_size'] = default_batch