def test_graph_statistics_forced_block_model():
    print("\n--- test_graph_statistics_forced_block_model() ---")
    H0 = np.array([[0.1, 0.8, 0.1],
                  [0.8, 0.1, 0.1],
                  [0.1, 0.1, 0.8]])
    alpha0 = np.array([0.4, 0.3, 0.3])
    print("alpha0: ", alpha0)
    print("H0:\n", H0)
    print("\n")

    n = 40
    b = 2
    start = time.time()
    Ws, X = graphGenerator(n, b, H=H0, alpha=alpha0, model='CBM', seed=None, directed=True)
    time_est = time.time()-start
    print("Time for graph generation: ", time_est)
    print("\n")

    Xd = to_dictionary_beliefs(X)
    n_vec = calculate_nVec_from_Xd(Xd)
    P_tot = calculate_Ptot_from_graph(Ws, Xd)
    H = row_normalize_matrix(P_tot)
    print("n_vec: ", n_vec)
    print("alpha: ", 1.*n_vec / sum(n_vec))
    print("P_tot:\n", P_tot)
    print("P:\n", 1. * P_tot / sum(P_tot.flatten()))           # Potential: normalized sum = 1
    print("H:\n", H)

    d_vec = calculate_outdegree_distribution_from_graph(Ws, Xd=None)
    print("Indegree distribution:\n", d_vec)
    d_vec_list = calculate_outdegree_distribution_from_graph(Ws, Xd)
    print("List of indegree distributions:")
    for dict in d_vec_list:
        print("  ", dict)
Esempio n. 2
0
def run(choice,
        create_data=False,
        add_data=False,
        show_plot=False,
        create_pdf=False,
        show_pdf=False):
    global n
    global d
    global rep_SameGraph
    global FILENAMEZ
    global csv_filename
    global initial_h0
    global exponent
    global length
    global variant

    global alpha_vec
    global beta_vec
    global gamma_vec
    global s_vec
    global clip_on_vec
    global numMaxIt_vec

    # Plotting Parameters
    global xtick_lab
    global xtick_labels
    global ytick_lab
    global xmax
    global xmin
    global ymin
    global ymax
    global labels
    global facecolor_vec
    global draw_std_vec
    global linestyle_vec
    global linewidth_vec
    global marker_vec
    global markersize_vec
    global legend_location

    global option_vec
    global learning_method_vec

    global Macro_Accuracy
    global EC
    global constraints
    global weight_vec
    global randomize_vec
    global k
    global err
    global avoidNeighbors
    global convergencePercentage_W
    global stratified
    global gradient
    global doubly_stochastic
    global num_restarts
    global numberOfSplits
    global H_heuristic

    global select_lambda_vec
    global lambda_vec
    global f_vec
    global H0c

    # -- Setup
    CHOICE = choice
    #300 Prop37, 400 MovieLens, 500 Yelp, 600 Flickr, 700 DBLP, 800 Enron
    experiments = [CHOICE]
    CREATE_DATA = create_data
    ADD_DATA = add_data
    SHOW_PDF = show_pdf
    SHOW_PLOT = show_plot
    CREATE_PDF = create_pdf

    SHOW_FIG = SHOW_PLOT or SHOW_PDF or CREATE_PDF
    STD_FILL = True
    TIMING = False
    CALCULATE_DATA_STATISTICS = False

    # -- Default Graph parameters
    rep_SameGraph = 10  # iterations on same graph

    initial_h0 = None  # initial vector to start finding optimal H
    exponent = -0.3
    length = 5
    variant = 1

    alpha_vec = [0] * 10
    beta_vec = [0] * 10
    gamma_vec = [0] * 10
    s_vec = [0.5] * 10
    clip_on_vec = [True] * 10
    numMaxIt_vec = [10] * 10

    # Plotting Parameters
    xtick_lab = [0.001, 0.01, 0.1, 1]
    xtick_labels = ['0.1\%', '1\%', '10\%', '100\%']
    ytick_lab = np.arange(0, 1.1, 0.1)
    xmax = 1
    xmin = 0.0001
    ymin = 0.3
    ymax = 0.7
    labels = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr']
    facecolor_vec = [
        'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
        "#64B5CD"
    ]
    draw_std_vec = [False] * 4 + [True]
    linestyle_vec = ['dashed'] + ['solid'] * 10
    linewidth_vec = [4, 4, 2, 1, 2, 2]
    marker_vec = [None, 'o', 'x', '^', 'v', '+']
    markersize_vec = [0, 8, 8, 8, 8, 8, 8]

    option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
    learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']

    Macro_Accuracy = False
    EC = True  # Non-backtracking for learning
    constraints = True  # True
    weight_vec = [None] * 3 + [10, 10] * 2
    randomize_vec = [False] * 4 + [True] * 2
    k = 3
    err = 0
    avoidNeighbors = False
    convergencePercentage_W = None
    stratified = True
    gradient = True
    doubly_stochastic = True
    num_restarts = None

    raw_std_vec = range(10)
    numberOfSplits = 1

    select_lambda_vec = [False] * 20
    lambda_vec = None

    f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
    FILENAMEZ = ""
    legend_location = ""
    fig_label = ""
    H_heuristic = ""

    def choose(choice):
        global n
        global d
        global rep_SameGraph
        global FILENAMEZ
        global initial_h0
        global exponent
        global length
        global variant

        global alpha_vec
        global beta_vec
        global gamma_vec
        global s_vec
        global clip_on_vec
        global numMaxIt_vec

        # Plotting Parameters
        global xtick_lab
        global xtick_labels
        global ytick_lab
        global xmax
        global xmin
        global ymin
        global ymax
        global labels
        global facecolor_vec
        global draw_std_vec
        global linestyle_vec
        global linewidth_vec
        global marker_vec
        global markersize_vec
        global legend_location

        global option_vec
        global learning_method_vec

        global Macro_Accuracy
        global EC
        global constraints
        global weight_vec
        global randomize_vec
        global k
        global err
        global avoidNeighbors
        global convergencePercentage_W
        global stratified
        global gradient
        global doubly_stochastic
        global num_restarts
        global numberOfSplits
        global H_heuristic

        global select_lambda_vec
        global lambda_vec
        global f_vec

        # -- Default Graph parameters

        if choice == 0:
            None

        elif choice == 304:  ## with varying weights
            FILENAMEZ = 'prop37'
            Macro_Accuracy = True
            gradient = True
            fig_label = 'Prop37'
            legend_location = 'lower right'
            n = 62000
            d = 34.8
            select_lambda_vec = [False] * 5
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]

        elif choice == 305:  # DCEr Only experiment
            choose(605)
            choose(304)

            select_lambda_vec = [False] * 6

        elif choice == 306:
            choose(304)
            select_lambda_vec = [False] * 3 + [True] * 3
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

            learning_method_vec.append('Holdout')
            labels.append('Holdout')

        elif choice == 307:  # heuristic comparison
            choose(304)
            select_lambda_vec = [False] * 3 + [True] * 3
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec
            learning_method_vec.append('Heuristic')
            labels.append('Heuristic')
            H_heuristic = np.array([[.476, .0476, .476], [.476, .0476, .476],
                                    [.476, .476, .0476]])

        # -- MovieLens dataset
        elif choice == 401:
            FILENAMEZ = 'movielens'
            Macro_Accuracy = True
            gradient = True
            fig_label = 'MovieLens'
            legend_location = 'upper left'

            n = 26850
            d = 25.0832029795

        elif choice == 402:
            choose(401)
            select_lambda_vec = [False] * 3 + [
                True
            ] * 3  # allow to choose lambda for different f in f_vec

            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 403:
            choose(402)
            ymin = 0.3
            ymax = 1.0
            learning_method_vec.append('Holdout')
            labels.append('Holdout')

        elif choice == 404:
            choose(401)

            select_lambda_vec = [
                True
            ] * 3  # allow to choose lambda for different f in f_vec
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

            labels = ['GS', 'DCEr', 'Homophily']
            facecolor_vec = ['black', "#C44E52", "#64B5CD"]
            draw_std_vec = [False, True, False]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 2, 2, 2, 2]
            marker_vec = [None, '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]

            weight_vec = [None, 10, None]
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
            randomize_vec = [False, True, False]
            learning_method_vec = ['GT', 'DHE']  #TODO

        elif choice == 405:  # DCEr ONLY experiment
            choose(605)
            choose(401)
            learning_method_vec += ['Holdout']
            labels += ['Holdout']

        elif choice == 406:  # comparison with a static heuristic matrix
            choose(402)
            learning_method_vec += ['Heuristic']
            labels += ['Heuristic']
            H_heuristic = np.array([[.0476, .476, .476], [.476, .0476, .476],
                                    [.476, .476, .0476]])

        elif choice == 407:
            choose(402)
            ymin = 0.3
            ymax = 1.0
            lambda_vec = [1] * 21  # same length as f_vec

        elif choice == 408:
            choose(402)
            ymin = 0.3
            ymax = 1.0
            lambda_vec = [10] * 21  # same length as f_vec

        # DO NOT RUN WITH CREATE_DATA=True, if you do please restore the data from
        # data/sigmod-movielens-fig.csv
        elif choice == 409:
            choose(402)
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#8172B2", "#C44E52",
                "#C44E52", "#CCB974", "#64B5CD"
            ]
            labels = [
                'GS', 'LCE', 'MCE', 'DCE1', 'DCE10', 'DCEr1', 'DCEr10',
                'Holdout'
            ]
            draw_std_vec = [False] * 5 + [True] * 2 + [False]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [2, 2, 2, 2, 2, 2, 2, 2]
            marker_vec = [None, 'o', 'x', 's', 'p', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8, 8]
            option_vec = [
                'opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6', 'opt7', 'opt8'
            ]
            legend_location = 'upper left'
            ymin = 0.3
            ymax = 1.0
            lambda_vec = [10] * 21  # same length as f_vec

        # -- Yelp dataset
        elif choice == 501:
            FILENAMEZ = 'yelp'
            Macro_Accuracy = True
            weight_vec = [None] * 3 + [10, 10]
            gradient = True
            ymin = 0.1
            ymax = 0.75
            fig_label = 'Yelp'
            legend_location = 'upper left'

            n = 4301900  # for figure
            d = 6.56  # for figure

        # -- Flickr dataset
        #elif choice == 601:
        #    FILENAMEZ = 'flickr'
        #    Macro_Accuracy = True
        #    fig_label = 'Flickr'
        #    legend_location = 'lower right'
        #    ymin = 0.3
        #    ymax = 0.7
        #    n = 2007369
        #    d = 18.1

        #elif choice == 602: ## with varying weights
        #    choose(601)

        #    select_lambda_vec = [False] * 4 + [True]*2  # allow to choose lambda for different f in f_vec
        #    f_vec = [0.9 * pow(0.1, 1 / 5) ** x for x in range(21)]
        #    lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        #elif choice == 603:     ## with varying weights
        #    choose(602)

        #    select_lambda_vec = [False] * 3 + [True] * 2  # allow to choose lambda for different f in f_vec
        #    # lambda_vec = [1] * 5 + [5] * 5 + [10] * 5 + [1] * 6  # same length as f_vec

        #elif choice == 604:     ## with weight = 1
        #    choose(603)

        #    lambda_vec = [0.5] * 21  # same length as f_vec

        # -- Flickr dataset
        elif choice == 601:
            FILENAMEZ = 'flickr'
            Macro_Accuracy = True
            fig_label = 'Flickr'
            legend_location = 'lower right'
            n = 2007369
            d = 18.1

        elif choice == 602:  ## with varying weights
            choose(601)

            select_lambda_vec = [False] * 4 + [
                True
            ]  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 603:  ## with varying weights
            choose(602)

            select_lambda_vec = [False] * 3 + [
                True
            ] * 2  # allow to choose lambda for different f in f_vec
            # lambda_vec = [1] * 5 + [5] * 5 + [10] * 5 + [1] * 6  # same length as f_vec

        elif choice == 604:  ## with weight = 1
            draw_std_vec = [4]
            choose(603)

            lambda_vec = [0.5] * 21  # same length as f_vec

        elif choice == 605:
            choose(601)
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD", 'orange'
            ]
            draw_std_vec = [False] + [True] * 10
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [3] * 10
            marker_vec = [None, 'o', 'x', '^', 'v', '+', 'o', 'x']
            markersize_vec = [0] + [8] * 10

            randomize_vec = [True] * 8
            option_vec = [
                'opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6', 'opt7', 'opt8'
            ]

            learning_method_vec = [
                'GT', 'DHE', 'DHE', 'DHE', 'DHE', 'DHE', 'DHE'
            ]
            select_lambda_vec = [False] * 8
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec
            weight_vec = [0, 0, 1, 2, 5, 10, 15]

            labels = ['GT'] + [
                i + ' {}'.format(weight_vec[ix])
                for ix, i in enumerate(['DCEr'] * 6)
            ]

        elif choice == 606:  # heuristic experiment
            choose(602)
            labels.append('Heuristic')
            learning_method_vec.append('Heuristic')
            H_heuristic = np.array([[.0476, .476, .476], [.476, .0476, .476],
                                    [.476, .476, .0476]])

        # -- DBLP dataset
        elif choice == 701:
            FILENAMEZ = 'dblp'
            Macro_Accuracy = True
            ymin = 0.2
            ymax = 0.5
            fig_label = 'DBLP'
            legend_location = 'lower right'
            n = 2241258  # for figure
            d = 26.11  # for figure

        # -- ENRON dataset
        elif choice == 801:
            FILENAMEZ = 'enron'
            Macro_Accuracy = True
            ymin = 0.3
            ymax = 0.75
            fig_label = 'Enron'
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            legend_location = 'upper left'
            n = 46463  # for figures
            d = 23.4  # for figures

        elif choice == 802:  ### WITH ADAPTIVE WEIGHTS
            choose(801)

            select_lambda_vec = [False] * 4 + [
                True
            ] * 2  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 803:  ### WITH ADAPTIVE WEIGHTS
            choose(802)

            lambda_vec = [1] * 5 + [5] * 5 + [10] * 5 + [
                1
            ] * 6  # same length as f_vec

        elif choice == 804:
            choose(803)

        elif choice == 805:
            choose(605)
            choose(801)
            #learning_method_vec += ['Holdout']
            #labels += ['Holdout']
        elif choice == 806:  # Heuristic experiment
            choose(802)
            learning_method_vec += ['Heuristic']
            labels += ['Heuristic']
            H_heuristic = np.array([[0.76, 0.08, 0.08, 0.08],
                                    [0.08, 0.08, 0.76, 0.08],
                                    [0.08, 0.76, 0.08, 0.76],
                                    [0.08, 0.08, 0.76, 0.08]])

        elif choice == 821:
            FILENAMEZ = 'enron'
            Macro_Accuracy = True
            constraints = True  # True
            gradient = True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [0.2, 0.2]

            randomize_vec = [False] * 4 + [True]
            xmin = 0.0001
            ymin = 0.0
            ymax = 0.7
            labels = ['GS', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Enron'
            legend_location = 'lower right'
            n = 46463  # for figures
            d = 23.4  # for figures

            alpha = 0.0
            beta = 0.0
            gamma = 0.0
            s = 0.5
            numMaxIt = 10

            select_lambda_vec = [False] * 3 + [True] * 2
            lambda_vec = [0.2] * 13 + [10] * 8  # same length as f_vec

        # -- Cora dataset
        elif choice == 901:
            FILENAMEZ = 'cora'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.001
            ymin = 0.0
            ymax = 0.9
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Cora'
            legend_location = 'lower right'
            n = 2708
            d = 7.8

        # -- Citeseer dataset
        elif CHOICE == 1001:
            FILENAMEZ = 'citeseer'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.001
            ymin = 0.0
            ymax = 0.75
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Citeseer'
            legend_location = 'lower right'
            n = 3312
            d = 5.6

        elif CHOICE == 1101:
            FILENAMEZ = 'hep-th'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.0001
            ymin = 0.0
            ymax = 0.1
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Hep-th'
            legend_location = 'lower right'
            n = 27770
            d = 5.6

        elif choice == 1102:
            choose(1101)
            Macro_Accuracy = True

        elif CHOICE == 1204:
            FILENAMEZ = 'pokec-gender'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.000015
            ymin = 0.0
            ymax = 0.75
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [0, 3, 4, 4, 4, 4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Pokec-Gender'
            legend_location = 'lower right'
            n = 1632803
            d = 54.6

        else:
            raise Warning("Incorrect choice!")

    for choice in experiments:

        choose(choice)
        filename = 'Fig_End-to-End_accuracy_realData_{}_{}'.format(
            choice, FILENAMEZ)
        csv_filename = '{}.csv'.format(filename)

        header = [
            'currenttime', 'method', 'f', 'accuracy', 'precision', 'recall',
            'learntime', 'proptime'
        ]
        if CREATE_DATA:
            save_csv_record(join(data_directory, csv_filename),
                            header,
                            append=False)

        # print("choice: {}".format(choice))

        # --- print data statistics
        if CALCULATE_DATA_STATISTICS:

            Xd, W = load_Xd_W_from_csv(
                join(realDataDir, FILENAMEZ) + '-classes.csv',
                join(realDataDir, FILENAMEZ) + '-neighbors.csv')

            X0 = from_dictionary_beliefs(Xd)
            n = len(Xd.keys())
            d = (len(W.nonzero()[0]) * 2) / n

            k = len(X0[0])

            print("FILENAMEZ:", FILENAMEZ)
            print("k:", k)
            print("n:", n)
            print("d:", d)

            # -- Graph statistics
            n_vec = calculate_nVec_from_Xd(Xd)
            print("n_vec:\n", n_vec)
            d_vec = calculate_average_outdegree_from_graph(W, Xd=Xd)
            print("d_vec:\n", d_vec)
            P = calculate_Ptot_from_graph(W, Xd)
            print("P:\n", P)
            for i in range(k):
                Phi = calculate_degree_correlation(W, X0, i, NB=True)
                print("Degree Correlation, Class {}:\n{}".format(i, Phi))

            # -- Various compatibilities
            H0 = estimateH(X0,
                           W,
                           method='MHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=1,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            print("H0 w/  constraints:\n", np.round(H0, 2))
            #raw_input() # Why?

            H2 = estimateH(X0,
                           W,
                           method='MHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=1,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H4 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=2,
                           randomize=False,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H5 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=2,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H6 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=2,
                           EC=EC,
                           weights=10,
                           randomize=False,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H7 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=2,
                           EC=EC,
                           weights=10,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)

            print()
            # print("H MCE w/o constraints:\n", np.round(H0, 3))
            print("H MCE w/  constraints:\n", np.round(H2, 3))
            # print("H DCE 2 w/o constraints:\n", np.round(H4, 3))
            print("H DCE 2 w/  constraints:\n", np.round(H5, 3))
            # print("H DCE 10 w/o constraints:\n", np.round(H6, 3))
            print("H DCE 20 w/  constraints:\n", np.round(H7, 3))

            print()
            H_row_vec = H_observed(W, X0, 3, NB=True, variant=1)
            print("H_est_1:\n", np.round(H_row_vec[0], 3))
            print("H_est_2:\n", np.round(H_row_vec[1], 3))
            print("H_est_3:\n", np.round(H_row_vec[2], 3))

        # --- Create data
        if CREATE_DATA or ADD_DATA:

            Xd, W = load_Xd_W_from_csv(
                join(realDataDir, FILENAMEZ) + '-classes.csv',
                join(realDataDir, FILENAMEZ) + '-neighbors.csv')

            X0 = from_dictionary_beliefs(Xd)
            n = len(Xd.keys())  ## number of nodes in graph
            k = len(X0[0])
            d = (len(W.nonzero()[0]) * 2) / n
            #print(n)
            #print(d)
            #print("contraint = {}".format(constraints))
            #print('select lambda: {}'.format(len(select_lambda_vec)))
            #print('learning method: {}'.format(len(learning_method_vec)))
            #print('alpha: {}'.format(len(alpha_vec)))
            #print('beta: {}'.format(len(beta_vec)))
            #print('gamma: {}'.format(len(gamma_vec)))
            #print('s: {}'.format(len(s_vec)))
            #print('maxit: {}'.format(len(numMaxIt_vec)))
            #print('weight: {}'.format(len(weight_vec)))
            #print('randomize: {}'.format(len(randomize_vec)))
            # ---  Calculating True Compatibility matrix
            H0 = estimateH(X0,
                           W,
                           method='MHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=1,
                           randomize=False,
                           constraints=constraints,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            # print(H0)
            H0c = to_centering_beliefs(H0)

            num_results = len(f_vec) * len(learning_method_vec) * rep_SameGraph

            # Starts a thread pool with at least 2 threads, and a lot more if you happen to be on a supercomputer
            pool = multiprocessing.Pool(max(2,
                                            multiprocessing.cpu_count() - 4))

            f_processes = f_vec * rep_SameGraph
            workers = []
            results = [(X0, W, f, ix)
                       for ix, f in enumerate(f_vec)] * rep_SameGraph
            # print('Expected results: {}'.format(num_results))
            try:  # hacky fix due to a bug in 2.7 multiprocessing
                # Distribute work for evaluating accuracy over the thread pool using
                # a hacky method due to python 2.7 multiprocessing not being fully
                # featured
                pool.map_async(multi_run_wrapper, results).get(num_results * 2)
            except multiprocessing.TimeoutError as e:
                continue
            finally:
                pool.close()
                pool.join()

        # -- Read data for all options and plot
        df1 = pd.read_csv(join(data_directory, csv_filename))
        acc_filename = '{}_accuracy_plot.pdf'.format(filename)
        pr_filename = '{}_PR_plot.pdf'.format(filename)
        if TIMING:
            print('=== {} Timing Results ==='.format(FILENAMEZ))
            print('Prop Time:\navg: {}\nstddev: {}'.format(
                np.average(df1['proptime'].values),
                np.std(df1['proptime'].values)))
            for learning_method in labels:
                rs = df1.loc[df1["method"] == learning_method]
                avg = np.average(rs['learntime'])
                std = np.std(rs['learntime'])
                print('{} Learn Time:\navg: {}\nstd: {}'.format(
                    learning_method, avg, std))

        sslhv.plot(df1,
                   join(figure_directory, acc_filename),
                   n=n,
                   d=d,
                   k=k,
                   labels=labels,
                   dataset=FILENAMEZ,
                   line_styles=linestyle_vec,
                   xmin=xmin,
                   ymin=ymin,
                   xmax=xmax,
                   ymax=ymax,
                   marker_sizes=markersize_vec,
                   draw_stds=draw_std_vec,
                   markers=marker_vec,
                   line_colors=facecolor_vec,
                   line_widths=linewidth_vec,
                   legend_location=legend_location,
                   show=SHOW_PDF,
                   save=CREATE_PDF,
                   show_plot=SHOW_PLOT)
Esempio n. 3
0
def run(choice,
        create_data=False,
        add_data=False,
        show_plot=False,
        create_pdf=False,
        show_pdf=False):
    global n
    global d
    global rep_SameGraph
    global FILENAMEZ
    global initial_h0
    global H0c
    global exponent
    global length
    global variant

    global alpha_vec
    global beta_vec
    global gamma_vec
    global s_vec
    global clip_on_vec
    global numMaxIt_vec

    # Plotting Parameters
    global xtick_lab
    global xtick_labels
    global ytick_lab
    global xmax
    global xmin
    global ymin
    global ymax
    global labels
    global facecolor_vec
    global draw_std_vec
    global linestyle_vec
    global linewidth_vec
    global marker_vec
    global markersize_vec
    global legend_location

    global option_vec
    global learning_method_vec

    global Macro_Accuracy
    global EC
    global constraints
    global weight_vec
    global randomize_vec
    global k
    global fig_label
    global err
    global avoidNeighbors
    global convergencePercentage_W
    global stratified
    global gradient
    global doubly_stochastic
    global numberOfSplits

    global select_lambda_vec
    global lambda_vec
    global f_vec
    # -- Setup
    CHOICE = choice
    #500 Yelp, 600 Flickr, 700 DBLP, 800 Enron
    CREATE_DATA = create_data
    ADD_DATA = add_data
    SHOW_PDF = show_pdf
    SHOW_PLOT = show_plot
    CREATE_PDF = create_pdf
    STD_FILL = True

    CALCULATE_DATA_STATISTICS = False

    # -- Default Graph parameters
    rep_SameGraph = 3  # iterations on same graph

    initial_h0 = None  # initial vector to start finding optimal H
    exponent = -0.3
    length = 5
    variant = 1

    alpha_vec = [0] * 10
    beta_vec = [0] * 10
    gamma_vec = [0] * 10
    s_vec = [0.5] * 10
    clip_on_vec = [True] * 10
    numMaxIt_vec = [10] * 10

    # Plotting Parameters
    xtick_lab = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]
    xtick_labels = ['0.001\%', '0.01\%', '0.1\%', '1\%', '10\%', '100\%']
    ytick_lab = np.arange(0, 1.1, 0.1)
    xmax = 1
    xmin = 0.0001
    ymin = 0.3
    ymax = 0.7
    labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
    facecolor_vec = [
        'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
        "#64B5CD"
    ]
    draw_std_vec = [0, 3, 4, 4, 4, 4]
    linestyle_vec = ['dashed'] + ['solid'] * 10
    linewidth_vec = [4, 4, 2, 1, 2]
    marker_vec = [None, 'o', 'x', '^', 'v', '+']
    markersize_vec = [0, 8, 8, 8, 8, 8, 8]

    option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
    learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']

    Macro_Accuracy = False
    EC = True  # Non-backtracking for learning
    constraints = True  # True
    weight_vec = [None] * 3 + [10, 10]
    randomize_vec = [False] * 4 + [True]
    k = 3
    err = 0
    avoidNeighbors = False
    convergencePercentage_W = None
    stratified = True
    gradient = True
    doubly_stochastic = True

    draw_std_vec = range(10)
    numberOfSplits = 1

    select_lambda_vec = [False] * 20
    lambda_vec = None

    f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
    FILENAMEZ = ""
    legend_location = ""
    fig_label = ""
    global exp_backoff
    exp_backoff = [2**n for n in range(6, 12)]

    def choose(choice):
        # -- Default Graph parameters
        global n
        global d
        global rep_SameGraph
        global FILENAMEZ
        global initial_h0
        global exponent
        global length
        global variant

        global alpha_vec
        global beta_vec
        global gamma_vec
        global s_vec
        global clip_on_vec
        global numMaxIt_vec

        # Plotting Parameters
        global xtick_lab
        global xtick_labels
        global ytick_lab
        global xmax
        global xmin
        global ymin
        global ymax
        global labels
        global facecolor_vec
        global draw_std_vec
        global linestyle_vec
        global linewidth_vec
        global marker_vec
        global markersize_vec
        global legend_location

        global option_vec
        global learning_method_vec

        global Macro_Accuracy
        global EC
        global constraints
        global weight_vec
        global randomize_vec
        global k
        global fig_label
        global err
        global avoidNeighbors
        global convergencePercentage_W
        global stratified
        global gradient
        global doubly_stochastic
        global numberOfSplits

        global select_lambda_vec
        global lambda_vec
        global f_vec
        if choice == 0:
            None

        elif choice == 304:  ## with varying weights
            FILENAMEZ = 'prop37'
            Macro_Accuracy = True
            fig_label = 'Prop37'
            legend_location = 'lower right'
            n = 62000
            d = 34.8
            select_lambda_vec = [False] * 5
            # select_lambda_vec = [False] * 3 + [True] * 2  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            # lambda_vec = [0.5] * 21  # same length as f_vec

        elif choice == 305:  # Test row stochastic cases
            choose(304)
            doubly_stochastic = False

        # -- Yelp dataset
        elif choice == 501:
            FILENAMEZ = 'yelp'
            Macro_Accuracy = True
            weight_vec = [None] * 3 + [10, 10]
            gradient = True
            ymin = 0.1
            ymax = 0.75
            fig_label = 'Yelp'
            legend_location = 'upper left'

            n = 4301900  # for figure
            d = 6.56  # for figure

        # -- Flickr dataset
        elif choice == 601:
            FILENAMEZ = 'flickr'
            Macro_Accuracy = True
            fig_label = 'Flickr'
            legend_location = 'lower right'
            n = 2007369
            d = 18.1

        elif choice == 602:  ## with varying weights
            choose(601)

            select_lambda_vec = [False] * 4 + [
                True
            ]  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 603:  ## with varying weights
            choose(602)

            select_lambda_vec = [False] * 3 + [
                True
            ] * 2  # allow to choose lambda for different f in f_vec
            # lambda_vec = [1] * 5 + [5] * 5 + [10] * 5 + [1] * 6  # same length as f_vec

        elif choice == 604:  ## with weight = 1
            draw_std_vec = [4]
            choose(603)

            lambda_vec = [0.5] * 21  # same length as f_vec

        # -- DBLP dataset
        elif choice == 701:
            FILENAMEZ = 'dblp.txt'
            Macro_Accuracy = True
            ymin = 0.2
            ymax = 0.5
            fig_label = 'DBLP'
            legend_location = 'lower right'
            n = 2241258  # for figure
            d = 26.11  # for figure

        # -- ENRON dataset
        elif choice == 801:
            FILENAMEZ = 'enron'
            Macro_Accuracy = True
            ymin = 0.3
            ymax = 0.75
            fig_label = 'Enron'
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            legend_location = 'upper left'
            n = 46463  # for figures
            d = 23.4  # for figures

        elif choice == 802:  ### WITH ADAPTIVE WEIGHTS
            choose(801)

            select_lambda_vec = [False] * 4 + [
                True
            ]  # allow to choose lambda for different f in f_vec
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 803:  ### WITH ADAPTIVE WEIGHTS
            choose(802)

            lambda_vec = [1] * 5 + [5] * 5 + [10] * 5 + [
                1
            ] * 6  # same length as f_vec

        elif choice == 804:
            choose(803)

        elif choice == 805:
            choose(801)
            doubly_stochastic = False

        elif choice == 821:
            FILENAMEZ = 'enron'
            Macro_Accuracy = True
            constraints = True  # True
            gradient = True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [0.2, 0.2]

            randomize_vec = [False] * 4 + [True]
            xmin = 0.0001
            ymin = 0.0
            ymax = 0.7
            labels = ['GS', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Enron'
            legend_location = 'lower right'
            n = 46463  # for figures
            d = 23.4  # for figures

            alpha = 0.0
            beta = 0.0
            gamma = 0.0
            s = 0.5
            numMaxIt = 10

            select_lambda_vec = [False] * 3 + [True] * 2
            lambda_vec = [0.2] * 13 + [10] * 8  # same length as f_vec
            captionText = "DCE weight=[0.2*13] [10*8], s={}, numMaxIt={}".format(
                s, numMaxIt)

        # -- Cora dataset
        elif choice == 901:
            FILENAMEZ = 'cora'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.001
            ymin = 0.0
            ymax = 0.9
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Cora'
            legend_location = 'lower right'
            n = 2708
            d = 7.8

        # -- Citeseer dataset
        elif CHOICE == 1001:
            FILENAMEZ = 'citeseer'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.001
            ymin = 0.0
            ymax = 0.75
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Citeseer'
            legend_location = 'lower right'
            n = 3312
            d = 5.6

        elif CHOICE == 1101:
            FILENAMEZ = 'hep-th'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.0001
            ymin = 0.0
            ymax = 0.1
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Hep-th'
            legend_location = 'lower right'
            n = 27770
            d = 5.6

        elif CHOICE == 1204:
            FILENAMEZ = 'pokec-gender'
            Macro_Accuracy = True
            constraints = True  # True
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
            learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
            weight_vec = [None] * 3 + [10, 10]

            numMaxIt_vec = [10] * 10
            randomize_vec = [False] * 4 + [True]
            gradient = True
            xmin = 0.000015
            ymin = 0.0
            ymax = 0.75
            labels = ['GT', 'LCE', 'MCE', 'DCE', 'DCE r']
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD"
            ]
            draw_std_vec = [0, 3, 4, 4, 4, 4]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 4, 2, 1, 2]
            marker_vec = [None, 'o', 'x', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]
            fig_label = 'Pokec-Gender'
            legend_location = 'lower right'
            n = 1632803
            d = 54.6

        else:
            raise Warning("Incorrect choice!")

    choose(CHOICE)

    csv_filename = 'Fig_End-to-End_accuracy_{}_{}.csv'.format(
        CHOICE, FILENAMEZ)
    header = ['currenttime', 'method', 'f', 'precision', 'recall', 'accuracy']
    if CREATE_DATA:
        save_csv_record(join(data_directory, csv_filename),
                        header,
                        append=False)

    # print("choice: {}".format(CHOICE))

    # --- print data statistics
    if CALCULATE_DATA_STATISTICS:

        Xd, W = load_Xd_W_from_csv(
            join(realDataDir, FILENAMEZ) + '-classes.csv',
            join(realDataDir, FILENAMEZ) + '-neighbors.csv')

        X0 = from_dictionary_beliefs(Xd)
        n = len(Xd.keys())
        d = (len(W.nonzero()[0]) * 2) / n

        print("FILENAMEZ:", FILENAMEZ)
        print("n:", n)
        print("d:", d)

        # -- Graph statistics
        n_vec = calculate_nVec_from_Xd(Xd)
        print("n_vec:\n", n_vec)
        d_vec = calculate_average_outdegree_from_graph(W, Xd=Xd)
        print("d_vec:\n", d_vec)
        P = calculate_Ptot_from_graph(W, Xd)
        print("P:\n", P)

        # -- Various compatibilities
        H0 = estimateH(X0,
                       W,
                       method='MHE',
                       variant=1,
                       distance=1,
                       EC=EC,
                       weights=1,
                       randomize=False,
                       constraints=True,
                       gradient=gradient,
                       doubly_stochastic=doubly_stochastic)
        print("H0 w/  constraints:\n", np.round(H0, 2))
        raw_input()

        H2 = estimateH(X0,
                       W,
                       method='MHE',
                       variant=1,
                       distance=1,
                       EC=EC,
                       weights=1,
                       randomize=False,
                       constraints=True,
                       gradient=gradient,
                       doubly_stochastic=doubly_stochastic)
        H4 = estimateH(X0,
                       W,
                       method='DHE',
                       variant=1,
                       distance=1,
                       EC=EC,
                       weights=2,
                       randomize=False,
                       gradient=gradient,
                       doubly_stochastic=doubly_stochastic)
        H5 = estimateH(X0,
                       W,
                       method='DHE',
                       variant=1,
                       distance=1,
                       EC=EC,
                       weights=2,
                       randomize=False,
                       constraints=True,
                       gradient=gradient,
                       doubly_stochastic=doubly_stochastic)
        H6 = estimateH(X0,
                       W,
                       method='DHE',
                       variant=1,
                       distance=2,
                       EC=EC,
                       weights=10,
                       randomize=False,
                       gradient=gradient,
                       doubly_stochastic=doubly_stochastic)
        H7 = estimateH(X0,
                       W,
                       method='DHE',
                       variant=1,
                       distance=2,
                       EC=EC,
                       weights=10,
                       randomize=False,
                       constraints=True,
                       gradient=gradient,
                       doubly_stochastic=doubly_stochastic)

        # print("H MCE w/o constraints:\n", np.round(H0, 3))
        print("H MCE w/  constraints:\n", np.round(H2, 3))
        # print("H DCE 2 w/o constraints:\n", np.round(H4, 3))
        print("H DCE 2 w/  constraints:\n", np.round(H5, 3))
        # print("H DCE 10 w/o constraints:\n", np.round(H6, 3))
        print("H DCE 20 w/  constraints:\n", np.round(H7, 3))

        H_row_vec = H_observed(W, X0, 3, NB=True, variant=1)
        print("H_est_1:\n", np.round(H_row_vec[0], 3))
        print("H_est_2:\n", np.round(H_row_vec[1], 3))
        print("H_est_3:\n", np.round(H_row_vec[2], 3))

    # --- Create data
    if CREATE_DATA or ADD_DATA:

        Xd, W = load_Xd_W_from_csv(
            join(realDataDir, FILENAMEZ) + '-classes.csv',
            join(realDataDir, FILENAMEZ) + '-neighbors.csv')

        X0 = from_dictionary_beliefs(Xd)
        n = len(Xd.keys())  ## number of nodes in graph

        d = (len(W.nonzero()[0]) * 2) / n
        # print(n)
        # print(d)
        # print("contraint = {}".format(constraints))

        # ---  Calculating True Compatibility matrix
        H0 = estimateH(X0,
                       W,
                       method='MHE',
                       variant=1,
                       distance=1,
                       EC=EC,
                       weights=1,
                       randomize=False,
                       constraints=constraints,
                       gradient=gradient,
                       doubly_stochastic=doubly_stochastic)
        # print(H0)
        H0c = to_centering_beliefs(H0)

        graph_workers = []
        gq = multiprocessing.Queue()
        for j in range(rep_SameGraph):  # repeat several times for same graph

            # print("Graph: {}".format(j))
            graph_workers.append(
                multiprocessing.Process(target=graph_worker, args=(X0, W, gq)))

        for gw in graph_workers:
            gw.start()

        for gw in graph_workers:
            for t in exp_backoff:
                gw.join(t)
                if gw.exitcode is None:
                    print(
                        "failed to join graph worker {} after {} seconds, retrying"
                        .format(gw, t))
                else:
                    continue
            print("Failed to join graph worker {}.".format(gw))

        gq.put('STOP')
        for i in iter(gq.get, 'STOP'):
            save_csv_record(join(data_directory, csv_filename), i)

    # -- Read, aggregate, and pivot data for all options
    df1 = pd.read_csv(join(data_directory, csv_filename))
    acc_filename = 'Fig_End-to-End_accuracy_realData{}_{}.pdf'.format(
        CHOICE, FILENAMEZ)
    pr_filename = 'Fig_End-to-End_PR_realData{}_{}.pdf'.format(
        CHOICE, FILENAMEZ)
    # generate_figure(data_directory, acc_filename, df1)
    # generate_figure(data_directory, pr_filename, df1, metric='pr')

    # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(5)))

    # Aggregate repetitions
    if "option" in df1.columns.values:
        pivot_col = "option"
        pivot_vec = option_vec
    else:
        pivot_col = "method"
        pivot_vec = learning_method_vec

    df2 = df1.groupby([pivot_col, 'f']).agg \
        ({'accuracy': [np.mean, np.std, np.size],  # Multiple Aggregates
          })
    df2.columns = ['_'.join(col).strip() for col in df2.columns.values
                   ]  # flatten the column hierarchy
    df2.reset_index(inplace=True)  # remove the index hierarchy
    df2.rename(columns={'accuracy_size': 'count'}, inplace=True)
    # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(500)))

    # Pivot table
    df3 = pd.pivot_table(df2,
                         index='f',
                         columns=pivot_col,
                         values=['accuracy_mean', 'accuracy_std'])  # Pivot
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30)))
    df3.columns = ['_'.join(col).strip() for col in df3.columns.values
                   ]  # flatten the column hierarchy
    df3.reset_index(inplace=True)  # remove the index hierarchy
    # df2.rename(columns={'time_size': 'count'}, inplace=True)
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(5)))

    # Extract values
    X_f = df3['f'].values  # plot x values
    Y = []
    Y_std = []
    for val in pivot_vec:
        Y.append(df3['accuracy_mean_{}'.format(val)].values)
        if STD_FILL:
            Y_std.append(df3['accuracy_std_{}'.format(val)].values)

    if CREATE_PDF or SHOW_PDF or SHOW_PLOT:
        print("Setting up figure...")

        # -- Setup figure
        # remove 4 last characters ".txt"
        fig_filename = 'Fig_End-to-End_accuracy_realData{}_{}.pdf'.format(
            CHOICE, FILENAMEZ)
        mpl.rc(
            'font', **{
                'family': 'sans-serif',
                'sans-serif': [u'Arial', u'Liberation Sans']
            })
        mpl.rcParams['axes.labelsize'] = 20
        mpl.rcParams['xtick.labelsize'] = 16
        mpl.rcParams['ytick.labelsize'] = 16
        mpl.rcParams['legend.fontsize'] = 14  # 6
        mpl.rcParams['grid.color'] = '777777'  # grid color
        mpl.rcParams[
            'xtick.major.pad'] = 2  # padding of tick labels: default = 4
        mpl.rcParams[
            'ytick.major.pad'] = 1  # padding of tick labels: default = 4
        mpl.rcParams['xtick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['ytick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['axes.titlesize'] = 16
        mpl.rcParams['figure.figsize'] = [4, 4]
        fig = figure()
        ax = fig.add_axes([0.13, 0.17, 0.8, 0.8])

        #  -- Drawing
        if STD_FILL:
            for choice, (option,
                         facecolor) in enumerate(zip(option_vec,
                                                     facecolor_vec)):
                if choice in draw_std_vec:
                    ax.fill_between(X_f,
                                    Y[choice] + Y_std[choice],
                                    Y[choice] - Y_std[choice],
                                    facecolor=facecolor,
                                    alpha=0.2,
                                    edgecolor=None,
                                    linewidth=0)
                    ax.plot(X_f,
                            Y[choice] + Y_std[choice],
                            linewidth=0.5,
                            color='0.8',
                            linestyle='solid')
                    ax.plot(X_f,
                            Y[choice] - Y_std[choice],
                            linewidth=0.5,
                            color='0.8',
                            linestyle='solid')

        for choice, (option, label, color, linewidth, clip_on, linestyle, marker, markersize) in \
                enumerate(zip(option_vec, labels, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec)):
            ax.plot(X_f,
                    Y[choice],
                    linewidth=linewidth,
                    color=color,
                    linestyle=linestyle,
                    label=label,
                    zorder=4,
                    marker=marker,
                    markersize=markersize,
                    markeredgewidth=1,
                    clip_on=clip_on)

        # -- Title and legend
        if n < 1000:
            n_label = '{}'.format(n)
        else:
            n_label = '{}k'.format(int(n / 1000))

        title(r'$\!\!\!\!\!\!\!${}: $n={}, d={}$'.format(
            fig_label, n_label, np.round(d, 1)))
        handles, labels = ax.get_legend_handles_labels()
        legend = plt.legend(
            handles,
            labels,
            loc=legend_location,  # 'upper right'
            handlelength=2,
            labelspacing=0,  # distance between label entries
            handletextpad=
            0.3,  # distance between label and the line representation
            # title='Variants',
            borderaxespad=0.2,  # distance between legend and the outer axes
            borderpad=0.3,  # padding inside legend box
            numpoints=1,  # put the marker only once
        )
        # # legend.set_zorder(1)
        frame = legend.get_frame()
        frame.set_linewidth(0.0)
        frame.set_alpha(0.9)  # 0.8
        plt.xscale('log')

        # -- Figure settings and save
        plt.xticks(xtick_lab, xtick_labels)
        plt.yticks(ytick_lab, ytick_lab)

        # Only show ticks on the left and bottom spines
        ax.yaxis.set_ticks_position('left')
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))

        grid(b=True,
             which='major',
             axis='both',
             alpha=0.2,
             linestyle='solid',
             linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        grid(b=True,
             which='minor',
             axis='both',
             alpha=0.2,
             linestyle='solid',
             linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        xlabel(r'Label Sparsity $(f)$', labelpad=0)  # labelpad=0
        ylabel(r'Accuracy', labelpad=0)

        xlim(xmin, xmax)
        ylim(ymin, ymax)

        if CREATE_PDF:
            print("saving PDF of figure...")
            savefig(join(figure_directory, fig_filename),
                    format='pdf',
                    dpi=None,
                    edgecolor='w',
                    orientation='portrait',
                    transparent=False,
                    bbox_inches='tight',
                    pad_inches=0.05,
                    frameon=None)

        if SHOW_PLOT:
            print("Showing plot...")
            plt.show()

        if SHOW_PDF:
            print("Showing pdf...")
            showfig(join(figure_directory,
                         fig_filename))  # shows actually created PDF
def test_smallMotivatingGraph_statistics():
    # 'create_blocked_matrix_from_graph()', 'test_calculate_Ptot_from_graph()' 'calculate_outdegree_distribution_from_graph()', 'calculate_average_outdegree_from_graph()'
    # uses motivation example with 15 nodes from VLDB introduction.
    # Weighs edges to see the affinities better in blocked matrix
    # create_blocked_matrix_from_graph,
    # test_calculate_Ptot_from_graph,
    # calculate_outdegree_distribution_from_graph,
    # calculate_average_outdegree_from_graph

    # VERSION = 'directed'        # 1: directed
    VERSION = 'undirected'      # 2: undirected

    print("\n--- Example graph from VLDB slides ---")
    Xd = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0,
          5: 1, 6: 1, 7: 1, 8: 1, 9: 1,
          10: 2, 11: 2, 12: 2, 13: 2, 14: 2,
          # 15: 2                                   # length of dictionary need to be = number of nodes in edges
          }
    # # Original VLDB drawing
    # row = [0, 2,  0, 1, 1, 2, 2, 2, 3, 4,   3,  4,  6, 7,   8,  9,  10, 10, 10, 11, 11, 11, 12, 13,]
    # col = [1, 3,  5, 8, 9, 6, 7, 8, 9, 5,  11, 12,  8, 9,  10, 14,  11, 12, 14, 12, 13, 14, 14, 14,]
    # weight = [1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, ]
    # # Corrected undirected graph with correct P_tot [2, 8, 2]
    # row = [0, 0, 1, 1, 2, 2, 2, 3, 4,   3,  4,  6,   8,  9,  10, 11, 12, 13,]
    # col = [1, 5, 8, 9, 6, 7, 8, 9, 5,  11, 12,  8,  10, 14,  11, 13, 14, 14,]
    # weight = [1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, ]
    # # Corrected undirected graph with correct P_tot [2, 6, 2]
    row =    [0, 0, 1, 1, 2, 2, 3,  3,  4, 6, 8,  9, 10, 12, 13,]
    col =    [1, 5, 8, 9, 6, 7, 9, 11, 12, 8,10, 14, 11, 14, 14,]
    weight = [1, 2, 2, 2, 2, 2, 2,  3,  3, 4, 5,  5,  6,  6,  6, ]

    print("Xd:", Xd)
    if VERSION == 'undirected':
        weight = weight + weight
        row, col = row + col, col + row             # !!! assignment at same time: same line
    print("row:", row)
    print("col:", col)
    print("weight:", weight, "\n")

    print("- Random permutation of node ids:")
    ranks = np.random.permutation(len(Xd))          # ranks is the new mapping vector
    print("ranks:", ranks)
    row2 = ranks[row]                               # !!! mapping
    col2 = ranks[col]
    print("row2:", row2.tolist())          # list plots nicer than np.array
    print("col2:", col2.tolist())
    print("weight:", weight)
    W_rand = csr_matrix((weight, (row2, col2)), shape=(15, 15))
    nodes = np.array(list(Xd.keys()))
    nodes2 = ranks[nodes]
    print("nodes:  ", nodes)
    print("nodes2: ", nodes2)
    classes = np.array(list(Xd.values()))   # Python 3 requires list(dict.keys()), and also for values
    print("classes:  ", classes)
    Xd_rand = dict(zip(ranks[nodes], classes))
    print("Xd_rand: {}".format(Xd_rand))
    print("W_rand:\n{}".format(W_rand.todense()))

    print("\n- 'create_blocked_matrix_from_graph():' ")
    W_block, Xd_new = create_blocked_matrix_from_graph(W_rand, Xd_rand)
    W = W_block
    Xd = Xd_new

    print("W:\n{}".format(W.todense()))

    print("\n- 'test_calculate_Ptot_from_graph():' ")
    W2 = csr_matrix(W, copy=True)
    W2.data[:] = np.sign(W2.data)                   # W contains weighted edges -> unweighted before counting edges with Ptot
    Ptot = calculate_Ptot_from_graph(W2, Xd)
    print("Ptot:\n{}".format(Ptot))

    print("\n- 'test_calculate_nVec_from_Xd():' ")
    n_vec = calculate_nVec_from_Xd(Xd)
    print("n_vec: {}".format(n_vec))

    print("\n- 'calculate_outdegree_distribution_from_graph():' ")
    print("Outdegree distribution: {}".format( calculate_outdegree_distribution_from_graph(W, Xd=None) ))
    # print ("Outdegree distribution: {}".format( sorted(calculate_outdegree_distribution_from_graph(W, Xd=None).items()) ))
    print("Outdegree distribution per class: {}".format( calculate_outdegree_distribution_from_graph(W, Xd) ))
    print("Indegree distribution: {}".format( calculate_outdegree_distribution_from_graph(W.transpose(), Xd=None) ))
    print("Indegree distribution per class: {}".format(calculate_outdegree_distribution_from_graph(W.transpose(), Xd)))

    print("\n- 'calculate_average_outdegree_from_graph():' ")
    print("Average outdegree: {}".format(calculate_average_outdegree_from_graph(W, Xd=None)))
    print("Average outdegree per class: {}".format(calculate_average_outdegree_from_graph(W, Xd)))
    print("Average indegree: {}".format(calculate_average_outdegree_from_graph(W.transpose(), Xd=None)))
    print("Average indegree per class: {}".format(calculate_average_outdegree_from_graph(W.transpose(), Xd)))

    print("\n- Visualize adjacency matrix")
    plt.matshow(W.todense(), fignum=100, cmap=plt.cm.Greys)  # cmap=plt.cm.gray / Blues
    plt.xticks([4.5, 9.5])
    plt.yticks([4.5, 9.5])
    plt.grid(which='major')
    frame = plt.gca()
    frame.axes.xaxis.set_ticklabels([])
    frame.axes.yaxis.set_ticklabels([])
    plt.savefig('figs/Fig_test_calculate_Ptot_from_graph.png')
    os.system('open "figs/Fig_test_calculate_Ptot_from_graph.png"')
def test_calculate_nVec_from_Xd():
    print("\n--- 'calculate_nVec_from_Xd(Xd):' ---")
    # Xd = {'n1': 1, 'n2' : 2, 3: 3, 4: 1, 5: 0, 6: 0, 7:0}     # Python 2 allowed comparing str and int, not anymore in Python 3
    Xd = {1: 1, 2: 2, 3: 3, 4: 1, 5: 0, 6: 0, 7: 0}
    print("Xd: {}".format(Xd))
    print("Result: {}".format(calculate_nVec_from_Xd(Xd)))
def test_planted_distribution_model():
    """ Tests the main graph generator with statistics and visualized degree distribution and edge adjacency matrix
    """
    print("\n--- 'planted_distribution_model_H', 'planted_distribution_model_P', 'number_of_connectedComponents', 'create_blocked_matrix_from_graph' --")
    CHOICE = 21
    print("CHOICE:", CHOICE)
    debug = 0

    # directed = True                     # !!! TODO: not yet clear what undirected means here, only P accepts directed
    backEdgesAllowed = True             # ??? should be enforced in code
    sameInAsOutDegreeRanking = False
    distribution = 'powerlaw'
    exponent = -0.3
    VERSION_P = True


    # --- AAAI figures ---
    if CHOICE in [1, 2, 3, 4, 5, 6]:
        n = 120
        alpha0 = [1/6, 1/3, 1/2]
        h = 8
        P = np.array([[1, h, 1],
                      [1, 1, h],
                      [h, 1, 1]])

    if CHOICE == 1:                     # P (equivalent to 2), AAAI 2
        m = 1080

    elif CHOICE == 2:                   # H (equivalent to 1)
        H0 = row_normalize_matrix(P)
        d_vec = [18, 9, 6]
        VERSION_P = False

    elif CHOICE == 3:                   # H (equivalent to 4), AAAI 3
        H0 = row_normalize_matrix(P)
        d_vec = 9
        VERSION_P = False

    elif CHOICE == 4:                   # P (equivalent to 3)
        P = np.array([[1, h, 1],
                      [2, 2, 2*h],
                      [3*h, 3, 3]])
        m = 1080

    elif CHOICE == 5:                   # H (equivalent to 2), but backedges=False
        H0 = row_normalize_matrix(P)
        d_vec = [18, 9, 6]
        VERSION_P = False
        backEdgesAllowed = False

    elif CHOICE == 6:                   # P undirected, AAAI 4
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        directed = False
        backEdgesAllowed = False
        m = 540

    # --- AGAIN DIRECTED ---
    if CHOICE == 12:
        n = 1001
        alpha0 = [0.6, 0.2, 0.2]
        P = np.array([[0.1, 0.8, 0.1],
                      [0.8, 0.1, 0.1],
                      [0.1, 0.1, 0.8]])
        m = 3000
        distribution = 'uniform'    # uniform powerlaw
        exponent = None
        backEdgesAllowed = False    # ??? should be enforced in code

    if CHOICE == 13:
        # Nice for block matrix visualization
        n = 1000
        alpha0 = [0.334, 0.333, 0.333]
        h = 2
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        m = 2000
        distribution = 'uniform'    # uniform powerlaw
        exponent = None
        backEdgesAllowed = False    # ??? should be enforced in code

    if CHOICE == 14:
        n = 1000
        alpha0 = [0.3334, 0.3333, 0.3333]
        h = 10
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        m = 10000
        exponent = -0.55


    # --- UNDIRECTED ---
    if CHOICE == 20:
        n = 100
        alpha0 = [0.6, 0.2, 0.2]
        h = 1.4
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        H0 = row_normalize_matrix(P)
        d_vec = 5
        directed = False
        exponent = -0.3
        VERSION_P = False

    elif CHOICE == 21:
        n = 1001
        alpha0 = [0.6, 0.2, 0.2]
        h = 4
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        H0 = row_normalize_matrix(P)
        d_vec = 3.4                   # don't specify vector for undirected
        distribution = 'uniform'    # uniform powerlaw
        exponent = -0.5
        directed = False
        backEdgesAllowed = True             # ignored in code for undirected
        VERSION_P = False
        sameInAsOutDegreeRanking = True     # ignored in code for undirected

    elif CHOICE == 22:
        n = 1000
        m = 3000
        alpha0 = [0.6, 0.2, 0.2]
        h = 4
        P = np.array([[1, 3*h, 1],
                      [2*h, 1, 1],
                      [1, 1, h]])
        distribution = 'uniform'    # uniform powerlaw
        exponent = -0.5
        directed = False
        backEdgesAllowed = False             # ignored in code for undirected
        sameInAsOutDegreeRanking = True     # ignored in code for undirected
        debug=0

        VERSION_P = True
        H0 = row_normalize_matrix(P)


    # --- Create the graph
    start = time.time()
    if VERSION_P:
        W, Xd = planted_distribution_model(n, alpha=alpha0, P=P, m=m,
                                           distribution=distribution, exponent=exponent,
                                           directed=directed,
                                           backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking,
                                           debug=debug)
    else:
        W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d_vec,
                                                  distribution=distribution, exponent=exponent,
                                                  directed=directed, backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking,
                                                  debug=debug)

    time_est = time.time()-start
    print("Time for graph generation: {}".format(time_est))

    # - Undirectd degrees: In + Out
    W_und = W.multiply(W.transpose())
    """if backEdgesAllowed then there can be edges in both directions."""
    # W_und.data[:] = np.sign(W_und.data)  # W contains weighted edges -> unweighted before counting edges with Ptot
    print("Fraction of edges that go in both directions: {}".format(np.sum(W_und.data) / np.sum(W.data)))

    # --- Statistics on created graph
    print("\n- 'calculate_Ptot_from_graph':")
    P_tot = calculate_Ptot_from_graph(W, Xd)
    print("P_tot:\n{}".format(P_tot))
    print("sum(P_tot): {}".format(np.sum(P_tot)))
    print("P (normalized to sum=1):\n{}".format(1. * P_tot / np.sum(P_tot)))           # Potential: normalized sum = 1
    H = row_normalize_matrix(P_tot)
    print("H (row-normalized):\n{}".format(H))

    print("\n- 'calculate_nVec_from_Xd':")
    n_vec = calculate_nVec_from_Xd(Xd)
    print("n_vec: {}".format(n_vec))
    print("alpha: {}".format(1.*n_vec / sum(n_vec)))

    print("\n- Average Out/Indegree 'calculate_average_outdegree_from_graph' (assumes directed for total; for undirected the totals are incorrect):")
    print("Average outdegree: {}".format(calculate_average_outdegree_from_graph(W)))
    print("Average indegree: {}".format(calculate_average_outdegree_from_graph(W.transpose())))
    print("Average total degree: {}".format(calculate_average_outdegree_from_graph(W + W.transpose())))
    print("Average outdegree per class: {}".format(calculate_average_outdegree_from_graph(W, Xd)))
    print("Average indegree per class: {}".format(calculate_average_outdegree_from_graph(W.transpose(), Xd)))
    print("Average total degree per class: {}".format(calculate_average_outdegree_from_graph(W + W.transpose(), Xd)))

    # - Overall degree distribution: In / out
    print("\n- Overall Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':")
    print("Overall Out and Indegree distribution:")
    d_out_vec_tot = calculate_outdegree_distribution_from_graph(W, Xd=None)
    d_in_vec_tot = calculate_outdegree_distribution_from_graph(W.transpose(), Xd=None)
    print("Outdegree distribution (degree / number):\n{}".format(np.array([d_out_vec_tot.keys(), d_out_vec_tot.values()])))
    print("Indegree distribution (degree / number):\n{}".format(np.array([d_in_vec_tot.keys(), d_in_vec_tot.values()])))

    # - Overall degree distribution: In + Out
    d_tot_vec_tot = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd=None)
    print("Total degree distribution (degree / number):\n{}".format(np.array([d_tot_vec_tot.keys(), d_tot_vec_tot.values()])))

    # - Per-class degree distribution: In / out
    print("\n- Per-class Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':")
    print("\nOutdegree distribution per class:")
    d_out_vec = calculate_outdegree_distribution_from_graph(W, Xd)
    for i in range(len(d_out_vec)):
        print("Class {}:".format(i))
        print(np.array([d_out_vec[i].keys(), d_out_vec[i].values()]))
    print("Indegree distribution per class:")
    d_in_vec = calculate_outdegree_distribution_from_graph(W.transpose(), Xd)
    for i in range(len(d_in_vec)):
        print("Class {}:".format(i))
        print(np.array([d_in_vec[i].keys(), d_in_vec[i].values()]))

    # - per-class degree distribution: In + out
    print("\nTotal degree distribution per class:")
    d_vec_und = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd)
    for i in range(len(d_vec_und)):
        print("Class {}:".format(i))
        print(np.array([d_vec_und[i].keys(), d_vec_und[i].values()]))

    print("\n- number of weakly connected components':")
    print("Number of weakly connected components: {}".format(connected_components(W, directed=True, connection='weak', return_labels=False)))


    # --- convergence boundary
    # print("\n- '_out_eps_convergence_directed_linbp', 'eps_convergence_linbp'")
    # if directed:
    #     eps_noEcho = _out_eps_convergence_directed_linbp(P, W, echo=False)
    #     eps_Echo = _out_eps_convergence_directed_linbp(P, W, echo=True)
    # else:
    Hc = to_centering_beliefs(H)
    eps_noEcho = eps_convergence_linbp(Hc, W, echo=False)
    eps_Echo = eps_convergence_linbp(Hc, W, echo=True)
    print("Eps (w/ echo): {}".format(eps_Echo))
    print("Eps (no echo): {}".format(eps_noEcho))


    # --- Fig1: Draw edge distributions
    print("\n- Fig1: Draw degree distributions")
    params = {'backend': 'pdf',
              'lines.linewidth': 4,
              'font.size': 10,
              'axes.labelsize': 24,  # fontsize for x and y labels (was 10)
              'axes.titlesize': 22,
              'xtick.labelsize': 20,
              'ytick.labelsize': 20,
              'legend.fontsize': 8,
              'figure.figsize': [5, 4],
              'font.family': 'sans-serif'
    }
    mpl.rcdefaults()
    mpl.rcParams.update(params)
    fig = plt.figure(1)
    ax = fig.add_axes([0.15, 0.15, 0.8, 0.8])  # main axes
    ax.xaxis.labelpad = -12
    ax.yaxis.labelpad = -12

    # A: Draw directed degree distribution
    y_vec = []
    for i in range(len(d_out_vec)):
        y = np.repeat(list(d_out_vec[i].keys()), list(d_out_vec[i].values()) )    # !!! np.repeat
        y = -np.sort(-y)
        y_vec.append(y)
        # print ("Class {}:\n{}".format(i,y))
    y_tot = np.repeat(list(d_out_vec_tot.keys()), list(d_out_vec_tot.values()))             # total outdegree
    y_tot = -np.sort(-y_tot)
    plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A out", linestyle='-')        # !!! plot default index starts from 0 otherwise
    plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B out", linestyle='--')
    plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C out", linestyle=':')
    plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot out", linestyle='-')

    # B: Draw second edge distribution of undirected degree distribution
    y_vec = []
    for i in range(len(d_vec_und)):
        y = np.repeat(list(d_vec_und[i].keys()), list(d_vec_und[i].values()) )    # !!! np.repeat
        y = -np.sort(-y)
        y_vec.append(y)
        # print ("Class {}:\n{}".format(i,y))
    y_tot = np.repeat(list(d_tot_vec_tot.keys()), list(d_tot_vec_tot.values()))             # total outdegree
    y_tot = -np.sort(-y_tot)
    plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A", linestyle='-')
    plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B", linestyle='--')
    plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C", linestyle=':')
    plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot", linestyle='-')

    plt.legend(loc='upper right', labelspacing=0)
    filename = 'figs/Fig_test_planted_distribution_model1_{}.pdf'.format(CHOICE)
    plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w',
                orientation='portrait', papertype='letter', format='pdf',
                transparent=True, bbox_inches='tight', pad_inches=0.1,
                # frameon=None,                 # TODO: frameon deprecated
                )
    os.system("open " + filename)


    # --- Fig2: Draw block matrix
    print("\n- Fig2: 'create_blocked_matrix_from_graph'")
    W_new, Xd_new = create_blocked_matrix_from_graph(W, Xd)

    fig = plt.figure(2)
    row, col = W_new.nonzero()                      # transform the sparse W back to row col format
    plt.plot(col, row, 'o', color='r', markersize=2, markeredgewidth=2, lw=0, zorder=3)    # Notice (col, row) because first axis is vertical in matrices
    # plt.matshow(W_new.todense(), cmap=plt.cm.Greys)  # cmap=plt.cm.gray / Blues   # alternative that does not work as well
    plt.gca().invert_yaxis()    # invert the y-axis to start on top and go down

    # Show quadrants
    d1 = alpha0[0] * n
    d2 = (alpha0[0] + alpha0[1]) * n
    plt.grid(which='major', color='0.7', linestyle='-', linewidth=1)
    plt.xticks([0, d1, d2, n])
    plt.yticks([0, d1, d2, n])
    plt.xlabel('to', labelpad=-1)
    plt.ylabel('from', rotation=90, labelpad=0)

    frame = plt.gca()
    # frame.axes.xaxis.set_ticklabels([])       # would hide the labels
    # frame.axes.yaxis.set_ticklabels([])
    frame.tick_params(direction='inout', width=1, length=10)

    filename = 'figs/Fig_test_planted_distribution_model2_{}.pdf'.format(CHOICE)
    plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w',
            orientation='portrait', papertype='letter', format='pdf',
            transparent=True, bbox_inches='tight', pad_inches=0.1)
    os.system("open " + filename)
Esempio n. 7
0
def run(choice,
        create_data=False,
        add_data=False,
        show_plot=False,
        create_pdf=False,
        show_pdf=False):
    # -- Setup
    CHOICE = choice
    #300 Prop37, 400 MovieLens, 500 Yelp, 600 Flickr, 700 DBLP, 800 Enron
    experiments = [CHOICE]
    CREATE_DATA = create_data
    ADD_DATA = add_data
    SHOW_PDF = show_pdf
    SHOW_PLOT = show_plot
    CREATE_PDF = create_pdf

    SHOW_FIG = SHOW_PLOT or SHOW_PDF or CREATE_PDF
    STD_FILL = True
    TIMING = False
    CALCULATE_DATA_STATISTICS = False

    # -- Default Graph parameters
    rep_SameGraph = 10  # iterations on same graph

    initial_h0 = None  # initial vector to start finding optimal H
    exponent = -0.3
    length = 5
    variant = 1

    alpha_vec = [0] * 10
    beta_vec = [0] * 10
    gamma_vec = [0] * 10
    s_vec = [0.5] * 10
    clip_on_vec = [True] * 10
    numMaxIt_vec = [10] * 10

    # Plotting Parameters
    xtick_lab = [0.001, 0.01, 0.1, 1]
    xtick_labels = ['0.1\%', '1\%', '10\%', '100\%']
    ytick_lab = np.arange(0, 1.1, 0.1)
    xmax = 1
    xmin = 0.0001
    ymin = 0.3
    ymax = 0.7
    labels = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr']
    facecolor_vec = [
        'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
        "#64B5CD"
    ]
    draw_std_vec = [False] * 4 + [True]
    linestyle_vec = ['dashed'] + ['solid'] * 10
    linewidth_vec = [4, 4, 2, 1, 2, 2]
    marker_vec = [None, 'o', 'x', '^', 'v', '+']
    markersize_vec = [0, 8, 8, 8, 8, 8, 8]

    option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
    learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']

    Macro_Accuracy = False
    EC = True  # Non-backtracking for learning
    constraints = True  # True
    weight_vec = [None] * 3 + [10, 10] * 2
    randomize_vec = [False] * 4 + [True] * 2
    k = 3
    err = 0
    avoidNeighbors = False
    convergencePercentage_W = None
    stratified = True
    gradient = True
    doubly_stochastic = True
    num_restarts = None

    raw_std_vec = range(10)
    numberOfSplits = 1

    select_lambda_vec = [False] * 20
    lambda_vec = None

    f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
    FILENAMEZ = ""
    legend_location = ""
    fig_label = ""
    H_heuristic = ""

    def choose(choice):
        # -- Default Graph parameters
        nonlocal n
        nonlocal d
        nonlocal rep_SameGraph
        nonlocal FILENAMEZ
        nonlocal initial_h0
        nonlocal exponent
        nonlocal length
        nonlocal variant

        nonlocal alpha_vec
        nonlocal beta_vec
        nonlocal gamma_vec
        nonlocal s_vec
        nonlocal clip_on_vec
        nonlocal numMaxIt_vec

        # Plotting Parameters
        nonlocal xtick_lab
        nonlocal xtick_labels
        nonlocal ytick_lab
        nonlocal xmax
        nonlocal xmin
        nonlocal ymin
        nonlocal ymax
        nonlocal labels
        nonlocal facecolor_vec
        nonlocal draw_std_vec
        nonlocal linestyle_vec
        nonlocal linewidth_vec
        nonlocal marker_vec
        nonlocal markersize_vec
        nonlocal legend_location

        nonlocal option_vec
        nonlocal learning_method_vec

        nonlocal Macro_Accuracy
        nonlocal EC
        nonlocal constraints
        nonlocal weight_vec
        nonlocal randomize_vec
        nonlocal k
        nonlocal err
        nonlocal avoidNeighbors
        nonlocal convergencePercentage_W
        nonlocal stratified
        nonlocal gradient
        nonlocal doubly_stochastic
        nonlocal num_restarts
        nonlocal numberOfSplits
        nonlocal H_heuristic

        nonlocal select_lambda_vec
        nonlocal lambda_vec
        nonlocal f_vec

        if choice == 0:
            None

        elif choice == 304:  ## with varying weights
            FILENAMEZ = 'prop37'
            Macro_Accuracy = True
            gradient = True
            fig_label = 'Prop37'
            legend_location = 'lower right'
            n = 62000
            d = 34.8
            select_lambda_vec = [False] * 5
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]

        elif choice == 305:  # DCEr Only experiment
            choose(605)
            choose(304)

            select_lambda_vec = [False] * 6

        elif choice == 306:
            choose(304)
            select_lambda_vec = [False] * 3 + [True] * 3
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

            learning_method_vec.append('Holdout')
            labels.append('Holdout')

        elif choice == 307:  # heuristic comparison
            choose(304)
            select_lambda_vec = [False] * 3 + [True] * 3
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec
            learning_method_vec.append('Heuristic')
            labels.append('Heuristic')
            H_heuristic = np.array([[.476, .0476, .476], [.476, .0476, .476],
                                    [.476, .476, .0476]])

        # -- MovieLens dataset
        elif choice == 401:
            FILENAMEZ = 'movielens'
            Macro_Accuracy = True
            gradient = True
            fig_label = 'MovieLens'
            legend_location = 'upper left'

            n = 26850
            d = 25.0832029795

        elif choice == 402:
            choose(401)
            select_lambda_vec = [False] * 3 + [
                True
            ] * 3  # allow to choose lambda for different f in f_vec

            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 403:
            choose(402)
            ymin = 0.3
            ymax = 1.0
            learning_method_vec.append('Holdout')
            labels.append('Holdout')

        elif choice == 404:
            choose(401)

            select_lambda_vec = [
                True
            ] * 3  # allow to choose lambda for different f in f_vec
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

            labels = ['GS', 'DCEr', 'Homophily']
            facecolor_vec = ['black', "#C44E52", "#64B5CD"]
            draw_std_vec = [False, True, False]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [4, 2, 2, 2, 2]
            marker_vec = [None, '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8]

            weight_vec = [None, 10, None]
            option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
            randomize_vec = [False, True, False]
            learning_method_vec = ['GT', 'DHE']  #TODO

        elif choice == 405:  # DCEr ONLY experiment
            choose(605)
            choose(401)
            learning_method_vec += ['Holdout']
            labels += ['Holdout']

        elif choice == 406:  # comparison with a static heuristic matrix
            choose(402)
            learning_method_vec += ['Heuristic']
            labels += ['Heuristic']
            H_heuristic = np.array([[.0476, .476, .476], [.476, .0476, .476],
                                    [.476, .476, .0476]])

        elif choice == 407:
            choose(402)
            ymin = 0.3
            ymax = 1.0
            lambda_vec = [1] * 21  # same length as f_vec

        elif choice == 408:
            choose(402)
            ymin = 0.3
            ymax = 1.0
            lambda_vec = [10] * 21  # same length as f_vec

        # DO NOT RUN WITH CREATE_DATA=True, if you do please restore the data from
        # data/sigmod-movielens-fig.csv
        elif choice == 409:
            choose(402)
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#8172B2", "#C44E52",
                "#C44E52", "#CCB974", "#64B5CD"
            ]
            labels = [
                'GS', 'LCE', 'MCE', 'DCE1', 'DCE10', 'DCEr1', 'DCEr10',
                'Holdout'
            ]
            draw_std_vec = [False] * 5 + [True] * 2 + [False]
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [2, 2, 2, 2, 2, 2, 2, 2]
            marker_vec = [None, 'o', 'x', 's', 'p', '^', 'v', '+']
            markersize_vec = [0, 8, 8, 8, 8, 8, 8, 8]
            option_vec = [
                'opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6', 'opt7', 'opt8'
            ]
            legend_location = 'upper left'
            ymin = 0.3
            ymax = 1.0
            lambda_vec = [10] * 21  # same length as f_vec

        # -- Yelp dataset
        elif choice == 501:
            FILENAMEZ = 'yelp'
            Macro_Accuracy = True
            weight_vec = [None] * 3 + [10, 10]
            gradient = True
            ymin = 0.1
            ymax = 0.75
            fig_label = 'Yelp'
            legend_location = 'upper left'

            n = 4301900  # for figure
            d = 6.56  # for figure

        # -- Flickr dataset
        elif choice == 601:
            FILENAMEZ = 'flickr'
            Macro_Accuracy = True
            fig_label = 'Flickr'
            legend_location = 'lower right'
            ymin = 0.3
            ymax = 0.7
            n = 2007369
            d = 18.1

        elif choice == 602:  ## with varying weights
            choose(601)

            select_lambda_vec = [False] * 4 + [
                True
            ] * 2  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 603:  ## with varying weights
            choose(602)

            select_lambda_vec = [False] * 3 + [
                True
            ] * 2  # allow to choose lambda for different f in f_vec
            # lambda_vec = [1] * 5 + [5] * 5 + [10] * 5 + [1] * 6  # same length as f_vec

        elif choice == 604:  ## with weight = 1
            choose(603)

            lambda_vec = [0.5] * 21  # same length as f_vec

        elif choice == 605:
            choose(601)
            facecolor_vec = [
                'black', "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974",
                "#64B5CD", 'orange'
            ]
            draw_std_vec = [False] + [True] * 10
            linestyle_vec = ['dashed'] + ['solid'] * 10
            linewidth_vec = [3] * 10
            marker_vec = [None, 'o', 'x', '^', 'v', '+', 'o', 'x']
            markersize_vec = [0] + [8] * 10

            randomize_vec = [True] * 8
            option_vec = [
                'opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6', 'opt7', 'opt8'
            ]

            learning_method_vec = [
                'GT', 'DHE', 'DHE', 'DHE', 'DHE', 'DHE', 'DHE'
            ]
            select_lambda_vec = [False] * 8
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec
            weight_vec = [0, 0, 1, 2, 5, 10, 15]

            labels = ['GT'] + [
                i + ' {}'.format(weight_vec[ix])
                for ix, i in enumerate(['DCEr'] * 6)
            ]

        elif choice == 606:  # heuristic experiment
            choose(602)
            labels.append('Heuristic')
            learning_method_vec.append('Heuristic')
            H_heuristic = np.array([[.0476, .476, .476], [.476, .0476, .476],
                                    [.476, .476, .0476]])

        # -- DBLP dataset
        elif choice == 701:
            FILENAMEZ = 'dblp'
            Macro_Accuracy = True
            ymin = 0.2
            ymax = 0.5
            fig_label = 'DBLP'
            legend_location = 'lower right'
            n = 2241258  # for figure
            d = 26.11  # for figure

        # -- ENRON dataset
        elif choice == 801:
            FILENAMEZ = 'enron'
            Macro_Accuracy = True
            ymin = 0.3
            ymax = 0.75
            fig_label = 'Enron'
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            legend_location = 'upper left'
            n = 46463  # for figures
            d = 23.4  # for figures

        elif choice == 802:  ### WITH ADAPTIVE WEIGHTS
            choose(801)

            select_lambda_vec = [False] * 4 + [
                True
            ] * 2  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 803:  ### WITH ADAPTIVE WEIGHTS
            choose(802)

            lambda_vec = [1] * 5 + [5] * 5 + [10] * 5 + [
                1
            ] * 6  # same length as f_vec

        elif choice == 804:
            choose(803)

        elif choice == 805:
            choose(605)
            choose(801)
            #learning_method_vec += ['Holdout']
            #labels += ['Holdout']
        elif choice == 806:  # Heuristic experiment
            choose(802)
            learning_method_vec += ['Heuristic']
            labels += ['Heuristic']
            H_heuristic = np.array([[0.76, 0.08, 0.08, 0.08],
                                    [0.08, 0.08, 0.76, 0.08],
                                    [0.08, 0.76, 0.08, 0.76],
                                    [0.08, 0.08, 0.76, 0.08]])

        # MASC Dataset
        elif choice == 901:
            FILENAMEZ = 'masc'
            Macro_Accuracy = False
            fig_label = 'MASC'
            legend_location = 'lower right'
            n = 0
            d = 0
            ymin = 0
            num_restarts = 100

            select_lambda_vec = [False] * 4 + [
                True
            ]  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        # MASC collapsed Dataset
        elif choice == 1001:
            FILENAMEZ = 'masc-collapsed'
            fig_label = 'MASC Collapsed'
            legend_location = 'lower right'
            n = 43724
            d = 7.2
            ymin = 0
            num_restarts = 20
            select_lambda_vec = [False] * 4 + [
                True
            ]  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 1002:
            choose(1001)
            Macro_Accuracy = True

        # MASC Reduced dataset
        elif choice == 1101:
            FILENAMEZ = 'masc-reduced'
            fig_label = 'MASC Reduced'
            legend_location = 'lower right'
            n = 31000
            d = 8.3
            ymin = 0
            select_lambda_vec = [False] * 4 + [
                True
            ]  # allow to choose lambda for different f in f_vec
            f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
            lambda_vec = [1] * 11 + [10] * 10  # same length as f_vec

        elif choice == 1102:
            choose(1101)
            Macro_Accuracy = True

        else:
            raise Warning("Incorrect choice!")

    def _f_worker_(X0, W, f, f_index):
        RANDOMSEED = None  # For repeatability
        random.seed(RANDOMSEED)  # seeds some other python random generator
        np.random.seed(
            seed=RANDOMSEED
        )  # seeds the actually used numpy random generator; both are used and thus needed

        X1, ind = replace_fraction_of_rows(X0,
                                           1 - f,
                                           avoidNeighbors=avoidNeighbors,
                                           W=W,
                                           stratified=stratified)
        X2 = introduce_errors(X1, ind, err)


        for option_index, (label, select_lambda, learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \
                enumerate(zip(labels, select_lambda_vec, learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)):
            learn_time = -1
            # -- Learning
            if learning_method == 'GT':
                H2c = H0c
            elif learning_method == 'Heuristic':
                # print('Heuristic')
                H2c = H_heuristic

            elif learning_method == 'Holdout':
                # print('Holdout')
                H2 = estimateH_baseline_serial(
                    X2,
                    ind,
                    W,
                    numMax=numMaxIt,
                    # ignore_rows=ind,
                    numberOfSplits=numberOfSplits,
                    # method=learning_method, variant=1,
                    # distance=length,
                    EC=EC,
                    alpha=alpha,
                    beta=beta,
                    gamma=gamma,
                    doubly_stochastic=doubly_stochastic)
                H2c = to_centering_beliefs(H2)

            else:
                if "DCEr" in learning_method:
                    learning_method = "DCEr"
                elif "DCE" in learning_method:
                    learning_method = "DCE"

                # -- choose optimal lambda: allows to specify different lambda for different f
                # print("option: ", option_index)
                if select_lambda == True:
                    weight = lambda_vec[f_index]
                    # print("weight : ", weight)
                else:
                    weight = weights

                # -- learn H
                learn_start = time.time()
                H2 = estimateH(X2,
                               W,
                               method=learning_method,
                               variant=1,
                               distance=length,
                               EC=EC,
                               weights=weight,
                               randomrestarts=num_restarts,
                               randomize=randomize,
                               constraints=constraints,
                               gradient=gradient,
                               doubly_stochastic=doubly_stochastic)
                learn_time = time.time() - learn_start
                H2c = to_centering_beliefs(H2)

            # if learning_method not in ['GT', 'GS']:

            # print(FILENAMEZ, f, learning_method)
            # print(H2c)

            # -- Propagation
            prop_start = time.time()
            # X2c = to_centering_beliefs(X2, ignoreZeroRows=True)       # try without
            eps_max = eps_convergence_linbp_parameterized(H2c,
                                                          W,
                                                          method='noecho',
                                                          alpha=alpha,
                                                          beta=beta,
                                                          gamma=gamma,
                                                          X=X2)
            eps = s * eps_max
            # print("Max eps: {}, eps: {}".format(eps_max, eps))
            # eps = 1

            try:
                F, actualIt, actualPercentageConverged = \
                    linBP_symmetric_parameterized(X2, W, H2c * eps,
                                                  method='noecho',
                                                  alpha=alpha, beta=beta, gamma=gamma,
                                                  numMaxIt=numMaxIt,
                                                  convergencePercentage=convergencePercentage_W,
                                                  debug=2)
                prop_time = time.time() - prop_start
                if Macro_Accuracy:
                    accuracy_X = matrix_difference_classwise(X0,
                                                             F,
                                                             ignore_rows=ind)
                    precision = matrix_difference_classwise(
                        X0, F, similarity='precision', ignore_rows=ind)
                    recall = matrix_difference_classwise(X0,
                                                         F,
                                                         similarity='recall',
                                                         ignore_rows=ind)
                else:
                    accuracy_X = matrix_difference(X0, F, ignore_rows=ind)
                    precision = matrix_difference(X0,
                                                  F,
                                                  similarity='precision',
                                                  ignore_rows=ind)
                    recall = matrix_difference(X0,
                                               F,
                                               similarity='recall',
                                               ignore_rows=ind)

                result = [str(datetime.datetime.now())]
                text = [
                    label, f, accuracy_X, precision, recall, learn_time,
                    prop_time
                ]
                result.extend(text)
                # print("method: {}, f: {}, actualIt: {}, accuracy: {}, precision:{}, recall: {}, learning time: {}, propagation time: {}".format(label, f, actualIt, accuracy_X, precision, recall, learn_time, prop_time))
                save_csv_record(join(data_directory, csv_filename), result)

            except ValueError as e:

                print("ERROR: {} with {}: d={}, h={}".format(
                    e, learning_method, d, h))
                raise e

        return 'success'

    def multi_run_wrapper(args):
        """Wrapper to unpack arguments passed to the pool worker. 
        
        NOTE: This method could be removed by upgrading to Python>=3.3, which
        includes the multiprocessing.starmap_async() function, which allows
        multiple arguments to be passed to the map function.  
        """

        return _f_worker_(*args)

    for choice in experiments:

        choose(choice)
        filename = 'Fig_End-to-End_accuracy_realData_{}_{}'.format(
            choice, FILENAMEZ)
        csv_filename = '{}.csv'.format(filename)

        header = [
            'currenttime', 'method', 'f', 'accuracy', 'precision', 'recall',
            'learntime', 'proptime'
        ]
        if CREATE_DATA:
            save_csv_record(join(data_directory, csv_filename),
                            header,
                            append=False)

        # print("choice: {}".format(choice))

        # --- print data statistics
        if CALCULATE_DATA_STATISTICS:

            Xd, W = load_Xd_W_from_csv(
                join(realDataDir, FILENAMEZ) + '-classes.csv',
                join(realDataDir, FILENAMEZ) + '-neighbors.csv')

            X0 = from_dictionary_beliefs(Xd)
            n = len(Xd.keys())
            d = (len(W.nonzero()[0]) * 2) / n

            k = len(X0[0])

            print("FILENAMEZ:", FILENAMEZ)
            print("k:", k)
            print("n:", n)
            print("d:", d)

            # -- Graph statistics
            n_vec = calculate_nVec_from_Xd(Xd)
            print("n_vec:\n", n_vec)
            d_vec = calculate_average_outdegree_from_graph(W, Xd=Xd)
            print("d_vec:\n", d_vec)
            P = calculate_Ptot_from_graph(W, Xd)
            print("P:\n", P)
            for i in range(k):
                Phi = calculate_degree_correlation(W, X0, i, NB=True)
                print("Degree Correlation, Class {}:\n{}".format(i, Phi))

            # -- Various compatibilities
            H0 = estimateH(X0,
                           W,
                           method='MHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=1,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            print("H0 w/  constraints:\n", np.round(H0, 2))
            #raw_input() # Why?

            H2 = estimateH(X0,
                           W,
                           method='MHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=1,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H4 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=2,
                           randomize=False,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H5 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=2,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H6 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=2,
                           EC=EC,
                           weights=10,
                           randomize=False,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            H7 = estimateH(X0,
                           W,
                           method='DHE',
                           variant=1,
                           distance=2,
                           EC=EC,
                           weights=10,
                           randomize=False,
                           constraints=True,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)

            print()
            # print("H MCE w/o constraints:\n", np.round(H0, 3))
            print("H MCE w/  constraints:\n", np.round(H2, 3))
            # print("H DCE 2 w/o constraints:\n", np.round(H4, 3))
            print("H DCE 2 w/  constraints:\n", np.round(H5, 3))
            # print("H DCE 10 w/o constraints:\n", np.round(H6, 3))
            print("H DCE 20 w/  constraints:\n", np.round(H7, 3))

            print()
            H_row_vec = H_observed(W, X0, 3, NB=True, variant=1)
            print("H_est_1:\n", np.round(H_row_vec[0], 3))
            print("H_est_2:\n", np.round(H_row_vec[1], 3))
            print("H_est_3:\n", np.round(H_row_vec[2], 3))

        # --- Create data
        if CREATE_DATA or ADD_DATA:

            Xd, W = load_Xd_W_from_csv(
                join(realDataDir, FILENAMEZ) + '-classes.csv',
                join(realDataDir, FILENAMEZ) + '-neighbors.csv')

            X0 = from_dictionary_beliefs(Xd)
            n = len(Xd.keys())  ## number of nodes in graph
            k = len(X0[0])
            d = (len(W.nonzero()[0]) * 2) / n
            #print(n)
            #print(d)
            #print("contraint = {}".format(constraints))
            #print('select lambda: {}'.format(len(select_lambda_vec)))
            #print('learning method: {}'.format(len(learning_method_vec)))
            #print('alpha: {}'.format(len(alpha_vec)))
            #print('beta: {}'.format(len(beta_vec)))
            #print('gamma: {}'.format(len(gamma_vec)))
            #print('s: {}'.format(len(s_vec)))
            #print('maxit: {}'.format(len(numMaxIt_vec)))
            #print('weight: {}'.format(len(weight_vec)))
            #print('randomize: {}'.format(len(randomize_vec)))
            # ---  Calculating True Compatibility matrix
            H0 = estimateH(X0,
                           W,
                           method='MHE',
                           variant=1,
                           distance=1,
                           EC=EC,
                           weights=1,
                           randomize=False,
                           constraints=constraints,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            # print(H0)
            H0c = to_centering_beliefs(H0)

            num_results = len(f_vec) * len(learning_method_vec) * rep_SameGraph

            # Starts a thread pool with 10 fewer than the max number your computer
            # has available assuming one thread per cpu - this is meant for
            # supercomputer.
            #pool = multiprocessing.Pool(int(multiprocessing.cpu_count()-10))
            # Use this for a reasonably powerful home computer
            #pool = multiprocessing.Pool(int(multiprocessing.cpu_count()/2))
            # Use this for anything else
            pool = multiprocessing.Pool(2)

            f_processes = f_vec * rep_SameGraph
            workers = []
            results = [(X0, W, f, ix)
                       for ix, f in enumerate(f_vec)] * rep_SameGraph
            # print('Expected results: {}'.format(num_results))
            try:  # hacky fix due to a bug in 2.7 multiprocessing
                # Distribute work for evaluating accuracy over the thread pool using
                # a hacky method due to python 2.7 multiprocessing not being fully
                # featured
                pool.map_async(multi_run_wrapper, results).get(num_results * 2)
            except multiprocessing.TimeoutError as e:
                continue
            finally:
                pool.close()
                pool.join()

        # -- Read data for all options and plot
        df1 = pd.read_csv(join(data_directory, csv_filename))
        acc_filename = '{}_accuracy_plot.pdf'.format(filename)
        pr_filename = '{}_PR_plot.pdf'.format(filename)
        if TIMING:
            print('=== {} Timing Results ==='.format(FILENAMEZ))
            print('Prop Time:\navg: {}\nstddev: {}'.format(
                np.average(df1['proptime'].values),
                np.std(df1['proptime'].values)))
            for learning_method in labels:
                rs = df1.loc[df1["method"] == learning_method]
                avg = np.average(rs['learntime'])
                std = np.std(rs['learntime'])
                print('{} Learn Time:\navg: {}\nstd: {}'.format(
                    learning_method, avg, std))

        sslhv.plot(df1,
                   join(figure_directory, acc_filename),
                   n=n,
                   d=d,
                   k=k,
                   labels=labels,
                   dataset=FILENAMEZ,
                   line_styles=linestyle_vec,
                   xmin=xmin,
                   ymin=ymin,
                   xmax=xmax,
                   ymax=ymax,
                   marker_sizes=markersize_vec,
                   draw_stds=draw_std_vec,
                   markers=marker_vec,
                   line_colors=facecolor_vec,
                   line_widths=linewidth_vec,
                   legend_location=legend_location,
                   show=SHOW_PDF,
                   save=CREATE_PDF,
                   show_plot=SHOW_PLOT)