Example #1
0
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False):

    # -- Setup
    CHOICE = choice
    CREATE_DATA = create_data
    ADD_DATA = add_data
    SHOW_PDF = show_pdf
    SHOW_PLOT = show_plot
    CREATE_PDF = create_pdf
    STD_FILL = True


    csv_filename = 'Fig_End-to-End_accuracy_VaryK_{}.csv'.format(CHOICE)
    header = ['currenttime',
              'option',
              'k',
              'f',
              'accuracy']
    if CREATE_DATA:
        save_csv_record(join(data_directory, csv_filename), header, append=False)


    # -- Default Graph parameters
    rep_SameGraph = 10       # iterations on same graph
    initial_h0 = None           # initial vector to start finding optimal H
    distribution = 'powerlaw'
    exponent = -0.3
    length = 5
    variant = 1
    EC = True                   # Non-backtracking for learning
    ymin = 0.3
    ymax = 1
    xmax = 8
    xtick_lab = [2,3,4,5,6,7, 8]
    xtick_labels = ['2', '3', '4', '5', '6', '7', '8']
    ytick_lab = np.arange(0, 1.1, 0.1)
    f_vec = [0.9 * pow(0.1, 1 / 5) ** x for x in range(21)]
    k_vec = [3, 4, 5 ]
    rep_DifferentGraphs = 10   # iterations on different graphs
    err = 0
    avoidNeighbors = False
    gradient = False
    pruneRandom = False
    convergencePercentage_W = None
    stratified = True
    label_vec = ['*'] * 10
    clip_on_vec = [False] * 10
    draw_std_vec = range(10)
    numberOfSplits = 1
    linestyle_vec = ['dashed'] + ['solid'] * 10
    linewidth_vec = [5, 4, 3, 3] + [3] * 10
    marker_vec = [None, None, 'o', 'x', 'o', '^', 'o', 'x', 'o', '^', 'o', 'x', 'o', '^']
    markersize_vec = [0, 0, 4, 8] + [6] * 10
    facecolor_vec = ["#4C72B0", "#55A868", "#C44E52", "#8172B2", "#CCB974", "#64B5CD"]


    # -- Options with propagation variants
    if CHOICE == 500:     ## 1k nodes
        n = 1000
        h = 8
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3']
        learning_method_vec = ['GS', 'MHE', 'DHE']
        weight_vec = [10] * 3
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 2 + [True]
        xmin = 3.
        ymin = 0.
        ymax = 1.
        label_vec = ['GS', 'MCE', 'DCEr']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.03, 0.01, 0.001]
        k_vec = [3, 4, 5, 6]

    elif CHOICE == 501:        ## 10k nodes
        n = 10000
        h = 8
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3']
        learning_method_vec = ['GT', 'MHE', 'DHE']
        weight_vec = [10] * 3
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 2 + [True]
        xmin = 2.
        ymin = 0.
        ymax = 1.
        label_vec = ['GT', 'MCE', 'DCEr']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.03, 0.01, 0.001]
        k_vec = [2, 3, 4, 5]


    elif CHOICE == 502:        ## 10k nodes
        n = 10000
        h = 8
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True] + [False]
        xmin = 2
        ymin = 0.6
        ymax = 1.
        label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.01]
        k_vec = [2, 3, 4, 5, 6, 7, 8]

        # option_vec = ['opt1', 'opt2', 'opt3', 'opt4']
        # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE']
        # k_vec = [2, 3, 4, 5]



    elif CHOICE == 503:        ## 10k nodes
        n = 10000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True] + [False]
        xmin = 2
        ymin = 0.3
        ymax = 0.9
        label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.01]
        k_vec = [2, 3, 4, 5, 6, 7, 8]
        # k_vec = [6, 7, 8]
        clip_on_vec = [True] * 10

        # option_vec = ['opt1', 'opt2', 'opt3', 'opt4']
        # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE']
        # k_vec = [2, 3, 4, 5]



    elif CHOICE == 504:        ## 10k nodes
        n = 10000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True] + [False]
        xmin = 2
        xmax = 7
        ymin = 0.2
        ymax = 0.9
        label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.01]
        # k_vec = [2, 3, 4, 5, 6, 7, 8]
        k_vec = [7]
        clip_on_vec = [True] * 10




    elif CHOICE == 505:        ## 10k nodes    with f = 0.005
        n = 10000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True] + [False]
        xmin = 2
        xmax = 7
        ymin = 0.2
        ymax = 0.9
        label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.005]
        k_vec = [2, 3, 4, 5, 6, 7]
        # k_vec = [7]
        clip_on_vec = [True] * 10

    # elif CHOICE == 506:        ## 10k nodes    with f = 0.005
    #     n = 10000
    #     h = 3
    #     d = 25
    #     option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
    #     learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
    #     weight_vec = [10] * 10
    #     alpha_vec = [0] * 10
    #     beta_vec = [0] * 10
    #     gamma_vec = [0] * 10
    #     s_vec = [0.5] * 10
    #     numMaxIt_vec = [10] * 10
    #     randomize_vec = [False] * 4 + [True] + [False]
    #     xmin = 2
    #     xmax = 7
    #     ymin = 0.2
    #     ymax = 0.9
    #     label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr']
    #     facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
    #     f_vec = [0.005]
    #     k_vec = [2,3,4,5,6,7]
    #     # k_vec = [7]
    #     clip_on_vec = [True] * 10




    elif CHOICE == 506:        ## 10k nodes
        n = 10000
        h = 8
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
        learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True] + [False]
        xmin = 2
        xmax = 7
        ymin = 0.2
        ymax = 0.9
        label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.005]
        k_vec = [2, 3, 4, 5, 6, 7, 8]
        # k_vec = [5]
        clip_on_vec = [True] * 10

        rep_SameGraph = 1       # iterations on same graph
        rep_DifferentGraphs = 1  # iterations on same graph

    elif CHOICE == 507:  ## 10k nodes   with gradient and PruneRandom
        n = 10000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['GS', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True] + [False]
        xmin = 2
        ymin = 0.1
        ymax = 0.9
        label_vec = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.01]
        k_vec = [2, 3, 4, 5, 6, 7, 8]
        # k_vec = [6, 7, 8]
        clip_on_vec = [True] * 10

        # option_vec = ['opt1', 'opt2', 'opt3', 'opt4']
        # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE']
        # k_vec = [2, 3, 4, 5]

        gradient = True
        pruneRandom = True


    elif CHOICE == 508:  ## 10k nodes   with gradient and PruneRandom
        n = 1000
        h = 3
        d = 10
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['GS', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True] + [False]
        xmin = 2
        ymin = 0.1
        ymax = 0.9
        label_vec = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3
        f_vec = [0.01]
        k_vec = [2, 3, 4, 5, 6, 7, 8]
        # k_vec = [6, 7, 8]
        clip_on_vec = [True] * 10

        # option_vec = ['opt1', 'opt2', 'opt3', 'opt4']
        # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE']
        # k_vec = [2, 3, 4, 5]

        gradient = True
        pruneRandom = True
        rep_DifferentGraphs = 1
        rep_SameGraph = 1



    else:
        raise Warning("Incorrect choice!")


    RANDOMSEED = None  # For repeatability
    random.seed(RANDOMSEED)  # seeds some other python random generator
    np.random.seed(seed=RANDOMSEED)  # seeds the actually used numpy random generator; both are used and thus needed
    # print("CHOICE: {}".format(CHOICE))


    # -- Create data
    if CREATE_DATA or ADD_DATA:
        for i in range(rep_DifferentGraphs):  # create several graphs with same parameters
            # print("\ni: {}".format(i))

            for k in k_vec:
                # print("\nk: {}".format(k))

                H0 = create_parameterized_H(k, h, symmetric=True)
                H0c = to_centering_beliefs(H0)

                a = [1.] * k
                alpha0 = np.array(a)
                alpha0 = alpha0 / np.sum(alpha0)

                W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d,
                                                          distribution=distribution,
                                                          exponent=exponent,
                                                          directed=False,
                                                          debug=False)
                X0 = from_dictionary_beliefs(Xd)

                for j in range(rep_SameGraph):  # repeat several times for same graph
                    # print("j: {}".format(j))

                    ind = None
                    for f in f_vec:             # Remove fraction (1-f) of rows from X0 (notice that different from first implementation)
                        X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified)
                        X2 = introduce_errors(X1, ind, err)



                        for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \
                                enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)):

                            # -- Learning
                            if learning_method == 'GT':
                                H2c = H0c


                            elif learning_method == 'Holdout':


                                H2 = estimateH_baseline_serial(X2, ind, W, numMax=numMaxIt,
                                                               # ignore_rows=ind,
                                                               numberOfSplits=numberOfSplits,
                                                               # method=learning_method, variant=1, distance=length,
                                                               EC=EC,
                                                               alpha=alpha, beta=beta, gamma=gamma)
                                H2c = to_centering_beliefs(H2)

                            elif learning_method != 'DHE':
                                H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize)
                                H2c = to_centering_beliefs(H2)

                            else:
                                H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, gradient=gradient, randomrestarts=pruneRandom)
                                H2c = to_centering_beliefs(H2)


                            # -- Propagation
                            X2c = to_centering_beliefs(X2, ignoreZeroRows=True)       # try without
                            eps_max = eps_convergence_linbp_parameterized(H2c, W,
                                                                          method='noecho',
                                                                          alpha=alpha, beta=beta, gamma=gamma,
                                                                          X=X2)
                            eps = s * eps_max
                            try:
                                F, actualIt, actualPercentageConverged = \
                                    linBP_symmetric_parameterized(X2, W, H2c * eps,
                                                                  method='noecho',
                                                                  alpha=alpha, beta=beta, gamma=gamma,
                                                                  numMaxIt=numMaxIt,
                                                                  convergencePercentage=convergencePercentage_W,
                                                                  debug=2)
                            except ValueError as e:
                                print (
                                "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h))

                            else:
                                accuracy_X = matrix_difference(X0, F, ignore_rows=ind)

                                tuple = [str(datetime.datetime.now())]
                                text = [option_vec[option_index],
                                        k,
                                        f,
                                        accuracy_X]
                                # text = ['' if v is None else v for v in text]       # TODO: test with vocabularies
                                # text = np.asarray(text)         # without np, entries get ugly format
                                tuple.extend(text)
                                # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option_vec[option_index], f, actualIt, accuracy_X))
                                save_csv_record(join(data_directory, csv_filename), tuple)


    # -- Read, aggregate, and pivot data for all options
    df1 = pd.read_csv(join(data_directory, csv_filename))
    # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15)))

    # -- Aggregate repetitions
    df2 = df1.groupby(['option', 'k', 'f']).agg \
        ({'accuracy': [np.mean, np.std, np.size, np.median],  # Multiple Aggregates
          })
    df2.columns = ['_'.join(col).strip() for col in df2.columns.values]  # flatten the column hierarchy
    df2.reset_index(inplace=True)  # remove the index hierarchy
    df2.rename(columns={'accuracy_size': 'count'}, inplace=True)
    # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15)))

    # -- Pivot table
    df3 = pd.pivot_table(df2, index=['f', 'k'], columns=['option'], values=[ 'accuracy_mean', 'accuracy_std'] )  # Pivot
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30)))
    df3.columns = ['_'.join(col).strip() for col in df3.columns.values]  # flatten the column hierarchy
    df3.reset_index(inplace=True)  # remove the index hierarchy
    # df2.rename(columns={'time_size': 'count'}, inplace=True)
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(100)))



    # X_f = k_vec
    X_f = df3['k'].values            # read k from values instead

    Y_hash = defaultdict(dict)
    Y_hash_std = defaultdict(dict)
    for f in f_vec:
        for option in option_vec:
            Y_hash[f][option] = list()
            Y_hash_std[f][option] = list()
    for f in f_vec:
        for option in option_vec:
            Y_hash[f][option] = df3.loc[df3['f'] == f]['accuracy_mean_{}'.format(option)].values
            Y_hash_std[f][option] = df3.loc[df3['f'] == f]['accuracy_std_{}'.format(option)].values




    if CREATE_PDF or SHOW_PLOT or SHOW_PDF:

        # -- Setup figure
        fig_filename = 'Fig_End-to-End_accuracy_varyK_{}.pdf'.format(CHOICE)
        mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']})
        mpl.rcParams['axes.labelsize'] = 20
        mpl.rcParams['xtick.labelsize'] = 16
        mpl.rcParams['ytick.labelsize'] = 16
        mpl.rcParams['legend.fontsize'] = 14
        mpl.rcParams['grid.color'] = '777777'  # grid color
        mpl.rcParams['xtick.major.pad'] = 2  # padding of tick labels: default = 4
        mpl.rcParams['ytick.major.pad'] = 1  # padding of tick labels: default = 4
        mpl.rcParams['xtick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['ytick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['axes.titlesize'] = 16
        mpl.rcParams['figure.figsize'] = [4, 4]
        fig = figure()
        ax = fig.add_axes([0.13, 0.17, 0.8, 0.8])

        opt_f_vecs = [(option, f) for option in option_vec for f in f_vec]

        for ((option, f), color, linewidth, clip_on, linestyle, marker, markersize) in \
            zip(opt_f_vecs, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec):

            # label = learning_method_vec[option_vec.index(option)]
            label = label_vec[option_vec.index(option)]
            # label = label + " " + str(f)

            if STD_FILL:


                # print((X_f))
                # print(Y_hash[f][option])


                ax.fill_between(X_f, Y_hash[f][option] + Y_hash_std[f][option], Y_hash[f][option] - Y_hash_std[f][option],
                                facecolor=color, alpha=0.2, edgecolor=None, linewidth=0)
                ax.plot(X_f, Y_hash[f][option] + Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid')
                ax.plot(X_f, Y_hash[f][option] - Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid')

            ax.plot(X_f, Y_hash[f][option], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker,
                markersize=markersize, markeredgewidth=1, markeredgecolor='black', clip_on=clip_on)

        if CHOICE==507:
            Y_f = [1/float(i) for i in X_f]

            ax.plot(X_f, Y_f, linewidth=2, color='black', linestyle='dashed',
                    label='Random', zorder=4, marker='x',
                markersize=8, markeredgewidth=1, markeredgecolor='black', clip_on=clip_on)

        # -- Title and legend
        if distribution == 'uniform':
            distribution_label = ',$uniform'
        else:
            distribution_label = '$'
        if n < 1000:
            n_label='{}'.format(n)
        else:
            n_label = '{}k'.format(int(n / 1000))

        title(r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format(n_label, d, h, f, distribution_label))
        handles, label_vec = ax.get_legend_handles_labels()
        legend = plt.legend(handles, label_vec,
                            loc='upper right',  # 'upper right'
                            handlelength=2,
                            labelspacing=0,  # distance between label entries
                            handletextpad=0.3,  # distance between label and the line representation
                            borderaxespad=0.2,  # distance between legend and the outer axes
                            borderpad=0.3,  # padding inside legend box
                            numpoints=1,  # put the marker only once
                            )
        # # legend.set_zorder(1)
        frame = legend.get_frame()
        frame.set_linewidth(0.0)
        frame.set_alpha(0.9)  # 0.8


        # -- Figure settings and save
        plt.xticks(xtick_lab, xtick_labels)
        plt.yticks(ytick_lab, ytick_lab)
        ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))

        # Only show ticks on the left and bottom spines
        ax.yaxis.set_ticks_position('left')
        ax.xaxis.set_ticks_position('bottom')

        grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        xlabel(r'Number of Classes $(k)$', labelpad=0)      # labelpad=0
        ylabel(r'Accuracy', labelpad=0)

        xlim(xmin, xmax)
        ylim(ymin, ymax)

        if CREATE_PDF:
            savefig(join(figure_directory, fig_filename), format='pdf',
                    dpi=None,
                    edgecolor='w',
                    orientation='portrait',
                    transparent=False,
                    bbox_inches='tight',
                    pad_inches=0.05,
                    frameon=None)

        if SHOW_PLOT:
            plt.show()

        if SHOW_PDF:
            showfig(join(figure_directory, fig_filename))
Example #2
0
def _f_worker_(X0, W, f, f_index):
    RANDOMSEED = None  # For repeatability
    random.seed(RANDOMSEED)  # seeds some other python random generator
    np.random.seed(
        seed=RANDOMSEED
    )  # seeds the actually used numpy random generator; both are used and thus needed

    X1, ind = replace_fraction_of_rows(X0,
                                       1 - f,
                                       avoidNeighbors=avoidNeighbors,
                                       W=W,
                                       stratified=stratified)
    X2 = introduce_errors(X1, ind, err)


    for option_index, (label, select_lambda, learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \
            enumerate(zip(labels, select_lambda_vec, learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)):
        learn_time = -1
        # -- Learning
        if learning_method == 'GT':
            H2c = H0c
        elif learning_method == 'Heuristic':
            # print('Heuristic')
            H2c = H_heuristic

        elif learning_method == 'Holdout':
            # print('Holdout')
            H2 = estimateH_baseline_serial(
                X2,
                ind,
                W,
                numMax=numMaxIt,
                # ignore_rows=ind,
                numberOfSplits=numberOfSplits,
                # method=learning_method, variant=1,
                # distance=length,
                EC=EC,
                alpha=alpha,
                beta=beta,
                gamma=gamma,
                doubly_stochastic=doubly_stochastic)
            H2c = to_centering_beliefs(H2)

        else:
            if "DCEr" in learning_method:
                learning_method = "DCEr"
            elif "DCE" in learning_method:
                learning_method = "DCE"

            # -- choose optimal lambda: allows to specify different lambda for different f
            # print("option: ", option_index)
            if select_lambda == True:
                weight = lambda_vec[f_index]
                # print("weight : ", weight)
            else:
                weight = weights

            # -- learn H
            learn_start = time.time()
            H2 = estimateH(X2,
                           W,
                           method=learning_method,
                           variant=1,
                           distance=length,
                           EC=EC,
                           weights=weight,
                           randomrestarts=num_restarts,
                           randomize=randomize,
                           constraints=constraints,
                           gradient=gradient,
                           doubly_stochastic=doubly_stochastic)
            learn_time = time.time() - learn_start
            H2c = to_centering_beliefs(H2)

        # if learning_method not in ['GT', 'GS']:

        # print(FILENAMEZ, f, learning_method)
        # print(H2c)

        # -- Propagation
        prop_start = time.time()
        # X2c = to_centering_beliefs(X2, ignoreZeroRows=True)       # try without
        eps_max = eps_convergence_linbp_parameterized(H2c,
                                                      W,
                                                      method='noecho',
                                                      alpha=alpha,
                                                      beta=beta,
                                                      gamma=gamma,
                                                      X=X2)
        eps = s * eps_max
        # print("Max eps: {}, eps: {}".format(eps_max, eps))
        # eps = 1

        try:
            F, actualIt, actualPercentageConverged = \
                linBP_symmetric_parameterized(X2, W, H2c * eps,
                                              method='noecho',
                                              alpha=alpha, beta=beta, gamma=gamma,
                                              numMaxIt=numMaxIt,
                                              convergencePercentage=convergencePercentage_W,
                                              debug=2)
            prop_time = time.time() - prop_start
            if Macro_Accuracy:
                accuracy_X = matrix_difference_classwise(X0,
                                                         F,
                                                         ignore_rows=ind)
                precision = matrix_difference_classwise(X0,
                                                        F,
                                                        similarity='precision',
                                                        ignore_rows=ind)
                recall = matrix_difference_classwise(X0,
                                                     F,
                                                     similarity='recall',
                                                     ignore_rows=ind)
            else:
                accuracy_X = matrix_difference(X0, F, ignore_rows=ind)
                precision = matrix_difference(X0,
                                              F,
                                              similarity='precision',
                                              ignore_rows=ind)
                recall = matrix_difference(X0,
                                           F,
                                           similarity='recall',
                                           ignore_rows=ind)

            result = [str(datetime.datetime.now())]
            text = [
                label, f, accuracy_X, precision, recall, learn_time, prop_time
            ]
            result.extend(text)
            # print("method: {}, f: {}, actualIt: {}, accuracy: {}, precision:{}, recall: {}, learning time: {}, propagation time: {}".format(label, f, actualIt, accuracy_X, precision, recall, learn_time, prop_time))
            save_csv_record(join(data_directory, csv_filename), result)

        except ValueError as e:

            print("ERROR: {} with {}: d={}, h={}".format(
                e, learning_method, d, h))
            raise e

    return 'success'
def run(choice,
        create_data=False,
        add_data=False,
        show_plot=False,
        create_pdf=False,
        show_pdf=False,
        shorten_length=False):
    # -- Setup
    CHOICE = choice
    CREATE_DATA = create_data
    ADD_DATA = add_data
    SHOW_PLOT = show_plot
    SHOW_PDF = show_pdf
    CREATE_PDF = create_pdf
    SHOW_ARROWS = False
    STD_FILL = False

    CALCULATE_DATA_STATISTICS = False
    csv_filename = 'Fig_timing_VaryK_{}.csv'.format(CHOICE)
    header = ['currenttime', 'option', 'k', 'f', 'time']
    if CREATE_DATA:
        save_csv_record(join(data_directory, csv_filename),
                        header,
                        append=False)

    # -- Default Graph parameters
    rep_SameGraph = 2  # iterations on same graph
    initial_h0 = None  # initial vector to start finding optimal H
    distribution = 'powerlaw'
    exponent = -0.3
    length = 5
    variant = 1
    EC = True  # Non-backtracking for learning
    ymin = 0.0
    ymax = 1
    xmin = 2
    xmax = 7.5
    xtick_lab = [2, 3, 4, 5, 6, 7, 8]
    xtick_labels = ['2', '3', '4', '5', '6', '7', '8']
    ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 50]
    ytick_labels = [
        r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$50$'
    ]
    f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)]
    k_vec = [3, 4, 5]
    rep_DifferentGraphs = 1000  # iterations on different graphs
    err = 0
    avoidNeighbors = False
    gradient = False
    convergencePercentage_W = None
    stratified = True
    label_vec = ['*'] * 10
    clip_on_vec = [True] * 15
    draw_std_vec = range(10)
    numberOfSplits = 1
    linestyle_vec = ['solid'] * 15
    linewidth_vec = [3, 2, 4, 2, 3, 2] + [3] * 15
    marker_vec = ['^', 's', 'o', 'x', 'o', '+', 's'] * 3
    markersize_vec = [8, 7, 8, 10, 7, 6] + [10] * 10
    facecolor_vec = [
        "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#64B5CD"
    ]
    legend_location = 'upper right'

    # -- Options with propagation variants
    if CHOICE == 600:  ## 1k nodes
        n = 1000
        h = 8
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4']
        learning_method_vec = ['GT', 'MHE', 'DHE', 'Holdout']
        weight_vec = [10] * 4
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 4 + [True]
        xmin = 3.
        xmax = 10.
        ymin = 0.
        ymax = 50.
        label_vec = ['GT', 'MCE', 'DCE', 'Holdout']
        facecolor_vec = [
            'black'
        ] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 4
        f_vec = [0.03, 0.01, 0.001]
        k_vec = [3, 4, 5, 6]
        ytick_lab = [0, 1e-3, 1e-2, 1e-1, 1, 10, 50]
        ytick_labels = [
            r'$0$', r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$50$'
        ]

    elif CHOICE == 601:  ## 10k nodes
        n = 10000
        h = 8
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4']
        learning_method_vec = ['GT', 'MHE', 'DHE', 'Holdout']
        weight_vec = [10] * 4
        alpha_vec = [0] * 20
        beta_vec = [0] * 20
        gamma_vec = [0] * 20
        s_vec = [0.5] * 20
        numMaxIt_vec = [10] * 20
        randomize_vec = [False] * 15 + [True]
        xmin = 3.
        xmax = 8.
        ymin = 0.
        ymax = 500.
        label_vec = ['GT', 'MCE', 'DCE', 'Holdout']
        facecolor_vec = [
            'black'
        ] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 4
        f_vec = [0.03, 0.01, 0.001]
        k_vec = [3, 4, 5]
        ytick_lab = [0, 1e-3, 1e-2, 1e-1, 1, 10, 100, 300]
        ytick_labels = [
            r'$0$', r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$300$'
        ]

    elif CHOICE == 602:  ## 10k nodes
        n = 10000
        h = 8
        d = 25
        weight_vec = [10] * 20
        alpha_vec = [0] * 20
        beta_vec = [0] * 20
        gamma_vec = [0] * 20
        s_vec = [0.5] * 20
        numMaxIt_vec = [10] * 20
        randomize_vec = [False] * 3 + [True] + [False]
        ymin = 0.01
        ymax = 500
        label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DHEr']
        facecolor_vec = [
            "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"
        ] * 4
        f_vec = [0.01]
        k_vec = [3, 4, 5]
        ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500]
        ytick_labels = [
            r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$500$'
        ]

        option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4']
        learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE']
        k_vec = [2, 3, 4, 5, 6, 7, 8]

        # option_vec = ['opt2', 'opt3', 'opt6']
        # learning_method_vec = ['MHE', 'DHE', 'LHE']
        # k_vec = [2, 3, 4, 5]

    elif CHOICE == 603:  ## 10k nodes

        n = 10000
        h = 3
        d = 25
        weight_vec = [10] * 20
        alpha_vec = [0] * 20
        beta_vec = [0] * 20
        gamma_vec = [0] * 20
        s_vec = [0.5] * 20
        numMaxIt_vec = [10] * 20
        randomize_vec = [False] * 4 + [True]

        xmin = 1.8
        xmax = 8.2
        ymin = 0.01
        ymax = 500
        label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr']
        facecolor_vec = [
            "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52"
        ] * 4
        f_vec = [0.01]
        k_vec = [3, 4, 5]
        ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500]
        ytick_labels = [
            r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$500$'
        ]

        option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4']
        learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE']
        k_vec = [2, 3, 4, 5, 6, 7, 8]

        legend_location = 'upper right'

        # option_vec = ['opt2', 'opt3', 'opt6']
        # learning_method_vec = ['MHE', 'DHE', 'LHE']
        # k_vec = [2, 3, 4, 5]

        # option_vec = ['opt4', 'opt3']
        # learning_method_vec = ['MHE', 'MHE']
        # randomize_vec = [True, False]
        # k_vec = [2, 3, 4, 5]

    elif CHOICE == 604:  ## 10k nodes with Gradient
        n = 10000
        h = 3
        d = 25
        weight_vec = [10] * 20
        alpha_vec = [0] * 20
        beta_vec = [0] * 20
        gamma_vec = [0] * 20
        s_vec = [0.5] * 20
        numMaxIt_vec = [10] * 20
        randomize_vec = [False] * 4 + [True]
        ymin = 0.00
        ymax = 800
        label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr']
        facecolor_vec = [
            "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52"
        ] * 4
        f_vec = [0.01]
        k_vec = [3, 4, 5]
        ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500]
        ytick_labels = [
            r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$500$'
        ]

        option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4']
        learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE']
        k_vec = [2, 3, 4, 5, 6, 7, 8]
        # k_vec = [7, 8]
        gradient = True
        legend_location = 'center right'

    elif CHOICE == 605:  ## 10k nodes with Gradient   with f = 0.005
        n = 10000
        h = 3
        d = 25
        weight_vec = [10] * 20
        alpha_vec = [0] * 20
        beta_vec = [0] * 20
        gamma_vec = [0] * 20
        s_vec = [0.5] * 20
        numMaxIt_vec = [10] * 20
        randomize_vec = [False] * 4 + [True]
        ymin = 0.00
        ymax = 800
        label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr']
        facecolor_vec = [
            "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52"
        ] * 4
        f_vec = [0.005]
        k_vec = [3, 4, 5]
        ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500]
        ytick_labels = [
            r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$500$'
        ]

        option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4']
        learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE']
        k_vec = [2, 3, 4, 5, 6, 7]
        # k_vec = [7, 8]
        gradient = True
        legend_location = 'center right'

    elif CHOICE == 606:  ## 10k nodes with Gradient   with f = 0.005 and Gradient and PruneRandom
        n = 10000
        h = 3
        d = 25
        weight_vec = [10] * 20
        alpha_vec = [0] * 20
        beta_vec = [0] * 20
        gamma_vec = [0] * 20
        s_vec = [0.5] * 20
        numMaxIt_vec = [10] * 20
        randomize_vec = [False] * 4 + [True]

        xmin = 1.8
        xmax = 7.2
        ymin = 0.01
        ymax = 800
        label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr']
        facecolor_vec = [
            "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52"
        ] * 4
        f_vec = [0.005]
        k_vec = [3, 4, 5]
        ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500]
        ytick_labels = [
            r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$500$'
        ]

        option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4']
        learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE']
        k_vec = [2, 3, 4, 5, 6, 7]

        gradient = True
        pruneRandom = True
        legend_location = 'upper right'

    elif CHOICE == 607:  ## 10k nodes   with gradient and PruneRandom
        n = 10000
        h = 3
        d = 25
        option_vec = ['opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 3 + [True] + [False]

        xmin = 1.8
        xmax = 7.
        ymin = 0.01
        ymax = 800
        label_vec = ['LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = [
            "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"
        ] * 4
        legend_location = 'upper left'
        marker_vec = [None, 's', 'x', 'o', '^', '+'] * 3
        markersize_vec = [8, 7, 10, 8, 7, 6] + [10] * 10
        f_vec = [0.01]
        k_vec = [2, 3, 4, 5, 6, 7, 8]
        clip_on_vec = [True] * 10
        gradient = True
        pruneRandom = True
        ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500]
        ytick_labels = [
            r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$500$'
        ]

    elif CHOICE == 608:  ## 10k nodes   with gradient and PruneRandom
        n = 10000
        h = 3
        d = 25
        option_vec = ['opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['LHE', 'MHE', 'DHE', 'DHE', 'Holdout']
        weight_vec = [10] * 10
        alpha_vec = [0] * 10
        beta_vec = [0] * 10
        gamma_vec = [0] * 10
        s_vec = [0.5] * 10
        numMaxIt_vec = [10] * 10
        randomize_vec = [False] * 3 + [True] + [False]

        xmin = 1.8
        xmax = 7.2
        ymin = 0.01
        ymax = 800
        label_vec = ['LCE', 'MCE', 'DCE', 'DCEr', 'Holdout']
        facecolor_vec = [
            "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"
        ] * 4
        legend_location = 'upper left'
        marker_vec = [None, 's', 'x', 'o', '^', '+'] * 3
        markersize_vec = [8, 7, 10, 8, 7, 6] + [10] * 10
        f_vec = [0.01]
        k_vec = [2, 3, 4, 5, 6, 7, 8]
        clip_on_vec = [True] * 10
        gradient = True
        pruneRandom = True
        ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500]
        ytick_labels = [
            r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$',
            r'$100$', r'$500$'
        ]
        rep_DifferentGraphs = 10

    else:
        raise Warning("Incorrect choice!")

    RANDOMSEED = None  # For repeatability
    random.seed(RANDOMSEED)  # seeds some other python random generator
    np.random.seed(
        seed=RANDOMSEED
    )  # seeds the actually used numpy random generator; both are used and thus needed
    # print("CHOICE: {}".format(CHOICE))

    # -- Create data
    if CREATE_DATA or ADD_DATA:
        for i in range(rep_DifferentGraphs
                       ):  # create several graphs with same parameters
            # print("\ni: {}".format(i))

            for k in k_vec:
                # print("\nk: {}".format(k))

                H0 = create_parameterized_H(k, h, symmetric=True)
                H0c = to_centering_beliefs(H0)

                a = [1.] * k
                alpha0 = np.array(a)
                alpha0 = alpha0 / np.sum(alpha0)

                W, Xd = planted_distribution_model_H(n,
                                                     alpha=alpha0,
                                                     H=H0,
                                                     d_out=d,
                                                     distribution=distribution,
                                                     exponent=exponent,
                                                     directed=False,
                                                     debug=False)
                X0 = from_dictionary_beliefs(Xd)

                for j in range(
                        rep_SameGraph):  # repeat several times for same graph
                    # print("j: {}".format(j))

                    ind = None
                    for f in f_vec:  # Remove fraction (1-f) of rows from X0 (notice that different from first implementation)
                        X1, ind = replace_fraction_of_rows(
                            X0,
                            1 - f,
                            avoidNeighbors=avoidNeighbors,
                            W=W,
                            ind_prior=ind,
                            stratified=stratified)
                        X2 = introduce_errors(X1, ind, err)

                        for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \
                                enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)):

                            # -- Learning
                            if learning_method == 'GT':
                                timeTaken = 0.0

                            elif learning_method == 'Holdout':

                                prev_time = time.time()
                                H2 = estimateH_baseline_serial(
                                    X2,
                                    ind,
                                    W,
                                    numMax=numMaxIt,
                                    numberOfSplits=numberOfSplits,
                                    EC=EC,
                                    alpha=alpha,
                                    beta=beta,
                                    gamma=gamma)
                                timeTaken = time.time() - prev_time

                            else:
                                prev_time = time.time()
                                if gradient and pruneRandom:
                                    H2 = estimateH(X2,
                                                   W,
                                                   method=learning_method,
                                                   variant=1,
                                                   distance=length,
                                                   EC=EC,
                                                   weights=weights,
                                                   randomize=randomize,
                                                   gradient=gradient)
                                else:
                                    H2 = estimateH(X2,
                                                   W,
                                                   method=learning_method,
                                                   variant=1,
                                                   distance=length,
                                                   EC=EC,
                                                   weights=weights,
                                                   randomize=randomize)
                                timeTaken = time.time() - prev_time

                            tuple = [str(datetime.datetime.now())]
                            text = [option_vec[option_index], k, f, timeTaken]
                            tuple.extend(text)
                            # print("option: {}, f: {}, timeTaken: {}".format(option_vec[option_index], f, timeTaken))
                            save_csv_record(join(data_directory, csv_filename),
                                            tuple)

    # -- Read, aggregate, and pivot data for all options
    df1 = pd.read_csv(join(data_directory, csv_filename))
    # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15)))

    # -- Aggregate repetitions
    df2 = df1.groupby(['option', 'k', 'f']).agg \
        ({'time': [np.mean, np.std, np.size, np.median],  # Multiple Aggregates
          })
    df2.columns = ['_'.join(col).strip() for col in df2.columns.values
                   ]  # flatten the column hierarchy
    df2.reset_index(inplace=True)  # remove the index hierarchy
    df2.rename(columns={'time_size': 'count'}, inplace=True)
    # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15)))

    # -- Pivot table
    df3 = pd.pivot_table(df2,
                         index=['f', 'k'],
                         columns=['option'],
                         values=['time_mean', 'time_std',
                                 'time_median'])  # Pivot
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30)))
    df3.columns = ['_'.join(col).strip() for col in df3.columns.values
                   ]  # flatten the column hierarchy
    df3.reset_index(inplace=True)  # remove the index hierarchy
    # df2.rename(columns={'time_size': 'count'}, inplace=True)
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(100)))

    # X_f = k_vec
    X_f = df3['k'].values  # read k from values instead

    Y_hash = defaultdict(dict)
    Y_hash_std = defaultdict(dict)

    for f in f_vec:
        for option in option_vec:
            Y_hash[f][option] = list()
            Y_hash_std[f][option] = list()

    for f in f_vec:
        for option in option_vec:
            Y_hash[f][option] = df3.loc[df3['f'] == f]['time_mean_{}'.format(
                option)].values  # mean
            # Y_hash[f][option] = df3.loc[df3['f'] == f]['time_median_{}'.format(option)].values          # median
            Y_hash_std[f][option] = df3.loc[df3['f'] == f][
                'time_std_{}'.format(option)].values

    if SHOW_PLOT or SHOW_PDF or CREATE_PDF:

        # -- Setup figure
        fig_filename = 'Fig_Time_varyK_{}.pdf'.format(CHOICE)
        mpl.rc(
            'font', **{
                'family': 'sans-serif',
                'sans-serif': [u'Arial', u'Liberation Sans']
            })
        mpl.rcParams['axes.labelsize'] = 20
        mpl.rcParams['xtick.labelsize'] = 16
        mpl.rcParams['ytick.labelsize'] = 16
        mpl.rcParams['legend.fontsize'] = 14
        mpl.rcParams['grid.color'] = '777777'  # grid color
        mpl.rcParams[
            'xtick.major.pad'] = 2  # padding of tick labels: default = 4
        mpl.rcParams[
            'ytick.major.pad'] = 1  # padding of tick labels: default = 4
        mpl.rcParams['xtick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['ytick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['axes.titlesize'] = 16
        mpl.rcParams['figure.figsize'] = [4, 4]
        fig = figure()
        ax = fig.add_axes([0.13, 0.17, 0.8, 0.8])

        opt_f_vecs = [(option, f) for option in option_vec for f in f_vec]

        for ((option, f), color, linewidth, clip_on, linestyle, marker, markersize) in \
            zip(opt_f_vecs, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec):

            label = label_vec[option_vec.index(option)]
            # label = label + " " + str(f)

            if STD_FILL:
                ax.fill_between(X_f,
                                Y_hash[f][option] + Y_hash_std[f][option],
                                Y_hash[f][option] - Y_hash_std[f][option],
                                facecolor=color,
                                alpha=0.2,
                                edgecolor=None,
                                linewidth=0)
                ax.plot(X_f,
                        Y_hash[f][option] + Y_hash_std[f][option],
                        linewidth=0.5,
                        color='0.8',
                        linestyle='solid')
                ax.plot(X_f,
                        Y_hash[f][option] - Y_hash_std[f][option],
                        linewidth=0.5,
                        color='0.8',
                        linestyle='solid')

            ax.plot(X_f,
                    Y_hash[f][option],
                    linewidth=linewidth,
                    color=color,
                    linestyle=linestyle,
                    label=label,
                    zorder=4,
                    marker=marker,
                    markersize=markersize,
                    markeredgecolor='black',
                    markeredgewidth=1,
                    clip_on=clip_on)

        if SHOW_ARROWS:
            for indx in [2, 3]:
                ax.annotate(s='',
                            xy=(X_f[indx] - 0.05, Y_hash[f]['opt4'][indx]),
                            xytext=(X_f[indx] - 0.05, Y_hash[f]['opt5'][indx]),
                            arrowprops=dict(facecolor='blue',
                                            arrowstyle='<->'))
                ax.annotate(
                    str(
                        int(
                            np.round(Y_hash[f]['opt5'][indx] /
                                     Y_hash[f]['opt4'][indx]))) + 'x',
                    xy=(X_f[indx] - 0.4,
                        (Y_hash[f]['opt5'][indx] + Y_hash[f]['opt4'][indx]) /
                        10),
                    color='black',
                    va='center',
                    annotation_clip=False,
                    zorder=5)

        # -- Title and legend
        if distribution == 'uniform':
            distribution_label = ',$uniform'
        else:
            distribution_label = '$'
        if n < 1000:
            n_label = '{}'.format(n)
        else:
            n_label = '{}k'.format(int(n / 1000))

        title(r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format(
            n_label, d, h, f, distribution_label))
        handles, label_vec = ax.get_legend_handles_labels()
        legend = plt.legend(
            handles,
            label_vec,
            loc=legend_location,  # 'upper right'
            handlelength=2,
            labelspacing=0,  # distance between label entries
            handletextpad=
            0.3,  # distance between label and the line representation
            borderaxespad=0.2,  # distance between legend and the outer axes
            borderpad=0.3,  # padding inside legend box
            numpoints=1,  # put the marker only once
        )
        # # legend.set_zorder(1)
        frame = legend.get_frame()
        frame.set_linewidth(0.0)
        frame.set_alpha(0.9)  # 0.8

        # -- Figure settings and save
        plt.yscale('log')
        plt.xticks(xtick_lab, xtick_labels)
        plt.yticks(ytick_lab, ytick_lab)

        # Only show ticks on the left and bottom spines
        ax.yaxis.set_ticks_position('left')
        ax.xaxis.set_ticks_position('bottom')
        plt.xlim(xmin, xmax)
        plt.ylim(ymin, ymax)

        grid(b=True,
             which='major',
             axis='both',
             alpha=0.2,
             linestyle='solid',
             linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        grid(b=True,
             which='minor',
             axis='both',
             alpha=0.2,
             linestyle='solid',
             linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        xlabel(r'Number of Classes $(k)$', labelpad=0)  # labelpad=0
        ylabel(r'Time [sec]', labelpad=0)

        if CREATE_PDF:
            savefig(join(figure_directory, fig_filename),
                    format='pdf',
                    dpi=None,
                    edgecolor='w',
                    orientation='portrait',
                    transparent=False,
                    bbox_inches='tight',
                    pad_inches=0.05,
                    frameon=None)

        if SHOW_PLOT:
            plt.show()

        if SHOW_PDF:
            showfig(join(figure_directory,
                         fig_filename))  # shows actually created PDF
Example #4
0
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False,
        show_arrows=False):
    # -- Setup
    CHOICE = choice
    CREATE_DATA = create_data
    ADD_DATA = add_data
    SHOW_PLOT = show_plot
    SHOW_PDF = show_pdf
    CREATE_PDF = create_pdf

    SHOW_STD = True         ## FALSE for just scatter plot points
    SHOW_ARROWS = show_arrows


    # -- Default Graph parameters
    rep_SameGraph = 1       # iterations on same graph
    distribution = 'powerlaw'
    exponent = -0.3
    length = 5
    variant = 1
    EC = False
    numberOfSplits = 1
    scaling_vec = [None]*10
    ymin = 0.3
    ymax = 1
    xmin = 1e-3
    xmax = 1e3
    xtick_lab = [1e-3, 0.01, 0.1, 1, 10, 100, 1000]
    xtick_labels = [r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$10^{2}$', r'$10^{3}$']
    ytick_lab = np.arange(0, 1.1, 0.1)
    k = 3
    a = 1
    rep_DifferentGraphs = 1   # iterations on different graphs
    err = 0
    avoidNeighbors = False
    convergencePercentage_W = 0.99
    facecolor_vec = ["#4C72B0", "#55A868", "#8172B2", "#C44E52", "#CCB974", "#64B5CD"]
    label_vec = ['MCE', 'LCE', 'DCE', 'Holdout']
    linewidth_vec = [4, 3, 1, 2, 2, 1]
    # clip_ons = [True, True, True, True, True, True]
    FILEZNAME = 'Fig_timing_accuracy_learning'
    marker_vec = ['s', '^', 'v', 'o', 'x', '+', 'None']   #'^'
    length_vec = [5]
    stratified = True
    f = 0.01
    numMaxIt_vec = [10]*7
    alpha_vec = [0] * 7
    beta_vec = [0] * 7  # TODO: LinBP does not use beta. Also SSLH uses alpha, but not beta for W^row! Now fixed
    gamma_vec = [0] * 7
    s_vec = [0.5] * 7


    # -- Main Options
    if CHOICE == 1:         # Main graph
        n = 1000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS']
        label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS']
        randomize_vec = [False]*3 + [True] + [None]*2
        scaling_vec = [None]*2 + [10, 100] + [None]*2
        splits_vec = [1, 2, 4, 8]


    elif CHOICE == 2:
        n = 1000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
        learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['GS']
        label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'GS']
        randomize_vec = [False]*3 + [True] + [None]
        scaling_vec = [None]*2 + [10, 100] + [None]


    elif CHOICE == 3:
        n = 1000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5']
        learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['GS']
        label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'GS']
        randomize_vec = [False]*3 + [True] + [None]
        scaling_vec = [None]*2 + [10, 100] + [None]
        f = 0.02


    elif CHOICE == 4:         # TODO: Overnight Wolfgang
        n = 1000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS']
        label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS']
        randomize_vec = [False]*3 + [True] + [None]*2
        scaling_vec = [None]*2 + [10, 100] + [None]*2
        splits_vec = [1, 2, 4, 8, 16]


    elif CHOICE == 5:         # Toy graph with 100 nodes
        n = 100
        h = 3
        d = 8
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS']
        label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS']
        randomize_vec = [False]*3 + [True] + [None]*2
        scaling_vec = [None]*2 + [10, 100] + [None]*2
        splits_vec = [1, 2, 4, 8]
        f=0.05


    elif CHOICE == 6:         # To be run by Prakhar on Cluster
        n = 10000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS']
        label_vec = ['MCE', 'LCE', 'DCE', 'DCEr', 'Holdout', 'GS']
        randomize_vec = [False]*3 + [True] + [None]*2
        scaling_vec = [None]*2 + [10, 100] + [None]*2
        splits_vec = [1, 2, 4, 8]
        f=0.003
        xmin = 1e-2
        # ymax = 0.9
        ymin = 0.2
        ymax = 0.9
        xmin = 1e-2
        xmax = 1e3



    elif CHOICE == 7:
        n = 1000
        h = 3
        d = 25
        option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
        learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS']
        label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS']
        randomize_vec = [False]*3 + [True] + [None]*2
        scaling_vec = [None]*2 + [10, 100] + [None]*2
        splits_vec = [1, 2, 4, 8, 16]
        f=0.009

    # elif CHOICE == 8:       # not working well
    #     n = 1000
    #     h = 3
    #     d = 25
    #     option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6']
    #     learning_method_vec = ['MHE']  + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS']
    #     label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS']
    #     randomize_vec = [False]*3 + [True] + [None]*2
    #     scaling_vec = [None]*2 + [10, 100] + [None]*2
    #     splits_vec = [1, 2, 4, 8, 16]
    #     f=0.005



    else:
        raise Warning("Incorrect choice!")



    csv_filename = '{}_{}.csv'.format(FILEZNAME, CHOICE)
    header = ['currenttime',
              'option',
              'lensplit',
              'f',
              'accuracy',
              'timetaken']
    if CREATE_DATA:
        save_csv_record(join(data_directory, csv_filename), header, append=False)

    alpha0 = np.array([a, 1., 1.])
    alpha0 = alpha0 / np.sum(alpha0)
    H0 = create_parameterized_H(k, h, symmetric=True)
    H0c = to_centering_beliefs(H0)


    RANDOMSEED = None  # For repeatability
    random.seed(RANDOMSEED)  # seeds some other python random generator
    np.random.seed(seed=RANDOMSEED)  # seeds the actually used numpy random generator; both are used and thus needed
    # print("CHOICE: {}".format(CHOICE))


    # -- Create data
    if CREATE_DATA or ADD_DATA:
        for i in range(rep_DifferentGraphs):  # create several graphs with same parameters
            # print("\ni: {}".format(i))

            W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d,
                                                      distribution=distribution,
                                                      exponent=exponent,
                                                      directed=False,
                                                      debug=False)
            X0 = from_dictionary_beliefs(Xd)

            for j in range(rep_SameGraph):  # repeat several times for same graph
                # print("j: {}".format(j))

                ind = None
                X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified = stratified)     # TODO: stratified sampling option = True
                X2 = introduce_errors(X1, ind, err)

                for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weight, randomize, option) in \
                        enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, scaling_vec, randomize_vec, option_vec)):

                    # weight = np.array([np.power(scaling, i) for i in range(5)])       # TODO: now enough to specify weight as a scalar!
                    H_est_dict = {}
                    timeTaken_dict = {}

                    # -- Learning
                    if learning_method == 'Holdout' :
                        for numberOfSplits in splits_vec:
                            prev_time = time.time()
                            H_est_dict[numberOfSplits] = estimateH_baseline_serial(X2, ind, W, numMax=numMaxIt,
                                                                                   # ignore_rows=ind,
                                                                                   numberOfSplits=numberOfSplits,
                                                                                   # method=learning_method, variant=1, distance=length,
                                                                                   EC=EC,
                                                                                   weights=weight, alpha=alpha, beta=beta, gamma=gamma)
                            timeTaken = time.time() - prev_time
                            timeTaken_dict[numberOfSplits] = timeTaken

                    elif learning_method in ['LHE', 'MHE', 'DHE']:      # TODO: no smartInit, just randomization as option
                        for length in length_vec:
                            prev_time = time.time()
                            H_est_dict[length] = estimateH(X2, W, method=learning_method, variant=1, randomize=randomize, distance=length, EC=EC, weights=weight)
                            timeTaken = time.time() - prev_time
                            timeTaken_dict[length] = timeTaken

                    elif learning_method == 'GS':
                        H_est_dict['GS'] = H0

                    for key in H_est_dict:
                        H_est = H_est_dict[key]
                        H2c = to_centering_beliefs(H_est)
                        # print("H_estimated by {} is \n".format(learning_method), H_est)
                        # print("H0 is \n", H0)
                        # print("randomize was: ", randomize)

                        # Propagation
                        X2c = to_centering_beliefs(X2, ignoreZeroRows=True)  # try without
                        eps_max = eps_convergence_linbp_parameterized(H2c, W,
                                                                      method='noecho',
                                                                      alpha=alpha, beta=beta, gamma=gamma,
                                                                      X=X2)

                        eps = s * eps_max

                        # print("Max Eps ", eps_max)

                        try:
                            F, actualIt, actualPercentageConverged = \
                                linBP_symmetric_parameterized(X2, W, H2c * eps,
                                                              method='noecho',
                                                              alpha=alpha, beta=beta, gamma=gamma,
                                                              numMaxIt=numMaxIt,
                                                              convergencePercentage=convergencePercentage_W,
                                                              convergenceThreshold=0.99,
                                                              debug=2)

                        except ValueError as e:
                            print(
                                "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h))

                        else:
                            accuracy_X = matrix_difference(X0, F, ignore_rows=ind)

                            tuple = [str(datetime.datetime.now())]
                            if learning_method == 'Holdout':
                                text = [option,"split{}".format(key), f, accuracy_X, timeTaken_dict[key]]
                            elif learning_method in ['MHE', 'DHE', 'LHE']:
                                text = [option, "len{}".format(key), f, accuracy_X, timeTaken_dict[key]]
                            elif learning_method == 'GS':
                                text = [option, 0, f, accuracy_X, 0]

                            tuple.extend(text)
                            # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option, f, actualIt, accuracy_X))
                            save_csv_record(join(data_directory, csv_filename), tuple)





    # -- Read, aggregate, and pivot data for all options
    df1 = pd.read_csv(join(data_directory, csv_filename))
    # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15)))

    # Aggregate repetitions
    df2 = df1.groupby(['option', 'lensplit', 'f']).agg \
        ({'accuracy': [np.mean, np.std, np.size],  # Multiple Aggregates
          })
    df2.columns = ['_'.join(col).strip() for col in df2.columns.values]  # flatten the column hierarchy
    df2.reset_index(inplace=True)  # remove the index hierarchy
    df2.rename(columns={'accuracy_size': 'count'}, inplace=True)
    # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15)))

    df3 = df1.groupby(['option', 'lensplit', 'f']).agg({'timetaken': [np.median] })
    df3.columns = ['_'.join(col).strip() for col in df3.columns.values]  # flatten the column hierarchy
    df3.reset_index(inplace=True)  # remove the index hierarchy
    # resultdf3 = df3.sort(['timetaken'], ascending=1)
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(15)))

    X_time_median_dict = {}
    Y_acc_dict = {}
    Y_std_dict = {}

    for option in option_vec:
        Y_acc_dict[option] = df2.loc[(df2['option'] == option), "accuracy_mean"].values
        Y_std_dict[option] = df2.loc[(df2['option'] == option), "accuracy_std"].values
        X_time_median_dict[option] = df3.loc[(df3['option'] == option), "timetaken_median"].values

        # print("option: ", option)
        # print("Y_acc_dict[option]: ", Y_acc_dict[option])
        # print("Y_std_dict[option]: ", Y_std_dict[option])
        # print("X_time_median_dict[option]: ", X_time_median_dict[option])



    # -- Setup figure
    fig_filename = '{}_{}.pdf'.format(FILEZNAME, CHOICE)
    mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']})
    mpl.rcParams['axes.labelsize'] = 18
    mpl.rcParams['xtick.labelsize'] = 16
    mpl.rcParams['ytick.labelsize'] = 16
    mpl.rcParams['axes.titlesize'] = 16
    mpl.rcParams['legend.fontsize'] = 14
    mpl.rcParams['grid.color'] = '777777'  # grid color
    mpl.rcParams['xtick.major.pad'] = 2  # padding of tick labels: default = 4
    mpl.rcParams['ytick.major.pad'] = 1  # padding of tick labels: default = 4
    mpl.rcParams['xtick.direction'] = 'out'  # default: 'in'
    mpl.rcParams['ytick.direction'] = 'out'  # default: 'in'
    mpl.rcParams['figure.figsize'] = [4, 4]
    fig = figure()
    ax = fig.add_axes([0.13, 0.17, 0.8, 0.8])


    SHOW_ARROWS = True

    for choice, color, learning_method, label, linewidth, marker in \
            zip(option_vec, facecolor_vec, learning_method_vec, label_vec, linewidth_vec, marker_vec):

        if learning_method == 'Holdout':
            # Draw std
            X1 = X_time_median_dict[choice]
            s = X1.argsort()
            X1 = X1[s]
            Y1 = Y_acc_dict[choice][s]
            Y2 = Y_std_dict[choice][s]

            if SHOW_STD:
                ax.fill_between(X1, Y1 + Y2, Y1 - Y2, facecolor=color, alpha=0.2, edgecolor=None, linewidth=0)
                ax.plot(X1, Y1 + Y2, linewidth=0.5, color='0.8', linestyle='solid')
                ax.plot(X1, Y1 - Y2, linewidth=0.5, color='0.8', linestyle='solid')
                ax.set_ylim(bottom=ymin)

                ax.plot(X1, Y1, linewidth=linewidth, color=color, linestyle='solid', label=label, zorder=20, marker='x', markersize=linewidth + 5, markeredgewidth=1)
                ax.annotate(np.round(X1[1], decimals=1), xy=(X1[1], Y1[1] - 0.05), color=color, va='center', annotation_clip=False, zorder=5)

            else:
                ax.scatter(list(X1), list(Y1),
                           color=color, label=label, marker='x', s=42)


        elif learning_method == 'GS':
            ax.plot([1e-4, 1e4], [Y_acc_dict[choice], Y_acc_dict[choice]],
                    linewidth=1, color='black',
                    linestyle='dashed', zorder=0,
                    marker=None,
                    label=label,
                    )

        else:       # For all other
            if SHOW_STD:
                ax.errorbar(list(X_time_median_dict[choice]), list(Y_acc_dict[choice]), yerr=Y_std_dict[choice],
                            fmt='-o', linewidth=2, color=color,
                            label=label, marker=marker, markersize=8)
                ax.annotate(np.round(X_time_median_dict[choice], decimals=2), xy=(X_time_median_dict[choice], Y_acc_dict[choice]-0.05), color=color, va='center',
                            annotation_clip=False, zorder=5)

            else:
                ax.scatter(list(X_time_median_dict[choice]), list(Y_acc_dict[choice]),
                           color=color, label=label, marker=marker, s=42)

        if SHOW_ARROWS:
            dce_opt = 'opt4'
            holdout_opt = 'opt5'

            ax.annotate(s='', xy=(X_time_median_dict[dce_opt], Y_acc_dict[dce_opt]-0.3), xytext=(X_time_median_dict[holdout_opt][2]+0.02, Y_acc_dict[dce_opt]-0.3), arrowprops=dict(arrowstyle='<->'))
            ax.annotate(str(int(np.round(X_time_median_dict[holdout_opt][2] / X_time_median_dict[dce_opt]))) + 'x', xy=((X_time_median_dict[dce_opt] + X_time_median_dict[holdout_opt][2])/100, Y_acc_dict[dce_opt]-0.28),
                        color='black', va='center',
                        # bbox = dict(boxstyle="round,pad=0.3", fc="w"),
                        annotation_clip=False, zorder=5)






    # -- Title and legend
    title(r'$\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}$'.format(int(n / 1000), d, h, f))
    handles, label_vec = ax.get_legend_handles_labels()
    for i, (h, learning_method) in enumerate(zip(handles, learning_method_vec)):        # remove error bars in legend
        if isinstance(handles[i], collections.Container):
            handles[i] = handles[i][0]

    # plt.legend(loc='upper left', numpoints=1, ncol=3, fontsize=8, bbox_to_anchor=(0, 0))

    SHOW_STD = False


    legend = plt.legend(handles, label_vec,
                        loc='upper right',  # 'upper right'
                        handlelength=2,
                        fontsize=12,
                        labelspacing=0.2,  # distance between label entries
                        handletextpad=0.3,  # distance between label and the line representation
                        borderaxespad=0.2,  # distance between legend and the outer axes
                        borderpad=0.3,  # padding inside legend box
                        numpoints=1,  # put the marker only once
                        )
    if not(SHOW_STD):
        legend = plt.legend(handles, label_vec,
                        loc='upper right',  # 'upper right'
                        handlelength=2,
                        fontsize=10,
                        labelspacing=0.2,  # distance between label entries
                        handletextpad=0.3,  # distance between label and the line representation
                        borderaxespad=0.2,  # distance between legend and the outer axes
                        borderpad=0.3,  # padding inside legend box
                        numpoints=1,  # put the marker only once
                        scatterpoints=1  # display only one-scatter point in legend
                        )

    # # legend.set_zorder(1)
    frame = legend.get_frame()
    frame.set_linewidth(0.0)
    frame.set_alpha(0.9)  # 0.8


    # -- Figure settings and save
    plt.xscale('log')
    plt.xticks(xtick_lab, xtick_labels)
    plt.yticks(ytick_lab, ytick_lab)
    ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    ax.set_ylim(bottom=ymin)

    grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
    grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',

    xlim(xmin, xmax)
    ylim(ymin, ymax)


    xlabel(r'Time Median (sec)', labelpad=0)      # labelpad=0
    ylabel(r'Accuracy', labelpad=0)
    if CREATE_PDF:
        savefig(join(figure_directory, fig_filename), format='pdf',
                dpi=None,
                edgecolor='w',
                orientation='portrait',
                transparent=False,
                bbox_inches='tight',
                pad_inches=0.05,
                frameon=None)

    if SHOW_PDF:
        showfig(join(figure_directory, fig_filename))

    if SHOW_PLOT:
        plt.show()
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False):
    CHOICE = choice

    CREATE_DATA = create_data
    ADD_DATA = add_data
    SHOW_PLOT = show_plot
    SHOW_PDF = show_pdf
    CREATE_PDF = create_pdf

    STD_FILL = True
    #
    SHORTEN_LENGTH = False

    fig_filename = 'Fig_homophily_{}.pdf'.format(CHOICE)
    csv_filename = 'Fig_homophily_{}.csv'.format(CHOICE)
    header = ['currenttime',
              'option',
              'f',
              'accuracy']
    if CREATE_DATA:
        save_csv_record(join(data_directory, csv_filename), header, append=False)


    # -- Default Graph parameters
    k = 3
    rep_DifferentGraphs = 1
    rep_SameGraph = 2
    initial_h0 = None
    distribution = 'powerlaw'
    exponent = -0.3
    length = 5
    constraint = True

    variant = 1
    EC = True                   # Non-backtracking for learning
    global f_vec, labels, facecolor_vec

    s = 0.5
    err = 0
    numMaxIt = 10
    avoidNeighbors = False
    convergencePercentage_W = None
    stratified = True


    clip_on_vec = [True] * 10
    draw_std_vec = range(10)
    ymin = 0.3
    ymax = 1
    xmin = 0.001
    xmax = 1
    xtick_lab = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]
    xtick_labels = ['1e-5', '0.01\%', '0.1\%', '1\%', '10\%', '100\%']
    ytick_lab = np.arange(0, 1.1, 0.1)
    linestyle_vec = ['dashed'] + ['solid'] * 10
    linewidth_vec = [5, 2, 3, 3, 3, 3] + [3]*10
    marker_vec = [None, '^', 'v', 'o', '^'] + [None]*10
    markersize_vec = [0, 8, 8, 8, 6, 6] + [6]*10
    facecolor_vec = ['black', "#C44E52",  "#64B5CD"]


    # -- Options with propagation variants
    if CHOICE == 101:
        n = 10000
        h = 3
        d = 15
        f_vec = [0.9 * pow(0.1, 1 / 5) ** x for x in range(21)]
        option_vec = ['opt1', 'opt2', 'opt3']
        learning_method_vec = ['GT','DHE','Homophily']
        weight_vec = [None] + [10] + [None]
        randomize_vec = [None] + [True] + [None]
        xmin = 0.001
        ymin = 0.3
        ymax = 1
        labels = ['GS', 'DCEr', 'Homophily']

    else:
        raise Warning("Incorrect choice!")

    a = 1
    alpha0 = np.array([a, 1., 1.])
    alpha0 = alpha0 / np.sum(alpha0)

    H0 = create_parameterized_H(k, h, symmetric=True)
    RANDOMSEED = None  # For repeatability
    random.seed(RANDOMSEED)  # seeds some other python random generator
    np.random.seed(seed=RANDOMSEED)  # seeds the actually used numpy random generator; both are used and thus needed
    # print("CHOICE: {}".format(CHOICE))


    # -- Create data
    if CREATE_DATA or ADD_DATA:
        for i in range(rep_DifferentGraphs):  # create several graphs with same parameters

            W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution,
                                                      exponent=exponent, directed=False, debug=False)
            X0 = from_dictionary_beliefs(Xd)

            for j in range(rep_SameGraph):  # repeat several times for same graph
                # print("Graph:{} and j: {}".format(i,j))

                ind = None
                for f in f_vec:
                    X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified)
                    X2 = introduce_errors(X1, ind, err)

                    for option_index, (option, learning_method,  weights, randomize) in \
                            enumerate(zip(option_vec, learning_method_vec, weight_vec, randomize_vec)):

                        # -- Learning
                        if learning_method == 'GT':
                            H2 = H0
                        elif learning_method == 'Homophily':
                            H2 = np.identity(k)

                        elif learning_method == 'DHE':
                            H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, constraints=constraint)
                            # print("learning_method:", learning_method)
                            # print("H:\n{}".format(H2))

                        # -- Propagation
                        H2c = to_centering_beliefs(H2)
                        X2c = to_centering_beliefs(X2, ignoreZeroRows=True)

                        try:
                            eps_max = eps_convergence_linbp_parameterized(H2c, W,
                                                                          method='noecho',
                                                                          X=X2)
                            eps = s * eps_max

                            F, actualIt, actualPercentageConverged = \
                                linBP_symmetric_parameterized(X2, W, H2c * eps,
                                                              method='noecho',
                                                              numMaxIt=numMaxIt,
                                                              convergencePercentage=convergencePercentage_W,
                                                              debug=2)
                        except ValueError as e:
                            print (
                            "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h))

                        else:
                            accuracy_X = matrix_difference_classwise(X0, F, ignore_rows=ind)


                            tuple = [str(datetime.datetime.now())]
                            text = [option_vec[option_index],
                                    f,
                                    accuracy_X]
                            tuple.extend(text)
                            # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option_vec[option_index], f, actualIt, accuracy_X))
                            save_csv_record(join(data_directory, csv_filename), tuple)



    # -- Read, aggregate, and pivot data for all options
    df1 = pd.read_csv(join(data_directory, csv_filename))
    # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15)))
    desred_decimals = 7
    df1['f'] = df1['f'].apply(lambda x: round(x,desred_decimals))                   # rounding due to different starting points
    # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15)))


    # Aggregate repetitions
    df2 = df1.groupby(['option', 'f']).agg \
        ({'accuracy': [np.mean, np.std, np.size],  # Multiple Aggregates
          })
    df2.columns = ['_'.join(col).strip() for col in df2.columns.values]  # flatten the column hierarchy
    df2.reset_index(inplace=True)  # remove the index hierarchy
    df2.rename(columns={'accuracy_size': 'count'}, inplace=True)
    # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(10)))

    # Pivot table
    df3 = pd.pivot_table(df2, index=['f'], columns=['option'], values=['accuracy_mean', 'accuracy_std'] )  # Pivot
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30)))
    df3.columns = ['_'.join(col).strip() for col in df3.columns.values]  # flatten the column hierarchy
    df3.reset_index(inplace=True)  # remove the index hierarchy
    # df2.rename(columns={'time_size': 'count'}, inplace=True)
    # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(10)))

    # Extract values
    X_f = df3['f'].values                     # plot x values
    Y=[]
    Y_std=[]
    for option in option_vec:
        Y.append(df3['accuracy_mean_{}'.format(option)].values)
        if STD_FILL:
            Y_std.append(df3['accuracy_std_{}'.format(option)].values)


    if SHORTEN_LENGTH:
        SHORT_FACTOR = 2        ## KEEP EVERY Nth ELEMENT
        X_f  = np.copy(X_f[list(range(0, len(X_f), SHORT_FACTOR)), ])

        for i in range(len(Y)):
            Y[i] = np.copy(Y[i][list(range(0, len(Y[i]), SHORT_FACTOR)), ])
            if STD_FILL:
                Y_std[i] = np.copy(Y_std[i][list(range(0, len(Y_std[i]), SHORT_FACTOR)),])






    if CREATE_PDF or SHOW_PLOT or SHOW_PDF:

        # -- Setup figure
        mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']})
        mpl.rcParams['axes.labelsize'] = 20
        mpl.rcParams['xtick.labelsize'] = 16
        mpl.rcParams['ytick.labelsize'] = 16
        mpl.rcParams['legend.fontsize'] = 14
        mpl.rcParams['grid.color'] = '777777'  # grid color
        mpl.rcParams['xtick.major.pad'] = 2  # padding of tick labels: default = 4
        mpl.rcParams['ytick.major.pad'] = 1  # padding of tick labels: default = 4
        mpl.rcParams['xtick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['ytick.direction'] = 'out'  # default: 'in'
        mpl.rcParams['font.size'] = 16
        mpl.rcParams['axes.titlesize'] = 16
        mpl.rcParams['figure.figsize'] = [4, 4]
        fig = figure()
        ax = fig.add_axes([0.13, 0.17, 0.8, 0.8])


        #  -- Drawing
        if STD_FILL:
            for choice, (option, facecolor) in enumerate(zip(option_vec, facecolor_vec)):
                ax.fill_between(X_f, Y[choice] + Y_std[choice], Y[choice] - Y_std[choice],
                                facecolor=facecolor, alpha=0.2, edgecolor=None, linewidth=0)
                ax.plot(X_f, Y[choice] + Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid')
                ax.plot(X_f, Y[choice] - Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid')

        for choice, (option, label, color, linewidth, clip_on, linestyle, marker, markersize) in \
                enumerate(zip(option_vec, labels, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec)):
            P = ax.plot(X_f, Y[choice], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker,
                    markersize=markersize, markeredgewidth=1, clip_on=clip_on, markeredgecolor='black')

        plt.xscale('log')

        # -- Title and legend
        distribution_label = '$'
        if distribution == 'uniform':
            distribution_label = ',$uniform'
        n_label = '{}k'.format(int(n / 1000))
        if n < 1000:
            n_label='{}'.format(n)
        a_label = ''
        if a != 1:
            a_label = ', a\!=\!{}'.format(a)

        titleString = r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}{}{}'.format(n_label, d, h, a_label, distribution_label)
        plt.title(titleString)

        handles, labels = ax.get_legend_handles_labels()
        legend = plt.legend(handles, labels,
                            loc='upper left',     # 'upper right'
                            handlelength=2,
                            labelspacing=0,  # distance between label entries
                            handletextpad=0.3,  # distance between label and the line representation
                            borderaxespad=0.2,  # distance between legend and the outer axes
                            borderpad=0.3,  # padding inside legend box
                            numpoints=1,  # put the marker only once
                            )
        # # legend.set_zorder(1)
        frame = legend.get_frame()
        frame.set_linewidth(0.0)
        frame.set_alpha(0.9)  # 0.8

        plt.xticks(xtick_lab, xtick_labels)
        plt.yticks(ytick_lab, ytick_lab)


        ax.yaxis.set_ticks_position('left')
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))

        grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5)  # linestyle='dashed', which='minor', axis='y',
        xlabel(r'Label Sparsity $(f)$', labelpad=0)      # labelpad=0
        ylabel(r'Accuracy', labelpad=0)

        xlim(xmin, xmax)
        ylim(ymin, ymax)

        if CREATE_PDF:
            savefig(join(figure_directory, fig_filename), format='pdf',
                    dpi=None,
                    edgecolor='w',
                    orientation='portrait',
                    transparent=False,
                    bbox_inches='tight',
                    pad_inches=0.05,
                    frameon=None)
        
        if SHOW_PLOT:
            plt.show()

        if SHOW_PDF:
            showfig(join(figure_directory, fig_filename))  # shows actually created PDF