コード例 #1
0
    def show_test_statistics(self, n_train=None, intercept=0, Ak_test=None, y_test=None):
        comm.barrier()
        if Ak_test is None:
            Ak_test = self.Ak_test
        if y_test is None:
            y_test = self.y_test
        if Ak_test is None or y_test is None:
            raise TypeError('Ak_test and y_test must not be None')
        
        if n_train is None:
            n_train = self.Ak.shape[0]
        n_test = len(y_test)
        
        if self.mode in ['global', 'all']:
            y_predict = self.model.predict(self.Ak_test)
            y_test_avg = np.average(y_test)
            rmse = np.sqrt(np.average((y_predict - y_test)**2))
            r2 = 1.0 - np.sum((y_predict - y_test)**2)/np.sum((y_test - y_test_avg)**2)
            max_rel = np.amax(np.abs(y_predict - y_test)/y_test)
            l1_rel = np.linalg.norm(y_test-y_predict, 1)/np.linalg.norm(y_test, 1)
            l2_rel = np.linalg.norm(y_test-y_predict, 2)/np.linalg.norm(y_test, 2)

        if self.verbose >= 1 and comm.get_rank() == 0:
            print(f'|-> Test Statistics ({n_train}/{n_test}/{n_train + n_test}): ')
            print(f'|---> max. rel. error = {max_rel}')
            print(f'|--->   rel. L1 error = {l1_rel}')
            print(f'|--->   rel. L2 error = {l2_rel}')
            print(f'|--->            RMSE = {rmse}')
            print(f'|--->             R^2 = {r2}')
コード例 #2
0
def make_thm1_plot(expname, default, center, savedir, no_reg=False):
    rank = comm.get_rank()
    if rank == 0:
        fig, ax = plt.subplots(1, 2)
        ax[0].set_xlabel('Iterations')
        ax[1].set_xlabel('Iterations')
    for i, mon in enumerate([default, center]):
        comm.barrier()
        if mon is None:
            continue

        size = mon.world_size
        comm.resize(size)

        if rank == 0:
            global_data = get_dataframe(mon)

        local_data = get_dataframe(mon, local=True)
        local_data.replace(np.nan, 0)
        if no_reg:
            local_subproblem = np.array(local_data['subproblem'] -
                                        local_data['gk'])
        else:
            local_subproblem = np.array(local_data['subproblem'])

        local_subproblem = comm.reduce(local_subproblem, op='SUM', root=0)

        if rank == 0:
            ax[i].set_xlabel("Iterations")
            iters = np.asarray(local_data['i_iter'])
            label = r"$\sum_k \Gamma_{k}^{\sigma'}" + (r'-g_{[k]}$'
                                                       if no_reg else "$")
            ax[i].semilogy(iters,
                           local_subproblem,
                           color='tab:cyan',
                           linestyle='--',
                           label=label)

            y_axis = 'f' if no_reg else 'P'
            label = r"$f(Ax)$" if no_reg else r"$\mathcal{O}_A(x)$"
            ax[i].semilogy('i_iter',
                           y_axis,
                           '',
                           data=global_data,
                           color='tab:orange',
                           label=label)

    if rank == 0:
        ax[1].legend(loc='best')
        fig.tight_layout()

        suf = '_no_reg' if no_reg else ''
        fig.savefig(os.path.join(savedir, f'{expname}thm1{suf}.png'), dpi=300)
        plt.close(fig)
    comm.reset()
コード例 #3
0
def make_stop_plot(expname, default, center, savedir):
    rank = comm.get_rank()
    for mon in [default, center]:
        comm.barrier()
        if mon is None:
            continue

        size = mon.world_size
        comm.resize(size)

        global_data = get_dataframe(mon)

        local_data = get_dataframe(mon, local=True)
        sendbuf = np.array(local_data['delta_xk'])
        local_updates = None
        if rank == 0:
            local_updates = np.empty([size, len(sendbuf)])
        comm.comm.Gather(sendbuf, local_updates, root=0)

        if rank == 0:
            fig, ax_l = plt.subplots(1, 1)
            ax_r = plt.twinx(ax=ax_l)
            ax_l.set_xlabel('Iterations')
            ax_l.set_ylabel(r'$\|\|\Delta x_{k}\|\|$')
            ax_r.set_ylabel(r'$f(Ax)$')

            iters = np.asarray(local_data['i_iter'])
            for k in range(size):
                ax_l.semilogy(iters,
                              local_updates[k, :],
                              linestyle='--',
                              label=r'$\|\|\Delta x_{k}\|\|$')
            ax_r.plot('i_iter',
                      'f',
                      '',
                      data=global_data,
                      color='black',
                      label='$f(Ax)$')

            ymin, ymax = ax_r.get_ylim()
            dist = ymax - ymin
            if dist < 0.01:
                ymax += (0.01 - dist) / 2
                ymin -= (0.01 - dist) / 2
                ax_r.set_ylim(ymin, ymax)

            fig.tight_layout()
            fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_stop.png'),
                        dpi=300)
            plt.close(fig)
        comm.reset()
コード例 #4
0
def make_intercept_local_cert_plot(expname, default, center, savedir, type='gap'):
    rank = comm.get_rank()
    for mon in [default, center]:
        comm.barrier()
        if mon is None:
            continue

        size = mon.world_size
        comm.resize(size)

        global_data = get_dataframe(mon)

        local_data = get_dataframe(mon, local=True)
        sendbuf = np.abs(np.array(local_data[f'cert_{type}']))
        local_updates = None
        if rank == 0:
            local_updates = np.empty([size, len(sendbuf)])
        comm.comm.Gather(sendbuf, local_updates, root=0)

        if rank == 0:
            fig, ax_l = plt.subplots(1, 1)
            fig.set_size_inches(6.5, 3.5)
            ax_r = plt.twinx(ax=ax_l)
            ax_l.set_xlabel('Iterations')
            label = 'Local Gap' if type=='gap' else 'Local CV'
            ax_l.set_ylabel(label)
            ax_r.set_ylabel(r'$f(Ax)$')
            iters = np.asarray(local_data['i_iter'])
            for k in range(size):
                ax_l.semilogy(iters, local_updates[k,:], linestyle='--', label=label)
            data = np.abs(global_data['gap'])
            ax_r.semilogy('i_iter', data, '', data=global_data, color='black', label='$f(Ax)$')
            
            fig.tight_layout()
            fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_cert_{type}.png'), dpi=300)
            plt.close(fig)
        comm.reset()
コード例 #5
0
def make_error_plot(expname, default, center, savedir, err='l2_rel'):
    rank = comm.get_rank()
    if rank != 0:
        comm.barrier()
        return
    if err == 'l2_rel':
        label = 'Relative L2 Error'
    elif err == 'l1_rel':
        label = 'Relative L1 Error'
    elif err == 'max_rel':
        label = 'Max. Relative Error'
    elif err == 'rmse':
        label = 'Root Mean Squared Error'
    else:
        comm.barrier()
        return
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Iterations')
    ax.set_ylabel(label)
    global_data = get_dataframe(default)
    ax.semilogy('i_iter',
                err,
                '',
                data=global_data,
                color='tab:purple',
                label='Default')
    global_data = get_dataframe(center)
    ax.semilogy('i_iter',
                err,
                '',
                data=global_data,
                color='tab:green',
                label='Center')

    ax.legend()
    fig.tight_layout()
    fig.savefig(os.path.join(savedir, f'{expname}{err}-error.png'), dpi=300)
    plt.close(fig)

    comm.barrier()
    return
コード例 #6
0
    dataset_path = os.path.join('data', dataset, 'features', f'{world_size}')
    X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1)
    index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int)
    index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int)

    # Define subproblem
    solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam/len(y), random_state=random_state)
    if dataset='inv':
        solver_stop = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam_stop/len(y), random_state=random_state)

    # Add hooks to log and save metrics.
    output_dir = os.path.join('out', 'report', dataset)
    clean_plots()
    # Run CoLA
    for topo in graphs_center:
        comm.barrier()
        if not graphs_center[topo]:
            continue
        suf = f'{world_size}-{topo}'

        mon_default = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Default')
        model_default = Cola(gamma, solver, theta, fit_intercept=False, normalize=True)
        mon_default.init(model_default, graphs_center[topo])
        model_default = model_default.fit(X, y, graphs_center[topo], mon_default, global_iters, local_iters)

        # Show test stats
        if rank == 0:
            print(f'Default - {topo}')
        mon_default.show_test_statistics()
        # Save final model
        mon_default.save(modelname=f'model-default-{suf}.pickle', logname=f'result-default-{suf}.csv')
コード例 #7
0
ファイル: run_cola_experiments.py プロジェクト: amiedlar/cola
def main(dataset):
    random_state = 42

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0
    theta = 1e-3
    global_iters = 500
    local_iters = 20
    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graphs_center = getGraphs(world_size)

    dataset_path = os.path.join('data', dataset, 'features', f'{world_size}')
    X, y, X_test, y_test = load_dataset_by_rank(dataset,
                                                rank,
                                                world_size,
                                                random_state=random_state,
                                                verbose=1)
    index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'),
                               allow_pickle=True),
                       dtype=np.int)
    index_test = np.asarray(np.load(os.path.join(dataset_path,
                                                 'index_test.npy'),
                                    allow_pickle=True),
                            dtype=np.int)

    # Define subproblem
    # lasso_solvers = getSolversByLambda(1, n_lambdas=10, size=len(y), random_state=random_state)
    # elasticnet_solvers = getSolversByLambda(0.5, n_lambdas=10, size=len(y), random_state=random_state)
    # l2_solvers = getSolversByLambda(0, n_lambdas=10, size=len(y), random_state=random_state)
    solver = configure_solver(name='ElasticNet',
                              l1_ratio=0.8,
                              lambda_=1e-3 / len(y),
                              random_state=random_state)

    # Add hooks to log and save metrics.
    output_dir = os.path.join('out', 'report', dataset)
    clean_plots()
    # Run CoLA
    for topo in graphs_center:
        comm.barrier()
        if not graphs_center[topo]:
            continue
        suf = f'{world_size}-{topo}'

        mon_default = Monitor(output_dir,
                              mode='all',
                              verbose=1,
                              Ak=X,
                              Ak_test=X_test,
                              y_test=y_test,
                              name='Default')
        model_default = Cola(gamma,
                             solver,
                             theta,
                             fit_intercept=False,
                             normalize=True)
        mon_default.init(model_default, graphs_center[topo])
        model_default = model_default.fit(X, y, graphs_center[topo],
                                          mon_default, global_iters,
                                          local_iters)

        # Show test stats
        if rank == 0:
            print(f'Default - {topo}')
        mon_default.show_test_statistics()
        # Save final model
        mon_default.save(modelname=f'model-default-{suf}.pickle',
                         logname=f'result-default-{suf}.csv')

        mon_center = Monitor(output_dir,
                             mode='all',
                             verbose=1,
                             Ak=X,
                             Ak_test=X_test,
                             y_test=y_test,
                             name='Center')
        model_center = Cola(gamma,
                            solver,
                            theta,
                            fit_intercept=True,
                            normalize=True)
        mon_center.init(model_center, graphs_center[topo])
        model_center = model_center.fit(X, y, graphs_center[topo], mon_center,
                                        global_iters, local_iters)

        # Show test stats
        if rank == 0:
            print(f'Center - {topo}')
        mon_center.show_test_statistics()

        # Save final model
        mon_center.save(modelname=f'model-center-{suf}.pickle',
                        logname=f'result-center-{suf}.csv')

        # Run CoLA
        make_intercept_plots(f'{dataset}_{topo}_', mon_default, mon_center,
                             None, index, index_test)
コード例 #8
0
def main(dataset, dataset_path, dataset_size, datapoints, use_split_dataset,
         split_by, random_state, algoritmname, max_global_steps, local_iters,
         solvername, output_dir, exit_time, lambda_, l1_ratio, theta,
         graph_topology, c, logmode, ckpt_freq, n_connectivity, fit_intercept,
         normalize, verbose):

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0

    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graph = define_graph_topology(world_size,
                                  graph_topology,
                                  n_connectivity=n_connectivity,
                                  verbose=verbose)

    if use_split_dataset:
        if not dataset_path:
            dataset_path = os.path.join('data', dataset, split_by,
                                        f'{world_size}')
        X, y, X_test, y_test = load_dataset_by_rank(dataset,
                                                    rank,
                                                    world_size,
                                                    dataset_size,
                                                    datapoints,
                                                    split_by,
                                                    dataset_path=dataset_path,
                                                    random_state=random_state,
                                                    verbose=verbose)
    else:
        X, y = load_dataset(dataset,
                            rank,
                            world_size,
                            dataset_size,
                            datapoints,
                            split_by,
                            dataset_path=dataset_path,
                            random_state=random_state,
                            verbose=verbose)

    # Define subproblem
    solver = configure_solver(name=solvername,
                              split_by=split_by,
                              l1_ratio=l1_ratio,
                              lambda_=lambda_,
                              C=c,
                              random_state=random_state)

    # Add hooks to log and save metrics.
    if algoritmname != 'cola':
        output_dir = os.path.join(output_dir, algoritmname)
    if dataset:
        output_dir = os.path.join(output_dir, dataset, f'{world_size:0>2}',
                                  graph_topology)
    monitor = Monitor(output_dir,
                      ckpt_freq=ckpt_freq,
                      exit_time=exit_time,
                      split_by=split_by,
                      mode=logmode,
                      verbose=verbose,
                      Ak=X,
                      Ak_test=X_test,
                      y_test=y_test)

    # Run CoLA
    comm.barrier()
    if algoritmname == 'cola':
        model = Cola(gamma, solver, theta, fit_intercept, normalize)
        monitor.init(model, graph)
        model = model.fit(X, y, graph, monitor, max_global_steps, local_iters)
    else:
        raise NotImplementedError()

    # Show test stats
    if X_test is not None:
        monitor.show_test_statistics()

    # Save final model
    monitor.save(modelname='model.pickle', logname=f'result.csv')