Beispiel #1
0
def make_intercept_plots(expname, default, center, affine, index, index_test, ):
    savedir = os.path.join('out','report','img')
    os.makedirs(savedir, exist_ok=True)
    rank = comm.get_rank()

    if rank == 0:
        # Set rc parameters
        plt.rc('font', size=11)
        plt.rc('xtick', labelsize=9)
        plt.rc('ytick', labelsize=9)
        plt.rc('lines', lw=1.4)
        plt.rc('figure', figsize=(6.5, 3.5))
        plt.rc('legend', fancybox=False, loc='upper right', fontsize='small', borderaxespad=0)
        plt.tick_params(which='major', labelsize='small')
        from matplotlib import rcsetup
        nipy = plt.cm.get_cmap(name='nipy_spectral')
        idx = 1 - np.linspace(0, 1, 20)
        plt.rc('axes', prop_cycle = rcsetup.cycler('color', nipy(idx)))

    make_intercept_regression_plot(expname, default, center, affine, index, index_test, savedir)
    make_stop_plot(expname, default, center, affine, savedir)
    # make_intercept_local_cert_plot(expname, default, center, savedir, type='gap')
    # make_intercept_local_cert_plot(expname, default, center, savedir, type='cv')
    make_intercept_global_local_plot(expname, default, center, savedir)
    make_intercept_global_local_plot(expname, default, center, savedir, no_reg=True)
Beispiel #2
0
def make_intercept_regression_plot(expname, default, center, affine, index, index_test, savedir):
    rank = comm.get_rank()
    
    if center is not None:
        comm.resize(center.world_size)
        default_reg = get_regression(default, index, index_test)
        center_reg = get_regression(center, index, index_test)
        comm.reset()
    if affine is not None:
        affine_reg = get_regression(affine, index, index_test)
    
    if rank == 0:
        # Regression Comp Plot
        fig, ax = plt.subplots(1,1)
        ax.set_ylabel('Meter Voltage (V)')
        plot_train_test(ax, center, index, index_test)
        t = np.linspace(0, 24, len(index)+len(index_test))
        ax.plot(t, default_reg.T, label=f'Regression - Default', linestyle='--', color='tab:purple')
        ax.plot(t, center_reg.T, label=f'Regression - Center', linestyle='-', color='black')
        if affine is not None:
            ax.plot(t, affine_reg.T, label=f'Regression - Affine', linestyle=':', color='tab:green')
        ax.legend()
        ax.set_xlabel('Time (h)')
        fig.tight_layout()
        fig.savefig(os.path.join(savedir, f'{expname}regression.png'), dpi=300)
        plt.close(fig)
Beispiel #3
0
    def show_test_statistics(self, n_train=None, intercept=0, Ak_test=None, y_test=None):
        comm.barrier()
        if Ak_test is None:
            Ak_test = self.Ak_test
        if y_test is None:
            y_test = self.y_test
        if Ak_test is None or y_test is None:
            raise TypeError('Ak_test and y_test must not be None')
        
        if n_train is None:
            n_train = self.Ak.shape[0]
        n_test = len(y_test)
        
        if self.mode in ['global', 'all']:
            y_predict = self.model.predict(self.Ak_test)
            y_test_avg = np.average(y_test)
            rmse = np.sqrt(np.average((y_predict - y_test)**2))
            r2 = 1.0 - np.sum((y_predict - y_test)**2)/np.sum((y_test - y_test_avg)**2)
            max_rel = np.amax(np.abs(y_predict - y_test)/y_test)
            l1_rel = np.linalg.norm(y_test-y_predict, 1)/np.linalg.norm(y_test, 1)
            l2_rel = np.linalg.norm(y_test-y_predict, 2)/np.linalg.norm(y_test, 2)

        if self.verbose >= 1 and comm.get_rank() == 0:
            print(f'|-> Test Statistics ({n_train}/{n_test}/{n_train + n_test}): ')
            print(f'|---> max. rel. error = {max_rel}')
            print(f'|--->   rel. L1 error = {l1_rel}')
            print(f'|--->   rel. L2 error = {l2_rel}')
            print(f'|--->            RMSE = {rmse}')
            print(f'|--->             R^2 = {r2}')
    def __init__(self,
                 output_dir,
                 ckpt_freq=-1,
                 exit_time=None,
                 split_by='features',
                 mode='local',
                 Ak=None,
                 Ak_test=None,
                 y_test=None,
                 verbose=1,
                 name=''):
        """
        Parameters
        ----------
        solver : CoCoASubproblemSolver
            a solver to be monitored.
        output_dir : str
            directory of output.
        ckpt_freq : Int
            frequency of the checkpoint.
        exit_time : float, optional
            exit if the program has been running for `exit_time`. (the default is None, which disable this criterion.)
        split_by : str, optional
            The data matrix is split by samples or features (the default is 'samples')
        mode : ['local', 'global', None], optional
             * `local` mode only logs duality gap of local solver. 
             * `global` mode logs duality gap of the whole program. It takes more time to compute.
        """
        self.name = name
        self.Ak = Ak
        self.Ak_test = Ak_test
        self.y_test = y_test
        self.do_prediction_tests = self.Ak_test is not None and self.y_test is not None

        self.rank = comm.get_rank()
        self.world_size = comm.get_world_size()

        self.running_time = 0
        self.previous_time = time.time()
        self.exit_time = exit_time or np.inf

        self.verbose = verbose

        self.records = []
        self.records_l = []
        self.records_g = []
        self.mode = mode
        self.ckpt_freq = ckpt_freq
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        self.model = None

        # If a problem is split by samples, then the total number of data points is unknown
        # in a local node. As a result, we will defer the division to the logging time.
        self.split_by_samples = split_by == 'samples'

        self._sigma_sum = None
Beispiel #5
0
def main(dataset, dataset_path, dataset_size, use_split_dataset, split_by,
         random_state, algoritmname, max_global_steps, local_iters, solvername,
         output_dir, exit_time, lambda_, l1_ratio, theta, graph_topology, c,
         logmode, ckpt_freq, n_connectivity):

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0

    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graph = define_graph_topology(world_size,
                                  graph_topology,
                                  n_connectivity=n_connectivity)

    if use_split_dataset:
        X, y = load_dataset_by_rank(dataset,
                                    rank,
                                    world_size,
                                    dataset_size,
                                    split_by,
                                    dataset_path=dataset_path,
                                    random_state=random_state)
    else:
        X, y = load_dataset(dataset,
                            rank,
                            world_size,
                            dataset_size,
                            split_by,
                            dataset_path=dataset_path,
                            random_state=random_state)

    # Define subproblem
    solver = configure_solver(name=solvername,
                              split_by=split_by,
                              l1_ratio=l1_ratio,
                              lambda_=lambda_,
                              C=c,
                              random_state=random_state)

    # Add hooks to log and save metrics.
    monitor = Monitor(solver, output_dir, ckpt_freq, exit_time, split_by,
                      logmode)

    # Always use this value throughout this project
    Akxk, xk = run_algorithm(algoritmname, X, y, solver, gamma, theta,
                             max_global_steps, local_iters, world_size, graph,
                             monitor)

    monitor.save(Akxk, xk, weightname='weight.npy', logname='result.csv')
def make_thm1_plot(expname, default, center, savedir, no_reg=False):
    rank = comm.get_rank()
    if rank == 0:
        fig, ax = plt.subplots(1, 2)
        ax[0].set_xlabel('Iterations')
        ax[1].set_xlabel('Iterations')
    for i, mon in enumerate([default, center]):
        comm.barrier()
        if mon is None:
            continue

        size = mon.world_size
        comm.resize(size)

        if rank == 0:
            global_data = get_dataframe(mon)

        local_data = get_dataframe(mon, local=True)
        local_data.replace(np.nan, 0)
        if no_reg:
            local_subproblem = np.array(local_data['subproblem'] -
                                        local_data['gk'])
        else:
            local_subproblem = np.array(local_data['subproblem'])

        local_subproblem = comm.reduce(local_subproblem, op='SUM', root=0)

        if rank == 0:
            ax[i].set_xlabel("Iterations")
            iters = np.asarray(local_data['i_iter'])
            label = r"$\sum_k \Gamma_{k}^{\sigma'}" + (r'-g_{[k]}$'
                                                       if no_reg else "$")
            ax[i].semilogy(iters,
                           local_subproblem,
                           color='tab:cyan',
                           linestyle='--',
                           label=label)

            y_axis = 'f' if no_reg else 'P'
            label = r"$f(Ax)$" if no_reg else r"$\mathcal{O}_A(x)$"
            ax[i].semilogy('i_iter',
                           y_axis,
                           '',
                           data=global_data,
                           color='tab:orange',
                           label=label)

    if rank == 0:
        ax[1].legend(loc='best')
        fig.tight_layout()

        suf = '_no_reg' if no_reg else ''
        fig.savefig(os.path.join(savedir, f'{expname}thm1{suf}.png'), dpi=300)
        plt.close(fig)
    comm.reset()
def make_stop_plot(expname, default, center, savedir):
    rank = comm.get_rank()
    for mon in [default, center]:
        comm.barrier()
        if mon is None:
            continue

        size = mon.world_size
        comm.resize(size)

        global_data = get_dataframe(mon)

        local_data = get_dataframe(mon, local=True)
        sendbuf = np.array(local_data['delta_xk'])
        local_updates = None
        if rank == 0:
            local_updates = np.empty([size, len(sendbuf)])
        comm.comm.Gather(sendbuf, local_updates, root=0)

        if rank == 0:
            fig, ax_l = plt.subplots(1, 1)
            ax_r = plt.twinx(ax=ax_l)
            ax_l.set_xlabel('Iterations')
            ax_l.set_ylabel(r'$\|\|\Delta x_{k}\|\|$')
            ax_r.set_ylabel(r'$f(Ax)$')

            iters = np.asarray(local_data['i_iter'])
            for k in range(size):
                ax_l.semilogy(iters,
                              local_updates[k, :],
                              linestyle='--',
                              label=r'$\|\|\Delta x_{k}\|\|$')
            ax_r.plot('i_iter',
                      'f',
                      '',
                      data=global_data,
                      color='black',
                      label='$f(Ax)$')

            ymin, ymax = ax_r.get_ylim()
            dist = ymax - ymin
            if dist < 0.01:
                ymax += (0.01 - dist) / 2
                ymin -= (0.01 - dist) / 2
                ax_r.set_ylim(ymin, ymax)

            fig.tight_layout()
            fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_stop.png'),
                        dpi=300)
            plt.close(fig)
        comm.reset()
def make_error_plot(expname, default, center, savedir, err='l2_rel'):
    rank = comm.get_rank()
    if rank != 0:
        comm.barrier()
        return
    if err == 'l2_rel':
        label = 'Relative L2 Error'
    elif err == 'l1_rel':
        label = 'Relative L1 Error'
    elif err == 'max_rel':
        label = 'Max. Relative Error'
    elif err == 'rmse':
        label = 'Root Mean Squared Error'
    else:
        comm.barrier()
        return
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Iterations')
    ax.set_ylabel(label)
    global_data = get_dataframe(default)
    ax.semilogy('i_iter',
                err,
                '',
                data=global_data,
                color='tab:purple',
                label='Default')
    global_data = get_dataframe(center)
    ax.semilogy('i_iter',
                err,
                '',
                data=global_data,
                color='tab:green',
                label='Center')

    ax.legend()
    fig.tight_layout()
    fig.savefig(os.path.join(savedir, f'{expname}{err}-error.png'), dpi=300)
    plt.close(fig)

    comm.barrier()
    return
def make_report_plots(expname,
                      default,
                      center,
                      index,
                      index_test,
                      reg=False,
                      stop=False):
    savedir = os.path.join('out', 'report', 'img')
    os.makedirs(savedir, exist_ok=True)
    rank = comm.get_rank()

    if rank == 0:
        # Set rc parameters
        plt.rc('font', size=10)
        plt.rc('xtick', labelsize=9)
        plt.rc('ytick', labelsize=9)
        plt.rc('lines', lw=1.0)
        plt.rc('figure', figsize=(4.5, 2))
        plt.rc('legend',
               fancybox=False,
               loc='upper right',
               fontsize='small',
               borderaxespad=0)
        plt.tick_params(which='major', labelsize='small')
        from matplotlib import rcsetup
        nipy = plt.cm.get_cmap(name='nipy_spectral')
        idx = 1 - np.linspace(0, 1, 20)
        plt.rc('axes', prop_cycle=rcsetup.cycler('color', nipy(idx)))

    if stop:
        make_stop_plot(expname, default, center, savedir)
    else:
        if reg:
            make_regression_plot(expname, default, center, index, index_test,
                                 savedir)
        else:
            make_error_plot(expname, default, center, savedir)
        make_thm1_plot(expname, default, center, savedir)
Beispiel #10
0
def main(dataset):
    if dataset == 'inv':
        lam_stop = 3.15
        lam = 0.01467
        reg = True
    elif dataset == 'mg':
        lam = 1e-3
        reg = True
    else:
        print('dataset not supported')
        return
    random_state = 42

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0
    theta = 1e-1
    global_iters = 500
    local_iters = 5
    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graphs_center = getGraphs(world_size)

    dataset_path = os.path.join('data', dataset, 'features', f'{world_size}')
    X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1)
    index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int)
    index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int)

    # Define subproblem
    solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam/len(y), random_state=random_state)
    if dataset='inv':
        solver_stop = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam_stop/len(y), random_state=random_state)
Beispiel #11
0
def make_intercept_local_cert_plot(expname, default, center, savedir, type='gap'):
    rank = comm.get_rank()
    for mon in [default, center]:
        comm.barrier()
        if mon is None:
            continue

        size = mon.world_size
        comm.resize(size)

        global_data = get_dataframe(mon)

        local_data = get_dataframe(mon, local=True)
        sendbuf = np.abs(np.array(local_data[f'cert_{type}']))
        local_updates = None
        if rank == 0:
            local_updates = np.empty([size, len(sendbuf)])
        comm.comm.Gather(sendbuf, local_updates, root=0)

        if rank == 0:
            fig, ax_l = plt.subplots(1, 1)
            fig.set_size_inches(6.5, 3.5)
            ax_r = plt.twinx(ax=ax_l)
            ax_l.set_xlabel('Iterations')
            label = 'Local Gap' if type=='gap' else 'Local CV'
            ax_l.set_ylabel(label)
            ax_r.set_ylabel(r'$f(Ax)$')
            iters = np.asarray(local_data['i_iter'])
            for k in range(size):
                ax_l.semilogy(iters, local_updates[k,:], linestyle='--', label=label)
            data = np.abs(global_data['gap'])
            ax_r.semilogy('i_iter', data, '', data=global_data, color='black', label='$f(Ax)$')
            
            fig.tight_layout()
            fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_cert_{type}.png'), dpi=300)
            plt.close(fig)
        comm.reset()
Beispiel #12
0
def clean_plots():
    if comm.get_rank() == 0:
        savedir = os.path.join('out','report','img')
        if os.path.exists(savedir):
            for img in glob.glob(os.path.join(savedir, '*.png')):
                os.remove(img)
Beispiel #13
0
def main(dataset):
    random_state = 42

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0
    theta = 1e-3
    global_iters = 500
    local_iters = 20
    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graphs_center = getGraphs(world_size)

    dataset_path = os.path.join('data', dataset, 'features', f'{world_size}')
    X, y, X_test, y_test = load_dataset_by_rank(dataset,
                                                rank,
                                                world_size,
                                                random_state=random_state,
                                                verbose=1)
    index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'),
                               allow_pickle=True),
                       dtype=np.int)
    index_test = np.asarray(np.load(os.path.join(dataset_path,
                                                 'index_test.npy'),
                                    allow_pickle=True),
                            dtype=np.int)

    # Define subproblem
    # lasso_solvers = getSolversByLambda(1, n_lambdas=10, size=len(y), random_state=random_state)
    # elasticnet_solvers = getSolversByLambda(0.5, n_lambdas=10, size=len(y), random_state=random_state)
    # l2_solvers = getSolversByLambda(0, n_lambdas=10, size=len(y), random_state=random_state)
    solver = configure_solver(name='ElasticNet',
                              l1_ratio=0.8,
                              lambda_=1e-3 / len(y),
                              random_state=random_state)

    # Add hooks to log and save metrics.
    output_dir = os.path.join('out', 'report', dataset)
    clean_plots()
    # Run CoLA
    for topo in graphs_center:
        comm.barrier()
        if not graphs_center[topo]:
            continue
        suf = f'{world_size}-{topo}'

        mon_default = Monitor(output_dir,
                              mode='all',
                              verbose=1,
                              Ak=X,
                              Ak_test=X_test,
                              y_test=y_test,
                              name='Default')
        model_default = Cola(gamma,
                             solver,
                             theta,
                             fit_intercept=False,
                             normalize=True)
        mon_default.init(model_default, graphs_center[topo])
        model_default = model_default.fit(X, y, graphs_center[topo],
                                          mon_default, global_iters,
                                          local_iters)

        # Show test stats
        if rank == 0:
            print(f'Default - {topo}')
        mon_default.show_test_statistics()
        # Save final model
        mon_default.save(modelname=f'model-default-{suf}.pickle',
                         logname=f'result-default-{suf}.csv')

        mon_center = Monitor(output_dir,
                             mode='all',
                             verbose=1,
                             Ak=X,
                             Ak_test=X_test,
                             y_test=y_test,
                             name='Center')
        model_center = Cola(gamma,
                            solver,
                            theta,
                            fit_intercept=True,
                            normalize=True)
        mon_center.init(model_center, graphs_center[topo])
        model_center = model_center.fit(X, y, graphs_center[topo], mon_center,
                                        global_iters, local_iters)

        # Show test stats
        if rank == 0:
            print(f'Center - {topo}')
        mon_center.show_test_statistics()

        # Save final model
        mon_center.save(modelname=f'model-center-{suf}.pickle',
                        logname=f'result-center-{suf}.csv')

        # Run CoLA
        make_intercept_plots(f'{dataset}_{topo}_', mon_default, mon_center,
                             None, index, index_test)
Beispiel #14
0
def main(dataset, dataset_path, dataset_size, datapoints, use_split_dataset,
         split_by, random_state, algoritmname, max_global_steps, local_iters,
         solvername, output_dir, exit_time, lambda_, l1_ratio, theta,
         graph_topology, c, logmode, ckpt_freq, n_connectivity, fit_intercept,
         normalize, verbose):

    # Fix gamma = 1.0 according to:
    #   Adding vs. Averaging in Distributed Primal-Dual Optimization
    gamma = 1.0

    # Initialize process group
    comm.init_process_group('mpi')

    # Get rank of current process
    rank = comm.get_rank()
    world_size = comm.get_world_size()

    # Create graph with specified topology
    graph = define_graph_topology(world_size,
                                  graph_topology,
                                  n_connectivity=n_connectivity,
                                  verbose=verbose)

    if use_split_dataset:
        if not dataset_path:
            dataset_path = os.path.join('data', dataset, split_by,
                                        f'{world_size}')
        X, y, X_test, y_test = load_dataset_by_rank(dataset,
                                                    rank,
                                                    world_size,
                                                    dataset_size,
                                                    datapoints,
                                                    split_by,
                                                    dataset_path=dataset_path,
                                                    random_state=random_state,
                                                    verbose=verbose)
    else:
        X, y = load_dataset(dataset,
                            rank,
                            world_size,
                            dataset_size,
                            datapoints,
                            split_by,
                            dataset_path=dataset_path,
                            random_state=random_state,
                            verbose=verbose)

    # Define subproblem
    solver = configure_solver(name=solvername,
                              split_by=split_by,
                              l1_ratio=l1_ratio,
                              lambda_=lambda_,
                              C=c,
                              random_state=random_state)

    # Add hooks to log and save metrics.
    if algoritmname != 'cola':
        output_dir = os.path.join(output_dir, algoritmname)
    if dataset:
        output_dir = os.path.join(output_dir, dataset, f'{world_size:0>2}',
                                  graph_topology)
    monitor = Monitor(output_dir,
                      ckpt_freq=ckpt_freq,
                      exit_time=exit_time,
                      split_by=split_by,
                      mode=logmode,
                      verbose=verbose,
                      Ak=X,
                      Ak_test=X_test,
                      y_test=y_test)

    # Run CoLA
    comm.barrier()
    if algoritmname == 'cola':
        model = Cola(gamma, solver, theta, fit_intercept, normalize)
        monitor.init(model, graph)
        model = model.fit(X, y, graph, monitor, max_global_steps, local_iters)
    else:
        raise NotImplementedError()

    # Show test stats
    if X_test is not None:
        monitor.show_test_statistics()

    # Save final model
    monitor.save(modelname='model.pickle', logname=f'result.csv')