def show_test_statistics(self, n_train=None, intercept=0, Ak_test=None, y_test=None): comm.barrier() if Ak_test is None: Ak_test = self.Ak_test if y_test is None: y_test = self.y_test if Ak_test is None or y_test is None: raise TypeError('Ak_test and y_test must not be None') if n_train is None: n_train = self.Ak.shape[0] n_test = len(y_test) if self.mode in ['global', 'all']: y_predict = self.model.predict(self.Ak_test) y_test_avg = np.average(y_test) rmse = np.sqrt(np.average((y_predict - y_test)**2)) r2 = 1.0 - np.sum((y_predict - y_test)**2)/np.sum((y_test - y_test_avg)**2) max_rel = np.amax(np.abs(y_predict - y_test)/y_test) l1_rel = np.linalg.norm(y_test-y_predict, 1)/np.linalg.norm(y_test, 1) l2_rel = np.linalg.norm(y_test-y_predict, 2)/np.linalg.norm(y_test, 2) if self.verbose >= 1 and comm.get_rank() == 0: print(f'|-> Test Statistics ({n_train}/{n_test}/{n_train + n_test}): ') print(f'|---> max. rel. error = {max_rel}') print(f'|---> rel. L1 error = {l1_rel}') print(f'|---> rel. L2 error = {l2_rel}') print(f'|---> RMSE = {rmse}') print(f'|---> R^2 = {r2}')
def make_thm1_plot(expname, default, center, savedir, no_reg=False): rank = comm.get_rank() if rank == 0: fig, ax = plt.subplots(1, 2) ax[0].set_xlabel('Iterations') ax[1].set_xlabel('Iterations') for i, mon in enumerate([default, center]): comm.barrier() if mon is None: continue size = mon.world_size comm.resize(size) if rank == 0: global_data = get_dataframe(mon) local_data = get_dataframe(mon, local=True) local_data.replace(np.nan, 0) if no_reg: local_subproblem = np.array(local_data['subproblem'] - local_data['gk']) else: local_subproblem = np.array(local_data['subproblem']) local_subproblem = comm.reduce(local_subproblem, op='SUM', root=0) if rank == 0: ax[i].set_xlabel("Iterations") iters = np.asarray(local_data['i_iter']) label = r"$\sum_k \Gamma_{k}^{\sigma'}" + (r'-g_{[k]}$' if no_reg else "$") ax[i].semilogy(iters, local_subproblem, color='tab:cyan', linestyle='--', label=label) y_axis = 'f' if no_reg else 'P' label = r"$f(Ax)$" if no_reg else r"$\mathcal{O}_A(x)$" ax[i].semilogy('i_iter', y_axis, '', data=global_data, color='tab:orange', label=label) if rank == 0: ax[1].legend(loc='best') fig.tight_layout() suf = '_no_reg' if no_reg else '' fig.savefig(os.path.join(savedir, f'{expname}thm1{suf}.png'), dpi=300) plt.close(fig) comm.reset()
def make_stop_plot(expname, default, center, savedir): rank = comm.get_rank() for mon in [default, center]: comm.barrier() if mon is None: continue size = mon.world_size comm.resize(size) global_data = get_dataframe(mon) local_data = get_dataframe(mon, local=True) sendbuf = np.array(local_data['delta_xk']) local_updates = None if rank == 0: local_updates = np.empty([size, len(sendbuf)]) comm.comm.Gather(sendbuf, local_updates, root=0) if rank == 0: fig, ax_l = plt.subplots(1, 1) ax_r = plt.twinx(ax=ax_l) ax_l.set_xlabel('Iterations') ax_l.set_ylabel(r'$\|\|\Delta x_{k}\|\|$') ax_r.set_ylabel(r'$f(Ax)$') iters = np.asarray(local_data['i_iter']) for k in range(size): ax_l.semilogy(iters, local_updates[k, :], linestyle='--', label=r'$\|\|\Delta x_{k}\|\|$') ax_r.plot('i_iter', 'f', '', data=global_data, color='black', label='$f(Ax)$') ymin, ymax = ax_r.get_ylim() dist = ymax - ymin if dist < 0.01: ymax += (0.01 - dist) / 2 ymin -= (0.01 - dist) / 2 ax_r.set_ylim(ymin, ymax) fig.tight_layout() fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_stop.png'), dpi=300) plt.close(fig) comm.reset()
def make_intercept_local_cert_plot(expname, default, center, savedir, type='gap'): rank = comm.get_rank() for mon in [default, center]: comm.barrier() if mon is None: continue size = mon.world_size comm.resize(size) global_data = get_dataframe(mon) local_data = get_dataframe(mon, local=True) sendbuf = np.abs(np.array(local_data[f'cert_{type}'])) local_updates = None if rank == 0: local_updates = np.empty([size, len(sendbuf)]) comm.comm.Gather(sendbuf, local_updates, root=0) if rank == 0: fig, ax_l = plt.subplots(1, 1) fig.set_size_inches(6.5, 3.5) ax_r = plt.twinx(ax=ax_l) ax_l.set_xlabel('Iterations') label = 'Local Gap' if type=='gap' else 'Local CV' ax_l.set_ylabel(label) ax_r.set_ylabel(r'$f(Ax)$') iters = np.asarray(local_data['i_iter']) for k in range(size): ax_l.semilogy(iters, local_updates[k,:], linestyle='--', label=label) data = np.abs(global_data['gap']) ax_r.semilogy('i_iter', data, '', data=global_data, color='black', label='$f(Ax)$') fig.tight_layout() fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_cert_{type}.png'), dpi=300) plt.close(fig) comm.reset()
def make_error_plot(expname, default, center, savedir, err='l2_rel'): rank = comm.get_rank() if rank != 0: comm.barrier() return if err == 'l2_rel': label = 'Relative L2 Error' elif err == 'l1_rel': label = 'Relative L1 Error' elif err == 'max_rel': label = 'Max. Relative Error' elif err == 'rmse': label = 'Root Mean Squared Error' else: comm.barrier() return fig, ax = plt.subplots(1, 1) ax.set_xlabel('Iterations') ax.set_ylabel(label) global_data = get_dataframe(default) ax.semilogy('i_iter', err, '', data=global_data, color='tab:purple', label='Default') global_data = get_dataframe(center) ax.semilogy('i_iter', err, '', data=global_data, color='tab:green', label='Center') ax.legend() fig.tight_layout() fig.savefig(os.path.join(savedir, f'{expname}{err}-error.png'), dpi=300) plt.close(fig) comm.barrier() return
dataset_path = os.path.join('data', dataset, 'features', f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1) index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int) index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int) # Define subproblem solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam/len(y), random_state=random_state) if dataset='inv': solver_stop = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam_stop/len(y), random_state=random_state) # Add hooks to log and save metrics. output_dir = os.path.join('out', 'report', dataset) clean_plots() # Run CoLA for topo in graphs_center: comm.barrier() if not graphs_center[topo]: continue suf = f'{world_size}-{topo}' mon_default = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Default') model_default = Cola(gamma, solver, theta, fit_intercept=False, normalize=True) mon_default.init(model_default, graphs_center[topo]) model_default = model_default.fit(X, y, graphs_center[topo], mon_default, global_iters, local_iters) # Show test stats if rank == 0: print(f'Default - {topo}') mon_default.show_test_statistics() # Save final model mon_default.save(modelname=f'model-default-{suf}.pickle', logname=f'result-default-{suf}.csv')
def main(dataset): random_state = 42 # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 theta = 1e-3 global_iters = 500 local_iters = 20 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graphs_center = getGraphs(world_size) dataset_path = os.path.join('data', dataset, 'features', f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1) index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int) index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int) # Define subproblem # lasso_solvers = getSolversByLambda(1, n_lambdas=10, size=len(y), random_state=random_state) # elasticnet_solvers = getSolversByLambda(0.5, n_lambdas=10, size=len(y), random_state=random_state) # l2_solvers = getSolversByLambda(0, n_lambdas=10, size=len(y), random_state=random_state) solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=1e-3 / len(y), random_state=random_state) # Add hooks to log and save metrics. output_dir = os.path.join('out', 'report', dataset) clean_plots() # Run CoLA for topo in graphs_center: comm.barrier() if not graphs_center[topo]: continue suf = f'{world_size}-{topo}' mon_default = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Default') model_default = Cola(gamma, solver, theta, fit_intercept=False, normalize=True) mon_default.init(model_default, graphs_center[topo]) model_default = model_default.fit(X, y, graphs_center[topo], mon_default, global_iters, local_iters) # Show test stats if rank == 0: print(f'Default - {topo}') mon_default.show_test_statistics() # Save final model mon_default.save(modelname=f'model-default-{suf}.pickle', logname=f'result-default-{suf}.csv') mon_center = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Center') model_center = Cola(gamma, solver, theta, fit_intercept=True, normalize=True) mon_center.init(model_center, graphs_center[topo]) model_center = model_center.fit(X, y, graphs_center[topo], mon_center, global_iters, local_iters) # Show test stats if rank == 0: print(f'Center - {topo}') mon_center.show_test_statistics() # Save final model mon_center.save(modelname=f'model-center-{suf}.pickle', logname=f'result-center-{suf}.csv') # Run CoLA make_intercept_plots(f'{dataset}_{topo}_', mon_default, mon_center, None, index, index_test)
def main(dataset, dataset_path, dataset_size, datapoints, use_split_dataset, split_by, random_state, algoritmname, max_global_steps, local_iters, solvername, output_dir, exit_time, lambda_, l1_ratio, theta, graph_topology, c, logmode, ckpt_freq, n_connectivity, fit_intercept, normalize, verbose): # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graph = define_graph_topology(world_size, graph_topology, n_connectivity=n_connectivity, verbose=verbose) if use_split_dataset: if not dataset_path: dataset_path = os.path.join('data', dataset, split_by, f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, dataset_size, datapoints, split_by, dataset_path=dataset_path, random_state=random_state, verbose=verbose) else: X, y = load_dataset(dataset, rank, world_size, dataset_size, datapoints, split_by, dataset_path=dataset_path, random_state=random_state, verbose=verbose) # Define subproblem solver = configure_solver(name=solvername, split_by=split_by, l1_ratio=l1_ratio, lambda_=lambda_, C=c, random_state=random_state) # Add hooks to log and save metrics. if algoritmname != 'cola': output_dir = os.path.join(output_dir, algoritmname) if dataset: output_dir = os.path.join(output_dir, dataset, f'{world_size:0>2}', graph_topology) monitor = Monitor(output_dir, ckpt_freq=ckpt_freq, exit_time=exit_time, split_by=split_by, mode=logmode, verbose=verbose, Ak=X, Ak_test=X_test, y_test=y_test) # Run CoLA comm.barrier() if algoritmname == 'cola': model = Cola(gamma, solver, theta, fit_intercept, normalize) monitor.init(model, graph) model = model.fit(X, y, graph, monitor, max_global_steps, local_iters) else: raise NotImplementedError() # Show test stats if X_test is not None: monitor.show_test_statistics() # Save final model monitor.save(modelname='model.pickle', logname=f'result.csv')