def make_intercept_plots(expname, default, center, affine, index, index_test, ): savedir = os.path.join('out','report','img') os.makedirs(savedir, exist_ok=True) rank = comm.get_rank() if rank == 0: # Set rc parameters plt.rc('font', size=11) plt.rc('xtick', labelsize=9) plt.rc('ytick', labelsize=9) plt.rc('lines', lw=1.4) plt.rc('figure', figsize=(6.5, 3.5)) plt.rc('legend', fancybox=False, loc='upper right', fontsize='small', borderaxespad=0) plt.tick_params(which='major', labelsize='small') from matplotlib import rcsetup nipy = plt.cm.get_cmap(name='nipy_spectral') idx = 1 - np.linspace(0, 1, 20) plt.rc('axes', prop_cycle = rcsetup.cycler('color', nipy(idx))) make_intercept_regression_plot(expname, default, center, affine, index, index_test, savedir) make_stop_plot(expname, default, center, affine, savedir) # make_intercept_local_cert_plot(expname, default, center, savedir, type='gap') # make_intercept_local_cert_plot(expname, default, center, savedir, type='cv') make_intercept_global_local_plot(expname, default, center, savedir) make_intercept_global_local_plot(expname, default, center, savedir, no_reg=True)
def make_intercept_regression_plot(expname, default, center, affine, index, index_test, savedir): rank = comm.get_rank() if center is not None: comm.resize(center.world_size) default_reg = get_regression(default, index, index_test) center_reg = get_regression(center, index, index_test) comm.reset() if affine is not None: affine_reg = get_regression(affine, index, index_test) if rank == 0: # Regression Comp Plot fig, ax = plt.subplots(1,1) ax.set_ylabel('Meter Voltage (V)') plot_train_test(ax, center, index, index_test) t = np.linspace(0, 24, len(index)+len(index_test)) ax.plot(t, default_reg.T, label=f'Regression - Default', linestyle='--', color='tab:purple') ax.plot(t, center_reg.T, label=f'Regression - Center', linestyle='-', color='black') if affine is not None: ax.plot(t, affine_reg.T, label=f'Regression - Affine', linestyle=':', color='tab:green') ax.legend() ax.set_xlabel('Time (h)') fig.tight_layout() fig.savefig(os.path.join(savedir, f'{expname}regression.png'), dpi=300) plt.close(fig)
def show_test_statistics(self, n_train=None, intercept=0, Ak_test=None, y_test=None): comm.barrier() if Ak_test is None: Ak_test = self.Ak_test if y_test is None: y_test = self.y_test if Ak_test is None or y_test is None: raise TypeError('Ak_test and y_test must not be None') if n_train is None: n_train = self.Ak.shape[0] n_test = len(y_test) if self.mode in ['global', 'all']: y_predict = self.model.predict(self.Ak_test) y_test_avg = np.average(y_test) rmse = np.sqrt(np.average((y_predict - y_test)**2)) r2 = 1.0 - np.sum((y_predict - y_test)**2)/np.sum((y_test - y_test_avg)**2) max_rel = np.amax(np.abs(y_predict - y_test)/y_test) l1_rel = np.linalg.norm(y_test-y_predict, 1)/np.linalg.norm(y_test, 1) l2_rel = np.linalg.norm(y_test-y_predict, 2)/np.linalg.norm(y_test, 2) if self.verbose >= 1 and comm.get_rank() == 0: print(f'|-> Test Statistics ({n_train}/{n_test}/{n_train + n_test}): ') print(f'|---> max. rel. error = {max_rel}') print(f'|---> rel. L1 error = {l1_rel}') print(f'|---> rel. L2 error = {l2_rel}') print(f'|---> RMSE = {rmse}') print(f'|---> R^2 = {r2}')
def __init__(self, output_dir, ckpt_freq=-1, exit_time=None, split_by='features', mode='local', Ak=None, Ak_test=None, y_test=None, verbose=1, name=''): """ Parameters ---------- solver : CoCoASubproblemSolver a solver to be monitored. output_dir : str directory of output. ckpt_freq : Int frequency of the checkpoint. exit_time : float, optional exit if the program has been running for `exit_time`. (the default is None, which disable this criterion.) split_by : str, optional The data matrix is split by samples or features (the default is 'samples') mode : ['local', 'global', None], optional * `local` mode only logs duality gap of local solver. * `global` mode logs duality gap of the whole program. It takes more time to compute. """ self.name = name self.Ak = Ak self.Ak_test = Ak_test self.y_test = y_test self.do_prediction_tests = self.Ak_test is not None and self.y_test is not None self.rank = comm.get_rank() self.world_size = comm.get_world_size() self.running_time = 0 self.previous_time = time.time() self.exit_time = exit_time or np.inf self.verbose = verbose self.records = [] self.records_l = [] self.records_g = [] self.mode = mode self.ckpt_freq = ckpt_freq self.output_dir = output_dir os.makedirs(self.output_dir, exist_ok=True) self.model = None # If a problem is split by samples, then the total number of data points is unknown # in a local node. As a result, we will defer the division to the logging time. self.split_by_samples = split_by == 'samples' self._sigma_sum = None
def main(dataset, dataset_path, dataset_size, use_split_dataset, split_by, random_state, algoritmname, max_global_steps, local_iters, solvername, output_dir, exit_time, lambda_, l1_ratio, theta, graph_topology, c, logmode, ckpt_freq, n_connectivity): # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graph = define_graph_topology(world_size, graph_topology, n_connectivity=n_connectivity) if use_split_dataset: X, y = load_dataset_by_rank(dataset, rank, world_size, dataset_size, split_by, dataset_path=dataset_path, random_state=random_state) else: X, y = load_dataset(dataset, rank, world_size, dataset_size, split_by, dataset_path=dataset_path, random_state=random_state) # Define subproblem solver = configure_solver(name=solvername, split_by=split_by, l1_ratio=l1_ratio, lambda_=lambda_, C=c, random_state=random_state) # Add hooks to log and save metrics. monitor = Monitor(solver, output_dir, ckpt_freq, exit_time, split_by, logmode) # Always use this value throughout this project Akxk, xk = run_algorithm(algoritmname, X, y, solver, gamma, theta, max_global_steps, local_iters, world_size, graph, monitor) monitor.save(Akxk, xk, weightname='weight.npy', logname='result.csv')
def make_thm1_plot(expname, default, center, savedir, no_reg=False): rank = comm.get_rank() if rank == 0: fig, ax = plt.subplots(1, 2) ax[0].set_xlabel('Iterations') ax[1].set_xlabel('Iterations') for i, mon in enumerate([default, center]): comm.barrier() if mon is None: continue size = mon.world_size comm.resize(size) if rank == 0: global_data = get_dataframe(mon) local_data = get_dataframe(mon, local=True) local_data.replace(np.nan, 0) if no_reg: local_subproblem = np.array(local_data['subproblem'] - local_data['gk']) else: local_subproblem = np.array(local_data['subproblem']) local_subproblem = comm.reduce(local_subproblem, op='SUM', root=0) if rank == 0: ax[i].set_xlabel("Iterations") iters = np.asarray(local_data['i_iter']) label = r"$\sum_k \Gamma_{k}^{\sigma'}" + (r'-g_{[k]}$' if no_reg else "$") ax[i].semilogy(iters, local_subproblem, color='tab:cyan', linestyle='--', label=label) y_axis = 'f' if no_reg else 'P' label = r"$f(Ax)$" if no_reg else r"$\mathcal{O}_A(x)$" ax[i].semilogy('i_iter', y_axis, '', data=global_data, color='tab:orange', label=label) if rank == 0: ax[1].legend(loc='best') fig.tight_layout() suf = '_no_reg' if no_reg else '' fig.savefig(os.path.join(savedir, f'{expname}thm1{suf}.png'), dpi=300) plt.close(fig) comm.reset()
def make_stop_plot(expname, default, center, savedir): rank = comm.get_rank() for mon in [default, center]: comm.barrier() if mon is None: continue size = mon.world_size comm.resize(size) global_data = get_dataframe(mon) local_data = get_dataframe(mon, local=True) sendbuf = np.array(local_data['delta_xk']) local_updates = None if rank == 0: local_updates = np.empty([size, len(sendbuf)]) comm.comm.Gather(sendbuf, local_updates, root=0) if rank == 0: fig, ax_l = plt.subplots(1, 1) ax_r = plt.twinx(ax=ax_l) ax_l.set_xlabel('Iterations') ax_l.set_ylabel(r'$\|\|\Delta x_{k}\|\|$') ax_r.set_ylabel(r'$f(Ax)$') iters = np.asarray(local_data['i_iter']) for k in range(size): ax_l.semilogy(iters, local_updates[k, :], linestyle='--', label=r'$\|\|\Delta x_{k}\|\|$') ax_r.plot('i_iter', 'f', '', data=global_data, color='black', label='$f(Ax)$') ymin, ymax = ax_r.get_ylim() dist = ymax - ymin if dist < 0.01: ymax += (0.01 - dist) / 2 ymin -= (0.01 - dist) / 2 ax_r.set_ylim(ymin, ymax) fig.tight_layout() fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_stop.png'), dpi=300) plt.close(fig) comm.reset()
def make_error_plot(expname, default, center, savedir, err='l2_rel'): rank = comm.get_rank() if rank != 0: comm.barrier() return if err == 'l2_rel': label = 'Relative L2 Error' elif err == 'l1_rel': label = 'Relative L1 Error' elif err == 'max_rel': label = 'Max. Relative Error' elif err == 'rmse': label = 'Root Mean Squared Error' else: comm.barrier() return fig, ax = plt.subplots(1, 1) ax.set_xlabel('Iterations') ax.set_ylabel(label) global_data = get_dataframe(default) ax.semilogy('i_iter', err, '', data=global_data, color='tab:purple', label='Default') global_data = get_dataframe(center) ax.semilogy('i_iter', err, '', data=global_data, color='tab:green', label='Center') ax.legend() fig.tight_layout() fig.savefig(os.path.join(savedir, f'{expname}{err}-error.png'), dpi=300) plt.close(fig) comm.barrier() return
def make_report_plots(expname, default, center, index, index_test, reg=False, stop=False): savedir = os.path.join('out', 'report', 'img') os.makedirs(savedir, exist_ok=True) rank = comm.get_rank() if rank == 0: # Set rc parameters plt.rc('font', size=10) plt.rc('xtick', labelsize=9) plt.rc('ytick', labelsize=9) plt.rc('lines', lw=1.0) plt.rc('figure', figsize=(4.5, 2)) plt.rc('legend', fancybox=False, loc='upper right', fontsize='small', borderaxespad=0) plt.tick_params(which='major', labelsize='small') from matplotlib import rcsetup nipy = plt.cm.get_cmap(name='nipy_spectral') idx = 1 - np.linspace(0, 1, 20) plt.rc('axes', prop_cycle=rcsetup.cycler('color', nipy(idx))) if stop: make_stop_plot(expname, default, center, savedir) else: if reg: make_regression_plot(expname, default, center, index, index_test, savedir) else: make_error_plot(expname, default, center, savedir) make_thm1_plot(expname, default, center, savedir)
def main(dataset): if dataset == 'inv': lam_stop = 3.15 lam = 0.01467 reg = True elif dataset == 'mg': lam = 1e-3 reg = True else: print('dataset not supported') return random_state = 42 # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 theta = 1e-1 global_iters = 500 local_iters = 5 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graphs_center = getGraphs(world_size) dataset_path = os.path.join('data', dataset, 'features', f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1) index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int) index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int) # Define subproblem solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam/len(y), random_state=random_state) if dataset='inv': solver_stop = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=lam_stop/len(y), random_state=random_state)
def make_intercept_local_cert_plot(expname, default, center, savedir, type='gap'): rank = comm.get_rank() for mon in [default, center]: comm.barrier() if mon is None: continue size = mon.world_size comm.resize(size) global_data = get_dataframe(mon) local_data = get_dataframe(mon, local=True) sendbuf = np.abs(np.array(local_data[f'cert_{type}'])) local_updates = None if rank == 0: local_updates = np.empty([size, len(sendbuf)]) comm.comm.Gather(sendbuf, local_updates, root=0) if rank == 0: fig, ax_l = plt.subplots(1, 1) fig.set_size_inches(6.5, 3.5) ax_r = plt.twinx(ax=ax_l) ax_l.set_xlabel('Iterations') label = 'Local Gap' if type=='gap' else 'Local CV' ax_l.set_ylabel(label) ax_r.set_ylabel(r'$f(Ax)$') iters = np.asarray(local_data['i_iter']) for k in range(size): ax_l.semilogy(iters, local_updates[k,:], linestyle='--', label=label) data = np.abs(global_data['gap']) ax_r.semilogy('i_iter', data, '', data=global_data, color='black', label='$f(Ax)$') fig.tight_layout() fig.savefig(os.path.join(savedir, f'{expname}{mon.name}_cert_{type}.png'), dpi=300) plt.close(fig) comm.reset()
def clean_plots(): if comm.get_rank() == 0: savedir = os.path.join('out','report','img') if os.path.exists(savedir): for img in glob.glob(os.path.join(savedir, '*.png')): os.remove(img)
def main(dataset): random_state = 42 # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 theta = 1e-3 global_iters = 500 local_iters = 20 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graphs_center = getGraphs(world_size) dataset_path = os.path.join('data', dataset, 'features', f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, random_state=random_state, verbose=1) index = np.asarray(np.load(os.path.join(dataset_path, 'index.npy'), allow_pickle=True), dtype=np.int) index_test = np.asarray(np.load(os.path.join(dataset_path, 'index_test.npy'), allow_pickle=True), dtype=np.int) # Define subproblem # lasso_solvers = getSolversByLambda(1, n_lambdas=10, size=len(y), random_state=random_state) # elasticnet_solvers = getSolversByLambda(0.5, n_lambdas=10, size=len(y), random_state=random_state) # l2_solvers = getSolversByLambda(0, n_lambdas=10, size=len(y), random_state=random_state) solver = configure_solver(name='ElasticNet', l1_ratio=0.8, lambda_=1e-3 / len(y), random_state=random_state) # Add hooks to log and save metrics. output_dir = os.path.join('out', 'report', dataset) clean_plots() # Run CoLA for topo in graphs_center: comm.barrier() if not graphs_center[topo]: continue suf = f'{world_size}-{topo}' mon_default = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Default') model_default = Cola(gamma, solver, theta, fit_intercept=False, normalize=True) mon_default.init(model_default, graphs_center[topo]) model_default = model_default.fit(X, y, graphs_center[topo], mon_default, global_iters, local_iters) # Show test stats if rank == 0: print(f'Default - {topo}') mon_default.show_test_statistics() # Save final model mon_default.save(modelname=f'model-default-{suf}.pickle', logname=f'result-default-{suf}.csv') mon_center = Monitor(output_dir, mode='all', verbose=1, Ak=X, Ak_test=X_test, y_test=y_test, name='Center') model_center = Cola(gamma, solver, theta, fit_intercept=True, normalize=True) mon_center.init(model_center, graphs_center[topo]) model_center = model_center.fit(X, y, graphs_center[topo], mon_center, global_iters, local_iters) # Show test stats if rank == 0: print(f'Center - {topo}') mon_center.show_test_statistics() # Save final model mon_center.save(modelname=f'model-center-{suf}.pickle', logname=f'result-center-{suf}.csv') # Run CoLA make_intercept_plots(f'{dataset}_{topo}_', mon_default, mon_center, None, index, index_test)
def main(dataset, dataset_path, dataset_size, datapoints, use_split_dataset, split_by, random_state, algoritmname, max_global_steps, local_iters, solvername, output_dir, exit_time, lambda_, l1_ratio, theta, graph_topology, c, logmode, ckpt_freq, n_connectivity, fit_intercept, normalize, verbose): # Fix gamma = 1.0 according to: # Adding vs. Averaging in Distributed Primal-Dual Optimization gamma = 1.0 # Initialize process group comm.init_process_group('mpi') # Get rank of current process rank = comm.get_rank() world_size = comm.get_world_size() # Create graph with specified topology graph = define_graph_topology(world_size, graph_topology, n_connectivity=n_connectivity, verbose=verbose) if use_split_dataset: if not dataset_path: dataset_path = os.path.join('data', dataset, split_by, f'{world_size}') X, y, X_test, y_test = load_dataset_by_rank(dataset, rank, world_size, dataset_size, datapoints, split_by, dataset_path=dataset_path, random_state=random_state, verbose=verbose) else: X, y = load_dataset(dataset, rank, world_size, dataset_size, datapoints, split_by, dataset_path=dataset_path, random_state=random_state, verbose=verbose) # Define subproblem solver = configure_solver(name=solvername, split_by=split_by, l1_ratio=l1_ratio, lambda_=lambda_, C=c, random_state=random_state) # Add hooks to log and save metrics. if algoritmname != 'cola': output_dir = os.path.join(output_dir, algoritmname) if dataset: output_dir = os.path.join(output_dir, dataset, f'{world_size:0>2}', graph_topology) monitor = Monitor(output_dir, ckpt_freq=ckpt_freq, exit_time=exit_time, split_by=split_by, mode=logmode, verbose=verbose, Ak=X, Ak_test=X_test, y_test=y_test) # Run CoLA comm.barrier() if algoritmname == 'cola': model = Cola(gamma, solver, theta, fit_intercept, normalize) monitor.init(model, graph) model = model.fit(X, y, graph, monitor, max_global_steps, local_iters) else: raise NotImplementedError() # Show test stats if X_test is not None: monitor.show_test_statistics() # Save final model monitor.save(modelname='model.pickle', logname=f'result.csv')