def test_Gatherv_rows(): """Test the Gatherv_rows function for Gathering and concatenating ndarrys along their first axes to root. """ comm = MPI.COMM_WORLD root = 0 rank = comm.rank size = comm.size for dtype in [int, float]: # Multiple rows per rank X = np.arange(151 * 3, dtype=dtype).reshape(151, 3) my_rows = np.array_split(X, size)[rank] Xp = Gatherv_rows(my_rows, comm, root) if rank == root: assert_array_equal(X, Xp) assert Xp.dtype == dtype # Fewer rows than ranks X = np.arange(2 * 3, dtype=dtype).reshape(2, 3) my_rows = np.array_split(X, size)[rank] Xp = Gatherv_rows(my_rows, comm, root) if rank == root: assert_array_equal(X, Xp) assert Xp.dtype == dtype # Multiple rows per rank, 3d X = np.arange(151 * 2 * 3, dtype=dtype).reshape(151, 2, 3) my_rows = np.array_split(X, size)[rank] Xp = Gatherv_rows(my_rows, comm, root) if rank == root: assert_array_equal(X, Xp) assert Xp.dtype == dtype # Fewer rows than ranks, 3d X = np.arange(2 * 3 * 5, dtype=dtype).reshape(2, 3, 5) my_rows = np.array_split(X, size)[rank] Xp = Gatherv_rows(my_rows, comm, root) if rank == root: assert_array_equal(X, Xp) assert Xp.dtype == dtype
def selector(self, X, y): solutions = self.uoi.estimates_ boots = self.uoi.boots n_boots, n_supports, n_coefs = solutions.shape # Need to distribute information across ranks: if self.comm is not None: boots = self.comm.bcast(boots) solutions = self.comm.bcast(solutions) n_boots, n_supports, n_coefs = solutions.shape # Distribute bootstraps across ranks tasks = np.arange(n_boots) chunked_tasks = np.array_split(tasks, self.size) task_list = chunked_tasks[self.rank] selected_coefs = np.zeros((len(task_list), n_coefs)) for i, boot in enumerate(task_list): # Train data t0 = time.time() xx = X[boots[0][boot], :] yy = y[boots[0][boot]] n_samples, n_features = xx.shape y_pred = solutions[boot, ...] @ xx.T sdict_ = super(UoISelector, self).selector(xx, yy, y_pred, solutions[boot, ...], np.arange(n_supports)) selected_coefs[i, :] = sdict_['coefs'] # if self.selection_method == 'empirical_bayes': # print('bootstrap time: %f' % (time.time() - t0)) # Gather selected_coefs if self.comm is not None: selected_coefs = Gatherv_rows(selected_coefs, self.comm) if self.rank == 0: coefs = self.union(selected_coefs) sdict = {} sdict['coefs'] = coefs else: sdict = None return sdict
def test_Gatherv_random_rows(): """Test Gatherv_rows for gathering ndarrays with random shapes along their first axis """ comm = MPI.COMM_WORLD root = 0 rank = comm.rank data = np.random.normal(size=(np.random.randint(1, 10), 1000)) sizes = comm.gather(data.shape[0], root=root) data = Gatherv_rows(data, comm, root) if rank == root: assert (data.shape[0] == np.sum(sizes))
def gather_results(results, comm): gathered_results = {} for selection_method in results.keys(): gathered_results[selection_method] = {} for field in results[selection_method].keys(): value = Gatherv_rows(results[selection_method][field], comm, root=0) gathered_results[selection_method][field] = value return gathered_results
def oracle_selector(self, true_model): # Simply return the maximum selection accuracy available solutions = self.uoi.estimates_ boots = self.uoi.boots if self.comm is not None: boots = self.comm.bcast(boots) solutions = self.comm.bcast(solutions) n_boots, n_supports, n_coefs = solutions.shape # Distribute bootstraps across ranks tasks = np.arange(n_boots) chunked_tasks = np.array_split(tasks, self.size) task_list = chunked_tasks[self.rank] selected_coefs = np.zeros((len(task_list), n_coefs)) for i, boot in enumerate(task_list): sdict_ = super(UoISelector, self).oracle_selector(solutions[boot, ...], np.arange(n_supports), true_model) selected_coefs[i, :] = sdict_['coefs'] # Gather if self.comm is not None: selected_coefs = Gatherv_rows(selected_coefs, self.comm) if self.rank == 0: coefs = self.union(selected_coefs) # Return just the coefficients that result sdict = {} sdict['coefs'] = coefs else: sdict = None return sdict
def r2_selector(self, X, y): # UoI Estimates have shape (n_boots_est, n_supports, n_coef) solutions = self.uoi.estimates_ boots = self.uoi.boots if self.comm is not None: boots = self.comm.bcast(boots) solutions = self.comm.bcast(solutions) n_boots, n_supports, n_coefs = solutions.shape # Distribute bootstraps across ranks tasks = np.arange(n_boots) chunked_tasks = np.array_split(tasks, self.size) task_list = chunked_tasks[self.rank] scores = np.zeros((len(task_list), n_supports)) for i, boot in enumerate(task_list): # Test data xx = X[boots[1][boot], :] yy = y[boots[1][boot]] y_pred = solutions[boot, ...] @ xx.T scores[i, :] = np.array( [r2_score(yy, y_pred[j, :]) for j in range(n_supports)]) # Gather if self.comm is not None: scores = Gatherv_rows(scores, self.comm) if self.rank == 0: selected_idxs = np.argmax(scores, axis=1) coefs = self.union(solutions[np.arange(n_boots), selected_idxs]) # Return just the coefficients that result sdict = {} sdict['scores'] = scores sdict['coefs'] = coefs else: sdict = None return sdict
for rep2 in range(nreps2): X, _, _, _, _ = gen_data(2000, p, covariance=sigma_rep, beta=subset.ravel()) # Normalize X X = StandardScaler().fit_transform(X) C = 1 / n_ * X.T @ X eta[i1, nidx, rep, i3, rep2] = calc_irrep_const(C, np.nonzero(subset)[0]) if comm.rank == 0: print(time.time() - t0) print('%d/%d' % (i1 + 1, len(task_chunk[comm.rank]))) # Gather and save results rho = Gatherv_rows(rho, comm, root=0) eta = Gatherv_rows(eta, comm, root=0) eta2 = Gatherv_rows(eta2, comm, root=0) norm_diff = Gatherv_rows(norm_diff, comm, root=0) if comm.rank == 0: with open('cov_ensemble.dat', 'wb') as f: f.write(pickle.dumps(rho)) f.write(pickle.dumps(eta)) f.write(pickle.dumps(eta2)) f.write(pickle.dumps(norm_diff))
F_chunk = np.array_split(F, numproc) # Storage cdf_vals = np.zeros((len(F_chunk[rank]), np.arange(1, p / 2).size)) for i, F_ in enumerate(F_chunk[rank]): for i3, T in enumerate(np.linspace(1, p / 2, 50, dtype=int)): t0 = time.time() dx2 = DChiSq(gamma_sq, sigma_sq, n - T, T) DeltaF = F_ * (S_ - T) # Calculate the CDF p = dx2.nCDF(DeltaF) cdf_vals[i, i3] = p print('Rank %d: %d/%d %d/%d, %f s' % (rank, i + 1, len(F_chunk[rank]), i3, len(np.linspace(1, p / 2, 50)), time.time() - t0)) # Gather cdf_vals = Gatherv_rows(cdf_vals, comm, root=0) # Save if rank == 0: with open(savepath, 'wb') as f: f.write(pickle.dumps(cdf_vals)) f.write(pickle.dumps(p)) f.write(pickle.dumps(F))