Ejemplo n.º 1
0
def test_Gatherv_rows():
    """Test the Gatherv_rows function for Gathering and
    concatenating ndarrys along their first axes to root.
    """
    comm = MPI.COMM_WORLD
    root = 0
    rank = comm.rank
    size = comm.size

    for dtype in [int, float]:
        # Multiple rows per rank
        X = np.arange(151 * 3, dtype=dtype).reshape(151, 3)
        my_rows = np.array_split(X, size)[rank]
        Xp = Gatherv_rows(my_rows, comm, root)
        if rank == root:
            assert_array_equal(X, Xp)
            assert Xp.dtype == dtype

        # Fewer rows than ranks
        X = np.arange(2 * 3, dtype=dtype).reshape(2, 3)
        my_rows = np.array_split(X, size)[rank]
        Xp = Gatherv_rows(my_rows, comm, root)
        if rank == root:
            assert_array_equal(X, Xp)
            assert Xp.dtype == dtype

        # Multiple rows per rank, 3d
        X = np.arange(151 * 2 * 3, dtype=dtype).reshape(151, 2, 3)
        my_rows = np.array_split(X, size)[rank]
        Xp = Gatherv_rows(my_rows, comm, root)
        if rank == root:
            assert_array_equal(X, Xp)
            assert Xp.dtype == dtype

        # Fewer rows than ranks, 3d
        X = np.arange(2 * 3 * 5, dtype=dtype).reshape(2, 3, 5)
        my_rows = np.array_split(X, size)[rank]
        Xp = Gatherv_rows(my_rows, comm, root)
        if rank == root:
            assert_array_equal(X, Xp)
            assert Xp.dtype == dtype
Ejemplo n.º 2
0
    def selector(self, X, y):

        solutions = self.uoi.estimates_
        boots = self.uoi.boots
        n_boots, n_supports, n_coefs = solutions.shape

        # Need to distribute information across ranks:
        if self.comm is not None:
            boots = self.comm.bcast(boots)
            solutions = self.comm.bcast(solutions)

        n_boots, n_supports, n_coefs = solutions.shape

        # Distribute bootstraps across ranks
        tasks = np.arange(n_boots)

        chunked_tasks = np.array_split(tasks, self.size)
        task_list = chunked_tasks[self.rank]

        selected_coefs = np.zeros((len(task_list), n_coefs))

        for i, boot in enumerate(task_list):
            # Train data

            t0 = time.time()

            xx = X[boots[0][boot], :]
            yy = y[boots[0][boot]]
            n_samples, n_features = xx.shape
            y_pred = solutions[boot, ...] @ xx.T

            sdict_ = super(UoISelector, self).selector(xx, yy, y_pred,
                                                       solutions[boot, ...],
                                                       np.arange(n_supports))

            selected_coefs[i, :] = sdict_['coefs']

            # if self.selection_method == 'empirical_bayes':
            #    print('bootstrap time: %f' % (time.time() - t0))

        # Gather selected_coefs
        if self.comm is not None:
            selected_coefs = Gatherv_rows(selected_coefs, self.comm)

        if self.rank == 0:
            coefs = self.union(selected_coefs)
            sdict = {}
            sdict['coefs'] = coefs
        else:
            sdict = None

        return sdict
Ejemplo n.º 3
0
def test_Gatherv_random_rows():
    """Test Gatherv_rows for gathering ndarrays with random
    shapes along their first axis
    """

    comm = MPI.COMM_WORLD
    root = 0
    rank = comm.rank

    data = np.random.normal(size=(np.random.randint(1, 10), 1000))
    sizes = comm.gather(data.shape[0], root=root)
    data = Gatherv_rows(data, comm, root)

    if rank == root:
        assert (data.shape[0] == np.sum(sizes))
Ejemplo n.º 4
0
def gather_results(results, comm):

    gathered_results = {}

    for selection_method in results.keys():
        gathered_results[selection_method] = {}
        for field in results[selection_method].keys():

            value = Gatherv_rows(results[selection_method][field],
                                 comm,
                                 root=0)

            gathered_results[selection_method][field] = value

    return gathered_results
Ejemplo n.º 5
0
    def oracle_selector(self, true_model):
        # Simply return the maximum selection accuracy available

        solutions = self.uoi.estimates_
        boots = self.uoi.boots

        if self.comm is not None:
            boots = self.comm.bcast(boots)
            solutions = self.comm.bcast(solutions)

        n_boots, n_supports, n_coefs = solutions.shape

        # Distribute bootstraps across ranks
        tasks = np.arange(n_boots)
        chunked_tasks = np.array_split(tasks, self.size)
        task_list = chunked_tasks[self.rank]

        selected_coefs = np.zeros((len(task_list), n_coefs))

        for i, boot in enumerate(task_list):

            sdict_ = super(UoISelector,
                           self).oracle_selector(solutions[boot, ...],
                                                 np.arange(n_supports),
                                                 true_model)

            selected_coefs[i, :] = sdict_['coefs']

        # Gather
        if self.comm is not None:
            selected_coefs = Gatherv_rows(selected_coefs, self.comm)

        if self.rank == 0:

            coefs = self.union(selected_coefs)

            # Return just the coefficients that result
            sdict = {}
            sdict['coefs'] = coefs

        else:
            sdict = None

        return sdict
Ejemplo n.º 6
0
    def r2_selector(self, X, y):

        # UoI Estimates have shape (n_boots_est, n_supports, n_coef)
        solutions = self.uoi.estimates_
        boots = self.uoi.boots

        if self.comm is not None:
            boots = self.comm.bcast(boots)
            solutions = self.comm.bcast(solutions)

        n_boots, n_supports, n_coefs = solutions.shape
        # Distribute bootstraps across ranks
        tasks = np.arange(n_boots)
        chunked_tasks = np.array_split(tasks, self.size)
        task_list = chunked_tasks[self.rank]
        scores = np.zeros((len(task_list), n_supports))

        for i, boot in enumerate(task_list):
            # Test data
            xx = X[boots[1][boot], :]
            yy = y[boots[1][boot]]

            y_pred = solutions[boot, ...] @ xx.T
            scores[i, :] = np.array(
                [r2_score(yy, y_pred[j, :]) for j in range(n_supports)])

        # Gather
        if self.comm is not None:
            scores = Gatherv_rows(scores, self.comm)

        if self.rank == 0:

            selected_idxs = np.argmax(scores, axis=1)
            coefs = self.union(solutions[np.arange(n_boots), selected_idxs])

            # Return just the coefficients that result
            sdict = {}
            sdict['scores'] = scores
            sdict['coefs'] = coefs

        else:
            sdict = None

        return sdict
Ejemplo n.º 7
0
                for rep2 in range(nreps2):

                    X, _, _, _, _ = gen_data(2000,
                                             p,
                                             covariance=sigma_rep,
                                             beta=subset.ravel())
                    # Normalize X
                    X = StandardScaler().fit_transform(X)
                    C = 1 / n_ * X.T @ X
                    eta[i1, nidx, rep, i3,
                        rep2] = calc_irrep_const(C,
                                                 np.nonzero(subset)[0])

        if comm.rank == 0:
            print(time.time() - t0)

        print('%d/%d' % (i1 + 1, len(task_chunk[comm.rank])))

    # Gather and save results
    rho = Gatherv_rows(rho, comm, root=0)
    eta = Gatherv_rows(eta, comm, root=0)
    eta2 = Gatherv_rows(eta2, comm, root=0)
    norm_diff = Gatherv_rows(norm_diff, comm, root=0)

    if comm.rank == 0:
        with open('cov_ensemble.dat', 'wb') as f:
            f.write(pickle.dumps(rho))
            f.write(pickle.dumps(eta))
            f.write(pickle.dumps(eta2))
            f.write(pickle.dumps(norm_diff))
Ejemplo n.º 8
0
F_chunk = np.array_split(F, numproc)

# Storage
cdf_vals = np.zeros((len(F_chunk[rank]), np.arange(1, p / 2).size))

for i, F_ in enumerate(F_chunk[rank]):

    for i3, T in enumerate(np.linspace(1, p / 2, 50, dtype=int)):

        t0 = time.time()
        dx2 = DChiSq(gamma_sq, sigma_sq, n - T, T)

        DeltaF = F_ * (S_ - T)

        # Calculate the CDF
        p = dx2.nCDF(DeltaF)
        cdf_vals[i, i3] = p
        print('Rank %d: %d/%d %d/%d, %f s' %
              (rank, i + 1, len(F_chunk[rank]), i3,
               len(np.linspace(1, p / 2, 50)), time.time() - t0))

# Gather
cdf_vals = Gatherv_rows(cdf_vals, comm, root=0)

# Save
if rank == 0:
    with open(savepath, 'wb') as f:
        f.write(pickle.dumps(cdf_vals))
        f.write(pickle.dumps(p))
        f.write(pickle.dumps(F))