Example #1
0
    def normalize(self, gropuname1, groupname2):
        # ## normalize y ## #
        with h5py.File(self.OUTPATH, mode='r+') as f:
            for atom in self.MAINCHAIN:
                # load
                train_y = da.from_array(
                    f[f'/{atom}/{gropuname1}/{self.RESPONSE_NAME}'],
                    chunks=("auto", 3))
                val_y = da.from_array(
                    f[f'/{atom}/{groupname2}/{self.RESPONSE_NAME}'],
                    chunks=("auto", 3))

                total_y = da.concatenate([train_y, val_y], axis=0)
                y_mean = da.mean(total_y.reshape(-1), axis=0).compute()
                y_std = da.std(total_y.reshape(-1), axis=0).compute()

                # normalize
                train_y = da.divide(da.subtract(train_y, y_mean), y_std)
                val_y = da.divide(da.subtract(val_y, y_mean), y_std)

                # save
                da.to_hdf5(self.OUTPATH,
                           f'/{atom}/{gropuname1}/{self.RESPONSE_NAME}',
                           train_y)
                da.to_hdf5(self.OUTPATH,
                           f'/{atom}/{groupname2}/{self.RESPONSE_NAME}', val_y)

                f.create_dataset(name=f'/{atom}/normalization',
                                 data=np.array([y_mean, y_std]))

                print(f'[{atom}]\tmean: {y_mean:.3f}\tstd: {y_std:.3f}')
Example #2
0
def score_gene_sets(ds, gs, z_score_ds=True, use_dask=False):
    if use_dask:
        import dask.array as np
    else:
        import numpy as np
    # gene sets has genes on rows, sets on columns
    # ds has cells on rows, genes on columns
    gs_x = gs.x
    ds_x = ds.x
    if z_score_ds:
        ds_x = ds_x.toarray() if scipy.sparse.isspmatrix(ds_x) else ds_x
    gene_indices = (gs_x.sum(axis=1) > 0) & (
        ds_x.std(axis=0) > 0
    )  # keep genes that are in gene sets and have standard deviation > 0

    gs_x = gs_x[gene_indices]
    ds_x = ds_x[:, gene_indices]
    if z_score_ds:
        ds_x = ds_x.toarray() if scipy.sparse.isspmatrix(ds_x) else ds_x
        std = np.std(ds_x, axis=0)
        mean = np.mean(ds_x, axis=0)
        ds_x = (ds_x - mean) / std
        ds_x[ds_x < -5] = -5
        ds_x[ds_x > 5] = 5
        ds_x[ds_x == np.nan] = 0

    scores = ds_x.dot(gs_x)
    ngenes_in_set = gs_x.sum(axis=0)
    ngenes_in_set[ngenes_in_set == 0] = 1  # avoid divide by zero
    scores = scores / ngenes_in_set  # scores contains cells on rows, gene sets on columns
    return wot.Dataset(x=scores, row_meta=ds.row_meta, col_meta=gs.col_meta)
Example #3
0
def test_PowerMethod_project():
    N, P = 1000, 1000
    k = 10
    svd_array = da.random.random(size=(N, P)).persist()
    proj_array = da.random.random(size=(10, P)).persist()
    mu = da.mean(svd_array, axis=0).persist()
    std = da.diag(1 / da.std(svd_array, axis=0)).persist()

    for scale in [True, False]:
        for center in [True, False]:
            svd_array1 = svd_array
            proj_array1 = proj_array
            if center:
                svd_array1 = svd_array1 - mu
                proj_array1 = proj_array1 - mu
            if scale:
                svd_array1 = svd_array1.dot(std)
                proj_array1 = proj_array1.dot(std)

            U, S, V = da.linalg.svd(svd_array1)
            U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k)

            PM = PowerMethod(k=k,
                             scale=scale,
                             center=center,
                             factor=None,
                             tol=1e-12)
            U_PM, S_PM, V_PM = PM.svd(array=svd_array)

            np.testing.assert_array_almost_equal(
                PM.project(proj_array, onto=V_k.T), proj_array1.dot(V_k.T))
Example #4
0
def test_make_regression(n_samples, n_features, n_informative, n_targets, bias,
                         effective_rank, tail_strength, noise, shuffle, coef,
                         random_state, n_parts, cluster):
    c = Client(cluster)
    try:
        from cuml.dask.datasets import make_regression

        result = make_regression(n_samples=n_samples,
                                 n_features=n_features,
                                 n_informative=n_informative,
                                 n_targets=n_targets,
                                 bias=bias,
                                 effective_rank=effective_rank,
                                 noise=noise,
                                 shuffle=shuffle,
                                 coef=coef,
                                 random_state=random_state,
                                 n_parts=n_parts)

        if coef:
            out, values, coefs = result
        else:
            out, values = result

        assert out.shape == (n_samples, n_features), "out shape mismatch"

        if n_targets > 1:
            assert values.shape == (n_samples, n_targets), \
                   "values shape mismatch"
        else:
            assert values.shape == (n_samples, ), "values shape mismatch"

        assert len(out.chunks[0]) == n_parts
        assert len(out.chunks[1]) == 1

        if coef:
            if n_targets > 1:
                assert coefs.shape == (n_features, n_targets), \
                       "coefs shape mismatch"
                assert len(coefs.chunks[1]) == 1
            else:
                assert coefs.shape == (n_features, ), "coefs shape mismatch"
                assert len(coefs.chunks[0]) == 1

            test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative)

            std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0)

            test1, std_test2 = da.compute(test1, std_test2)

            diff = cp.abs(1.0 - std_test2)
            test2 = cp.all(diff < 1.5 * 10**(-1.))

            assert test1, \
                "Unexpected number of informative features"

            assert test2, "Unexpectedly incongruent outputs"

    finally:
        c.close()
 def statistics(self, data, pca_stats=None):
     # set headers
     if pca_stats:  # for pca
         if pca_stats["eigenvals"] is not None:
             self.stats_header.setText("Eigenvalue: {} ({}%)".format(
                 round(pca_stats["eigenvals"][self.pc_id - 1], 2),
                 round(pca_stats["eigenvals_%"][self.pc_id - 1], 2)))
             self.stats_header.setToolTip(
                 "It shows how are the dispersion of the data with respect to its component"
             )
         else:
             self.stats_header.setText("Eigenvalue: --")
             self.stats_header.setToolTip(
                 "Is only available when the components are computed with the plugin"
             )
     else:  # for aoi
         self.stats_header.setText("Pixels in AOI: {}".format(
             round(data.size if data.size > 1 else 0, 2)))
         self.stats_header.setToolTip("")
     # restore or compute the statistics
     if self.QCBox_StatsLayer.currentText(
     ) == self.pc_name and self.stats_pc is not None:
         min, max, std, p25, p50, p75 = self.stats_pc
     else:
         da_data = da.from_array(data, chunks=(8000000, ))
         min = da.min(da_data).compute()
         max = da.max(da_data).compute()
         std = da.std(da_data).compute()
         p25 = da.percentile(da_data, 25).compute()[0]
         p50 = da.percentile(da_data, 50).compute()[0]
         p75 = da.percentile(da_data, 75).compute()[0]
         if self.QCBox_StatsLayer.currentText() == self.pc_name:
             self.stats_pc = (min, max, std, p25, p50, p75)
     # set in dialog
     self.stats_min.setText(str(round(min, 2)))
     self.stats_max.setText(str(round(max, 2)))
     self.stats_std.setText(str(round(std, 2)))
     self.stats_p25.setText(str(round(p25, 2)))
     self.stats_p50.setText(str(round(p50, 2)))
     self.stats_p75.setText(str(round(p75, 2)))
Example #6
0
def test_ScaledArray_fromArrayMoment_array():
    N1, P = 7, 10
    N2 = 5
    array1 = da.random.random(size=(N1, P)).persist()
    mu = da.mean(array1, axis=0)
    std = da.diag(1/da.std(array1, axis=0))
    array2 = da.random.random(size=(N2, P)).persist()
    for scale in [True, False]:
        for center in [True, False]:
            for factor1 in [None, 'n', 'p']:
                sa1 = ScaledCenterArray(scale=scale, center=center, factor=factor1)
                sa1.fit(array1)

                for factor2, factor_value in zip([None, 'n', 'p'], [1, N2, P]):
                    sa2 = ScaledCenterArray.fromScaledArray(array=array2, scaled_array=sa1, factor=factor2)
                    sa2_array = array2

                    if center:
                        sa2_array = sa2_array - mu
                    if scale:
                        sa2_array = sa2_array.dot(std)

                    np.testing.assert_array_almost_equal(sa2.array, sa2_array)
Example #7
0
def test_make_regression(n_samples, n_features, n_informative,
                         n_targets, bias, effective_rank,
                         tail_strength, noise, shuffle,
                         coef, n_parts, order,
                         use_full_low_rank, client):

    c = client
    from cuml.dask.datasets import make_regression

    result = make_regression(n_samples=n_samples, n_features=n_features,
                             n_informative=n_informative,
                             n_targets=n_targets, bias=bias,
                             effective_rank=effective_rank, noise=noise,
                             shuffle=shuffle, coef=coef,
                             n_parts=n_parts,
                             use_full_low_rank=use_full_low_rank,
                             order=order)

    if coef:
        out, values, coefs = result
    else:
        out, values = result

    assert out.shape == (n_samples, n_features), "out shape mismatch"

    if n_targets > 1:
        assert values.shape == (n_samples, n_targets), \
               "values shape mismatch"
    else:
        assert values.shape == (n_samples,), "values shape mismatch"

    assert len(out.chunks[0]) == n_parts
    assert len(out.chunks[1]) == 1

    if coef:
        if n_targets > 1:
            assert coefs.shape == (n_features, n_targets), \
                   "coefs shape mismatch"
            assert len(coefs.chunks[1]) == 1
        else:
            assert coefs.shape == (n_features,), "coefs shape mismatch"
            assert len(coefs.chunks[0]) == 1

        test1 = da.all(da.sum(coefs != 0.0, axis=0) == n_informative)

        std_test2 = da.std(values - (da.dot(out, coefs) + bias), axis=0)

        test1, std_test2 = da.compute(test1, std_test2)

        diff = cp.abs(1.0 - std_test2)
        test2 = cp.all(diff < 1.5 * 10**(-1.))

        assert test1, \
            "Unexpected number of informative features"

        assert test2, "Unexpectedly incongruent outputs"

    data_ddh = DistributedDataHandler.create(data=(out, values),
                                             client=c)
    out_part, value_part = data_ddh.gpu_futures[0][1].result()

    if coef:
        coefs_ddh = DistributedDataHandler.create(data=coefs,
                                                  client=c)
        coefs_part = coefs_ddh.gpu_futures[0][1].result()
    if order == 'F':
        assert out_part.flags['F_CONTIGUOUS']
        if n_targets > 1:
            assert value_part.flags['F_CONTIGUOUS']
            if coef:
                assert coefs_part.flags['F_CONTIGUOUS']
    elif order == 'C':
        assert out_part.flags['C_CONTIGUOUS']
        if n_targets > 1:
            assert value_part.flags['C_CONTIGUOUS']
            if coef:
                assert coefs_part.flags['C_CONTIGUOUS']
Example #8
0
File: result.py Project: IOMRC/piv
    def __call__(self, tag='', reprocess=False):
        super().__call__(tag='tensor', reprocess=reprocess)

        if self.cache is None:
            ds = self.parent
            ds_new = xr.Dataset(coords=ds.coords, attrs=ds.attrs)

            print('Calculating tensor')
            # j = jacobianConv(ds.U, ds.V, ds.W, dx, dy, dz, sigma=1.5)
            j = jacobian(ds.U.data, ds.V.data, ds.W.data,
                         ds.attrs['piv_step_frame'], ds.attrs['dx'],
                         ds.attrs['dy'], ds.attrs['dz'])
            j = j.compute()
            ds_new['jacobian'] = (['time', 'x', 'y', 'z', 'comp', 'dims'], j)
            ds_new['jacobianNorm'] = da.sqrt(
                (ds_new['jacobian']**2.).sum(dim=['comp', 'dims']))
            jT = ds_new.jacobian.transpose('time', 'x', 'y', 'z', 'dims',
                                           'comp')  #.values
            ds_new['strainTensor'] = (ds_new.jacobian + jT) / 2.
            ds_new['vorticityTensor'] = (ds_new.jacobian - jT) / 2.
            ds_new['strainTensorNorm'] = da.sqrt(
                (ds_new.strainTensor**2.).sum(dim=['comp', 'dims']))
            ds_new['vorticityTensorNorm'] = da.sqrt(
                (ds_new.vorticityTensor**2.).sum(dim=['comp', 'dims']))

            ds_new['dudx'] = (['time', 'x', 'y', 'z'], j[:, :, :, :, 0, 0])
            ds_new['dvdy'] = (['time', 'x', 'y', 'z'], j[:, :, :, :, 1, 1])
            ds_new['dwdz'] = (['time', 'x', 'y', 'z'], j[:, :, :, :, 2, 2])
            ds_new['divergence'] = ds_new['dudx'] + ds_new['dvdy'] + ds_new[
                'dwdz']

            print(ds_new['divergence'])
            #
            ds_new['vorticity'] = (
                ['time', 'x', 'y', 'z', 'comp'],
                da.stack((ds_new['vorticityTensor'][:, :, :, :, 2, 1],
                          ds_new['vorticityTensor'][:, :, :, :, 0, 2],
                          ds_new['vorticityTensor'][:, :, :, :, 1, 0]),
                         axis=-1))

            ds_new['divNorm'] = ds_new['divergence'] / ds_new['jacobianNorm']
            ds_new['divNorm_mean'] = da.mean(ds_new['divNorm'])
            ds_new['divNorm_std'] = da.std(ds_new['divNorm'])
            delta = (ds.attrs['dx'] * ds.attrs['dy'] * ds.attrs['dz'])**(1. /
                                                                         3.)
            dpx = (ds.attrs['pdx'] * ds.attrs['pdy'] * ds.attrs['pdz'])**(1. /
                                                                          3.)
            delta_px = delta / dpx
            dt = ds.attrs['piv_step_ensemble']

            ds_new['divRMS'] = da.mean((ds_new['divergence'] * dt)**2.)**0.5
            ds_new['velocityError'] = ds_new['divRMS'] / (
                (3. / (2. * delta_px**2.))**0.5)
            print(
                da.percentile(ds_new['vorticityTensorNorm'].data.ravel(), 99.))
            print(ds_new['divRMS'])
            print(ds_new['divNorm_mean'])
            ds_new['vorticityError'] = ds_new['divRMS'] / dt / da.percentile(
                ds_new['vorticityTensorNorm'].data.ravel(), 99.)

            print('saving')
            self.cache = ds_new
            self.to_file()

        return self.cache