Ejemplo n.º 1
0
def test_ScaledArray_array():
    for N in range(3, 5):
        for P in range(3, 5):
            array = np.random.rand(N, P) + 1
            std = np.diag(1 / np.std(array, axis=0))
            mu = np.mean(array, axis=0)

            sarray = ScaledCenterArray(scale=False, center=False)
            sarray.fit(da.array(array))
            np.testing.assert_array_almost_equal(array, sarray.array)
            np.testing.assert_array_almost_equal(array.T, sarray.T.array)

            # With Scale but No Center
            # B = AD
            b_array = array.dot(std)
            sarray = ScaledCenterArray(scale=True, center=False)
            sarray.fit(da.array(array))
            np.testing.assert_array_almost_equal(b_array, sarray.array)
            np.testing.assert_array_almost_equal(b_array.T, sarray.T.array)

            # With Center but No Scale:
            # B = (A - U)
            b_array = array - mu
            sarray = ScaledCenterArray(scale=False, center=True)
            sarray.fit(da.array(array))
            np.testing.assert_array_almost_equal(b_array, sarray.array)
            np.testing.assert_array_almost_equal(b_array.T, sarray.T.array)

            # With Center and  Scale:
            # (A - U)'D'D(A - U)x
            b_array = (array - mu).dot(std)
            sarray = ScaledCenterArray(scale=True, center=True)
            sarray.fit(da.array(array))
            np.testing.assert_array_almost_equal(b_array, sarray.array)
            np.testing.assert_array_almost_equal(b_array.T, sarray.T.array)
Ejemplo n.º 2
0
def test_PowerMethod_reset():
    for start in [True, False]:

        PM = PowerMethod(sub_svd_start=start)

        _, _, _ = PM.svd(da.array(np.random.randn(100, 50)))
        _, _, _ = PM.svd(da.array(np.random.randn(110, 60)))
Ejemplo n.º 3
0
def prepare_dataset(X):
    
    len_ = X.shape[0]
    shape_ = X.shape

    d = int(da.sqrt(X.flatten().reshape(X.shape[0], -1).shape[1]))

    if len(shape_)==4:
        X = da.reshape(X, [-1, d, d, 3])
        
    elif d==shape_[1] and len(shape_)==3:
        X = da.reshape(X, [-1, d, d])
        X = da.array(list(map(lambda x: grey2rgb(x), X)), dtype=da.float32)

    else:
        r = d**2 - X.shape[1]
        train_padding = da.zeros((shape_[0], r))
        X = da.vstack([X, train_padding])
        
        X = da.reshape(X, [-1, d, d])
        X = da.array(list(map(lambda x: grey2rgb(x), X)), dtype=da.float32)
    
    print('Scaling dataset')
    if scalar is not None:
        X = scaler.transform(X.flatten().reshape(-1,1).astype(da.float32)).reshape(X.shape)
    else:
        scaler = MinMaxScaler()
        X = scaler.fit_transform(X.flatten().reshape(-1,1).astype(da.float32)).reshape(X.shape)
        
    return X
Ejemplo n.º 4
0
def main():

    datset_path = "cifar-10-batches-py"
    # time for loading
    st = time.time()
    train_images, train_labels = load_training_data(datset_path)
    test_images, test_labels = load_test_data(datset_path)
    et = time.time()
    print("Time taken for loading images = {}".format(et - st))

    random_prediction = random_classifier(10000)
    random_accuracy = classification_accuracy(random_prediction, test_labels)
    print("Random classifier accuracy = {}".format(random_accuracy))

    #  ################ Naive implementation for 1NN classifier ########################
    st = time.time()
    k = 1
    test_images, train_images, train_labels = da.array(test_images), da.array(
        train_images), da.array(train_labels)
    prediction = nearest_neighbour(test_images,
                                   train_images,
                                   train_labels,
                                   k=k)
    accuracy = classification_accuracy(prediction, test_labels)
    et = time.time()
    print("{} nearest neighbor classifier accuracy = {}".format(k, accuracy))
    print("Time taken for classifying test images = {}".format(et - st))
Ejemplo n.º 5
0
def workMethod():
    matrix1 = dar.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12],
                         [13, 14, 15, 16, 17, 18]])
    matrix2 = dar.array([[5, 10, 15], [20, 25, 30], [35, 40, 45], [50, 55, 60],
                         [65, 70, 75], [80, 85, 90]])

    # Expected Results:
    # [1155, 1260, 1365]
    # [2685, 2970, 3255]
    # [4215, 4680, 5145]

    print('Matrix 1:')
    print(matrix1.compute())
    print('\n')

    print('Matrix 2:')
    print(matrix2.compute())
    print('\n')

    result = dar.dot(matrix1, matrix2)

    # result.visualize(filename='./Results/DaskSyncMatrixMultFiles/DaskSyncMatrixMultGraph')

    print('Final Result')
    print(result.compute())
    print('\n')
Ejemplo n.º 6
0
    def test_add_bands(self):
        from satpy.composites import add_bands
        import dask.array as da
        import numpy as np
        import xarray as xr

        # L + RGB -> RGB
        data = xr.DataArray(da.ones((1, 3, 3)),
                            dims=('bands', 'y', 'x'),
                            coords={'bands': ['L']})
        new_bands = xr.DataArray(da.array(['R', 'G', 'B']),
                                 dims=('bands'),
                                 coords={'bands': ['R', 'G', 'B']})
        res = add_bands(data, new_bands)
        res_bands = ['R', 'G', 'B']
        self.assertEqual(res.mode, ''.join(res_bands))
        np.testing.assert_array_equal(res.bands, res_bands)
        np.testing.assert_array_equal(res.coords['bands'], res_bands)

        # L + RGBA -> RGBA
        data = xr.DataArray(da.ones((1, 3, 3)),
                            dims=('bands', 'y', 'x'),
                            coords={'bands': ['L']},
                            attrs={'mode': 'L'})
        new_bands = xr.DataArray(da.array(['R', 'G', 'B', 'A']),
                                 dims=('bands'),
                                 coords={'bands': ['R', 'G', 'B', 'A']})
        res = add_bands(data, new_bands)
        res_bands = ['R', 'G', 'B', 'A']
        self.assertEqual(res.mode, ''.join(res_bands))
        np.testing.assert_array_equal(res.bands, res_bands)
        np.testing.assert_array_equal(res.coords['bands'], res_bands)

        # LA + RGB -> RGBA
        data = xr.DataArray(da.ones((2, 3, 3)),
                            dims=('bands', 'y', 'x'),
                            coords={'bands': ['L', 'A']},
                            attrs={'mode': 'LA'})
        new_bands = xr.DataArray(da.array(['R', 'G', 'B']),
                                 dims=('bands'),
                                 coords={'bands': ['R', 'G', 'B']})
        res = add_bands(data, new_bands)
        res_bands = ['R', 'G', 'B', 'A']
        self.assertEqual(res.mode, ''.join(res_bands))
        np.testing.assert_array_equal(res.bands, res_bands)
        np.testing.assert_array_equal(res.coords['bands'], res_bands)

        # RGB + RGBA -> RGBA
        data = xr.DataArray(da.ones((3, 3, 3)),
                            dims=('bands', 'y', 'x'),
                            coords={'bands': ['R', 'G', 'B']},
                            attrs={'mode': 'RGB'})
        new_bands = xr.DataArray(da.array(['R', 'G', 'B', 'A']),
                                 dims=('bands'),
                                 coords={'bands': ['R', 'G', 'B', 'A']})
        res = add_bands(data, new_bands)
        res_bands = ['R', 'G', 'B', 'A']
        self.assertEqual(res.mode, ''.join(res_bands))
        np.testing.assert_array_equal(res.bands, res_bands)
        np.testing.assert_array_equal(res.coords['bands'], res_bands)
Ejemplo n.º 7
0
            def get_groups(model: "sbmtm",
                           l: int = 0) -> Tuple[da.array, da.array]:
                # rewrite from _sbmtm to use dask
                V = model.get_V()
                D = model.get_D()

                g = model.g
                state = model.state
                state_l = state.project_level(l).copy(overlap=True)
                state_l_edges = state_l.get_edge_blocks()  # labeled half-edges

                # count labeled half-edges, group-memberships
                B = state_l.get_B()

                id_dbw = np.zeros(g.edge_index_range, dtype=np.dtype(int))
                id_wb = np.zeros(g.edge_index_range, dtype=np.dtype(int))
                id_b = np.zeros(g.edge_index_range, dtype=np.dtype(int))
                weig = np.zeros(g.edge_index_range, dtype=np.dtype(int))

                for i, e in enumerate(g.edges()):
                    _, id_b[i] = state_l_edges[e]
                    id_dbw[i] = int(e.source())
                    id_wb[i] = int(e.target()) - D
                    weig[i] = g.ep["count"][e]

                n_bw = sparse.COO(
                    [id_b, id_wb], weig, shape=(B, V), fill_value=0
                )  # number of half-edges incident on word-node w and labeled as word-group tw

                del id_wb

                n_dbw = sparse.COO(
                    [id_dbw, id_b], weig, shape=(D, B), fill_value=0
                )  # number of half-edges incident on document-node d and labeled as word-group td

                del weig
                del id_b
                del id_dbw

                ind_w = np.where(np.sum(n_bw, axis=1) > 0)[0]
                n_bw = n_bw[ind_w, :]
                del ind_w

                ind_w2 = np.where(np.sum(n_dbw, axis=0) > 0)[0]
                n_dbw = n_dbw[:, ind_w2]
                del ind_w2

                # topic-distribution for words P(t_w | w)
                p_w_tw = n_bw / np.sum(n_bw, axis=1).todense()[:, np.newaxis]

                # Mixture of word-groups into documetns P(d | t_w)
                p_tw_d = n_dbw / np.sum(n_dbw, axis=0).todense()[np.newaxis, :]

                return (
                    da.array(p_w_tw).map_blocks(lambda b: b.todense(),
                                                dtype=np.dtype(float)),
                    da.array(p_tw_d).map_blocks(lambda b: b.todense(),
                                                dtype=np.dtype(float)),
                )
Ejemplo n.º 8
0
def test_array():
    x = np.ones(5, dtype="i4")
    d = da.ones(5, chunks=3, dtype="i4")
    assert_eq(da.array(d, ndmin=3, dtype="i8"), np.array(x, ndmin=3, dtype="i8"))

    # regression #1847 this shall not raise an exception.
    x = da.ones((100, 3), chunks=10)
    y = da.array(x)
    assert isinstance(y, da.Array)
Ejemplo n.º 9
0
def test_array_id():
    array = da.array(np.random.rand(10, 7))
    x = da.array(np.random.rand(10,5))
    sarray = ScaledCenterArray(scale=True, center=True)
    sarray.fit(da.array(array), x=x)
    sarray_T = sarray.T
    assert id(sarray._array) == id(sarray_T._array)
    assert id(sarray.center_vector) == id(sarray_T.center_vector)
    assert id(sarray._array_moment.scale_matrix) == id(sarray_T._array_moment.scale_matrix)
    assert id(sarray._array_moment.sym_scale_matrix) == id(sarray_T._array_moment.sym_scale_matrix)
Ejemplo n.º 10
0
def test_array():
    x = np.ones(5, dtype='i4')
    d = da.ones(5, chunks=3, dtype='i4')
    assert_eq(da.array(d, ndmin=3, dtype='i8'),
              np.array(x, ndmin=3, dtype='i8'))

    # regression #1847 this shall not raise an exception.
    x = da.ones((100,3), chunks=10)
    y = da.array(x)
    assert isinstance(y, da.Array)
Ejemplo n.º 11
0
def test_array_tranpose_tranpose():
    array = da.array(np.random.rand(7, 10))
    x = da.array(np.random.rand(10, 5))
    sarray = ScaledCenterArray(scale=True, center=True)
    sarray.fit(da.array(array))

    s_array_T_T = sarray.T.T
    assert id(sarray._array) == id(s_array_T_T._array)

    assert id(sarray) == id(s_array_T_T)

    np.testing.assert_array_equal(sarray.dot(x), s_array_T_T.dot(x))
Ejemplo n.º 12
0
    def test_lazy_nd_points_and_bounds(self):

        self.setupTestArrays((3, 4))
        coord = AuxCoord(self.pts_lazy, bounds=self.bds_lazy)

        collapsed_coord = coord.collapsed()

        self.assertTrue(collapsed_coord.has_lazy_points())
        self.assertTrue(collapsed_coord.has_lazy_bounds())

        self.assertArrayEqual(collapsed_coord.points, da.array([55]))
        self.assertArrayEqual(collapsed_coord.bounds, da.array([[-2, 112]]))
Ejemplo n.º 13
0
def run_whitening(with_dask):

    # CHECKING THE TYPES
    if with_dask:
        import dask.array as numerical_module
    else:
        import numpy as numerical_module

    # Tests our Whitening extractor.
    data = numerical_module.array([
        [1.2622, -1.6443, 0.1889],
        [0.4286, -0.8922, 1.3020],
        [-0.6613, 0.0430, 0.6377],
        [-0.8718, -0.4788, 0.3988],
        [-0.0098, -0.3121, -0.1807],
        [0.4301, 0.4886, -0.1456],
    ])
    sample = numerical_module.array([1, 2, 3.0])

    # Expected results (from matlab)
    mean_ref = numerical_module.array(
        [0.096324163333333, -0.465965438333333, 0.366839091666667])
    whit_ref = numerical_module.array([
        [1.608410253685985, 0, 0],
        [1.079813355720326, 1.411083365535711, 0],
        [0.693459921529905, 0.571417184139332, 1.800117179839927],
    ])
    sample_whitened_ref = numerical_module.array(
        [5.942255453628436, 4.984316201643742, 4.739998188373740])

    # Runs whitening (first method)

    t = Whitening()
    t.fit(data)

    s = t.transform(sample)

    # Makes sure results are good
    eps = 1e-4
    assert np.allclose(t.input_subtract, mean_ref, eps, eps)
    assert np.allclose(t.weights, whit_ref, eps, eps)
    assert np.allclose(s, sample_whitened_ref, eps, eps)

    # Runs whitening (second method)
    m2 = t.fit(data)
    s2 = t.transform(sample)

    # Makes sure results are good
    eps = 1e-4
    assert np.allclose(m2.input_subtract, mean_ref, eps, eps)
    assert np.allclose(m2.weights, whit_ref, eps, eps)
    assert np.allclose(s2, sample_whitened_ref, eps, eps)
Ejemplo n.º 14
0
def test_ScaledArray_sym_mat_mult():
    for N in range(2, 5):
        for P in range(2, 5):
            array = np.random.rand(N, P) + 1
            std = np.diag(1/np.std(array, axis=0))
            mu = np.mean(array, axis=0)
            for factor in [None, 'n', 'p']:
                if factor is None:
                    f = 1
                elif factor == 'n':
                    f = N
                else:
                    f = P
                for K in range(1, 5):
                    for squeeze in [True, False]:
                        x = np.random.rand(N, K)
                        if squeeze:
                            x = np.squeeze(x)

                            for fit_x in [x, None]:

                                # With No Scale or Center
                                # x = A'Ax
                                result = array.dot(array.T.dot(x))/f
                                assert result.shape == x.shape
                                sarray = ScaledCenterArray(scale=False, center=False, factor=factor)
                                sarray.fit(da.array(array), x=fit_x)
                                np.testing.assert_array_equal(result, sarray.sym_mat_mult(x))

                                # With Scale but No Center
                                # B = AD
                                b_array = array.dot(std)
                                result = b_array.dot(b_array.T.dot(x))/f
                                assert result.shape == x.shape
                                sarray = ScaledCenterArray(scale=True, center=False, factor=factor)
                                sarray.fit(da.array(array), x=fit_x)
                                np.testing.assert_array_almost_equal(result, sarray.sym_mat_mult(x))

                                # With Center but No Scale:
                                # B = (A - U)
                                b_array = array - mu
                                result = b_array.dot(b_array.T.dot(x))/f
                                sarray = ScaledCenterArray(scale=False, center=True, factor=factor)
                                sarray.fit(da.array(array), x=fit_x)
                                np.testing.assert_array_almost_equal(result, sarray.sym_mat_mult(x))

                                # With Center and  Scale:
                                # (A - U)'D'D(A - U)x
                                result = (array - mu).dot(std).dot(std).dot((array - mu).T.dot(x))/f
                                sarray = ScaledCenterArray(scale=True, center=True, factor=factor)
                                sarray.fit(da.array(array), x=fit_x)
                                np.testing.assert_array_almost_equal(result, sarray.sym_mat_mult(x))
Ejemplo n.º 15
0
    def test_lazy_nd_points_and_bounds(self):
        import dask.array as da

        self.setupTestArrays((3, 4))
        coord = AuxCoord(self.pts_lazy, bounds=self.bds_lazy)

        collapsed_coord = coord.collapsed()

        self.assertTrue(collapsed_coord.has_lazy_points())
        self.assertTrue(collapsed_coord.has_lazy_bounds())

        self.assertArrayEqual(collapsed_coord.points, da.array([55]))
        self.assertArrayEqual(collapsed_coord.bounds, da.array([[-2, 112]]))
Ejemplo n.º 16
0
def run_wccn(with_dask):

    # CHECKING THE TYPES
    if with_dask:
        import dask.array as numerical_module
    else:
        import numpy as numerical_module

    # Tests our Whitening extractor.
    X = numerical_module.array([
        [1.2622, -1.6443, 0.1889],
        [0.4286, -0.8922, 1.3020],
        [-0.6613, 0.0430, 0.6377],
        [-0.8718, -0.4788, 0.3988],
        [-0.0098, -0.3121, -0.1807],
        [0.4301, 0.4886, -0.1456],
    ])
    y = [0, 0, 1, 1, 2, 2]

    sample = numerical_module.array([1, 2, 3.0])

    # Expected results
    mean_ref = numerical_module.array([0.0, 0.0, 0.0])
    weight_ref = numerical_module.array([
        [15.8455444, 0.0, 0.0],
        [-10.7946764, 2.87942129, 0.0],
        [18.76762201, -2.19719292, 2.1505817],
    ])
    sample_wccn_ref = numerical_module.array(
        [50.55905765, -0.83273618, 6.45174511])

    # Runs WCCN (first method)
    t = WCCN()
    t.fit(X, y=y)
    s = t.transform(sample)

    # Makes sure results are good
    eps = 1e-4
    assert np.allclose(t.input_subtract, mean_ref, eps, eps)
    assert np.allclose(t.weights, weight_ref, eps, eps)
    assert np.allclose(s, sample_wccn_ref, eps, eps)

    # Runs WCCN (second method)
    t.fit(X, y)
    s2 = t.transform(sample)

    # Makes sure results are good
    eps = 1e-4
    assert np.allclose(t.input_subtract, mean_ref, eps, eps)
    assert np.allclose(t.weights, weight_ref, eps, eps)
    assert np.allclose(s2, sample_wccn_ref, eps, eps)
Ejemplo n.º 17
0
def test_PowerMethod_nan_arrays():
    array = np.random.randn(100, 100)
    for bad_type in [float('nan')]:
        array[0, 0] = bad_type
        for start in [True, False]:
            PM = PowerMethod(sub_svd_start=start, max_iter=2)
            with pytest.raises(np.linalg.LinAlgError):
                _, _, _ = PM.svd(da.array(array))

            clean_array = make_snp_array(da.array(array),
                                         mask_nan=True,
                                         std_method='norm',
                                         dtype='float64')
            _, _, _ = PM.svd(clean_array)
Ejemplo n.º 18
0
    def fit(self, X, y):

        # CHECKING THE TYPES
        if isinstance(X, dask.array.Array):
            import dask.array as numerical_module

            from dask.array.linalg import cholesky, inv
        else:
            import numpy as numerical_module

            from scipy.linalg import cholesky, inv

        possible_labels = set(y)
        y_ = numerical_module.array(y)

        n_classes = len(possible_labels)

        # 1. compute the means for each label
        mu_l = numerical_module.array(
            [
                numerical_module.mean(
                    X[numerical_module.where(y_ == label)[0]], axis=0
                )
                for label in possible_labels
            ]
        )

        # 2. Compute Sw
        Sw = numerical_module.zeros((X.shape[1], X.shape[1]), dtype=float)

        for label in possible_labels:
            indexes = numerical_module.where(y_ == label)[0]
            X_l_mu_l = X[indexes] - mu_l[label]

            Sw += X_l_mu_l.T @ X_l_mu_l

        # 3. Compute inv
        scaled_Sw = (1 / n_classes) * Sw
        inv_scaled_Sw = pinv(scaled_Sw) if self.pinv else inv(scaled_Sw)

        # 3. Computes the Cholesky decomposition
        self.weights = cholesky(
            inv_scaled_Sw, lower=True
        )  # Setting lower true to have the same implementation as in the previous code
        self.input_subtract = 0
        self.input_divide = 1.0

        return self
Ejemplo n.º 19
0
def test_call_allele_frequencies__tetraploid(chunks):
    ds = call_allele_frequencies(
        get_dataset(
            [
                [[0, 1, 2, 2], [0, 0, 0, 0], [0, 0, 1, 2]],
                [[0, 0, 1, 0], [0, 2, 2, 2], [2, 1, 2, 1]],
                [[1, 1, -1, 2], [1, 1, 1, 1], [-1, -1, -1, -1]],
            ],
            n_ploidy=4,
            n_allele=3,
        )
    )
    if chunks is not None:
        ds["call_genotype"] = (
            ds["call_genotype"].dims,
            da.array(ds["call_genotype"]).rechunk(chunks),
        )
    af = ds["call_allele_frequency"]
    np.testing.assert_equal(
        af,
        np.array(
            [
                [[0.25, 0.25, 0.5], [1.0, 0.0, 0.0], [0.5, 0.25, 0.25]],
                [[0.75, 0.25, 0.0], [0.25, 0.0, 0.75], [0.0, 0.5, 0.5]],
                [[0.0, 2 / 3, 1 / 3], [0.0, 1.0, 0.0], [np.nan, np.nan, np.nan]],
            ]
        ),
    )
Ejemplo n.º 20
0
    def _initialization(self, data, **kwargs):
        vec_t = self.k + self.buffer

        if vec_t > min(data.shape):
            raise ValueError(
                'Cannot find more than min(n,p) singular values of array function.'
                'Currently k = {}, buffer = {}. k + b > min(n,p)'.format(
                    self.k, self.buffer))

        self.array = da.array(data)

        if self.factor == 'n':
            self.factor = self.array.shape[0]
        elif self.factor == 'p':
            self.factor = self.array.shape[1]
        elif self.factor is None:
            self.factor = False

        if self.sub_svd_start:
            x = sub_svd_init(
                self.array,
                k=vec_t,
                warm_start_row_factor=self.init_row_sampling_factor,
                log=0)

            if self.lmbd:
                c_norms = np.linalg.norm(x, 2, axis=0)
                x *= (1 - self.lmbd)
                x += (self.lmbd * c_norms /
                      np.sqrt(x.shape[0])) * da.random.normal(size=x.shape)
        else:
            x = rnormal_start(self.array, vec_t, log=0)

        return x.persist()
Ejemplo n.º 21
0
def test_call_allele_frequencies__diploid(chunks):
    ds = call_allele_frequencies(
        get_dataset(
            [
                [[0, 0], [0, 0], [0, 0]],
                [[0, 0], [0, 0], [0, 1]],
                [[1, 1], [0, 1], [1, 0]],
                [[1, -1], [1, 1], [-1, -1]],
            ]
        )
    )
    if chunks is not None:
        ds["call_genotype"] = (
            ds["call_genotype"].dims,
            da.array(ds["call_genotype"]).rechunk(chunks),
        )
    af = ds["call_allele_frequency"]
    np.testing.assert_equal(
        af,
        np.array(
            [
                [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]],
                [[1.0, 0.0], [1.0, 0.0], [0.5, 0.5]],
                [[0.0, 1.0], [0.5, 0.5], [0.5, 0.5]],
                [[0.0, 1.0], [0.0, 1.0], [np.nan, np.nan]],
            ]
        ),
    )
Ejemplo n.º 22
0
    def setUp(self):
        """
        Retrieves test data filepaths and auxiliary data and creates temporary reference data as NumPy arrays,
        xarray arrays and Pandas data frames.
        """

        self.gt_filepaths, self.timestamps = setup_gt_test_data()
        self.nc_filepaths, _ = setup_nc_multi_test_data()
        self.nc_filepath, _ = setup_nc_single_test_data()

        self.lon = 5.
        self.lat = 44.
        sref = osr.SpatialReference()
        sref.ImportFromEPSG(4326)
        self.sref = sref
        row = 970
        col = 246
        self.x = 4323250.
        self.y = 1314750.

        self.ref_np_ar = (np.array([[[row + col] * 4]]).T +
                          np.arange(0, 4)[:, None, None]).astype(float)
        xr_ar = xr.DataArray(data=da.array(
            self.ref_np_ar.astype(float)).rechunk((1, 1, 1)),
                             coords={
                                 'time': self.timestamps,
                                 'y': [self.y],
                                 'x': [self.x]
                             },
                             dims=['time', 'y', 'x'])
        self.ref_xr_ds = xr.Dataset(data_vars={'1': xr_ar})
        self.ref_pd_df = self.ref_xr_ds.to_dataframe()
Ejemplo n.º 23
0
 def _get_test_calib_for_channel_vis(self, chroot, meas):
     xrda = xr.DataArray
     data = {}
     data["state/celestial/earth_sun_distance"] = xrda(
         da.repeat(da.array([149597870.7]), 6000))
     data[meas + "/channel_effective_solar_irradiance"] = xrda(50)
     return data
Ejemplo n.º 24
0
    def test_calc_obs_het(self):
        variations = Variations(samples=da.array(['a', 'b', 'c', 'd']))
        gts = np.array([[[0, 0], [0, 1], [0, -1], [-1, -1]],
                        [[0, 0], [0, 0], [0, -1], [-1, -1]]])

        dps = np.array([[5, 12, 10, 10], [10, 10, 10, 10]])
        variations[GT_FIELD] = da.from_array(gts)
        variations[DP_FIELD] = da.from_array(dps)
        # with this step we create a  variation with dask arrays of unknown shapes
        variations = remove_low_call_rate_vars(variations, 0)[FLT_VARS]

        het = calc_obs_het(variations, min_num_genotypes=0)
        self.assertTrue(np.allclose(het.compute(), [0.5, 0]))

        #         het = calc_obs_het(variations, min_num_genotypes=10)
        #         assert np.allclose(het, [np.NaN, np.NaN], equal_nan=True)

        het = calc_obs_het(variations,
                           min_num_genotypes=0,
                           min_call_dp_for_het_call=10)
        self.assertTrue(np.allclose(het.compute(), [1, 0]))
        het = calc_obs_het(variations,
                           min_num_genotypes=0,
                           max_call_dp_for_het_call=11)
        self.assertTrue(np.allclose(het.compute(), [0, 0]))

        het = calc_obs_het(variations,
                           min_num_genotypes=0,
                           min_call_dp_for_het_call=5)
        self.assertTrue(np.allclose(het.compute(), [0.5, 0]))
Ejemplo n.º 25
0
    def test_calc_obs_het2(self):

        gts = np.array([[[0, 0], [0, 1], [0, -1], [-1, -1]],
                        [[0, 0], [0, 0], [0, -1], [-1, -1]]])

        dps = np.array([[5, 12, 10, 10], [10, 10, 10, 10]])
        samples = np.array([str(i) for i in range(gts.shape[1])])
        variations = Variations(samples=da.array(samples))
        variations[GT_FIELD] = da.from_array(gts)
        variations[DP_FIELD] = da.from_array(dps)

        het = calc_obs_het(variations, min_num_genotypes=0)
        het = compute(het)
        assert np.allclose(het, [0.5, 0])
        het = calc_obs_het(variations, min_num_genotypes=10)
        het = compute(het)
        assert np.allclose(het, [np.NaN, np.NaN], equal_nan=True)

        het = calc_obs_het(variations,
                           min_num_genotypes=0,
                           min_call_dp_for_het_call=10)
        het = compute(het)
        assert np.allclose(het, [1, 0])

        het = calc_obs_het(variations,
                           min_num_genotypes=0,
                           max_call_dp_for_het_call=11)
        het = compute(het)
        assert np.allclose(het, [0, 0])

        het = calc_obs_het(variations,
                           min_num_genotypes=0,
                           min_call_dp_for_het_call=5)
        het = compute(het)
        assert np.allclose(het, [0.5, 0])
Ejemplo n.º 26
0
def test_PowerMethod_case1():
    n = 100
    p = 80
    array = np.random.rand(100, 80)
    mu = array.mean(axis=0)
    std = np.diag(1 / array.std(axis=0))
    scaled_centered_array = (array - mu).dot(std)
    U, S, V = np.linalg.svd(scaled_centered_array,
                            full_matrices=False)  # Ground Truth
    array = make_snp_array(da.array(array),
                           mean=True,
                           std=True,
                           std_method='norm',
                           mask_nan=False,
                           dtype='float64')
    for k in range(1, 10):
        U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

        PM = PowerMethod(k=k,
                         tol=1e-9,
                         scoring_method='rmse',
                         max_iter=100,
                         sub_svd_start=False,
                         init_row_sampling_factor=1,
                         factor=None,
                         lmbd=0)
        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        np.testing.assert_array_almost_equal(S_k, S_k_PM)
        assert V_k.shape == V_k_PM.shape == (k, p)
        assert U_k.shape == U_k_PM.shape == (n, k)
        np.testing.assert_almost_equal(subspace_dist(V_k, V_k_PM, S_k_PM), 0)
        np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)
Ejemplo n.º 27
0
        def _compute_gt_graph(self) -> None:
            path = path = self.path / (self.name + ".gt.gz")
            g = gt.Graph(directed=False)
            name = g.vp["name"] = g.new_vp("int")
            kind = g.vp["kind"] = g.new_vp("int")
            ecount = g.ep["count"] = g.new_ep("int")

            docs_add: defaultdict = defaultdict(lambda: g.add_vertex())
            words_add: defaultdict = defaultdict(lambda: g.add_vertex())

            count_matrix = (da.array(self.get_count_matrix()).map_blocks(
                lambda b: sparse.COO(b), dtype=np.dtype(int)).compute())

            n_doc, n_word = self.get_shape()

            for i_d in range(n_doc):
                d = docs_add[i_d]
                name[d] = i_d
                kind[d] = 0

            for i_w in range(n_word):
                w = words_add[i_w]
                name[w] = i_w
                kind[w] = 1

            for i in range(count_matrix.nnz):
                i_d, i_w = count_matrix.coords[:, i]
                e = g.add_edge(i_d, n_doc + i_w)
                ecount[e] = count_matrix.data[i]

            g.save(str(path))
            self.data["gt"] = File(path)
Ejemplo n.º 28
0
    def build(self, input_shape):

        if self.kernel_size == 3:
            bk = np.array([[1, 2, 1],
                           [2, 4, 2],
                           [1, 2, 1]])
            bk = bk / np.sum(bk)
        elif self.kernel_size == 5:
            bk = np.array([[1, 4, 6, 4, 1],
                           [4, 16, 24, 16, 4],
                           [6, 24, 36, 24, 6],
                           [4, 16, 24, 16, 4],
                           [1, 4, 6, 4, 1]])
            bk = bk / np.sum(bk)
        else:
            raise ValueError

        bk = np.repeat(bk, input_shape[3])
        bk = da.array(bk)

        bk = da.reshape(bk, (self.kernel_size, self.kernel_size, input_shape[3], 1))
        blur_init = tf.keras.initializers.constant(bk)

        self.blur_kernel = self.add_weight(name='blur_kernel',
                                           shape=(self.kernel_size, self.kernel_size, input_shape[3], 1),
                                           initializer=blur_init,
                                           trainable=False)

        super(MaxBlurPooling2D, self).build(input_shape)
Ejemplo n.º 29
0
def process_data(X, y=None, test_size=0.20, dummies=False):
    if y is None:
        y = da.ones(X.shape[0])
    
    len_ = X.shape[0]    
    X = prepare_dataset(X)
        
    if dummies:
        y = dd.get_dummies(y)
        
    shape_  = list(X.shape[1:])
    
    X_train, X_test, y_train, y_test  = train_test_split(X.flatten().reshape(len_,-1), y, test_size=test_size, random_state=4891)
    
    X_train = X_train.reshape([X_train.shape[0]]+shape_)
    X_test = X_test.reshape([X_test.shape[0]]+shape_)
     
    print('Training dataset shape: ', X_train.shape)
    print('Validation dataset shape: ', X_test.shape)

    train_dataset = Dataset(X_train, y_train)
    test_dataset = Dataset(X_test, y_test)
    
    samples = list()
    for _ in range(10):
        for y_uniq in da.unique(train_dataset.labels):
            samples.append(train_dataset.x[train_dataset.labels==y_uniq][random.randint(0,len(train_dataset.x[train_dataset.labels==y_uniq])-1)])
    
    train_dataset.samples = da.array(samples)
    return train_dataset, test_dataset
Ejemplo n.º 30
0
    def setUp(self, *mocks):
        """Create fake data for testing."""
        self.def_cali = [-0.0037, 15.20]
        self.upd_cali = [-0.0074, 30.40]
        self.bad_cali = [0.0, 0.0]
        fh = AHIHSDFileHandler(filetype_info={'file_type': 'hsd_b01'})
        fh.calib_mode = 'NOMINAL'
        fh.user_calibration = None
        fh.is_zipped = False
        fh._header = {
            'block5': {
                'band_number': [5],
                'gain_count2rad_conversion': [self.def_cali[0]],
                'offset_count2rad_conversion': [self.def_cali[1]],
                'central_wave_length': [10.4073],
            },
            'calibration': {
                'coeff_rad2albedo_conversion': [0.0019255],
                'speed_of_light': [299792458.0],
                'planck_constant': [6.62606957e-34],
                'boltzmann_constant': [1.3806488e-23],
                'c0_rad2tb_conversion': [-0.116127314574],
                'c1_rad2tb_conversion': [1.00099153832],
                'c2_rad2tb_conversion': [-1.76961091571e-06],
                'cali_gain_count2rad_conversion': [self.upd_cali[0]],
                'cali_offset_count2rad_conversion': [self.upd_cali[1]]
            },
        }

        self.counts = da.array(np.array([[0., 1000.], [2000., 5000.]]))
        self.fh = fh
Ejemplo n.º 31
0
def test_PowerMethod_nan_arrays_fills():
    array = np.random.randint(0, 3, size=(100, 100)).astype(float)
    array[0, 0] = 10000
    median = round(np.median(array))
    mean = round(np.mean(array))
    k = 10

    for method in ['mean', 'median', 10]:

        PM = PowerMethod(factor=None,
                         scale=False,
                         center=False,
                         tol=1e-16,
                         lmbd=0)
        if method == 'mean':
            filled_value = mean
        elif method == 'median':
            filled_value = median
        else:
            filled_value = method
        array[0, 1] = filled_value

        U, S, V = np.linalg.svd(array, full_matrices=False)
        U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k)

        array[0, 1] = float('nan')
        U, S, V = PM.svd(da.array(array), mask_fill=method, mask_nan=True)
        assert PM.array.array[0, 1] == filled_value
        np.testing.assert_array_almost_equal(S, S_k)
Ejemplo n.º 32
0
    def _std_inverter(self, std):
        """
        Parameters
        ----------
        std : array_like, shape  (P,)
            vector of standard deviations of the P rows of self._array

        Returns
        -------
        inv_std : array_like, shape (P,)
            vector of 1/std
        """

        try:
            std = std.compute()
        except AttributeError:
            pass

        degenerate_snp_columns = np.where(std <= self._std_tol)
        if len(degenerate_snp_columns[0]) > 0:
            if self._warn:
                warnings.warn('SNP Columns {} have low standard deviation.'
                              ' Setting STD of columns to 1'.format(
                                  degenerate_snp_columns))
            std[degenerate_snp_columns[0]] = 1

        return da.array(1 / std)
Ejemplo n.º 33
0
    def test_lazy_nd_bounds(self):
        import dask.array as da

        self.setupTestArrays((3, 4))
        coord = AuxCoord(self.pts_real, bounds=self.bds_lazy)

        collapsed_coord = coord.collapsed()

        # Note that the new points get recalculated from the lazy bounds
        #  and so end up as lazy
        self.assertTrue(collapsed_coord.has_lazy_points())
        self.assertTrue(collapsed_coord.has_lazy_bounds())

        self.assertArrayEqual(collapsed_coord.points, np.array([55]))
        self.assertArrayEqual(collapsed_coord.bounds, da.array([[-2, 112]]))
Ejemplo n.º 34
0
def _addtarr(t, dt):
    return darr.array([tn + dt for tn in t])
Ejemplo n.º 35
0
def test_array():
    x = np.ones(5, dtype='i4')
    d = da.ones(5, chunks=3, dtype='i4')
    assert eq(da.array(d, ndmin=3, dtype='i8'),
              np.array(x, ndmin=3, dtype='i8'))
Ejemplo n.º 36
0
def _alloc_hpr(ensblk, group, varname):
    phisc = 0.01 # Scale heading, pitch and roll by 0.01. Sentinel V manual, p. 259.
    return darr.array([ensarr[group][varname]*phisc for ensarr in ensblk
                       if type(ensarr)==dict])