Ejemplo n.º 1
0
def test_cumulative_partition():
    n, p = 1000, 1000
    a = np.random.randn(n, p)

    for f in [.001, .1, .25, .5]:
        parts = array_constant_partition(a.shape, f=f, min_size=1)
        cum_parts = cumulative_partition(parts)

        for i, cum_part in enumerate(cum_parts):
            a_actual = a[:int((i + 1) * n * f), :]
            a_part_stack = np.vstack([a[p, :] for p in parts[:i + 1]])
            a_cum = a[cum_part, :]

            np.testing.assert_array_equal(a_actual, a_part_stack)
            np.testing.assert_array_equal(a_actual, a_cum)

        parts = array_constant_partition(a.shape, f=f, min_size=1, axis=1)
        cum_parts = cumulative_partition(parts)

        for i, cum_part in enumerate(cum_parts):
            a_actual = a[:, 0:int((i + 1) * p * f)]
            a_part_stack = np.hstack([a[:, p] for p in parts[:i + 1]])
            a_cum = a[:, cum_part]

            np.testing.assert_array_equal(a_actual, a_part_stack)
            np.testing.assert_array_equal(a_actual, a_cum)
Ejemplo n.º 2
0
def test_array_constant_partition_sizes():
    a = np.random.rand(10, 20)
    parts = array_constant_partition(a.shape, f=.1, min_size=1)
    assert len(parts) == 10

    parts = array_constant_partition(a.shape, f=.1, min_size=5)
    assert len(parts) == 2

    parts = array_constant_partition(a.shape, f=.1, axis=1, min_size=1)
    assert len(parts) == 10

    parts = array_constant_partition(a.shape, f=.1, axis=1, min_size=5)
    assert len(parts) == 4
Ejemplo n.º 3
0
def test_array_constant_partition_array():
    n, p = 1000, 2000
    a = np.random.rand(1000, 2000)

    for f in [0.01, .1, .25, .5]:
        parts = array_constant_partition(a.shape, f=f)
        for i, part in enumerate(parts):
            np.testing.assert_array_equal(
                a[int(i * f * n):int((i + 1) * (f * n))], a[part, :])

        parts = array_constant_partition(a.shape, f=f, axis=1)
        for i, part in enumerate(parts):
            np.testing.assert_array_equal(
                a[:, int(i * f * p):int((i + 1) * (f * p))], a[:, part])
Ejemplo n.º 4
0
def test_array_constant_partition_bad_size():
    array_constant_partition((10, 10), f=.5)
    with pytest.raises(ValueError):
        array_constant_partition((10, 10), f=.51)

    with pytest.raises(ValueError):
        array_constant_partition((10, 10, 10), f=.51)
Ejemplo n.º 5
0
    def svd(self,
            array: LargeArrayType,
            verbose: bool = True,
            return_history: bool = False,
            **kwargs):

        self._reset_history()
        self.history.time['start'] = time.time()

        self.array = da.array(array)

        if self.factor == 'n':
            self.factor = self.array.shape[0]
        elif self.factor == 'p':
            self.factor = self.array.shape[1]
        elif self.factor is None:
            self.factor = False

        vec_t = self.k + self.buffer

        partitions = array_constant_partition(self.array.shape,
                                              f=self.f,
                                              min_size=vec_t)
        partitions = cumulative_partition(partitions)

        sub_array = self.array[partitions[0], :]

        if self.sub_svd_start == 'warm':
            x = sub_svd_init(
                sub_array,
                k=vec_t,
                warm_start_row_factor=self.init_row_sampling_factor,
                log=0)
        else:
            x = rnormal_start(sub_array, k=vec_t, log=0)

        x = sub_array.T.dot(x)

        for part in tqdm(partitions[:-1], disable=not verbose):
            _PM = _vPowerMethod(v_start=x,
                                k=self.k,
                                buffer=self.buffer,
                                max_iter=self.max_iter,
                                factor=self.factor,
                                scoring_method=self.scoring_method,
                                tol=self.tol,
                                warn=self.warn)

            x = _PM.svd(self.array[part, :],
                        verbose=False,
                        **{
                            'mask_nan': False,
                            'transpose': False
                        })
            self.history.iter['last_value'] = _PM.history.iter['last_value']
            self.history.iter['sub_svd'].append(
                self.history.iter['last_value'])

            if self.lmbd:
                c_norms = np.linalg.norm(x, 2, axis=0)
                x *= (1 - self.lmbd)
                x += (self.lmbd * c_norms /
                      np.sqrt(x.shape[0])) * da.random.normal(size=x.shape)

            if 'v-subspace' in self.scoring_method:
                self.history.iter['V'].append(_PM.history.iter['V'][-1])

            self.history.iter['S'].append(_PM.history.iter['S'][-1])
            self.history.acc['sub_svd_acc'].append(_PM.history.acc)
            self.history.time['iter'].append(_PM.history.time)

        _PM = _vPowerMethod(v_start=x,
                            k=self.k,
                            buffer=self.buffer,
                            max_iter=self.max_iter,
                            factor=self.factor,
                            scoring_method=self.scoring_method,
                            tol=self.tol,
                            full_svd=True,
                            warn=self.warn)

        _PM.svd(self.array,
                verbose=False,
                **{
                    'mask_nan': False,
                    'transpose': False
                })

        self.history.iter['last_value'] = _PM.history.iter['last_value']
        self.history.iter['sub_svd'].append(self.history.iter['last_value'])

        if return_history:
            return self.history
        else:
            return self.history.iter['last_value']