Exemple #1
0
def main(readcsv=None, method='defaultDense'):
    infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv')

    # Using of the classic way (computations on CPU)
    # configure a covariance object
    algo = d4p.covariance(streaming=True)
    # get the generator (defined in stream.py)...
    rn = read_next(infile, 112, readcsv)
    # ... and iterate through chunks/stream
    for chunk in rn:
        algo.compute(chunk)
    # finalize computation
    result_classic = algo.finalize()

    try:
        from dpctx import device_context, device_type
        gpu_context = lambda: device_context(device_type.gpu, 0)
        cpu_context = lambda: device_context(device_type.cpu, 0)
    except:
        from daal4py.oneapi import sycl_context
        gpu_context = lambda: sycl_context('gpu')
        cpu_context = lambda: sycl_context('cpu')

    # It is possible to specify to make the computations on GPU
    if gpu_available:
        with gpu_context():
            # configure a covariance object
            algo = d4p.covariance(streaming=True)
            # get the generator (defined in stream.py)...
            rn = read_next(infile, 112, readcsv)
            # ... and iterate through chunks/stream
            for chunk in rn:
                sycl_chunk = sycl_buffer(to_numpy(chunk))
                algo.compute(sycl_chunk)
            # finalize computation
            result_gpu = algo.finalize()
        assert np.allclose(result_classic.covariance, result_gpu.covariance)
        assert np.allclose(result_classic.mean, result_gpu.mean)
        assert np.allclose(result_classic.correlation, result_gpu.correlation)

    # It is possible to specify to make the computations on CPU
    with cpu_context():
        # configure a covariance object
        algo = d4p.covariance(streaming=True)
        # get the generator (defined in stream.py)...
        rn = read_next(infile, 112, readcsv)
        # ... and iterate through chunks/stream
        for chunk in rn:
            sycl_chunk = sycl_buffer(to_numpy(chunk))
            algo.compute(sycl_chunk)
        # finalize computation
        result_cpu = algo.finalize()

    # covariance result objects provide correlation, covariance and mean

    assert np.allclose(result_classic.covariance, result_cpu.covariance)
    assert np.allclose(result_classic.mean, result_cpu.mean)
    assert np.allclose(result_classic.correlation, result_cpu.correlation)

    return result_classic
Exemple #2
0
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/covcormoments_dense.csv"

    # configure a covariance object
    algo = d4p.covariance()

    # let's provide a file directly, not a table/array
    result1 = algo.compute(infile)

    # We can also load the data ourselfs and provide the numpy array
    algo = d4p.covariance(method=method)
    data = readcsv(infile)
    result2 = algo.compute(data)

    # covariance result objects provide correlation, covariance and mean
    assert np.allclose(result1.covariance, result1.covariance)
    assert np.allclose(result1.mean, result1.mean)
    assert np.allclose(result1.correlation, result1.correlation)

    return result1
def main(readcsv=None, method='defaultDense'):
    infile = "./data/batch/covcormoments_dense.csv"

    # configure a covariance object
    algo = d4p.covariance(streaming=True)

    # get the generator (defined in stream.py)...
    rn = read_next(infile, 112, readcsv)
    # ... and iterate through chunks/stream
    for chunk in rn:
        algo.compute(chunk)

    # finalize computation
    result = algo.finalize()

    return result
Exemple #4
0
def main():
    infile = "./data/batch/covcormoments_dense.csv"

    # We know the number of lines in the file and use this to separate data between processes
    skiprows, nrows = get_chunk_params(lines_count=200,
                                       chunks_count=d4p.num_procs(),
                                       chunk_number=d4p.my_procid())

    # Each process reads its chunk of the file
    data = read_csv(infile, sr=skiprows, nr=nrows)

    # Create algorithm with distributed mode
    alg = d4p.covariance(method="defaultDense", distributed=True)

    # Perform computation
    res = alg.compute(data)

    # covariance result objects provide correlation, covariance and mean
    assert res.covariance.shape == (data.shape[1], data.shape[1])
    assert res.mean.shape == (1, data.shape[1])
    assert res.correlation.shape == (data.shape[1], data.shape[1])

    return res
Exemple #5
0
def compute(data, method):
    # configure a covariance object
    algo = d4p.covariance(method=method)
    return algo.compute(data)
Exemple #6
0
    def _fit_full_daal4py(self, X, n_components):
        n_samples, n_features = X.shape
        n_sf_min = min(n_samples, n_features)

        if n_components == 'mle':
            daal_n_components = n_features
        elif n_components < 1:
            daal_n_components = n_sf_min
        else:
            daal_n_components = n_components

        fpType = getFPType(X)

        covariance_algo = daal4py.covariance(
            fptype=fpType, outputMatrixType='covarianceMatrix')
        covariance_res = covariance_algo.compute(X)

        self.mean_ = covariance_res.mean.ravel()
        covariance = covariance_res.covariance
        variances_ = np.array([covariance[i, i] for i in range(n_features)])

        pca_alg = daal4py.pca(fptype=fpType,
                              method='correlationDense',
                              resultsToCompute='eigenvalue',
                              isDeterministic=True,
                              nComponents=daal_n_components)
        pca_res = pca_alg.compute(X, covariance)

        components_ = pca_res.eigenvectors
        explained_variance_ = np.maximum(pca_res.eigenvalues.ravel(), 0)
        tot_var = explained_variance_.sum()
        explained_variance_ratio_ = explained_variance_ / tot_var

        if n_components == 'mle':
            if sklearn_check_version('0.23'):
                n_components = _infer_dimension(explained_variance_, n_samples)
            else:
                n_components = \
                    _infer_dimension_(explained_variance_, n_samples, n_features)
        elif 0 < n_components < 1.0:
            ratio_cumsum = stable_cumsum(explained_variance_ratio_)
            n_components = np.searchsorted(
                ratio_cumsum, n_components, side='right') + 1

        if n_components < n_sf_min:
            if explained_variance_.shape[0] == n_sf_min:
                self.noise_variance_ = explained_variance_[n_components:].mean(
                )
            else:
                resid_var_ = variances_.sum()
                resid_var_ -= explained_variance_[:n_components].sum()
                self.noise_variance_ = resid_var_ / (n_sf_min - n_components)
        else:
            self.noise_variance_ = 0.

        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = components_[:n_components]
        self.n_components_ = n_components
        self.explained_variance_ = explained_variance_[:n_components]
        self.explained_variance_ratio_ = explained_variance_ratio_[:
                                                                   n_components]
        self.singular_values_ = np.sqrt(
            (n_samples - 1) * self.explained_variance_)