def main(readcsv=None, method='defaultDense'): infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') # Using of the classic way (computations on CPU) # configure a covariance object algo = d4p.covariance(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream for chunk in rn: algo.compute(chunk) # finalize computation result_classic = algo.finalize() try: from dpctx import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): # configure a covariance object algo = d4p.covariance(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_gpu = algo.finalize() assert np.allclose(result_classic.covariance, result_gpu.covariance) assert np.allclose(result_classic.mean, result_gpu.mean) assert np.allclose(result_classic.correlation, result_gpu.correlation) # It is possible to specify to make the computations on CPU with cpu_context(): # configure a covariance object algo = d4p.covariance(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_cpu = algo.finalize() # covariance result objects provide correlation, covariance and mean assert np.allclose(result_classic.covariance, result_cpu.covariance) assert np.allclose(result_classic.mean, result_cpu.mean) assert np.allclose(result_classic.correlation, result_cpu.correlation) return result_classic
def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/covcormoments_dense.csv" # configure a covariance object algo = d4p.covariance() # let's provide a file directly, not a table/array result1 = algo.compute(infile) # We can also load the data ourselfs and provide the numpy array algo = d4p.covariance(method=method) data = readcsv(infile) result2 = algo.compute(data) # covariance result objects provide correlation, covariance and mean assert np.allclose(result1.covariance, result1.covariance) assert np.allclose(result1.mean, result1.mean) assert np.allclose(result1.correlation, result1.correlation) return result1
def main(readcsv=None, method='defaultDense'): infile = "./data/batch/covcormoments_dense.csv" # configure a covariance object algo = d4p.covariance(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream for chunk in rn: algo.compute(chunk) # finalize computation result = algo.finalize() return result
def main(): infile = "./data/batch/covcormoments_dense.csv" # We know the number of lines in the file and use this to separate data between processes skiprows, nrows = get_chunk_params(lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid()) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) # Create algorithm with distributed mode alg = d4p.covariance(method="defaultDense", distributed=True) # Perform computation res = alg.compute(data) # covariance result objects provide correlation, covariance and mean assert res.covariance.shape == (data.shape[1], data.shape[1]) assert res.mean.shape == (1, data.shape[1]) assert res.correlation.shape == (data.shape[1], data.shape[1]) return res
def compute(data, method): # configure a covariance object algo = d4p.covariance(method=method) return algo.compute(data)
def _fit_full_daal4py(self, X, n_components): n_samples, n_features = X.shape n_sf_min = min(n_samples, n_features) if n_components == 'mle': daal_n_components = n_features elif n_components < 1: daal_n_components = n_sf_min else: daal_n_components = n_components fpType = getFPType(X) covariance_algo = daal4py.covariance( fptype=fpType, outputMatrixType='covarianceMatrix') covariance_res = covariance_algo.compute(X) self.mean_ = covariance_res.mean.ravel() covariance = covariance_res.covariance variances_ = np.array([covariance[i, i] for i in range(n_features)]) pca_alg = daal4py.pca(fptype=fpType, method='correlationDense', resultsToCompute='eigenvalue', isDeterministic=True, nComponents=daal_n_components) pca_res = pca_alg.compute(X, covariance) components_ = pca_res.eigenvectors explained_variance_ = np.maximum(pca_res.eigenvalues.ravel(), 0) tot_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / tot_var if n_components == 'mle': if sklearn_check_version('0.23'): n_components = _infer_dimension(explained_variance_, n_samples) else: n_components = \ _infer_dimension_(explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: ratio_cumsum = stable_cumsum(explained_variance_ratio_) n_components = np.searchsorted( ratio_cumsum, n_components, side='right') + 1 if n_components < n_sf_min: if explained_variance_.shape[0] == n_sf_min: self.noise_variance_ = explained_variance_[n_components:].mean( ) else: resid_var_ = variances_.sum() resid_var_ -= explained_variance_[:n_components].sum() self.noise_variance_ = resid_var_ / (n_sf_min - n_components) else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = n_components self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = explained_variance_ratio_[: n_components] self.singular_values_ = np.sqrt( (n_samples - 1) * self.explained_variance_)