def main(readcsv=None, method='defaultDense'): infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') # Using of the classic way (computations on CPU) # configure a covariance object algo = d4p.covariance(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream for chunk in rn: algo.compute(chunk) # finalize computation result_classic = algo.finalize() try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): # configure a covariance object algo = d4p.covariance(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_gpu = algo.finalize() assert np.allclose(result_classic.covariance, result_gpu.covariance) assert np.allclose(result_classic.mean, result_gpu.mean) assert np.allclose(result_classic.correlation, result_gpu.correlation) # It is possible to specify to make the computations on CPU with cpu_context(): # configure a covariance object algo = d4p.covariance(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_cpu = algo.finalize() # covariance result objects provide correlation, covariance and mean assert np.allclose(result_classic.covariance, result_cpu.covariance) assert np.allclose(result_classic.mean, result_cpu.mean) assert np.allclose(result_classic.correlation, result_cpu.correlation) return result_classic
def main(readcsv=read_csv, method='defaultDense'): nClasses = 2 nFeatures = 20 # read training data from file with 20 features per observation and 1 class label trainfile = os.path.join('..', 'data', 'batch', 'binary_cls_train.csv') train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # read testing data from file with 20 features per observation testfile = os.path.join('..', 'data', 'batch', 'binary_cls_test.csv') predict_data = readcsv(testfile, range(nFeatures)) predict_labels = readcsv(testfile, range(nFeatures, nFeatures + 1)) # Using of the classic way (computations on CPU) result_classic, train_result = compute(train_data, train_labels, predict_data, nClasses) train_data = to_numpy(train_data) train_labels = to_numpy(train_labels) predict_data = to_numpy(predict_data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) result_gpu, _ = compute(sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses) assert np.allclose(result_classic.prediction, result_gpu.prediction) # It is possible to specify to make the computations on GPU with cpu_context(): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) result_cpu, _ = compute(sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses) # the prediction result provides prediction assert result_classic.prediction.shape == (predict_data.shape[0], train_labels.shape[1]) assert np.allclose(result_classic.prediction, result_cpu.prediction) return (train_result, result_classic, predict_labels)
def main(readcsv=read_csv, method="defaultDense"): # read data from file file = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') data = readcsv(file, range(10)) # Using of the classic way (computations on CPU) result_classic = compute(data, method) data = to_numpy(data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, "defaultDense") for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ]: assert np.allclose(getattr(result_classic, name), getattr(result_gpu, name)) # It is possible to specify to make the computations on CPU with cpu_context(): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, "defaultDense") # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation assert (all( getattr(result_classic, name).shape == (1, data.shape[1]) for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ])) for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ]: assert np.allclose(getattr(result_classic, name), getattr(result_cpu, name)) return result_classic
def main(readcsv=read_csv, method='defaultDense'): # read training data. Let's have 10 independent, and 2 dependent variables (for each observation) trainfile = os.path.join('..', 'data', 'batch', 'linear_regression_train.csv') train_indep_data = readcsv(trainfile, range(10)) train_dep_data = readcsv(trainfile, range(10, 12)) # read testing data testfile = os.path.join('..', 'data', 'batch', 'linear_regression_test.csv') test_indep_data = readcsv(testfile, range(10)) test_dep_data = readcsv(testfile, range(10, 12)) # Using of the classic way (computations on CPU) result_classic, train_result = compute(train_indep_data, train_dep_data, test_indep_data) train_indep_data = to_numpy(train_indep_data) train_dep_data = to_numpy(train_dep_data) test_indep_data = to_numpy(test_indep_data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_train_indep_data = sycl_buffer(train_indep_data) sycl_train_dep_data = sycl_buffer(train_dep_data) sycl_test_indep_data = sycl_buffer(test_indep_data) result_gpu, _ = compute(sycl_train_indep_data, sycl_train_dep_data, sycl_test_indep_data) assert np.allclose(result_classic.prediction, result_gpu.prediction) # It is possible to specify to make the computations on CPU with cpu_context(): sycl_train_indep_data = sycl_buffer(train_indep_data) sycl_train_dep_data = sycl_buffer(train_dep_data) sycl_test_indep_data = sycl_buffer(test_indep_data) result_cpu, _ = compute(sycl_train_indep_data, sycl_train_dep_data, sycl_test_indep_data) # The prediction result provides prediction assert result_classic.prediction.shape == (test_dep_data.shape[0], test_dep_data.shape[1]) assert np.allclose(result_classic.prediction, result_cpu.prediction) return (train_result, result_classic, test_dep_data)
def main(readcsv=read_csv, method='randomDense'): infile = os.path.join('..', 'data', 'batch', 'kmeans_dense.csv') nClusters = 20 maxIter = 5 # Load the data data = readcsv(infile, range(20)) # Using of the classic way (computations on CPU) result_classic = compute(data, nClusters, maxIter, method) data = to_numpy(data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, nClusters, maxIter, method) assert np.allclose(result_classic.centroids, result_gpu.centroids) assert np.allclose(result_classic.assignments, result_gpu.assignments) assert np.isclose(result_classic.objectiveFunction, result_gpu.objectiveFunction) assert result_classic.nIterations == result_gpu.nIterations # It is possible to specify to make the computations on CPU with cpu_context(): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, nClusters, maxIter, method) # Kmeans result objects provide assignments (if requested), centroids, goalFunction, nIterations and objectiveFunction assert result_classic.centroids.shape[0] == nClusters assert result_classic.assignments.shape == (data.shape[0], 1) assert result_classic.nIterations <= maxIter assert np.allclose(result_classic.centroids, result_cpu.centroids) assert np.allclose(result_classic.assignments, result_cpu.assignments) assert np.isclose(result_classic.objectiveFunction, result_cpu.objectiveFunction) assert result_classic.nIterations == result_cpu.nIterations return result_classic
def get_context(device): if dppl_available: return device_context(device, 0) elif sycl_extention_available: return sycl_context(device) else: return None
def main(readcsv=read_csv, method='defaultDense'): infile = os.path.join('..', 'data', 'batch', 'dbscan_dense.csv') epsilon = 0.04 minObservations = 45 # Load the data data = readcsv(infile, range(2)) result_classic = compute(data, minObservations, epsilon) data = to_numpy(data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU print('gpu', gpu_available) if gpu_available: with gpu_context(): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, minObservations, epsilon) assert np.allclose(result_classic.nClusters, result_gpu.nClusters) assert np.allclose(result_classic.assignments, result_gpu.assignments) assert np.allclose(result_classic.coreIndices, result_gpu.coreIndices) assert np.allclose(result_classic.coreObservations, result_gpu.coreObservations) with cpu_context(): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, minObservations, epsilon) assert np.allclose(result_classic.nClusters, result_cpu.nClusters) assert np.allclose(result_classic.assignments, result_cpu.assignments) assert np.allclose(result_classic.coreIndices, result_cpu.coreIndices) assert np.allclose(result_classic.coreObservations, result_cpu.coreObservations) return result_classic
def main(readcsv=read_csv, method='defaultDense'): infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') # Load the data data = readcsv(infile, range(10)) # Using of the classic way (computations on CPU) result_classic = compute(data, method) data = to_numpy(data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, 'defaultDense') assert np.allclose(result_classic.covariance, result_gpu.covariance) assert np.allclose(result_classic.mean, result_gpu.mean) assert np.allclose(result_classic.correlation, result_gpu.correlation) # It is possible to specify to make the computations on CPU with cpu_context(): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, 'defaultDense') # covariance result objects provide correlation, covariance and mean assert np.allclose(result_classic.covariance, result_cpu.covariance) assert np.allclose(result_classic.mean, result_cpu.mean) assert np.allclose(result_classic.correlation, result_cpu.correlation) return result_classic
def main(readcsv=read_csv, method='svdDense'): dataFileName = os.path.join('..', 'data', 'batch', 'pca_transform.csv') nComponents = 2 # read data data = readcsv(dataFileName, range(3)) # Using of the classic way (computations on CPU) result_classic = compute(data, nComponents) data = to_numpy(data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, nComponents) assert np.allclose(result_classic.transformedData, result_gpu.transformedData) # It is possible to specify to make the computations on CPU with cpu_context(): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, nComponents) # pca_transform_result objects provides transformedData assert np.allclose(result_classic.transformedData, result_cpu.transformedData) return (result_classic)
def main(readcsv=read_csv, method='defaultDense'): # Input data set parameters train_file = os.path.join('..', 'data', 'batch', 'k_nearest_neighbors_train.csv') predict_file = os.path.join('..', 'data', 'batch', 'k_nearest_neighbors_test.csv') # Read data. Let's use 5 features per observation nFeatures = 5 nClasses = 5 train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) predict_data = readcsv(predict_file, range(nFeatures)) predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1)) train_data = to_numpy(train_data) train_labels = to_numpy(train_labels) predict_data = to_numpy(predict_data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') # It is possible to specify to make the computations on GPU with gpu_context(): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) # Create an algorithm object and call compute train_algo = d4p.bf_knn_classification_training(nClasses=nClasses) train_result = train_algo.compute(sycl_train_data, sycl_train_labels) # Create an algorithm object and call compute predict_algo = d4p.bf_knn_classification_prediction() predict_result = predict_algo.compute(sycl_predict_data, train_result.model) # We expect less than 170 mispredicted values assert np.count_nonzero(predict_labels != predict_result.prediction) < 170 return (predict_result, predict_labels)
def main(readcsv=read_csv, method='defaultDense'): maxIterations = 200 # input data file infile = os.path.join('..', 'data', 'batch', 'df_regression_train.csv') testfile = os.path.join('..', 'data', 'batch', 'df_regression_test.csv') # Read data. Let's use 13 features per observation train_indep_data = readcsv(infile, range(13), t=np.float32) train_dep_data = readcsv(infile, range(13, 14), t=np.float32) # read test data (with same #features) test_indep_data = readcsv(testfile, range(13), t=np.float32) # Using of the classic way (computations on CPU) result_classic = compute(train_indep_data, train_dep_data, test_indep_data, maxIterations) train_indep_data = to_numpy(train_indep_data) train_dep_data = to_numpy(train_dep_data) test_indep_data = to_numpy(test_indep_data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') # It is possible to specify to make the computations on GPU with gpu_context(): sycl_train_indep_data = sycl_buffer(train_indep_data) sycl_train_dep_data = sycl_buffer(train_dep_data) sycl_test_indep_data = sycl_buffer(test_indep_data) result_gpu = compute(sycl_train_indep_data, sycl_train_dep_data, sycl_test_indep_data, maxIterations) test_dep_data = np.loadtxt(testfile, usecols=range(13, 14), delimiter=',', ndmin=2, dtype=np.float32) return (result_classic, test_dep_data)
def main(readcsv=read_csv): # input data file train_file = os.path.join('..', 'data', 'batch', 'svm_two_class_train_dense.csv') predict_file = os.path.join('..', 'data', 'batch', 'svm_two_class_test_dense.csv') nFeatures = 20 train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) predict_data = readcsv(predict_file, range(nFeatures)) predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1)) predict_result_classic, decision_function_classic = compute( train_data, train_labels, predict_data, 'boser') train_data = to_numpy(train_data) train_labels = to_numpy(train_labels) predict_data = to_numpy(predict_data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) predict_result_gpu, decision_function_gpu = compute( sycl_train_data, sycl_train_labels, sycl_predict_data, 'thunder') assert np.allclose(predict_result_gpu, predict_result_classic) return predict_labels, predict_result_classic, decision_function_classic
import os from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader try: import pandas read_csv = lambda f, c, t=np.float64: pandas.read_csv( f, usecols=c, delimiter=',', header=None, dtype=t) except: # fall back to numpy loadtxt read_csv = lambda f, c, t=np.float64: np.loadtxt( f, usecols=c, delimiter=',', ndmin=2) try: from dppl import device_context, device_type with device_context(device_type.gpu, 0): gpu_available = True except: try: from daal4py.oneapi import sycl_context with sycl_context('gpu'): gpu_available = True except: gpu_available = False # Commone code for both CPU and GPU computations def compute(data, nClusters, maxIter, method): # configure kmeans init object initrain_algo = d4p.kmeans_init(nClusters, method=method) # compute initial centroids
def main(readcsv=read_csv, method='defaultDense'): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label trainfile = os.path.join('..', 'data', 'batch', 'logreg_train.csv') train_data = readcsv(trainfile, range(nFeatures)) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # read testing data from file with 6 features per observation testfile = os.path.join('..', 'data', 'batch', 'logreg_test.csv') predict_data = readcsv(testfile, range(nFeatures)) # Using of the classic way (computations on CPU) result_classic, train_result = compute(train_data, train_labels, predict_data, nClasses) train_data = to_numpy(train_data) train_labels = to_numpy(train_labels) predict_data = to_numpy(predict_data) try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU if gpu_available: with gpu_context(): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) result_gpu, _ = compute(sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses) assert np.allclose(result_classic.prediction, result_gpu.prediction) assert np.allclose(result_classic.probabilities, result_gpu.probabilities, atol=1e-3) assert np.allclose(result_classic.logProbabilities, result_gpu.logProbabilities, atol=1e-2) # It is possible to specify to make the computations on CPU with cpu_context(): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) result_cpu, _ = compute(sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses) # the prediction result provides prediction, probabilities and logProbabilities assert result_classic.probabilities.shape == (predict_data.shape[0], nClasses) assert result_classic.logProbabilities.shape == (predict_data.shape[0], nClasses) predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=',', ndmin=2) assert np.count_nonzero(result_classic.prediction - predict_labels) / predict_labels.shape[0] < 0.025 assert np.allclose(result_classic.prediction, result_cpu.prediction) assert np.allclose(result_classic.probabilities, result_cpu.probabilities) assert np.allclose(result_classic.logProbabilities, result_cpu.logProbabilities) return (train_result, result_classic, predict_labels)
def main(readcsv=None, method='defaultDense'): # read data from file infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') # Using of the classic way (computations on CPU) # Configure a low order moments object for streaming algo = d4p.low_order_moments(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream for chunk in rn: algo.compute(chunk) # finalize computation result_classic = algo.finalize() try: from dppl import device_context, device_type gpu_context = lambda: device_context(device_type.gpu, 0) cpu_context = lambda: device_context(device_type.cpu, 0) except: from daal4py.oneapi import sycl_context gpu_context = lambda: sycl_context('gpu') cpu_context = lambda: sycl_context('cpu') # It is possible to specify to make the computations on GPU try: with gpu_context(): # Configure a low order moments object for streaming algo = d4p.low_order_moments(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_gpu = algo.finalize() for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ]: assert np.allclose(getattr(result_classic, name), getattr(result_gpu, name)) except: pass # It is possible to specify to make the computations on CPU with cpu_context(): # Configure a low order moments object for streaming algo = d4p.low_order_moments(streaming=True) # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream for chunk in rn: sycl_chunk = sycl_buffer(to_numpy(chunk)) algo.compute(sycl_chunk) # finalize computation result_cpu = algo.finalize() # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation for name in [ 'minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation' ]: assert np.allclose(getattr(result_classic, name), getattr(result_cpu, name)) return result_classic