def test_dbscan_spmd(self): epsilon = 0.04 minObservations = 45 data = np_read_csv( os.path.join(".", 'data', 'batch', 'dbscan_dense.csv')) batch_algo = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, resultsToCompute='computeCoreIndices') batch_result = batch_algo.compute(data) rpp = int(data.shape[0] / d4p.num_procs()) node_stride = rpp * d4p.my_procid() node_range = range(node_stride, node_stride + rpp) node_data = data[node_range, :] spmd_algo = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, distributed=True) spmd_result = spmd_algo.compute(node_data) # clusters can get different indexes in batch and spmd algos, # to compare them we should take care about it cluster_index_dict = {} for i in node_range: # border points assignments can be different # with different amount of nodes but cores are the same if i in batch_result.coreIndices: right = spmd_result.assignments[i - node_stride][0] if not batch_result.assignments[i][0] in cluster_index_dict: cluster_index_dict[batch_result.assignments[i] [0]] = right left = cluster_index_dict[batch_result.assignments[i][0]] self.assertTrue(left == right)
def verify_on_dbscan(self, X): alg1 = d4p.dbscan(epsilon=2.0, minObservations=5, fptype='double') res1 = alg1.compute(X) Xc = np.ascontiguousarray(X) alg2 = d4p.dbscan(epsilon=2.0, minObservations=5, fptype='double') res2 = alg2.compute(Xc) self.assertTrue(np.array_equal(res1.assignments, res2.assignments)) self.assertTrue(len(np.unique(res1.assignments)) > 2)
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None): if eps <= 0.0: raise ValueError("eps must be positive.") X = check_array(X, dtype=[np.float64, np.float32]) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) ww = make2d(sample_weight) else: ww = None XX = make2d(X) fpt = getFPType(XX) alg = daal4py.dbscan(method='defaultDense', fptype=fpt, epsilon=float(eps), minObservations=int(min_samples), memorySavingMode=False, resultsToCompute="computeCoreIndices") daal_res = alg.compute(XX, ww) n_clusters = daal_res.nClusters[0, 0] assignments = daal_res.assignments.ravel() if daal_res.coreIndices is not None: core_ind = daal_res.coreIndices.ravel() else: core_ind = np.array([], dtype=np.intc) return (core_ind, assignments)
def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/dbscan_dense.csv" epsilon = 0.02 minObservations = 180 # Load the data data = readcsv(infile, range(2)) # configure dbscan main object: we also request the indices and observations of cluster cores algo = d4p.dbscan( minObservations=minObservations, epsilon=epsilon, resultsToCompute='computeCoreIndices|computeCoreObservations') # and compute result = algo.compute(data) # Note: we could have done this in just one line: # assignments = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, resultsToCompute='computeCoreIndices|computeCoreObservations').compute(data).assignments # DBSCAN result objects provide assignments, nClusters and coreIndices/coreObservations (if requested) assert result.assignments.shape == (data.shape[0], 1) assert result.coreObservations.shape == (result.coreIndices.shape[0], data.shape[1]) return result
def compute(data, minObservations, epsilon): # configure dbscan main object: we also request the indices and observations of cluster cores algo = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, resultsToCompute='computeCoreIndices|computeCoreObservations', memorySavingMode=True) # and compute return algo.compute(data)
def main(method='defaultDense'): infile = "./data/batch/dbscan_dense.csv" epsilon = 0.04 minObservations = 45 # Load the data data = np.loadtxt(infile, delimiter=',') rpp = int(data.shape[0] / d4p.num_procs()) data = data[rpp * d4p.my_procid(): rpp * d4p.my_procid() + rpp, :] # configure dbscan main object algo = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, distributed=True) # and compute result = algo.compute(data) return result
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None): ww = make2d(sample_weight) if sample_weight is not None else None XX = make2d(X) fpt = getFPType(XX) alg = daal4py.dbscan(method='defaultDense', fptype=fpt, epsilon=float(eps), minObservations=int(min_samples), memorySavingMode=False, resultsToCompute="computeCoreIndices") daal_res = alg.compute(XX, ww) assignments = daal_res.assignments.ravel() if daal_res.coreIndices is not None: core_ind = daal_res.coreIndices.ravel() else: core_ind = np.array([], dtype=np.intc) return (core_ind, assignments)
def test_dbscan(X): algorithm = dbscan(fptype=getFPType(X), epsilon=params.eps, minObservations=params.min_samples, resultsToCompute='computeCoreIndices') return algorithm.compute(X)