def test_make_blobs_ary_parameters(dtype, n_samples, n_features, centers, cluster_std, center_box, shuffle, random_state): centers = centers.astype(np.dtype(dtype)) cluster_std = cluster_std.astype(np.dtype(dtype)) if centers.shape[1] != n_features or cluster_std.shape[0] != n_features: with pytest.raises(ValueError): out, labels = \ cuml.make_blobs(dtype=dtype, n_samples=n_samples, n_features=n_features, centers=centers, cluster_std=cluster_std, center_box=center_box, shuffle=shuffle, random_state=random_state) else: out, labels = \ cuml.make_blobs(dtype=dtype, n_samples=n_samples, n_features=n_features, centers=centers, cluster_std=cluster_std, center_box=center_box, shuffle=shuffle, random_state=random_state) assert out.shape == (n_samples, n_features), "out shape mismatch" assert labels.shape == (n_samples, ), "labels shape mismatch" labels_np = labels.copy_to_host() assert np.unique(labels_np).shape == (len(centers),), \ "unexpected number of clusters"
def generate_synthetic_dataset(config): coilType = config['coilType'] nSamples = config['nSamples'] coilDensity = config['coilDensity'] coil1StDev = config['coil1StDev'] coil2StDev = config['coil2StDev'] nGuidePointsPerCoil = config['nGuidePointsPerCoil'] randomSeed = config['randomSeed'] shuffleFlag = config['shuffleFlag'] startTime = time.time() coil1Centers, coil2Centers = gen_two_coils(nPoints=nGuidePointsPerCoil, coilType=coilType, coilDensity=coilDensity) samplesPerCoil = nSamples // 2 nDims = 3 coil1Data, _ = cuml.make_blobs(n_samples=samplesPerCoil, n_features=nDims, centers=coil1Centers, cluster_std=coil1StDev, random_state=randomSeed, dtype='float') coil2Data, _ = cuml.make_blobs(n_samples=samplesPerCoil, n_features=nDims, centers=coil2Centers, cluster_std=coil2StDev, random_state=randomSeed, dtype='float') combinedData = cupy.empty(shape=(samplesPerCoil * 2, nDims), dtype='float32', order='F') combinedData[0::2] = coil1Data combinedData[1::2] = coil2Data combinedLabels = cupy.empty(shape=(samplesPerCoil * 2, 1), dtype='int', order='F') combinedLabels[0::2] = cupy.ones(shape=(samplesPerCoil, 1), dtype='int') combinedLabels[1::2] = cupy.zeros(shape=(samplesPerCoil, 1), dtype='int') if shuffleFlag: cupy.random.seed(randomSeed) shuffledInds = cupy.random.permutation(combinedData.shape[0]) combinedData = cupy.asfortranarray(combinedData[shuffledInds, :]) combinedLabels = cupy.asfortranarray(combinedLabels[shuffledInds]) data = cudf.DataFrame.from_gpu_matrix(combinedData, columns=['x', 'y', 'z']) labels = cudf.DataFrame.from_gpu_matrix(combinedLabels, columns=['labels']) elapsedTime = time.time() - startTime return data, labels, elapsedTime
def test_make_blobs_scalar_parameters(dtype, n_samples, n_features, centers, cluster_std, center_box, shuffle, random_state): out, labels = cuml.make_blobs(dtype=dtype, n_samples=n_samples, n_features=n_features, centers=centers, cluster_std=0.001, center_box=center_box, shuffle=shuffle, random_state=random_state) # we can use cupy in the future labels_np = labels.copy_to_host() assert out.shape == (n_samples, n_features), "out shape mismatch" assert labels.shape == (n_samples, ), "labels shape mismatch" if centers is None: assert np.unique(labels_np).shape == (3,), \ "unexpected number of clusters" elif centers <= n_samples: assert np.unique(labels_np).shape == (centers,), \ "unexpected number of clusters"
def test_make_blobs_scalar_parameters(dtype, n_samples, n_features, centers, cluster_std, center_box, shuffle, random_state, order): out, labels = cuml.make_blobs(dtype=dtype, n_samples=n_samples, n_features=n_features, centers=centers, cluster_std=0.001, center_box=center_box, shuffle=shuffle, random_state=random_state, order=order) assert out.shape == (n_samples, n_features), "out shape mismatch" assert labels.shape == (n_samples, ), "labels shape mismatch" if order == 'F': assert out.flags['F_CONTIGUOUS'] elif order == 'C': assert out.flags['C_CONTIGUOUS'] if centers is None: assert cp.unique(labels).shape == (3,), \ "unexpected number of clusters" elif centers <= n_samples: assert cp.unique(labels).shape == (centers,), \ "unexpected number of clusters"
def test_make_blobs_ary_parameters(dtype, n_samples, n_features, centers, cluster_std, center_box, shuffle, random_state): centers = centers.astype(np.dtype(dtype)) cluster_std = np.full(shape=(1, 10), fill_value=cluster_std, dtype=dtype) if centers.shape[1] != n_features or \ cluster_std.shape[1] != centers.shape[0]: with pytest.raises(ValueError): out, labels = \ cuml.make_blobs(dtype=dtype, n_samples=n_samples, n_features=n_features, centers=centers, cluster_std=cluster_std, center_box=center_box, shuffle=shuffle, random_state=random_state) else: out, labels = \ cuml.make_blobs(dtype=dtype, n_samples=n_samples, n_features=n_features, centers=centers, cluster_std=cluster_std, center_box=center_box, shuffle=shuffle, random_state=random_state) assert out.shape == (n_samples, n_features), "out shape mismatch" assert labels.shape == (n_samples, ), "labels shape mismatch" labels_np = labels.copy_to_host() out_np = out.copy_to_host() assert np.unique(labels_np).shape == (centers.shape[0],), \ "unexpected number of clusters" # Use kmeans to verify k cluster centers from sklearn.cluster import KMeans model = KMeans(n_clusters=centers.shape[0]) model.fit(np.array(out_np)) assert adjusted_rand_score(model.labels_, labels_np)
def test_output_type(input_type: str): # Set the output type and ensure its respected by the function with cuml.using_output_type(input_type): X, y = cuml.make_blobs(n_samples=10, centers=3, n_features=2, random_state=0) if (isinstance(test_output_types[input_type], tuple)): assert (isinstance(X, test_output_types[input_type][0])) assert (isinstance(y, test_output_types[input_type][1])) else: assert (isinstance(X, test_output_types[input_type])) assert (isinstance(y, test_output_types[input_type]))