Example #1
0
def test_make_blobs_ary_parameters(dtype, n_samples, n_features, centers,
                                   cluster_std, center_box, shuffle,
                                   random_state):

    centers = centers.astype(np.dtype(dtype))
    cluster_std = cluster_std.astype(np.dtype(dtype))

    if centers.shape[1] != n_features or cluster_std.shape[0] != n_features:
        with pytest.raises(ValueError):
            out, labels = \
                cuml.make_blobs(dtype=dtype, n_samples=n_samples,
                                n_features=n_features, centers=centers,
                                cluster_std=cluster_std,
                                center_box=center_box, shuffle=shuffle,
                                random_state=random_state)

    else:
        out, labels = \
            cuml.make_blobs(dtype=dtype, n_samples=n_samples,
                            n_features=n_features, centers=centers,
                            cluster_std=cluster_std,
                            center_box=center_box, shuffle=shuffle,
                            random_state=random_state)

        assert out.shape == (n_samples, n_features), "out shape mismatch"
        assert labels.shape == (n_samples, ), "labels shape mismatch"

        labels_np = labels.copy_to_host()
        assert np.unique(labels_np).shape == (len(centers),), \
            "unexpected number of clusters"
Example #2
0
def generate_synthetic_dataset(config):

    coilType = config['coilType']
    nSamples = config['nSamples']
    coilDensity = config['coilDensity']
    coil1StDev = config['coil1StDev']
    coil2StDev = config['coil2StDev']
    nGuidePointsPerCoil = config['nGuidePointsPerCoil']
    randomSeed = config['randomSeed']
    shuffleFlag = config['shuffleFlag']

    startTime = time.time()

    coil1Centers, coil2Centers = gen_two_coils(nPoints=nGuidePointsPerCoil,
                                               coilType=coilType,
                                               coilDensity=coilDensity)
    samplesPerCoil = nSamples // 2
    nDims = 3

    coil1Data, _ = cuml.make_blobs(n_samples=samplesPerCoil,
                                   n_features=nDims,
                                   centers=coil1Centers,
                                   cluster_std=coil1StDev,
                                   random_state=randomSeed,
                                   dtype='float')

    coil2Data, _ = cuml.make_blobs(n_samples=samplesPerCoil,
                                   n_features=nDims,
                                   centers=coil2Centers,
                                   cluster_std=coil2StDev,
                                   random_state=randomSeed,
                                   dtype='float')

    combinedData = cupy.empty(shape=(samplesPerCoil * 2, nDims),
                              dtype='float32',
                              order='F')
    combinedData[0::2] = coil1Data
    combinedData[1::2] = coil2Data

    combinedLabels = cupy.empty(shape=(samplesPerCoil * 2, 1),
                                dtype='int',
                                order='F')
    combinedLabels[0::2] = cupy.ones(shape=(samplesPerCoil, 1), dtype='int')
    combinedLabels[1::2] = cupy.zeros(shape=(samplesPerCoil, 1), dtype='int')

    if shuffleFlag:
        cupy.random.seed(randomSeed)
        shuffledInds = cupy.random.permutation(combinedData.shape[0])
        combinedData = cupy.asfortranarray(combinedData[shuffledInds, :])
        combinedLabels = cupy.asfortranarray(combinedLabels[shuffledInds])

    data = cudf.DataFrame.from_gpu_matrix(combinedData,
                                          columns=['x', 'y', 'z'])
    labels = cudf.DataFrame.from_gpu_matrix(combinedLabels, columns=['labels'])

    elapsedTime = time.time() - startTime
    return data, labels, elapsedTime
Example #3
0
def test_make_blobs_scalar_parameters(dtype, n_samples, n_features, centers,
                                      cluster_std, center_box, shuffle,
                                      random_state):

    out, labels = cuml.make_blobs(dtype=dtype,
                                  n_samples=n_samples,
                                  n_features=n_features,
                                  centers=centers,
                                  cluster_std=0.001,
                                  center_box=center_box,
                                  shuffle=shuffle,
                                  random_state=random_state)

    # we can use cupy in the future
    labels_np = labels.copy_to_host()

    assert out.shape == (n_samples, n_features), "out shape mismatch"
    assert labels.shape == (n_samples, ), "labels shape mismatch"

    if centers is None:
        assert np.unique(labels_np).shape == (3,), \
            "unexpected number of clusters"
    elif centers <= n_samples:
        assert np.unique(labels_np).shape == (centers,), \
            "unexpected number of clusters"
Example #4
0
def test_make_blobs_scalar_parameters(dtype, n_samples, n_features, centers,
                                      cluster_std, center_box, shuffle,
                                      random_state, order):

    out, labels = cuml.make_blobs(dtype=dtype,
                                  n_samples=n_samples,
                                  n_features=n_features,
                                  centers=centers,
                                  cluster_std=0.001,
                                  center_box=center_box,
                                  shuffle=shuffle,
                                  random_state=random_state,
                                  order=order)

    assert out.shape == (n_samples, n_features), "out shape mismatch"
    assert labels.shape == (n_samples, ), "labels shape mismatch"

    if order == 'F':
        assert out.flags['F_CONTIGUOUS']
    elif order == 'C':
        assert out.flags['C_CONTIGUOUS']

    if centers is None:
        assert cp.unique(labels).shape == (3,), \
            "unexpected number of clusters"
    elif centers <= n_samples:
        assert cp.unique(labels).shape == (centers,), \
            "unexpected number of clusters"
Example #5
0
def test_make_blobs_ary_parameters(dtype, n_samples, n_features, centers,
                                   cluster_std, center_box, shuffle,
                                   random_state):

    centers = centers.astype(np.dtype(dtype))
    cluster_std = np.full(shape=(1, 10), fill_value=cluster_std, dtype=dtype)

    if centers.shape[1] != n_features or \
            cluster_std.shape[1] != centers.shape[0]:
        with pytest.raises(ValueError):
            out, labels = \
                cuml.make_blobs(dtype=dtype, n_samples=n_samples,
                                n_features=n_features, centers=centers,
                                cluster_std=cluster_std,
                                center_box=center_box, shuffle=shuffle,
                                random_state=random_state)

    else:

        out, labels = \
            cuml.make_blobs(dtype=dtype, n_samples=n_samples,
                            n_features=n_features, centers=centers,
                            cluster_std=cluster_std,
                            center_box=center_box, shuffle=shuffle,
                            random_state=random_state)

        assert out.shape == (n_samples, n_features), "out shape mismatch"
        assert labels.shape == (n_samples, ), "labels shape mismatch"

        labels_np = labels.copy_to_host()
        out_np = out.copy_to_host()

        assert np.unique(labels_np).shape == (centers.shape[0],), \
            "unexpected number of clusters"

        # Use kmeans to verify k cluster centers
        from sklearn.cluster import KMeans
        model = KMeans(n_clusters=centers.shape[0])
        model.fit(np.array(out_np))

        assert adjusted_rand_score(model.labels_, labels_np)
Example #6
0
def test_output_type(input_type: str):

    # Set the output type and ensure its respected by the function
    with cuml.using_output_type(input_type):
        X, y = cuml.make_blobs(n_samples=10,
                               centers=3,
                               n_features=2,
                               random_state=0)

        if (isinstance(test_output_types[input_type], tuple)):
            assert (isinstance(X, test_output_types[input_type][0]))
            assert (isinstance(y, test_output_types[input_type][1]))
        else:
            assert (isinstance(X, test_output_types[input_type]))
            assert (isinstance(y, test_output_types[input_type]))