Ejemplo n.º 1
0
 def test_assymetric_U(self):
     """ Should raise error when U is assymetric"""
     N = 100
     X, X_m = datasets.make_s_curve(N, random_state=0)
     Y, Y_m = datasets.make_s_curve(N, random_state=1)
     # make some assymetric matching matrices
     U_1 = utils.gaussian_similarity_kernel(X_m.reshape(N, 1),
                                            Y_m.reshape(N, 1), 1.0)
     U_2 = utils.gaussian_similarity_kernel(Y_m.reshape(N, 1),
                                            X_m.reshape(N, 1), 0.5)
     assert_raises(ValueError, laplacian_manifold_align, [X, Y],
                   [[None, U_1], [U_2, None]], 5, 0.1, 0.1)
def test_make_s_curve():
    X, t = make_s_curve(n_samples=5, noise=0.0, random_state=0)

    assert_equal(X.shape, (5, 3), "X shape mismatch")
    assert_equal(t.shape, (5,), "t shape mismatch")
    assert_array_equal(X[:, 0], np.sin(t))
    assert_array_equal(X[:, 2], np.sign(t) * (np.cos(t) - 1))
Ejemplo n.º 3
0
def create_true_data(type_of_data, number_of_modes, std, size, vocabulary_size):
    list_of_x_values, list_of_y_values = list(), list()
    if (type_of_data=="mixture_of_gaussians"):
        for i in range(number_of_modes):
            list_of_x_values.append(np.clip(np.random.normal(loc=np.random.randint(vocabulary_size-1), scale=500, size=size), 0, vocabulary_size))
            list_of_y_values.append(np.clip(np.random.normal(loc=np.random.randint(vocabulary_size-1), scale=500, size=size), 0, vocabulary_size))
        x = np.column_stack((np.append([], list_of_x_values), np.append([], list_of_y_values)))

    cos_theta = np.random.uniform()
    sin_theta = math.sqrt(1-cos_theta*cos_theta)
    if (type_of_data=="blobs"):
        x = np.clip(((vocabulary_size/20)*make_blobs(n_samples=size, centers=number_of_modes, cluster_std=std)[0]+(vocabulary_size/2)), [0,0], [vocabulary_size, vocabulary_size]).astype(int)
    if (type_of_data=="moons"):
        x = ((np.dot(make_moons(n_samples=size)[0]*(1/2), np.array([[cos_theta, sin_theta], [-sin_theta, cos_theta]])))*(vocabulary_size/2)+(vocabulary_size/2)).astype(int)
    if (type_of_data=="circles"):
        x = ((make_circles(n_samples=size)[0]*(vocabulary_size/2))+(vocabulary_size/2)).astype(int)
    if (type_of_data=="swiss_roll"):
        x = make_swiss_roll(n_samples=size, random_state=2, noise=std)[0]
        x = np.column_stack((x[:,0], x[:,2]))
        x = np.dot((1/25)*x,np.array([[cos_theta, -sin_theta], [sin_theta, cos_theta]]))
        x = (x*(vocabulary_size/2)+(vocabulary_size/2)).astype(int)
    if (type_of_data=="s_curve"):
        x = make_s_curve(n_samples=size)[0]/2
        x = np.column_stack((x[:,0], x[:,2]))
        x = ((np.dot(x, np.array([[cos_theta, -sin_theta], [sin_theta, cos_theta]])))*(vocabulary_size/2)+(vocabulary_size/2)).astype(int)
    return x
def s_curves(request):
    """
    Creates a random regressor fixture.
    """
    X, y = make_s_curve(1000, random_state=888)
    # Set a class attribute for continuous data
    request.cls.s_curves = Dataset(X, y)
Ejemplo n.º 5
0
 def test_shape(self):
     """ Check L has the correct shape"""
     N = 100
     X, X_m = datasets.make_s_curve(N, random_state=0)
     Y, Y_m = datasets.make_s_curve(N, random_state=1)
     U_1 = utils.gaussian_similarity_kernel(X_m.reshape(N, 1),
                                            Y_m.reshape(N, 1), 1.0)
     U_2 = utils.gaussian_similarity_kernel(Y_m.reshape(N, 1),
                                            X_m.reshape(N, 1), 1.0)
     for d in [2, 3, 4]:
         L = laplacian_manifold_align([X, Y], [[None, U_1], [U_2, None]],
                                      5,
                                      0.1,
                                      0.1,
                                      d=d)
         assert_true(L.shape == (N * 2, d))
Ejemplo n.º 6
0
 def __init__(self, train=True, n_samples=6000, noise=0.05,
              test_fraction=0.1, seed=42):
     _rnd = np.random.RandomState(seed)
     data, pos = make_s_curve(n_samples, noise, _rnd)
     data = data.astype(np.float32)
     pos = pos.astype(np.float32)
     super().__init__(data, pos, train, test_fraction, _rnd)
Ejemplo n.º 7
0
def load_s_curve_hole(n_points=2000, deviation=0.1):
    """
    Load a s curve dataset but with a hole in the middle
    """
    x, color = make_s_curve(n_samples=n_points, noise=deviation)

    return x, color
Ejemplo n.º 8
0
def test_make_s_curve():
    X, t = make_s_curve(n_samples=5, noise=0.0, random_state=0)

    assert X.shape == (5, 3), "X shape mismatch"
    assert t.shape == (5, ), "t shape mismatch"
    assert_array_almost_equal(X[:, 0], np.sin(t))
    assert_array_almost_equal(X[:, 2], np.sign(t) * (np.cos(t) - 1))
Ejemplo n.º 9
0
 def test_diff_size_graphs(self):
     N_X = 100
     N_Y = 200
     X, X_m = datasets.make_s_curve(N_X, random_state=0)
     Y, Y_m = datasets.make_s_curve(N_Y, random_state=1)
     U_1 = utils.gaussian_similarity_kernel(X_m.reshape(N_X, 1),
                                            Y_m.reshape(N_Y, 1), 1.0)
     U_2 = utils.gaussian_similarity_kernel(Y_m.reshape(N_Y, 1),
                                            X_m.reshape(N_X, 1), 1.0)
     for d in [2, 3, 4]:
         L = laplacian_manifold_align([X, Y], [[None, U_1], [U_2, None]],
                                      5,
                                      0.1,
                                      0.1,
                                      d=d)
         assert_true(L.shape == (N_X + N_Y, d))
Ejemplo n.º 10
0
def test_curve():
    X, y = datasets.make_s_curve(n_samples=128 * 2, noise=.05)
    run_ptsne_modes(X)
    for perplexity in (5, 30, 50, 100):
        run_comparison("S_Curve_{}".format(perplexity),
                       X,
                       color=y,
                       perplexity=perplexity)
Ejemplo n.º 11
0
def generate_scurve(samples, noise, num_classes):
    X, t = make_s_curve(samples, noise=noise)
    spaces = np.linspace(t.min(), t.max(), num_classes + 1)
    y = np.empty(samples)
    for i in range(spaces.size - 1):
        idx = ((spaces[i] <= t) * (spaces[i] <= t))
        y[idx] = i
    return X.astype(np.float32), y.astype(np.int32)
Ejemplo n.º 12
0
    def test_manifold_algorithm_transform_fit(self, algorithm):
        """
        Test manifold fit with algorithms having transform implemented
        """
        X, y = make_s_curve(1000, random_state=94)
        with pytest.warns(YellowbrickWarning):
            manifold = Manifold(manifold=algorithm, target="auto")

        assert manifold.fit(X, y) is manifold, "fit did not return self"
Ejemplo n.º 13
0
def main(args):
    n_samples = args.get("n_samples", 2000)
    startTime = time.time()
    rng = np.random.RandomState(0)
    X, y = make_s_curve(n_samples=n_samples, random_state=rng)
    X = scipy.sparse.csr_matrix(X)
    X_train, X_test, _, y_test = train_test_split(X, y, random_state=rng)
    pca = PCA(n_components=150, svd_solver='randomized',
              whiten=True).fit(X_train)
    return {'token': 'pca finished', 'startTime': int(round(startTime * 1000))}
Ejemplo n.º 14
0
def make_toy_Story(n_samples_per_class = 100, ood=False):
  mu1 = [.0, 1]
  mu2 = [.0, -1]
  cov = [[.05, 0], [0, .05]]
  X, color = make_s_curve(n_samples_per_class*3, random_state=0)
  s_labels = 0*(X[:,2]<-.5) + 1*(-.5<=X[:,2])*(X[:,2]<1) + 2*(X[:,2]>=1)
  n1 = np.random.multivariate_normal(mu1, cov, n_samples_per_class)
  n2 = np.random.multivariate_normal(mu2, cov, n_samples_per_class)
  X = np.concatenate([X[:,[0,2]], n1, n2])
  Y = np.concatenate([s_labels, [3]*n_samples_per_class, [4]*n_samples_per_class])
  p = np.random.permutation(len(X))
  return X[p], Y[p]
Ejemplo n.º 15
0
def make_s_curve(n_samples, seed):
    x, y = datasets.make_s_curve(n_samples=n_samples, random_state=seed)
    idx = y.argsort()
    y.sort()
    x = x[idx]
    y = []
    for i in range(n_samples):
        if i < n_samples / 2:
            y.append('purple')
        else:
            y.append('blue')
    return (x, y)
Ejemplo n.º 16
0
def main(args):

    n_samples = args.get("n_samples", 4000)
    startTime = time.time()
    rng = np.random.RandomState(0)
    X, y = make_s_curve(n_samples=n_samples, random_state=rng)
    X = scipy.sparse.csr_matrix(X)
    X_train, X_test, _, y_test = train_test_split(X, y, random_state=rng)
    kmeans = KMeans(algorithm='elkan').fit(X_train)
    token = completeness_score(kmeans.predict(X_test), y_test)
    print(token)
    return {'token': token, 'startTime': int(round(startTime * 1000))}
Ejemplo n.º 17
0
def test_serialize():
    X, y = datasets.make_s_curve(n_samples=64, noise=.05)
    ptsne1 = PTSNE(X, n_iter=10, batch_size=64)
    fd = io.BytesIO()
    ptsne1.save(fd)

    fd.seek(0)
    ptsne2 = PTSNE.load(fd)

    y1 = ptsne1.transform(X)
    y2 = ptsne2.transform(X)

    assert np.allclose(y1, y2)
Ejemplo n.º 18
0
def make_toy_Story_with_ood_class(n_samples_per_class = 100):
  mu1 = [.0, 1]
  mu2 = [.0, -1]
  cov = [[.05, 0], [0, .05]]
  X, color = make_s_curve(n_samples_per_class*3, random_state=0)
  s_labels = 0*(X[:,2]<-.5) + 1*(-.5<=X[:,2])*(X[:,2]<1) + 2*(X[:,2]>=1)
  n1 = np.random.multivariate_normal(mu1, cov, n_samples_per_class)
  n2 = np.random.multivariate_normal(mu2, cov, n_samples_per_class)
  u1 = np.random.uniform(0, 2*np.pi, [int(n_samples_per_class*5), 1])
  ood_s = np.concatenate([2.2*np.cos(u1), 2.2*np.sin(u1)], axis=1)
  X = np.concatenate([X[:,[0,2]], n1, n2, ood_s])
  Y = np.concatenate([s_labels, [3]*n_samples_per_class, [4]*n_samples_per_class, [5]*int(n_samples_per_class*5)])
  p = np.random.permutation(len(X))
  return X[p], Y[p]
Ejemplo n.º 19
0
def test():
    n_points = 1000
    X, color = datasets.make_s_curve(n_points, random_state=0)
    n_neighbors = 10
    n_components = 2

    # Create figure
    fig = plt.figure(figsize=(15, 8))
    fig.suptitle("Manifold Learning with %i points, %i neighbors" %
                 (1000, n_neighbors),
                 fontsize=14)

    # Add 3d scatter plot
    ax = fig.add_subplot(251, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
    ax.view_init(4, -72)

    # Set-up manifold methods
    LLE = partial(manifold.LocallyLinearEmbedding,
                  n_neighbors,
                  n_components,
                  eigen_solver='auto')

    methods = OrderedDict()
    methods['LLE'] = LLE(method='standard')
    methods['LTSA'] = LLE(method='ltsa')
    methods['Hessian LLE'] = LLE(method='hessian')
    methods['Modified LLE'] = LLE(method='modified')
    methods['Isomap'] = manifold.Isomap(n_neighbors, n_components)
    methods['MDS'] = manifold.MDS(n_components, max_iter=100, n_init=1)
    methods['SE'] = manifold.SpectralEmbedding(n_components=n_components,
                                               n_neighbors=n_neighbors)
    methods['t-SNE'] = manifold.TSNE(n_components=n_components,
                                     init='pca',
                                     random_state=0)

    # Plot results
    for i, (label, method) in enumerate(methods.items()):
        t0 = time()
        Y = method.fit_transform(X)
        t1 = time()
        print("%s: %.2g sec" % (label, t1 - t0))
        ax = fig.add_subplot(2, 5, 2 + i + (i > 3))
        ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
        ax.set_title("%s (%.2g sec)" % (label, t1 - t0))
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        ax.axis('tight')

    plt.show()
Ejemplo n.º 20
0
    def _generate(self, random_state):
        rows = api.payload['rows']
        noise = api.payload['noise']

        x, y = make_s_curve(random_state=random_state,
                            n_samples=rows,
                            noise=noise)

        if 'classes' in api.payload:
            classes = api.payload['classes']
            if classes is not None and classes > 0:
                categories = pd.cut(y, classes)
                y = categories.codes

        return x, y
Ejemplo n.º 21
0
def gen_s_curve(rng, emissions):
    """Generate synthetic data from datasets generating process.
    """
    N = 500
    J = 100
    D = 2

    # Generate latent manifold.
    # -------------------------
    X, t = make_s_curve(N, random_state=rng)
    X = np.delete(X, obj=1, axis=1)
    X = X / np.std(X, axis=0)
    inds = t.argsort()
    X = X[inds]
    t = t[inds]

    # Generate kernel `K` and latent GP-distributed maps `F`.
    # -------------------------------------------------------
    K = kern.RBF(input_dim=D, lengthscale=1).K(X)
    F = rng.multivariate_normal(np.zeros(N), K, size=J).T

    # Generate emissions using `F` and/or `K`.
    # ----------------------------------------
    if emissions == 'bernoulli':
        P = logistic(F)
        Y = rng.binomial(1, P).astype(np.double)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    if emissions == 'gaussian':
        Y = F + np.random.normal(0, scale=0.5, size=F.shape)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'multinomial':
        C = 100
        pi = np.exp(F - logsumexp(F, axis=1)[:, None])
        Y = np.zeros(pi.shape)
        for n in range(N):
            Y[n] = rng.multinomial(C, pi[n])
        return Dataset('s-curve', False, Y, X, F, K, None, t)
    elif emissions == 'negbinom':
        P = logistic(F)
        R = np.arange(1, J + 1, dtype=float)
        Y = rng.negative_binomial(R, 1 - P)
        return Dataset('s-curve', False, Y, X, F, K, R, t)
    else:
        assert (emissions == 'poisson')
        theta = np.exp(F)
        Y = rng.poisson(theta)
        return Dataset('s-curve', False, Y, X, F, K, None, t)
Ejemplo n.º 22
0
    def __init__(self, n_samples=SAMPLE, split='none', split_ratio=FIT_DEFAULT,
                 random_state=SEED, data_path=DEFAULT_PATH):
        """Init.

        Args:
            n_samples(int, optional): Number of points to sample from the manifold.
            split(str, optional): Name of split. See BaseDataset.
            split_ratio(float, optional): Ratio of train split. See BaseDataset.
            random_state(int, optional): Random seed. See BaseDataset.
            data_path(str, optional): Unused. Only to share same signature with other datasets.
        """
        x, y = datasets.make_s_curve(n_samples=n_samples,
                                     random_state=random_state)

        super().__init__(x, y, split, split_ratio, random_state)

        self.latents = self.targets.numpy()
Ejemplo n.º 23
0
def load_dataset(dataset, n_samples, random_state=1, n_features=3):
    # wrapper function to load one of the 3d datasets
    if dataset == 's_curve':
        return make_s_curve(n_samples, random_state=random_state)
    elif dataset == 'swiss_roll':
        return make_swiss_roll(n_samples, random_state=random_state)
    elif dataset == 'broken_swiss_roll':
        return make_broken_swiss_roll(n_samples, random_state=random_state)
    elif dataset == 'sphere':
        return make_sphere(n_samples, random_state=random_state)
    elif dataset == '3_circles':
        return make_3_circles(n_samples, random_state=random_state)
    elif dataset == 'peaks':
        return make_peaks(n_samples, random_state=random_state)
    elif dataset == 'blobs':
        return make_blobs(n_samples, n_features=n_features, centers=3, random_state=random_state)
    else:
        print("unknown dataset")
Ejemplo n.º 24
0
def test_isomap_fit_precomputed_radius_graph():
    # Isomap.fit_transform must yield similar result when using
    # a precomputed distance matrix.

    X, y = datasets.make_s_curve(200, random_state=0)
    radius = 10

    g = neighbors.radius_neighbors_graph(X, radius=radius, mode="distance")
    isomap = manifold.Isomap(n_neighbors=None,
                             radius=radius,
                             metric="precomputed")
    isomap.fit(g)
    precomputed_result = isomap.embedding_

    isomap = manifold.Isomap(n_neighbors=None,
                             radius=radius,
                             metric="minkowski")
    result = isomap.fit_transform(X)
    assert_allclose(precomputed_result, result)
def genPoints(n_points=1000, func_name='swiss-roll'):
    if func_name == 'swiss-roll':
        points, colors = datasets.make_swiss_roll(n_points, random_state=0)
    elif func_name == 's-curve':
        points, colors = datasets.make_s_curve(n_points, random_state=0)
    elif func_name == 'severed-sphere':
        random_state = check_random_state(0)
        p = random_state.rand(n_points) * (2 * np.pi - 0.55)
        t = random_state.rand(n_points) * np.pi

        indices = ((t < (np.pi - (np.pi / 8))) & (t > ((np.pi / 8))))
        colors = p[indices]
        points = np.c_[np.sin(t[indices]) * np.cos(p[indices]),
                       np.sin(t[indices]) * np.sin(p[indices]),
                       np.cos(t[indices])]
    else:
        raise ValueError('Unsupported function [%s]' % func_name)

    return points, colors
Ejemplo n.º 26
0
def test_transform():
    n_samples = 200
    n_components = 10
    noise_scale = 0.01

    # Create S-curve dataset
    X, y = datasets.make_s_curve(n_samples, random_state=0)

    # Compute isomap embedding
    iso = manifold.Isomap(n_components=n_components, n_neighbors=2)
    X_iso = iso.fit_transform(X)

    # Re-embed a noisy version of the points
    rng = np.random.RandomState(0)
    noise = noise_scale * rng.randn(*X.shape)
    X_iso2 = iso.transform(X + noise)

    # Make sure the rms error on re-embedding is comparable to noise_scale
    assert np.sqrt(np.mean((X_iso - X_iso2)**2)) < 2 * noise_scale
Ejemplo n.º 27
0
def test_parsimonious():
    from UQpy.utilities.kernels.GaussianKernel import GaussianKernel
    from UQpy.dimension_reduction.diffusion_maps.DiffusionMaps import DiffusionMaps
    from sklearn.datasets import make_s_curve

    n = 4000
    X, X_color = make_s_curve(n, random_state=3, noise=0)
    kernel = GaussianKernel()

    dmaps_object = DiffusionMaps(data=X,
                                 alpha=1.0,
                                 n_eigenvectors=9,
                                 is_sparse=True,
                                 n_neighbors=100,
                                 kernel=kernel)

    dmaps_object.parsimonious(dim=2)

    assert dmaps_object.parsimonious_indices[0] == 1
    assert dmaps_object.parsimonious_indices[1] == 5
Ejemplo n.º 28
0
def makeSCurve():
    n_points = 1000
    noise = 0.2
    X, color = datasets.make_s_curve(n_points, noise=noise, random_state=0)
    Y = np.array([X[:,0], X[:,2]])
    Y = Y.T
    # Stretch in all directions
    Y = Y * 2
    
    # Now add some background noise
    xMin = np.min(Y[:,0])
    xMax = np.max(Y[:,0])
    yMin = np.min(Y[:,1])
    yMax = np.max(Y[:,1])
    
    n_bg = n_points//10
    Ybg = np.zeros(shape=(n_bg,2))
    Ybg[:,0] = np.random.uniform(low=xMin, high=xMax, size=n_bg)
    Ybg[:,1] = np.random.uniform(low=yMin, high=yMax, size=n_bg)
    
    Y = np.concatenate((Y, Ybg))
    return Y
      wcss.append(kmeans.inertia_)
  plt.scatter(range(1, 20), wcss)
  plt.title('The Elbow Method')
  plt.xlabel('Number of clusters')
  plt.ylabel('WCSS')
  plt.show()
  plt.clf()

"""## Get the dataset"""

n = 1000
from sklearn.datasets import make_moons, make_blobs, make_circles, make_s_curve
X_moons, y_moons = make_moons(n_samples = n, noise=0.1)
X_blobs, y_blobs = make_blobs(n_samples = n, n_features = 2)
X_circles, y_circles = make_circles(n_samples=n, noise=0.1, factor = 0.5)
X_scurve, y_scurve = make_s_curve(n_samples=n, noise = 0.1)
X_random = np.random.random([n, 2])
transformation = [[0.80834549, -0.83667341], [-0.20887718, 0.85253229]]
X_aniso = np.dot(X_blobs, transformation)

plot_dataset(X_moons)

visual_elbow(X_moons)

visual(10, X_moons)

plot_dataset(X_blobs)

visual_elbow(X_blobs)
visual(3, X_blobs)
Ejemplo n.º 30
0
    '--random_state',
    type=int,
    help=
    ' RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.'
)

import scrape as sc

sc.all_options(parser)
sc.output_options(parser)

from scrape import write_dict

args = sc.parse_args(parser)

np.random.seed(0)

X, y = make_s_curve(n_samples=args.n_samples,
                    noise=args.noise,
                    random_state=args.random_state)

datasets.dump_svmlight_file(X,
                            y,
                            args.output_file,
                            zero_based=args.zero_based,
                            query_id=args.query_id,
                            multilabel=args.multilabel,
                            comment=args.comment)

write_dict({'feature_file': args.output_file})
# the target here is just the digit represented by the data
print(digits.target)
# we've got two versions of the data array, data and images
print(digits.data.shape)
print(digits.images.shape)
# we see that the two versions differ only in shape
print(digits.data.__array_interface__["data"])
print(digits.images.__array_interface__["data"])

# we can visualise this.
fig = plt_2.figure(figsize=(6, 6))  # figure size in inches
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

# plot the digits, each is 8x8 pixels
for i in range(64):
    ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
    ax.imshow(digits.images[i], cmap=plt_2.cm.binary, interpolation="nearest")
    # label the image with the target value
    ax.text(0, 7, str(digits.target[i]))
# each feature is a real-valued quantity indicating the darkness of a particular
# pixel in an 8x8 image

print("Non linear dataset - The S-Curve: ")
data, colors = make_s_curve(n_samples=1000)
print(data.shape)
print(colors.shape)
# let's visualise this
ax = plt_2.axes(projection="3d")
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=colors)
ax.view_init(10, -60)
Ejemplo n.º 32
0
pca = decomposition.PCA(n_components=2).fit(X)
X_pca = pca.transform(X)
plot_embedding(X_pca, "Principal Components projection of the digits")
plt.matshow(pca.components_[0, :].reshape(8, 8), cmap="gray")
plt.axis('off')
plt.matshow(pca.components_[1, :].reshape(8, 8), cmap="gray")
plt.axis('off')
plt.show()
"""
MANIFOLD LEARNING 
PCA has one weakness which is it cannot detect non-linear features. Then 
the manifold learning algorithms have been developed to bypass this deficiency.
In manifold learning, we use a canonical dataset called the S-curve.
"""
from sklearn.datasets import make_s_curve
X,y=make_s_curve(n_samples=1000)
from mpl_toolkits.mplot3d import Axes3D
ax=plt.axes(projection='3d')
ax.scatter3D(X[:,0],X[:,1],X[:,2],c=y)
ax.view_init(10,-60)
# this is a 2D dataset embedded in 3D, but it is embedded in such a way that 
#PCA can't discover the underlying data orientation.
from sklearn import decomposition
X_pca=decomposition.PCA(n_components=2).fit_transform(X)
plt.scatter(X_pca[:,0],X_pca[:,1],c=y)
#Manifold learning algorithms, however, available in the sklearn.manifold
#submodule, are able to recover the underlying 2-dimensional manifold:
from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=15, n_components=2)
X_iso = iso.fit_transform(X)
plt.scatter(X_iso[:, 0], X_iso[:, 1], c=y)
Ejemplo n.º 33
-1
def make_sklearn_dataset(dataset_name, n_samples):
    # create dataset
    if 'circles_distant' == dataset_name:  # labels=3, seed=1, n-samples=1000, max-depth=4 OR labels=4, seed=1, n-samples=1000, max-depth=4
        dataset = datasets.make_circles(n_samples=n_samples,
                                        factor=.5,
                                        noise=.05)
    elif 'moons' == dataset_name:  # labels=2, seed=13, n-samples=500, max-depth=4 OR labels=1, seed=27, n-samples=500, max-depth=4
        dataset = datasets.make_moons(n_samples=n_samples, noise=.05)
    elif 'blobs' == dataset_name:  # labels=1, seed=0, n-samples=100, max-depth=3
        dataset = datasets.make_blobs(n_samples=n_samples, random_state=8)
    elif 'circles_near' == dataset_name:  # labels = 20, seed=0, n-samples=2000, max-depth=5
        dataset = datasets.make_circles(n_samples=n_samples, noise=.05)
    elif 's_curve' == dataset_name:  # labels=10, seed=35, n-samples=2500, max-depth=7
        scurve1 = datasets.make_s_curve(n_samples=n_samples // 2, noise=.05)
        scurve1 = np.vstack((scurve1[0][:, 0], scurve1[0][:, 2])).T
        scurve2 = datasets.make_s_curve(n_samples=n_samples // 2, noise=.05)
        scurve2 = np.vstack(
            (scurve2[0][:, 0], scurve2[0][:, 2])).T + [.5, .5]  # offset
        dataset = np.concatenate((scurve1, scurve2), 0), \
                  np.concatenate((np.asarray([0] * scurve1.shape[0]),
                                  np.asarray([1] * scurve2.shape[0])), 0)
    elif 'swiss_roll' == dataset_name:  # labels = 10, seed = 35, n-samples=2500, max-depth=5
        sroll1 = datasets.make_swiss_roll(n_samples=n_samples // 2, noise=.05)
        sroll1 = np.vstack((sroll1[0][:, 0], sroll1[0][:, 2])).T
        sroll2 = datasets.make_swiss_roll(n_samples=n_samples // 2, noise=.05)
        sroll2 = np.vstack(
            (sroll2[0][:, 0], sroll2[0][:, 2])).T * 0.75  # shrink
        dataset = np.concatenate((sroll1, sroll2), 0), \
                  np.concatenate((np.asarray([0] * sroll1.shape[0]),
                                  np.asarray([1] * sroll2.shape[0])), 0)

    return dataset