Example #1
0
def data_loader(dataset, num_classes, one_hot=True, reshape=False):

    if dataset == "circles":
        X, y = datasets.make_circles(n_samples=70000, factor=.5, noise=.05)
        X = (X - X.min()) / (X.max() - X.min())
        Xte, yte = datasets.make_circles(n_samples=10000, factor=.5, noise=.05)
        Xte = (Xte - Xte.min()) / (Xte.max() - Xte.min())

    if dataset == "moons":
        X, y = datasets.make_moons(n_samples=70000, noise=.05)
        X = (X - X.min()) / (X.max() - X.min())
        Xte, yte = datasets.make_moons(n_samples=10000, noise=.05)
        Xte = (Xte - Xte.min()) / (Xte.max() - Xte.min())

    if dataset == "swiss_roll":
        X, y = datasets.make_swiss_roll(n_samples=70000, noise=.05)
        X = (X - X.min()) / (X.max() - X.min())
        Xte, yte = datasets.make_swiss_roll(n_samples=10000, noise=.05)
        Xte = (Xte - Xte.min()) / (Xte.max() - Xte.min())
        y = np.where(y > y.mean(), 1, 0)
        yte = np.where(yte > yte.mean(), 1, 0)

    if dataset == "mnist" or dataset == "fashion_mnist" or \
       dataset == "cifar10" or dataset == "cifar100":
        loader = getattr(getattr(tf.keras.datasets, dataset), 'load_data')
        (X, y), (Xte, yte) = loader()
        X = X / 255.0
        Xte = Xte / 255.0
        if dataset == "mnist" or dataset == "fashion_mnist":
            X = np.expand_dims(X, axis=3)
            Xte = np.expand_dims(Xte, axis=3)

    Xval = X[:10000]
    yval = y[:10000]
    X = X[10000:]
    y = y[10000:]

    if one_hot:
        y = tf.keras.utils.to_categorical(y, num_classes)
        yval = tf.keras.utils.to_categorical(yval, num_classes)
        yte = tf.keras.utils.to_categorical(yte, num_classes)

    if reshape:
        X = X.reshape(-1, np.prod(X.shape[1:]))
        Xte = Xte.reshape(-1, np.prod(Xte.shape[1:]))
        Xval = Xval.reshape(-1, np.prod(Xval.shape[1:]))

    Dataset = namedtuple('Dataset', 'images labels len')
    Split = namedtuple('Split', ['train', 'valid', 'test'])
    data = Split(Dataset(X, y, len(X)), Dataset(Xval, yval, len(Xval)),
                 Dataset(Xte, yte, len(Xte)))

    return data
Example #2
0
 def __init__(self, train=True, n_samples=6000, noise=0.05,
              test_fraction=0.1, seed=42):
     _rnd = np.random.RandomState(seed)
     data, pos = make_swiss_roll(n_samples, noise, seed)
     data = data.astype(np.float32)
     pos = pos.astype(np.float32)
     super().__init__(data, pos, train, test_fraction, _rnd)
    def test_swiss_roll(self):
        samples = 1000
        neighbors = 10
        n_components = 2

        data, c = datasets.make_swiss_roll(n_samples=samples, random_state=0)
        displayer = Displayer(title="Isomap algorithms comparison") \
            .load(title="Swiss roll from %i samples." % (samples,), data=data, color=c)

        start = time()
        result = manifold.Isomap(neighbors, n_components).fit_transform(data)
        elapsed = time() - start

        displayer \
            .load(
                title="SKLearn's Isomap with %i neighbors, taking %.1fs." % (neighbors, elapsed),
                data=result,
                color=c)

        start = time()
        result = Isomap(k=neighbors, n_components=n_components).transform(data)
        elapsed = time() - start

        displayer \
            .load(
                title="Isomap with %i neighbors, taking %.1fs" % (neighbors, elapsed),
                data=result,
                color=c)
        displayer.show()
Example #4
0
def make_roll(n_classes=3, samples=256, seed=None, noise=0.0, *args, **kwargs):
    """Load the wines dataset from sklearn with the appropriate format for program synthesis."""
    X, y = make_swiss_roll(n_samples=samples, random_state=seed, noise=noise)
    bins = KBinsDiscretizer(n_bins=n_classes, encode="ordinal")
    y = bins.fit_transform(y.reshape(-1, 1)).astype(int)
    x = tensor.to_backend(X.astype(numpy.float32))
    return x, tensor.to_backend(y).flatten()
Example #5
0
def other_dimensional_reduction():
    from sklearn.datasets import make_swiss_roll
    from sklearn.manifold import MDS, Isomap, TSNE

    X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=41)

    mds = MDS(n_components=2)
    mds_reduced_x = mds.fit_transform(X)

    isomap = Isomap(n_components=2)
    iso_reduced_x = isomap.fit_transform(X)

    tsne = TSNE(n_components=2, random_state=42)
    tsne_reduced_x = tsne.fit_transform(X)

    titles = ["MDS", "Isomap", "t-SNE"]
    reduced_x = [mds_reduced_x, iso_reduced_x, tsne_reduced_x]

    plt.figure(figsize=(11, 4))

    for subplot, title, reduced in zip((131, 132, 133), titles, reduced_x):
        plt.subplot(subplot)
        plt.title(title, fontsize=14)
        plt.scatter(reduced[:, 0], reduced[:, 1], c=t, cmap=plt.cm.hot)
        plt.xlabel("$z_1$", fontsize=18)

        if subplot == 131:
            plt.ylabel("$z_2$", fontsize=18, rotation=0)
        plt.grid(True)

    save_fig("other_dimensional_reduction")
    plt.show()
Example #6
0
    def __init__(self,
                 location=None,
                 setype='moons',
                 train=False,
                 n_samples=2000,
                 noise=0.05):
        super(GeneratedSet, self).__init__()

        generate = False
        if location is None:
            generate = True
            location = 'gen_data/'
        os.makedirs(location, exist_ok=True)
        dfile = (setype + '.npy') if train else (setype + '_val.npy')
        dfile = osp.join(location, dfile)
        if osp.exists(dfile):
            self.data = np.load(dfile)
        else:
            generate = True
        if generate:
            if setype == 'moons':
                self.data = sklsets.make_moons(n_samples=n_samples,
                                               noise=noise)[0].astype(
                                                   np.float32)
            elif setype == 'swiss_roll':
                self.data = sklsets.make_swiss_roll(n_samples=n_samples,
                                                    noise=noise)[0].astype(
                                                        np.float32)
            np.save(dfile, self.data)
Example #7
0
def bonus():
    """
    Plots first eigenfunctions versus other via datafold package.
    """
    nr_samples = 5000
    # reduce number of points for plotting
    nr_samples_plot = 1000
    idx_plot = np.random.permutation(nr_samples)[0:nr_samples_plot]

    # generate point cloud
    X, X_color =make_swiss_roll(nr_samples, noise=0.0, random_state=None)

    X_pcm = pfold.PCManifold(X)
    X_pcm.optimize_parameters(result_scaling=0.5)
    print(f'epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}')

    dmap = dfold.DiffusionMaps(kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon), n_eigenpairs=9,
                               dist_kwargs=dict(cut_off=X_pcm.cut_off))
    dmap = dmap.fit(X_pcm)
    evecs, evals = dmap.eigenvectors_, dmap.eigenvalues_
    print(evecs.shape)
    print(evals.shape)

    plot_pairwise_eigenvector(eigenvectors=dmap.eigenvectors_[idx_plot, :], n=1,
                              fig_params=dict(figsize=[15, 15]),
                              scatter_params=dict(cmap=plt.cm.Spectral, c=X_color[idx_plot]))
    plt.show()
def test_make_swiss_roll():
    X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0)

    assert_equal(X.shape, (5, 3), "X shape mismatch")
    assert_equal(t.shape, (5,), "t shape mismatch")
    assert_array_equal(X[:, 0], t * np.cos(t))
    assert_array_equal(X[:, 2], t * np.sin(t))
Example #9
0
def test_make_swiss_roll(hole):
    X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0, hole=hole)

    assert X.shape == (5, 3)
    assert t.shape == (5, )
    assert_array_almost_equal(X[:, 0], t * np.cos(t))
    assert_array_almost_equal(X[:, 2], t * np.sin(t))
def test_make_swiss_roll():
    X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0)

    assert_equal(X.shape, (5, 3), "X shape mismatch")
    assert_equal(t.shape, (5,), "t shape mismatch")
    assert_array_almost_equal(X[:, 0], t * np.cos(t))
    assert_array_almost_equal(X[:, 2], t * np.sin(t))
    def _run(self):
        data, target = datasets.make_swiss_roll(n_samples=self.samples, random_state=0)

        self.displayer.load(data, target).show()

        print('Correlation matrix:')
        print(np.cov(data, rowvar=0))
Example #12
0
def swiss_roll():
    from sklearn import manifold
    swiss_roll_dataset, color = datasets.make_swiss_roll(n_samples=2000)
    swiss_roll_dataset_distances = calculate_distances(swiss_roll_dataset)

    swiss_roll_dataset_mds = MDS(swiss_roll_dataset_distances, 2)
    # Scree plot
    eigvals, _ = get_mds_eig_entities(swiss_roll_dataset_distances)
    scree_plot(eigvals, "MDS", './plots/swiss_roll_scree_plot_mds.png')

    plot_data(swiss_roll_dataset, swiss_roll_dataset_mds, color,
              './plots/plot_data_mds.png')

    # For checking my implementation - compared with sklearn results
    # swiss_roll_dataset_mds_sklearn = manifold.MDS(n_components=2).fit(swiss_roll_dataset_distances)
    # plot_data(swiss_roll_dataset, swiss_roll_dataset_mds_sklearn, color, './plots/plot_data_mds_sklearn.png')

    swiss_roll_dataset_diffusion_map = DiffusionMap(swiss_roll_dataset, 2, 100,
                                                    1000)
    # Scree plot
    eigvals, _ = get_diffusion_maps_eig_entites(swiss_roll_dataset, 100)
    scree_plot(eigvals, "Diffusion Maps",
               './plots/scree_plot_diffusion_maps.png')
    plot_data(swiss_roll_dataset, swiss_roll_dataset_diffusion_map, color,
              './plots/plot_data_diffusion_map.png')

    # For checking my implementation - compared with pydiffmap results
    # swiss_roll_dataset_diffusion_map_pydiffmap = pydiffmap.diffusion_map.DiffusionMap(pydiffmap.kernel.Kernel(), n_evecs=2).fit_transform(swiss_roll_dataset)
    # plot_data(swiss_roll_dataset, swiss_roll_dataset_diffusion_map_pydiffmap, color, './plots/plot_data_diffusion_map_pydiffmap.png')

    swiss_roll_dataset_lle = LLE(swiss_roll_dataset, 2, 12)
    plot_data(swiss_roll_dataset, swiss_roll_dataset_lle, color,
              './plots/plot_data_lle.png')
    def test_similar_graphics(self):
        """Tests if Displayer class is presenting a similar graphic from the one printed
        by the hard-coded lines bellow (manual checking).
        """
        points = 1000
        data, color = datasets.make_swiss_roll(points, random_state=0)
        neighbors = 10
        to_dimension = 2

        result = manifold.Isomap(neighbors, to_dimension).fit_transform(data)

        # Expected printing...
        Axes3D
        fig = plt.figure(figsize=(15, 8))
        plt.suptitle("Expected image", fontsize=14)
        ax = fig.add_subplot(121, projection='3d')
        ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=color, cmap=plt.cm.Spectral)
        ax.view_init(4, -72)
        ax = fig.add_subplot(122)
        plt.scatter(result[:, 0], result[:, 1], c=color, cmap=plt.cm.Spectral)
        plt.title("SKLearn's Isomap")
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')

        # Actual printing...
        Displayer(title="Actual image", points=points, neighbors=neighbors) \
            .load(data, color, title='Graphic I') \
            .load(result, color, title='SKLearn\'s Isomap') \
            .show()
Example #14
0
def demo(k):
    X, t = make_swiss_roll(noise=1)

    #le = SpectralEmbedding(n_components=2, n_neighbors=k)
    #le_X = le.fit_transform(X)

    ler = LER(n_components=2, n_neighbors=k, affinity='rbf')
    ler_X = ler.fit_transform(X, t)
    """
    _, axes = plt.subplots(nrows=1, ncols=3, figsize=plt.figaspect(0.33))
    axes[0].set_axis_off()
    axes[0] = plt.subplot(131, projection='3d')
    axes[0].scatter(*X.T, c=t, s=50)
    axes[0].set_title('Swiss Roll')
    axes[1].scatter(*le_X.T, c=t, s=50)
    axes[1].set_title('LE Embedding')
    axes[2].scatter(*ler_X.T, c=t, s=50)
    axes[2].set_title('LER Embedding')
    plt.show()
    """
    _, axes = plt.subplots(nrows=1, ncols=2, figsize=plt.figaspect(0.33))
    axes[0].set_axis_off()
    axes[0] = plt.subplot(131, projection='3d')
    axes[0].scatter(*X.T, c=t, s=50)
    axes[0].set_title('Swiss Roll')
    axes[1].scatter(*ler_X.T, c=t, s=50)
    axes[1].set_title('LER Embedding')
    plt.show()
Example #15
0
def create_true_data(type_of_data, number_of_modes, std, size, vocabulary_size):
    list_of_x_values, list_of_y_values = list(), list()
    if (type_of_data=="mixture_of_gaussians"):
        for i in range(number_of_modes):
            list_of_x_values.append(np.clip(np.random.normal(loc=np.random.randint(vocabulary_size-1), scale=500, size=size), 0, vocabulary_size))
            list_of_y_values.append(np.clip(np.random.normal(loc=np.random.randint(vocabulary_size-1), scale=500, size=size), 0, vocabulary_size))
        x = np.column_stack((np.append([], list_of_x_values), np.append([], list_of_y_values)))

    cos_theta = np.random.uniform()
    sin_theta = math.sqrt(1-cos_theta*cos_theta)
    if (type_of_data=="blobs"):
        x = np.clip(((vocabulary_size/20)*make_blobs(n_samples=size, centers=number_of_modes, cluster_std=std)[0]+(vocabulary_size/2)), [0,0], [vocabulary_size, vocabulary_size]).astype(int)
    if (type_of_data=="moons"):
        x = ((np.dot(make_moons(n_samples=size)[0]*(1/2), np.array([[cos_theta, sin_theta], [-sin_theta, cos_theta]])))*(vocabulary_size/2)+(vocabulary_size/2)).astype(int)
    if (type_of_data=="circles"):
        x = ((make_circles(n_samples=size)[0]*(vocabulary_size/2))+(vocabulary_size/2)).astype(int)
    if (type_of_data=="swiss_roll"):
        x = make_swiss_roll(n_samples=size, random_state=2, noise=std)[0]
        x = np.column_stack((x[:,0], x[:,2]))
        x = np.dot((1/25)*x,np.array([[cos_theta, -sin_theta], [sin_theta, cos_theta]]))
        x = (x*(vocabulary_size/2)+(vocabulary_size/2)).astype(int)
    if (type_of_data=="s_curve"):
        x = make_s_curve(n_samples=size)[0]/2
        x = np.column_stack((x[:,0], x[:,2]))
        x = ((np.dot(x, np.array([[cos_theta, -sin_theta], [sin_theta, cos_theta]])))*(vocabulary_size/2)+(vocabulary_size/2)).astype(int)
    return x
Example #16
0
def get_swiss_roll():
    X, y = make_swiss_roll(n_samples=1500, random_state=123)

    inputs = torch.from_numpy(X).to(device).float()
    targets = torch.from_numpy(y).to(device)

    return inputs, targets
Example #17
0
 def make_data(self):
     """
     构造swiss roll数据
     """
     self.X_data, t = make_swiss_roll(1000, noise=0, random_state=0)
     ward = AgglomerativeClustering(n_clusters=6,
                                    linkage='ward').fit(self.X_data)
     self.Y_data = ward.labels_
def generate_swiss(num_points, seed):
    A, y = ds.make_swiss_roll(num_points, 2, seed)
    my = np.mean(y)
    y_binary = [0 for i in range(len(y))]
    for i in range(len(y)):
        if y[i] > my: y_binary[i] = 1
        else: y_binary[i] = -1
    return A, y_binary
Example #19
0
def simulate(num_samples, noise=0.5):
    global seed
    X, _ = make_swiss_roll(n_samples=num_samples,
                           noise=noise,
                           random_state=seed)
    seed += 1
    X = np.delete(X, 1, axis=1)
    return X / 5.
Example #20
0
def make_broken_swiss_roll(n_samples, random_state=1):
    # get original swiss roll
    X, Y_plot = make_swiss_roll(2 * n_samples, random_state=random_state)
    # cut off a part
    X, Y_plot = X[X[:, 0] > -5, :], Y_plot[X[:, 0] > -5]
    # get desired number of samples
    X, Y_plot = X[:n_samples, :], Y_plot[:n_samples]
    return X, Y_plot
def plot_kpca():
    X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)

    lin_pca = KernelPCA(n_components=2,
                        kernel="linear",
                        fit_inverse_transform=True)
    rbf_pca = KernelPCA(n_components=2,
                        kernel="rbf",
                        gamma=0.0433,
                        fit_inverse_transform=True)
    sig_pca = KernelPCA(n_components=2,
                        kernel="sigmoid",
                        gamma=0.001,
                        coef0=1,
                        fit_inverse_transform=True)
    y = t > 6.9
    plt.figure(figsize=(11, 4))
    for subplot, pca, title in ((131, lin_pca, "Linear kernel"),
                                (132, rbf_pca, "RBF kernel, $\gamma=0.04$"),
                                (133, sig_pca,
                                 "Sigmoid kernel, $\gamma=10^{-3}, r=1$")):
        X_reduced = pca.fit_transform(X)
        if subplot == 132:
            X_reduced_rbf = X_reduced

        plt.subplot(subplot)
        # plt.plot(X_reduced[y, 0], X_reduced[y, 1], "gs")
        # plt.plot(X_reduced[~y, 0], X_reduced[~y, 1], "y^")
        plt.title(title, fontsize=14)
        plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)
        plt.xlabel("$z_1$", fontsize=18)
        if subplot == 131:
            plt.ylabel("$z_2$", fontsize=18, rotation=0)
        plt.grid(True)

    save_fig("kernel_pca_plot")
    plt.show()

    # 逆过程压缩
    plt.figure(figsize=(6, 5))
    X_inverse = rbf_pca.inverse_transform(X_reduced_rbf)
    ax = plt.subplot(121, projection='3d')
    ax.view_init(10, -70)
    ax.scatter(X_inverse[:, 0],
               X_inverse[:, 1],
               X_inverse[:, 2],
               c=t,
               cmap=plt.cm.hot,
               marker="x")
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_zlabel("")
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.set_zticklabels([])
    save_fig("preimage_plot", tight_layout=False)
    plt.show()
def generate_data():
    '''
    generate data
    :return: X: input data, y: given labels
    '''
    np.random.seed(0)
    #X, y = datasets.make_moons(200, noise=0.20)
    X, y = datasets.make_swiss_roll(200, noise=0.20)
    return X, y
Example #23
0
def swiss(batch_size, size=1., std=0.01):
    x, _ = datasets.make_swiss_roll(1000)
    norm = x[:, ::2].max()
    xs = x[:, 0] * size / norm
    ys = x[:, 2] * size / norm
    cat = ds.Categorical(tf.zeros(len(x)))
    comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
    data = ds.Mixture(cat, comps)
    return data.sample(batch_size)
def load_swissroll(n_datapoints=1000,  noice=0.0):
    ''' Loads the Swiss roll dataset with 1000, zero-varianced datapoints. Returns a tuple (data, target) containing the dataset and the labels.

        data:   The 1000 x 3 data matrix containing the points.
        target: The univariate position of the sample according to the main dimension of the points in the manifold. Can be used as the color.

        http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_swiss_roll.html   
    '''
    return datasets.make_swiss_roll(n_samples=n_datapoints, noise=noice, random_state=None);
 def generate_data(self):
     swiss_roll, swiss_roll_colors = datasets.make_swiss_roll(
         n_samples=self.samples, random_state=0)
     self.data, self.target = swiss_roll, swiss_roll_colors
     self.original_data = self.data
     self.displayer \
         .load(swiss_roll, swiss_roll_colors) \
         .save('datasets/swiss') \
         .dispose()
Example #26
0
def swiss_roll_3d(n_samples=800):
    random_state = 0
    X, t = sk_datasets.make_swiss_roll(n_samples=n_samples,
                                       random_state=random_state)
    idx = np.argsort(t)
    X = X[idx, :]
    # X = np.roll(X, 1, axis=1)
    # X = X[:, :2]
    return X
Example #27
0
def generate_swiss_roll_data(n_samples):
    noise = 0.05
    X, _ = make_swiss_roll(n_samples, noise)
    # Make it thinner
    X[:, 1] *= .5
    distance_from_y_axis = X[:, 0]**2 + X[:, 2]**2
    X_color = plt.cm.jet(distance_from_y_axis /
                         np.max(distance_from_y_axis + 1))
    return X, X_color, "Swiss roll"
    def generate_data(self, samples):
        self.data, self.target = datasets.make_swiss_roll(n_samples=samples,
                                                          random_state=0)
        self.original_data = self.data

        if self.plotting:
            self.displayer.load(self.data, self.target)

        print('Data set size: %.2fKB' % (self.data.nbytes / 1024))
        print('Shape: %s' % str(self.data.shape))
    def make_classification(self, name="circles", n_classes=2):
        """
        Creates a binary classification data set.
        
        :param name:            name of the data set to be generated
                                    - simple_linear (linearly separable)
                                    - linear (linearly separable, sklearn)
                                    - spiral (non-linear)
                                    - spiral_complex (non-linear)
                                    - circles (non-linear, sklearn)
                                    - moons (non-linear, sklearn)
                                    - swiss (swiss roll, non-linear, sklearn)
        :param n_classes:       number of classes (only needed for name="linear")
        :return:                X, y (data features and labels)
        """
        # cusotm data set
        if name == "simple_linear":
            X, y = self.__make_simple_linear()

        # random data generated by sklearn
        elif name == "linear":
            X, y = make_classification(n_samples=200,
                                       n_features=2,
                                       n_redundant=0,
                                       n_informative=2,
                                       n_clusters_per_class=1,
                                       n_classes=n_classes,
                                       class_sep=3.25,
                                       random_state=42)

        # non linear data set
        elif name == "non_linear":
            X, y = self.__make_non_linear()

        # spiral data set
        elif name == "spiral":
            X, y = self.__make_spiral(n_samples=200)

        elif name == "spiral_complex":
            X, y = self.__make_spiral_complex(n_samples=200, noise=0.0)

        # circular data
        elif name == "circles":
            X, y = make_circles(n_samples=400, factor=0.3, noise=0.2)

        # moon data set
        elif name == "moons":
            X, y = make_moons(n_samples=150, noise=0.07, random_state=21)

        # swiss roll data set
        else:
            X, y = make_swiss_roll(2000, 0.00)

        return X, y
Example #30
0
    def __init__(self, dataset_size=25000, **kwargs):
        #self.x, self.y = make_moons(n_samples=dataset_size, shuffle=True, noise=0.05)
        #self.x = torch.Tensor(self.x)
        #self.y = torch.Tensor(self.y)
        XY, _ = make_swiss_roll(n_samples=dataset_size, noise=0.05)

        self.x = torch.Tensor(XY[:, 1:])
        self.y = torch.Tensor(XY[:, 0])
        self.y = self.y.view(self.y.shape[0], -1)
        self.input_size = 2
        self.label_size = 1
        self.dataset_size = dataset_size
Example #31
0
def demo(k):
    X, t = make_swiss_roll(noise=1)

    le = SpectralEmbedding(n_components=2, n_neighbors=k)
    le_X = le.fit_transform(X)

    ler = LER(n_components=2, n_neighbors=k, affinity='rbf')
    ler_X = ler.fit_transform(X, t)

    _, axes = plt.subplots(nrows=1, ncols=3, figsize=plt.figaspect(0.33))
    axes[0].set_axis_off()
    axes[0] = plt.subplot(131, projection='3d')
    axes[0].scatter(*X.T, c=t, s=50)
    axes[0].set_title('Swiss Roll')
    axes[1].scatter(*le_X.T, c=t, s=50)
    axes[1].set_title('LE Embedding')
    axes[2].scatter(*ler_X.T, c=t, s=50)
    axes[2].set_title('LER Embedding')
    plt.show()
Example #32
0
        for i in range(numNodes):
            for j in range(numNodes):
                if k_neighbors_array[j, i] <= k_neighbors_array[i, j]:
                    k_neighbors_array[i, j] = k_neighbors_array[j, i]
                else:
                    k_neighbors_array[j, i] = k_neighbors_array[i, j]

        #   Compute the all pair shortest path distance.
        dist_matrix = floyd_warshall(k_neighbors_array, directed=False)

        dist_matrix[np.isinf(dist_matrix)] = 0
        # Do MDS or learn embedding
        # MDS can also be seen as a case of Kernel PCA
        # using data dependent kernel
        # So using K = 1/2 D^2,
        # we generate projections along principal components

        kernel = dist_matrix ** 2

        kernel *= -0.5

        kernelPCA = KernelPCA(n_components=self.n_components, kernel='precomputed')

        return kernelPCA.fit_transform(kernel)

if __name__ == "__main__":
    isomap = Isomap(10, 3)
    X, color = datasets.make_swiss_roll(n_samples=3000)
    X_r = isomap.run(X)
    plot_artificial_dataset(X, X_r, color, "Swiss Roll")
Example #33
0
from sklearn import manifold
from sklearn import datasets
from plot import *
class StochasticNeighborEmbedding():

    def __init__(self, n_components=2, n_neighbors=30, init='pca'):
        self.tsne = manifold.TSNE(n_components, init=init, random_state=0)

    def run(self, X):
        return self.tsne.fit_transform(X)


if __name__ == "__main__":

    tsne = manifold.TSNE(n_components=2, init='pca')
    X = datasets.make_swiss_roll(n_samples=2000)
    X[0].dtype='float64'
    import pdb;pdb.set_trace()
    X_tsne = tsne.fit_transform(X[0])
    plot_artificial_dataset(X[0], X_tsne, color=X[1], title='title')
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')

ax.plot(X2D[:, 0], X2D[:, 1], "k+")
ax.plot(X2D[:, 0], X2D[:, 1], "k.")
ax.plot([0], [0], "ko")
ax.arrow(0, 0, 0, 1, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')
ax.arrow(0, 0, 1, 0, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')
ax.set_xlabel("$z_1$", fontsize=18)
ax.set_ylabel("$z_2$", fontsize=18, rotation=0)
ax.axis([-1.5, 1.3, -1.2, 1.2])
ax.grid(True)
save_fig("dataset_2d_plot")
plt.show()

X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)

axes = [11.5, 14, -2, 23, -12, 15]

fig = plt.figure(figsize=(6, 5))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=t, cmap=plt.cm.hot)
ax.view_init(10, -70)
ax.set_xlabel("$x_1$", fontsize=18)
ax.set_ylabel("$x_2$", fontsize=18)
ax.set_zlabel("$x_3$", fontsize=18)
ax.set_xlim(axes[0:2])
ax.set_ylim(axes[2:4])
ax.set_zlim(axes[4:6])
def load_swiss_roll():
    return (ARTIFICIAL, datasets.make_swiss_roll(n_samples=1500))
Example #36
-1
def make_sklearn_dataset(dataset_name, n_samples):
    # create dataset
    if 'circles_distant' == dataset_name:  # labels=3, seed=1, n-samples=1000, max-depth=4 OR labels=4, seed=1, n-samples=1000, max-depth=4
        dataset = datasets.make_circles(n_samples=n_samples,
                                        factor=.5,
                                        noise=.05)
    elif 'moons' == dataset_name:  # labels=2, seed=13, n-samples=500, max-depth=4 OR labels=1, seed=27, n-samples=500, max-depth=4
        dataset = datasets.make_moons(n_samples=n_samples, noise=.05)
    elif 'blobs' == dataset_name:  # labels=1, seed=0, n-samples=100, max-depth=3
        dataset = datasets.make_blobs(n_samples=n_samples, random_state=8)
    elif 'circles_near' == dataset_name:  # labels = 20, seed=0, n-samples=2000, max-depth=5
        dataset = datasets.make_circles(n_samples=n_samples, noise=.05)
    elif 's_curve' == dataset_name:  # labels=10, seed=35, n-samples=2500, max-depth=7
        scurve1 = datasets.make_s_curve(n_samples=n_samples // 2, noise=.05)
        scurve1 = np.vstack((scurve1[0][:, 0], scurve1[0][:, 2])).T
        scurve2 = datasets.make_s_curve(n_samples=n_samples // 2, noise=.05)
        scurve2 = np.vstack(
            (scurve2[0][:, 0], scurve2[0][:, 2])).T + [.5, .5]  # offset
        dataset = np.concatenate((scurve1, scurve2), 0), \
                  np.concatenate((np.asarray([0] * scurve1.shape[0]),
                                  np.asarray([1] * scurve2.shape[0])), 0)
    elif 'swiss_roll' == dataset_name:  # labels = 10, seed = 35, n-samples=2500, max-depth=5
        sroll1 = datasets.make_swiss_roll(n_samples=n_samples // 2, noise=.05)
        sroll1 = np.vstack((sroll1[0][:, 0], sroll1[0][:, 2])).T
        sroll2 = datasets.make_swiss_roll(n_samples=n_samples // 2, noise=.05)
        sroll2 = np.vstack(
            (sroll2[0][:, 0], sroll2[0][:, 2])).T * 0.75  # shrink
        dataset = np.concatenate((sroll1, sroll2), 0), \
                  np.concatenate((np.asarray([0] * sroll1.shape[0]),
                                  np.asarray([1] * sroll2.shape[0])), 0)

    return dataset