def data_loader(dataset, num_classes, one_hot=True, reshape=False): if dataset == "circles": X, y = datasets.make_circles(n_samples=70000, factor=.5, noise=.05) X = (X - X.min()) / (X.max() - X.min()) Xte, yte = datasets.make_circles(n_samples=10000, factor=.5, noise=.05) Xte = (Xte - Xte.min()) / (Xte.max() - Xte.min()) if dataset == "moons": X, y = datasets.make_moons(n_samples=70000, noise=.05) X = (X - X.min()) / (X.max() - X.min()) Xte, yte = datasets.make_moons(n_samples=10000, noise=.05) Xte = (Xte - Xte.min()) / (Xte.max() - Xte.min()) if dataset == "swiss_roll": X, y = datasets.make_swiss_roll(n_samples=70000, noise=.05) X = (X - X.min()) / (X.max() - X.min()) Xte, yte = datasets.make_swiss_roll(n_samples=10000, noise=.05) Xte = (Xte - Xte.min()) / (Xte.max() - Xte.min()) y = np.where(y > y.mean(), 1, 0) yte = np.where(yte > yte.mean(), 1, 0) if dataset == "mnist" or dataset == "fashion_mnist" or \ dataset == "cifar10" or dataset == "cifar100": loader = getattr(getattr(tf.keras.datasets, dataset), 'load_data') (X, y), (Xte, yte) = loader() X = X / 255.0 Xte = Xte / 255.0 if dataset == "mnist" or dataset == "fashion_mnist": X = np.expand_dims(X, axis=3) Xte = np.expand_dims(Xte, axis=3) Xval = X[:10000] yval = y[:10000] X = X[10000:] y = y[10000:] if one_hot: y = tf.keras.utils.to_categorical(y, num_classes) yval = tf.keras.utils.to_categorical(yval, num_classes) yte = tf.keras.utils.to_categorical(yte, num_classes) if reshape: X = X.reshape(-1, np.prod(X.shape[1:])) Xte = Xte.reshape(-1, np.prod(Xte.shape[1:])) Xval = Xval.reshape(-1, np.prod(Xval.shape[1:])) Dataset = namedtuple('Dataset', 'images labels len') Split = namedtuple('Split', ['train', 'valid', 'test']) data = Split(Dataset(X, y, len(X)), Dataset(Xval, yval, len(Xval)), Dataset(Xte, yte, len(Xte))) return data
def __init__(self, train=True, n_samples=6000, noise=0.05, test_fraction=0.1, seed=42): _rnd = np.random.RandomState(seed) data, pos = make_swiss_roll(n_samples, noise, seed) data = data.astype(np.float32) pos = pos.astype(np.float32) super().__init__(data, pos, train, test_fraction, _rnd)
def test_swiss_roll(self): samples = 1000 neighbors = 10 n_components = 2 data, c = datasets.make_swiss_roll(n_samples=samples, random_state=0) displayer = Displayer(title="Isomap algorithms comparison") \ .load(title="Swiss roll from %i samples." % (samples,), data=data, color=c) start = time() result = manifold.Isomap(neighbors, n_components).fit_transform(data) elapsed = time() - start displayer \ .load( title="SKLearn's Isomap with %i neighbors, taking %.1fs." % (neighbors, elapsed), data=result, color=c) start = time() result = Isomap(k=neighbors, n_components=n_components).transform(data) elapsed = time() - start displayer \ .load( title="Isomap with %i neighbors, taking %.1fs" % (neighbors, elapsed), data=result, color=c) displayer.show()
def make_roll(n_classes=3, samples=256, seed=None, noise=0.0, *args, **kwargs): """Load the wines dataset from sklearn with the appropriate format for program synthesis.""" X, y = make_swiss_roll(n_samples=samples, random_state=seed, noise=noise) bins = KBinsDiscretizer(n_bins=n_classes, encode="ordinal") y = bins.fit_transform(y.reshape(-1, 1)).astype(int) x = tensor.to_backend(X.astype(numpy.float32)) return x, tensor.to_backend(y).flatten()
def other_dimensional_reduction(): from sklearn.datasets import make_swiss_roll from sklearn.manifold import MDS, Isomap, TSNE X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=41) mds = MDS(n_components=2) mds_reduced_x = mds.fit_transform(X) isomap = Isomap(n_components=2) iso_reduced_x = isomap.fit_transform(X) tsne = TSNE(n_components=2, random_state=42) tsne_reduced_x = tsne.fit_transform(X) titles = ["MDS", "Isomap", "t-SNE"] reduced_x = [mds_reduced_x, iso_reduced_x, tsne_reduced_x] plt.figure(figsize=(11, 4)) for subplot, title, reduced in zip((131, 132, 133), titles, reduced_x): plt.subplot(subplot) plt.title(title, fontsize=14) plt.scatter(reduced[:, 0], reduced[:, 1], c=t, cmap=plt.cm.hot) plt.xlabel("$z_1$", fontsize=18) if subplot == 131: plt.ylabel("$z_2$", fontsize=18, rotation=0) plt.grid(True) save_fig("other_dimensional_reduction") plt.show()
def __init__(self, location=None, setype='moons', train=False, n_samples=2000, noise=0.05): super(GeneratedSet, self).__init__() generate = False if location is None: generate = True location = 'gen_data/' os.makedirs(location, exist_ok=True) dfile = (setype + '.npy') if train else (setype + '_val.npy') dfile = osp.join(location, dfile) if osp.exists(dfile): self.data = np.load(dfile) else: generate = True if generate: if setype == 'moons': self.data = sklsets.make_moons(n_samples=n_samples, noise=noise)[0].astype( np.float32) elif setype == 'swiss_roll': self.data = sklsets.make_swiss_roll(n_samples=n_samples, noise=noise)[0].astype( np.float32) np.save(dfile, self.data)
def bonus(): """ Plots first eigenfunctions versus other via datafold package. """ nr_samples = 5000 # reduce number of points for plotting nr_samples_plot = 1000 idx_plot = np.random.permutation(nr_samples)[0:nr_samples_plot] # generate point cloud X, X_color =make_swiss_roll(nr_samples, noise=0.0, random_state=None) X_pcm = pfold.PCManifold(X) X_pcm.optimize_parameters(result_scaling=0.5) print(f'epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}') dmap = dfold.DiffusionMaps(kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon), n_eigenpairs=9, dist_kwargs=dict(cut_off=X_pcm.cut_off)) dmap = dmap.fit(X_pcm) evecs, evals = dmap.eigenvectors_, dmap.eigenvalues_ print(evecs.shape) print(evals.shape) plot_pairwise_eigenvector(eigenvectors=dmap.eigenvectors_[idx_plot, :], n=1, fig_params=dict(figsize=[15, 15]), scatter_params=dict(cmap=plt.cm.Spectral, c=X_color[idx_plot])) plt.show()
def test_make_swiss_roll(): X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0) assert_equal(X.shape, (5, 3), "X shape mismatch") assert_equal(t.shape, (5,), "t shape mismatch") assert_array_equal(X[:, 0], t * np.cos(t)) assert_array_equal(X[:, 2], t * np.sin(t))
def test_make_swiss_roll(hole): X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0, hole=hole) assert X.shape == (5, 3) assert t.shape == (5, ) assert_array_almost_equal(X[:, 0], t * np.cos(t)) assert_array_almost_equal(X[:, 2], t * np.sin(t))
def test_make_swiss_roll(): X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0) assert_equal(X.shape, (5, 3), "X shape mismatch") assert_equal(t.shape, (5,), "t shape mismatch") assert_array_almost_equal(X[:, 0], t * np.cos(t)) assert_array_almost_equal(X[:, 2], t * np.sin(t))
def _run(self): data, target = datasets.make_swiss_roll(n_samples=self.samples, random_state=0) self.displayer.load(data, target).show() print('Correlation matrix:') print(np.cov(data, rowvar=0))
def swiss_roll(): from sklearn import manifold swiss_roll_dataset, color = datasets.make_swiss_roll(n_samples=2000) swiss_roll_dataset_distances = calculate_distances(swiss_roll_dataset) swiss_roll_dataset_mds = MDS(swiss_roll_dataset_distances, 2) # Scree plot eigvals, _ = get_mds_eig_entities(swiss_roll_dataset_distances) scree_plot(eigvals, "MDS", './plots/swiss_roll_scree_plot_mds.png') plot_data(swiss_roll_dataset, swiss_roll_dataset_mds, color, './plots/plot_data_mds.png') # For checking my implementation - compared with sklearn results # swiss_roll_dataset_mds_sklearn = manifold.MDS(n_components=2).fit(swiss_roll_dataset_distances) # plot_data(swiss_roll_dataset, swiss_roll_dataset_mds_sklearn, color, './plots/plot_data_mds_sklearn.png') swiss_roll_dataset_diffusion_map = DiffusionMap(swiss_roll_dataset, 2, 100, 1000) # Scree plot eigvals, _ = get_diffusion_maps_eig_entites(swiss_roll_dataset, 100) scree_plot(eigvals, "Diffusion Maps", './plots/scree_plot_diffusion_maps.png') plot_data(swiss_roll_dataset, swiss_roll_dataset_diffusion_map, color, './plots/plot_data_diffusion_map.png') # For checking my implementation - compared with pydiffmap results # swiss_roll_dataset_diffusion_map_pydiffmap = pydiffmap.diffusion_map.DiffusionMap(pydiffmap.kernel.Kernel(), n_evecs=2).fit_transform(swiss_roll_dataset) # plot_data(swiss_roll_dataset, swiss_roll_dataset_diffusion_map_pydiffmap, color, './plots/plot_data_diffusion_map_pydiffmap.png') swiss_roll_dataset_lle = LLE(swiss_roll_dataset, 2, 12) plot_data(swiss_roll_dataset, swiss_roll_dataset_lle, color, './plots/plot_data_lle.png')
def test_similar_graphics(self): """Tests if Displayer class is presenting a similar graphic from the one printed by the hard-coded lines bellow (manual checking). """ points = 1000 data, color = datasets.make_swiss_roll(points, random_state=0) neighbors = 10 to_dimension = 2 result = manifold.Isomap(neighbors, to_dimension).fit_transform(data) # Expected printing... Axes3D fig = plt.figure(figsize=(15, 8)) plt.suptitle("Expected image", fontsize=14) ax = fig.add_subplot(121, projection='3d') ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=color, cmap=plt.cm.Spectral) ax.view_init(4, -72) ax = fig.add_subplot(122) plt.scatter(result[:, 0], result[:, 1], c=color, cmap=plt.cm.Spectral) plt.title("SKLearn's Isomap") ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') # Actual printing... Displayer(title="Actual image", points=points, neighbors=neighbors) \ .load(data, color, title='Graphic I') \ .load(result, color, title='SKLearn\'s Isomap') \ .show()
def demo(k): X, t = make_swiss_roll(noise=1) #le = SpectralEmbedding(n_components=2, n_neighbors=k) #le_X = le.fit_transform(X) ler = LER(n_components=2, n_neighbors=k, affinity='rbf') ler_X = ler.fit_transform(X, t) """ _, axes = plt.subplots(nrows=1, ncols=3, figsize=plt.figaspect(0.33)) axes[0].set_axis_off() axes[0] = plt.subplot(131, projection='3d') axes[0].scatter(*X.T, c=t, s=50) axes[0].set_title('Swiss Roll') axes[1].scatter(*le_X.T, c=t, s=50) axes[1].set_title('LE Embedding') axes[2].scatter(*ler_X.T, c=t, s=50) axes[2].set_title('LER Embedding') plt.show() """ _, axes = plt.subplots(nrows=1, ncols=2, figsize=plt.figaspect(0.33)) axes[0].set_axis_off() axes[0] = plt.subplot(131, projection='3d') axes[0].scatter(*X.T, c=t, s=50) axes[0].set_title('Swiss Roll') axes[1].scatter(*ler_X.T, c=t, s=50) axes[1].set_title('LER Embedding') plt.show()
def create_true_data(type_of_data, number_of_modes, std, size, vocabulary_size): list_of_x_values, list_of_y_values = list(), list() if (type_of_data=="mixture_of_gaussians"): for i in range(number_of_modes): list_of_x_values.append(np.clip(np.random.normal(loc=np.random.randint(vocabulary_size-1), scale=500, size=size), 0, vocabulary_size)) list_of_y_values.append(np.clip(np.random.normal(loc=np.random.randint(vocabulary_size-1), scale=500, size=size), 0, vocabulary_size)) x = np.column_stack((np.append([], list_of_x_values), np.append([], list_of_y_values))) cos_theta = np.random.uniform() sin_theta = math.sqrt(1-cos_theta*cos_theta) if (type_of_data=="blobs"): x = np.clip(((vocabulary_size/20)*make_blobs(n_samples=size, centers=number_of_modes, cluster_std=std)[0]+(vocabulary_size/2)), [0,0], [vocabulary_size, vocabulary_size]).astype(int) if (type_of_data=="moons"): x = ((np.dot(make_moons(n_samples=size)[0]*(1/2), np.array([[cos_theta, sin_theta], [-sin_theta, cos_theta]])))*(vocabulary_size/2)+(vocabulary_size/2)).astype(int) if (type_of_data=="circles"): x = ((make_circles(n_samples=size)[0]*(vocabulary_size/2))+(vocabulary_size/2)).astype(int) if (type_of_data=="swiss_roll"): x = make_swiss_roll(n_samples=size, random_state=2, noise=std)[0] x = np.column_stack((x[:,0], x[:,2])) x = np.dot((1/25)*x,np.array([[cos_theta, -sin_theta], [sin_theta, cos_theta]])) x = (x*(vocabulary_size/2)+(vocabulary_size/2)).astype(int) if (type_of_data=="s_curve"): x = make_s_curve(n_samples=size)[0]/2 x = np.column_stack((x[:,0], x[:,2])) x = ((np.dot(x, np.array([[cos_theta, -sin_theta], [sin_theta, cos_theta]])))*(vocabulary_size/2)+(vocabulary_size/2)).astype(int) return x
def get_swiss_roll(): X, y = make_swiss_roll(n_samples=1500, random_state=123) inputs = torch.from_numpy(X).to(device).float() targets = torch.from_numpy(y).to(device) return inputs, targets
def make_data(self): """ 构造swiss roll数据 """ self.X_data, t = make_swiss_roll(1000, noise=0, random_state=0) ward = AgglomerativeClustering(n_clusters=6, linkage='ward').fit(self.X_data) self.Y_data = ward.labels_
def generate_swiss(num_points, seed): A, y = ds.make_swiss_roll(num_points, 2, seed) my = np.mean(y) y_binary = [0 for i in range(len(y))] for i in range(len(y)): if y[i] > my: y_binary[i] = 1 else: y_binary[i] = -1 return A, y_binary
def simulate(num_samples, noise=0.5): global seed X, _ = make_swiss_roll(n_samples=num_samples, noise=noise, random_state=seed) seed += 1 X = np.delete(X, 1, axis=1) return X / 5.
def make_broken_swiss_roll(n_samples, random_state=1): # get original swiss roll X, Y_plot = make_swiss_roll(2 * n_samples, random_state=random_state) # cut off a part X, Y_plot = X[X[:, 0] > -5, :], Y_plot[X[:, 0] > -5] # get desired number of samples X, Y_plot = X[:n_samples, :], Y_plot[:n_samples] return X, Y_plot
def plot_kpca(): X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42) lin_pca = KernelPCA(n_components=2, kernel="linear", fit_inverse_transform=True) rbf_pca = KernelPCA(n_components=2, kernel="rbf", gamma=0.0433, fit_inverse_transform=True) sig_pca = KernelPCA(n_components=2, kernel="sigmoid", gamma=0.001, coef0=1, fit_inverse_transform=True) y = t > 6.9 plt.figure(figsize=(11, 4)) for subplot, pca, title in ((131, lin_pca, "Linear kernel"), (132, rbf_pca, "RBF kernel, $\gamma=0.04$"), (133, sig_pca, "Sigmoid kernel, $\gamma=10^{-3}, r=1$")): X_reduced = pca.fit_transform(X) if subplot == 132: X_reduced_rbf = X_reduced plt.subplot(subplot) # plt.plot(X_reduced[y, 0], X_reduced[y, 1], "gs") # plt.plot(X_reduced[~y, 0], X_reduced[~y, 1], "y^") plt.title(title, fontsize=14) plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot) plt.xlabel("$z_1$", fontsize=18) if subplot == 131: plt.ylabel("$z_2$", fontsize=18, rotation=0) plt.grid(True) save_fig("kernel_pca_plot") plt.show() # 逆过程压缩 plt.figure(figsize=(6, 5)) X_inverse = rbf_pca.inverse_transform(X_reduced_rbf) ax = plt.subplot(121, projection='3d') ax.view_init(10, -70) ax.scatter(X_inverse[:, 0], X_inverse[:, 1], X_inverse[:, 2], c=t, cmap=plt.cm.hot, marker="x") ax.set_xlabel("") ax.set_ylabel("") ax.set_zlabel("") ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) save_fig("preimage_plot", tight_layout=False) plt.show()
def generate_data(): ''' generate data :return: X: input data, y: given labels ''' np.random.seed(0) #X, y = datasets.make_moons(200, noise=0.20) X, y = datasets.make_swiss_roll(200, noise=0.20) return X, y
def swiss(batch_size, size=1., std=0.01): x, _ = datasets.make_swiss_roll(1000) norm = x[:, ::2].max() xs = x[:, 0] * size / norm ys = x[:, 2] * size / norm cat = ds.Categorical(tf.zeros(len(x))) comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())] data = ds.Mixture(cat, comps) return data.sample(batch_size)
def load_swissroll(n_datapoints=1000, noice=0.0): ''' Loads the Swiss roll dataset with 1000, zero-varianced datapoints. Returns a tuple (data, target) containing the dataset and the labels. data: The 1000 x 3 data matrix containing the points. target: The univariate position of the sample according to the main dimension of the points in the manifold. Can be used as the color. http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_swiss_roll.html ''' return datasets.make_swiss_roll(n_samples=n_datapoints, noise=noice, random_state=None);
def generate_data(self): swiss_roll, swiss_roll_colors = datasets.make_swiss_roll( n_samples=self.samples, random_state=0) self.data, self.target = swiss_roll, swiss_roll_colors self.original_data = self.data self.displayer \ .load(swiss_roll, swiss_roll_colors) \ .save('datasets/swiss') \ .dispose()
def swiss_roll_3d(n_samples=800): random_state = 0 X, t = sk_datasets.make_swiss_roll(n_samples=n_samples, random_state=random_state) idx = np.argsort(t) X = X[idx, :] # X = np.roll(X, 1, axis=1) # X = X[:, :2] return X
def generate_swiss_roll_data(n_samples): noise = 0.05 X, _ = make_swiss_roll(n_samples, noise) # Make it thinner X[:, 1] *= .5 distance_from_y_axis = X[:, 0]**2 + X[:, 2]**2 X_color = plt.cm.jet(distance_from_y_axis / np.max(distance_from_y_axis + 1)) return X, X_color, "Swiss roll"
def generate_data(self, samples): self.data, self.target = datasets.make_swiss_roll(n_samples=samples, random_state=0) self.original_data = self.data if self.plotting: self.displayer.load(self.data, self.target) print('Data set size: %.2fKB' % (self.data.nbytes / 1024)) print('Shape: %s' % str(self.data.shape))
def make_classification(self, name="circles", n_classes=2): """ Creates a binary classification data set. :param name: name of the data set to be generated - simple_linear (linearly separable) - linear (linearly separable, sklearn) - spiral (non-linear) - spiral_complex (non-linear) - circles (non-linear, sklearn) - moons (non-linear, sklearn) - swiss (swiss roll, non-linear, sklearn) :param n_classes: number of classes (only needed for name="linear") :return: X, y (data features and labels) """ # cusotm data set if name == "simple_linear": X, y = self.__make_simple_linear() # random data generated by sklearn elif name == "linear": X, y = make_classification(n_samples=200, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, n_classes=n_classes, class_sep=3.25, random_state=42) # non linear data set elif name == "non_linear": X, y = self.__make_non_linear() # spiral data set elif name == "spiral": X, y = self.__make_spiral(n_samples=200) elif name == "spiral_complex": X, y = self.__make_spiral_complex(n_samples=200, noise=0.0) # circular data elif name == "circles": X, y = make_circles(n_samples=400, factor=0.3, noise=0.2) # moon data set elif name == "moons": X, y = make_moons(n_samples=150, noise=0.07, random_state=21) # swiss roll data set else: X, y = make_swiss_roll(2000, 0.00) return X, y
def __init__(self, dataset_size=25000, **kwargs): #self.x, self.y = make_moons(n_samples=dataset_size, shuffle=True, noise=0.05) #self.x = torch.Tensor(self.x) #self.y = torch.Tensor(self.y) XY, _ = make_swiss_roll(n_samples=dataset_size, noise=0.05) self.x = torch.Tensor(XY[:, 1:]) self.y = torch.Tensor(XY[:, 0]) self.y = self.y.view(self.y.shape[0], -1) self.input_size = 2 self.label_size = 1 self.dataset_size = dataset_size
def demo(k): X, t = make_swiss_roll(noise=1) le = SpectralEmbedding(n_components=2, n_neighbors=k) le_X = le.fit_transform(X) ler = LER(n_components=2, n_neighbors=k, affinity='rbf') ler_X = ler.fit_transform(X, t) _, axes = plt.subplots(nrows=1, ncols=3, figsize=plt.figaspect(0.33)) axes[0].set_axis_off() axes[0] = plt.subplot(131, projection='3d') axes[0].scatter(*X.T, c=t, s=50) axes[0].set_title('Swiss Roll') axes[1].scatter(*le_X.T, c=t, s=50) axes[1].set_title('LE Embedding') axes[2].scatter(*ler_X.T, c=t, s=50) axes[2].set_title('LER Embedding') plt.show()
for i in range(numNodes): for j in range(numNodes): if k_neighbors_array[j, i] <= k_neighbors_array[i, j]: k_neighbors_array[i, j] = k_neighbors_array[j, i] else: k_neighbors_array[j, i] = k_neighbors_array[i, j] # Compute the all pair shortest path distance. dist_matrix = floyd_warshall(k_neighbors_array, directed=False) dist_matrix[np.isinf(dist_matrix)] = 0 # Do MDS or learn embedding # MDS can also be seen as a case of Kernel PCA # using data dependent kernel # So using K = 1/2 D^2, # we generate projections along principal components kernel = dist_matrix ** 2 kernel *= -0.5 kernelPCA = KernelPCA(n_components=self.n_components, kernel='precomputed') return kernelPCA.fit_transform(kernel) if __name__ == "__main__": isomap = Isomap(10, 3) X, color = datasets.make_swiss_roll(n_samples=3000) X_r = isomap.run(X) plot_artificial_dataset(X, X_r, color, "Swiss Roll")
from sklearn import manifold from sklearn import datasets from plot import * class StochasticNeighborEmbedding(): def __init__(self, n_components=2, n_neighbors=30, init='pca'): self.tsne = manifold.TSNE(n_components, init=init, random_state=0) def run(self, X): return self.tsne.fit_transform(X) if __name__ == "__main__": tsne = manifold.TSNE(n_components=2, init='pca') X = datasets.make_swiss_roll(n_samples=2000) X[0].dtype='float64' import pdb;pdb.set_trace() X_tsne = tsne.fit_transform(X[0]) plot_artificial_dataset(X[0], X_tsne, color=X[1], title='title')
fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') ax.plot(X2D[:, 0], X2D[:, 1], "k+") ax.plot(X2D[:, 0], X2D[:, 1], "k.") ax.plot([0], [0], "ko") ax.arrow(0, 0, 0, 1, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k') ax.arrow(0, 0, 1, 0, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k') ax.set_xlabel("$z_1$", fontsize=18) ax.set_ylabel("$z_2$", fontsize=18, rotation=0) ax.axis([-1.5, 1.3, -1.2, 1.2]) ax.grid(True) save_fig("dataset_2d_plot") plt.show() X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42) axes = [11.5, 14, -2, 23, -12, 15] fig = plt.figure(figsize=(6, 5)) ax = fig.add_subplot(111, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=t, cmap=plt.cm.hot) ax.view_init(10, -70) ax.set_xlabel("$x_1$", fontsize=18) ax.set_ylabel("$x_2$", fontsize=18) ax.set_zlabel("$x_3$", fontsize=18) ax.set_xlim(axes[0:2]) ax.set_ylim(axes[2:4]) ax.set_zlim(axes[4:6])
def load_swiss_roll(): return (ARTIFICIAL, datasets.make_swiss_roll(n_samples=1500))
def make_sklearn_dataset(dataset_name, n_samples): # create dataset if 'circles_distant' == dataset_name: # labels=3, seed=1, n-samples=1000, max-depth=4 OR labels=4, seed=1, n-samples=1000, max-depth=4 dataset = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) elif 'moons' == dataset_name: # labels=2, seed=13, n-samples=500, max-depth=4 OR labels=1, seed=27, n-samples=500, max-depth=4 dataset = datasets.make_moons(n_samples=n_samples, noise=.05) elif 'blobs' == dataset_name: # labels=1, seed=0, n-samples=100, max-depth=3 dataset = datasets.make_blobs(n_samples=n_samples, random_state=8) elif 'circles_near' == dataset_name: # labels = 20, seed=0, n-samples=2000, max-depth=5 dataset = datasets.make_circles(n_samples=n_samples, noise=.05) elif 's_curve' == dataset_name: # labels=10, seed=35, n-samples=2500, max-depth=7 scurve1 = datasets.make_s_curve(n_samples=n_samples // 2, noise=.05) scurve1 = np.vstack((scurve1[0][:, 0], scurve1[0][:, 2])).T scurve2 = datasets.make_s_curve(n_samples=n_samples // 2, noise=.05) scurve2 = np.vstack( (scurve2[0][:, 0], scurve2[0][:, 2])).T + [.5, .5] # offset dataset = np.concatenate((scurve1, scurve2), 0), \ np.concatenate((np.asarray([0] * scurve1.shape[0]), np.asarray([1] * scurve2.shape[0])), 0) elif 'swiss_roll' == dataset_name: # labels = 10, seed = 35, n-samples=2500, max-depth=5 sroll1 = datasets.make_swiss_roll(n_samples=n_samples // 2, noise=.05) sroll1 = np.vstack((sroll1[0][:, 0], sroll1[0][:, 2])).T sroll2 = datasets.make_swiss_roll(n_samples=n_samples // 2, noise=.05) sroll2 = np.vstack( (sroll2[0][:, 0], sroll2[0][:, 2])).T * 0.75 # shrink dataset = np.concatenate((sroll1, sroll2), 0), \ np.concatenate((np.asarray([0] * sroll1.shape[0]), np.asarray([1] * sroll2.shape[0])), 0) return dataset