def generate_data_for_swiss_roll(N, noise): """ This method is used to generate the data set for Part two of Task 2. This data set is called Swiss Roll dataset. Here, we use built-in method make_swiss_roll() from sklearn library. :param N: It is number of data points to be generated. :param noise: It is the noise (if any) for the data :return: It returns a data frame containing the data for swiss roll data set. """ # Generating the data points for swiss roll data set and put them in data frame data, data_color = make_swiss_roll(N, noise) idx_plot = np.random.permutation(N)[0:N] df = pd.DataFrame(data, columns=['xcord', 'ycord', 'zcord']) """ Now we plot the swiss roll data set on a 3-d plot with columns of data frame - xcord, ycord, zcord on x, y and z axes respectively. """ fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111, projection="3d") ax.scatter(data[idx_plot, 0], data[idx_plot, 1], data[idx_plot, 2], c=data_color[idx_plot], cmap=plt.cm.Spectral) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") ax.set_title("Swiss-Roll data manifold") ax.view_init(10, 70) plt.show() return df
def get_dict_swiss_roll(name, n_samples, noise, **kwargs): from sklearn.datasets.samples_generator import make_swiss_roll data, t = make_swiss_roll(n_samples, noise) # Make it thinner data[:, 1] *= 0.5 # Normalize to range of [-1, 1] data = _normalize_data(data) data = data.astype(np.float32) pclouds = torch.from_numpy(data) pclouds = pclouds.contiguous() meta_dict = {} meta_dict['num_images'] = 1 MetadataCatalog.get(name).set(**meta_dict) dataset_dicts = [] record = {} record["id"] = 0 record["num_samples"] = n_samples record["dim"] = 3 record["points"] = pclouds dataset_dicts.append(record) return dataset_dicts
def compute_bench_dense(n_samples, n_features, rad0, dim, quiet = False): dense_d_results = [] dense_a_results = [] dense_l_results = [] dense_e_results = [] dense_r_results = [] it = 0 rng = np.random.RandomState(123) for ns in n_samples: # make a dataset X, t = make_swiss_roll( ns, noise = 0.0, random_state = rng ) X = np.asarray( X, order="C" ) for nf in n_features: it += 1 rad = rad0/ns**(1./(dim+6)) #check the scaling if not quiet: print('==================') print('Iteration %s of %s' % (it, max(len(n_samples), len(n_features)))) print('==================') if nf < 3: raise ValueError('n_features must be at least 3 for swiss roll') else: # add noise dimensions up to n_features n_noisef = nf - 3 noise_rad_frac = 0.1 noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac Xnoise = rng.rand(ns, n_noisef) * noiserad # Xnoise = np.random.random((ns, n_noisef)) * noiserad X = np.hstack((X, Xnoise)) rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions gc.collect() if not quiet: print("- benchmarking dense") print( 'rad=', rad, 'ns=', ns, 'nf=', nf ) Geom = Geometry(X, neighborhood_radius = 1.5*rad, affinity_radius = 1.5*rad, distance_method = 'brute', input_type = 'data', laplacian_type = 'symmetricnormalized') tstart = time() dists = Geom.get_distance_matrix(copy=False) dense_d_results.append(time() - tstart) A = Geom.get_affinity_matrix(copy = False, symmetrize = True) dense_a_results.append(time() - tstart) if sparse.isspmatrix(A): A.todense() Geom.assign_affinity_matrix(A, affinity_radius = rad*1.5) lap = Geom.get_laplacian_matrix(scaling_epps=rad*1.5, return_lapsym=True, copy = False) dense_l_results.append(time() - tstart) gc.collect() embed = spectral_embedding(Geom, n_components = 2, eigen_solver = 'dense') dense_e_results.append(time() - tstart) gc.collect() return dense_d_results, dense_a_results, dense_l_results, dense_e_results
def swissroll(path): X, color = make_swiss_roll(n_samples=2000, random_state=123) df_data = pd.DataFrame(X) df_labels = pd.DataFrame(color) df_data.to_csv(path + "swissroll.csv", header=None, index=False) df_labels.to_csv(path + "groundtruth.csv", header=None, index=False) del df_data del df_labels gc.collect() return X, color
def swiss_roll_dataset(number_of_samples=1000,plot=True): X, color = make_swiss_roll(n_samples=number_of_samples, random_state=123) fig = plt.figure(figsize=(7, 7)) ax = fig.add_subplot(111,projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.rainbow) plt.title('Swiss Roll in 3D') if plot: plt.show() plt.clf() return X, color
def compute_bench_sklearn(n_samples, n_features, rad0, dim, quiet = False): sklearn_d_results = [] sklearn_a_results = [] sklearn_e_results = [] it = 0 rng = np.random.RandomState(123) for ns in n_samples: # make a dataset X, t = make_swiss_roll( ns, noise = 0.0, random_state = rng ) X = np.asarray( X, order="C" ) for nf in n_features: it += 1 rad = rad0/ns**(1./(dim+6)) #check the scaling if not quiet: print('==================') print('Iteration %s of %s' % (it, max(len(n_samples), len(n_features)))) print('==================') print( 'rad=', rad, 'ns=', ns, 'nf=', nf ) if nf < 3: raise ValueError('n_features must be at least 3 for swiss roll') else: # add noise dimensions up to n_features n_noisef = nf - 3 noise_rad_frac = 0.1 noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac # Xnoise = np.random.random((ns, n_noisef)) * noiserad Xnoise = rng.rand(ns, n_noisef) * noiserad X = np.hstack((X, Xnoise)) rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions gc.collect() if not quiet: print("- benchmarking sklearn") tstart = time() dists = radius_neighbors_graph(X, radius = rad*1.5, mode = 'distance') dists = 0.5 * (dists + dists.T) sklearn_d_results.append(time() - tstart) # taken from sklearn.metrics.pairwise.rbf_kernel() gamma = -1.0/(rad*1.5) A = dists.copy() A.data = A.data**2 A.data = A.data/(-(rad*1.5)**2) np.exp(A.data,A.data) sklearn_a_results.append(time() - tstart) embed = se(A, n_components = 2) sklearn_e_results.append(time() - tstart) gc.collect() return sklearn_d_results, sklearn_a_results, sklearn_e_results
def plot_swiss_roll(): # 1 generate data (swiss roll dataset) X, _ = make_swiss_roll(n_samples=1500, noise=0.05) X[:, 1] *= .5 # make it thinner # 2 apply hierarchical clustering without connectivity st = time.time() ward = AgglomerativeClustering(n_clusters=6, linkage='ward').fit(X) elapsed_time = time.time() - st label = ward.labels_ # 3 plot results (no connectivity) fig = plt.figure() ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2], color=plt.cm.jet(np.float(l) / np.max(label + 1)), s=20, edgecolor='k') plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time) # 4 apply hierarchical clustering with connectivity from sklearn.neighbors import kneighbors_graph connectivity = kneighbors_graph(X, n_neighbors=10, include_self=False) st = time.time() ward = AgglomerativeClustering(n_clusters=6, connectivity=connectivity, linkage='ward').fit(X) elapsed_time = time.time() - st label = ward.labels_ # 5 plot results (with connectivity) fig = plt.figure() ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2], color=plt.cm.jet(float(l) / np.max(label + 1)), s=20, edgecolor='k') plt.title('With connectivity constraints (time %.2fs)' % elapsed_time) plt.show()
def compute_bench(n_samples, n_features, rad0, dim, quiet = False): dense_d_results = [] dense_a_results = [] dense_l_results = [] it = 0 for ns in n_samples: # make a dataset X, t = make_swiss_roll( ns, noise = 0.0 ) X = np.asarray( X, order="C" ) for nf in n_features: it += 1 rad = rad0/ns**(1./(dim+6)) #check the scaling if not quiet: print('==================') print('Iteration %s of %s' % (it, max(len(n_samples), len(n_features)))) print('==================') print( 'rad=', rad, 'ns=', ns ) if nf < 3: raise ValueError('n_features must be at least 3 for swiss roll') else: # add noise dimensions up to n_features n_noisef = nf - 3 noise_rad_frac = 0.1 noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac Xnoise = np.random.random((ns, n_noisef)) * noiserad X = np.hstack((X, Xnoise)) rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions gc.collect() if not quiet: print("- benchmarking dense") tstart = time() dists = distance_matrix(X, flindex = None, mode='radius_neighbors', neighbors_radius=rad*1.5 ) dense_d_results.append(time() - tstart) A = affinity_matrix( dists, rad ) dense_a_results.append(time() - tstart) lap = graph_laplacian(A, normed='geometric', symmetrize=False, scaling_epps=rad, return_lapsym=False) dense_l_results.append(time() - tstart) gc.collect() return dense_d_results, dense_a_results, dense_l_results
def main(): # loading dataset X, color = make_swiss_roll(n_samples=1200, random_state=123) Xstd = StandardScaler().fit_transform(X) print('Fitting ISOMAP model...') model = ISOMAP(neighbors=9, dim=2) X_hat = model.fit(Xstd) print('done.') # show ISOMAP results Y = X_hat.T plt.figure(figsize=(8,6)) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.rainbow) plt.title('Aprendizado de variedades com ISOMAP\nVizinhos-mais-próximos: {}'.format(model.neighbors)) plt.xlabel('Dimensão 1') plt.ylabel('Dimensão 2') plt.show()
def load_swiss_roll(self): n_samples = 1500 noise = 0.05 X, _ = make_swiss_roll(n_samples, noise) # Make it thinner X[:, 1] *= .5 dim = X.shape[1] if dim > 3: _min = min(self.dimensions, dim) self.dataset = [[sample[d] for d in range(_min)] for sample in X] else: self.dataset = X ward = AgglomerativeClustering(n_clusters=6, linkage='ward').fit(X) self.tags = ward.labels_ self.tags_set = set(self.tags)
def demoSwiss(k=6, parallel_client=None): ''' Demonstrate the performance of LCC on the swiss roll data set. Some of the code is from the scikits.learn example for applying ward's clustering to the swiss roll data, but appropriately modified to use LCC instead. Original authors of the non-LCC version: # Authors : Vincent Michel, 2010 # Alexandre Gramfort, 2010 # Gael Varoquaux, 2010 # License: BSD ''' import numpy as np import pylab as pl import mpl_toolkits.mplot3d.axes3d as p3 from sklearn.datasets.samples_generator import make_swiss_roll # Generate data (swiss roll dataset) n_samples = 1000 noise = 0.05 X, _ = make_swiss_roll(n_samples, noise) # Make it thinner X[:, 1] *= .5 #Convert data matrix X to a list of samples N = X.shape[0] dat = [X[i,:] for i in range(N)] #generate LCC clustering print "Generating LCC Clustering" (label, _, _, _) = pf.LatentConfigurationClustering(dat, pt_dist, k, numtrees=27, parallel_client=parallel_client) # Plot result fig = pl.figure() ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2], 'o', color=pl.cm.jet(np.float(l) / np.max(label + 1))) pl.title('Latent Configuration Clustering') pl.show()
def compute_silhouette_simulated_data(num_cluster, metric_measure, sub_data_sizes, num_neighbors, iterations, verbose=False): print "Computing silhouette score :" sil = np.zeros((len(sub_data_sizes), len(num_neighbors),iterations)) time = np.zeros((len(sub_data_sizes), len(num_neighbors),iterations)) for m, sub_data_size in enumerate(sub_data_sizes): if verbose: print 'Sub data size: ', sub_data_size ############################################################################### # Generate data (swiss roll dataset) noise = 0.05 X, _ = make_swiss_roll(sub_data_size, noise) # Make it thinner X[:, 1] *= .5 for j, num_neigh in enumerate(num_neighbors): print "number of neighbors:", num_neigh, " - ", for k in range(iterations): print '\n',k, #stdout.flush() if verbose: print("Generating at %s neighbor" % num_neigh), connectivity = kneighbors_graph(X, n_neighbors=num_neigh) st = cpu_time()#time.clock() ward = Ward(n_clusters=num_cluster, connectivity=connectivity).fit(X)#, compute_full_tree=False).fit(X) #t = time.clock() - st t = cpu_time() - st label = ward.labels_ score = metrics.silhouette_score(X, label, metric = metric_measure) sil[m,j,k] = score time[m,j,k] = t #print score, '\t', time print return sil, time
shape_of_map = [30, 30] shape_of_rbf_centers = [4, 4] variance_of_rbfs = 0.5 lambda_in_em_algorithm = 0.001 number_of_iterations = 300 display_flag = True noise_ratio_of_y = 0.1 random_state_number = 30000 number_of_samples = 1000 numbers_of_x = [0, 1, 2] numbers_of_y = [3] # load a swiss roll dataset and make a y-variable x, color = make_swiss_roll(number_of_samples, 0, random_state=10) raw_y = 0.3 * x[:, 0] - 0.1 * x[:, 1] + 0.2 * x[:, 2] y = raw_y + noise_ratio_of_y * raw_y.std(ddof=1) * np.random.randn(len(raw_y)) # plot plt.rcParams['font.size'] = 18 fig = plt.figure() ax = fig.add_subplot(111, projection='3d') p = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y) fig.colorbar(p) plt.show() variables = np.c_[x, y] # standardize x and y autoscaled_variables = (variables - variables.mean(axis=0)) / variables.std( axis=0, ddof=1) autoscaled_target_y_value = (target_y_value - variables.mean(
def toy_swiss_bag_gen(self, num_bags, scaler=None, scaler_bag=None, indiv=None, bw=None, bw_bag=None, data_noise=1.0, train=False, sigma=1.0, y_type='normal', kernel='rbf', seed=23): rs = check_random_state(seed) if y_type == 'normal': scale = 1.0 y_gen = partial(rs.normal, scale=sigma) elif y_type == 'poisson': scale = 0.5 y_gen = lambda rate: rs.poisson(np.abs(rate)) else: raise TypeError('y_gen type {} not understood'.format(y_type)) sizes = self.bag_size_gen(num_bags, rs) #print('sizes:', sizes) total_pts = np.sum(sizes) X, label = make_swiss_roll(total_pts, noise=data_noise, random_state=rs) label = scale * label sort_index = X[:, 2].argsort() X = X[sort_index[::-1]] label = label[sort_index[::-1]] X_dim = increase_dim(X[:, :2], self.dim) data = rotate_orth(X_dim, seed=23) # Rotate into dim-dimensional object. indexes = [0] + np.cumsum(sizes).tolist() # Bag Variable on Manifold generate indiv_var = standardise(X[:, 2]) bag_var = [] bags = [] indiv_labels = [] indiv_true_labels = [] bag_true_labels = [] bag_labels = np.zeros(num_bags) for i in range(num_bags): lower = indexes[i] upper = indexes[i + 1] #print(indiv_var[lower:upper]) indiv_label_bag = [y_gen(phi) for phi in label[lower:upper]] bag_value = self.s_manifold(indiv_var[lower:upper], random_state=rs) bag_value_rep = np.tile(bag_value, (upper - lower, 1)) bag_var.append(bag_value) bag_labels[i] = np.sum(indiv_label_bag) bags.append(np.hstack((data[lower:upper], bag_value_rep))) indiv_labels.append(indiv_label_bag) indiv_true_labels.append(label[lower:upper]) bag_true_labels.append(np.sum(label[lower:upper])) bag_var = np.vstack(bag_var) bags, scaler, bw = self.preprocessing(bags, scaler, train, bw, kernel, seed=seed) bag_var, scaler_bag, bw_bag = self.preprocessing(bag_var, scaler_bag, train, bw_bag, kernel, seed=seed, bag_var=True) bags = [ np.column_stack((bags[index], np.ones(len(bags[index])), indiv_true_labels[index], indiv_labels[index])) for index in range(num_bags) ] return Mal_features(bags, pop=True, indiv=True, y=bag_labels, true_indiv=True, true_y=bag_true_labels, bag_var=bag_var, bag_pop=sizes), scaler, bw, scaler_bag, bw_bag
def get_swiss_roll_data(n_samples=1000): noise = 0.2 X, _ = make_swiss_roll(n_samples, noise) X = X.astype('float32')[:, [0, 2]] return X, _
# %% {"slideshow": {"slide_type": "slide"}} # %%output filename='../media/03-scurve-latent' fig='png' ( s_curve_components.hvplot.scatter( x="Component 1", y="Component 2", color="y", groupby="tag", cmap="spectral" ) .layout() .opts(title="S-Curve Manifold", shared_axes=False) .cols(2) ) # %% {"slideshow": {"slide_type": "skip"}} swissroll_models = get_models() swissroll_X, swissroll_color = samples_generator.make_swiss_roll( n_points, random_state=0 ) swissroll_components = pd.concat( [ get_components(m, swissroll_X, swissroll_color, t) for t, m in swissroll_models.items() ] ) # %% {"slideshow": {"slide_type": "slide"}} # %%output filename='../media/03-swissroll-latent' fig='png' ( swissroll_components.hvplot.scatter( x="Component 1", y="Component 2", color="y", groupby="tag", cmap="spectral" )
from sklearn.datasets.samples_generator import make_swiss_roll # Same as first_example.py but using embed_with_rmetric() rad = 0.05 n_samples = 1000 if True: X = np.random.random((n_samples, 2)) thet = X[:,0] X1 = np.array( 3*thet*np.sin(2*thet )) X2 = np.array( 3*thet*np.cos(2*thet )) X = np.array( (X1, X2, X[:,1]) ) X = X.T else: X, thet = make_swiss_roll( n_samples, noise = 0.03 ) X /= 10. thet -= thet.min() thet /= thet.max() # normalize thet between [0,1] # print( "max,min(thet)", max( thet), min(thet)) # print( X.max(0), X.min(0)) X = np.asarray( X, order="C" ) #print( X.flags ) #print( X.shape, type(X)) distance_matrix, similarity_matrix, laplacian, Y, H = embed_with_rmetric( X,2, rad ) # Plot the results
desplayflag = 1 k = 10 numofsamples = 1000 noisey = 0.1 random_state_number = 30000 import numpy as np #import pandas as pd from sklearn.datasets.samples_generator import make_swiss_roll from gtm import gtm import matplotlib.pyplot as plt import matplotlib.figure as figure from mpl_toolkits.mplot3d import Axes3D # load a swiss roll dataset and make a y-variable OriginalX, color = make_swiss_roll(numofsamples, 0, random_state=10) X = OriginalX rawy = 0.3 * OriginalX[:, 0] - 0.1 * OriginalX[:, 1] + 0.2 * OriginalX[:, 2] originaly = rawy + noisey * rawy.std(ddof=1) * np.random.randn(len(rawy)) # plot plt.rcParams["font.size"] = 18 fig = plt.figure() ax = fig.add_subplot(111, projection='3d') p = ax.scatter(OriginalX[:, 0], OriginalX[:, 1], OriginalX[:, 2], c=originaly) fig.colorbar(p) plt.show() # autoscaling autoscaledX = (OriginalX - OriginalX.mean(axis=0)) / OriginalX.std(axis=0, ddof=1)
def __init__(self, batch_size, time_steps, data_type='circular_8_gaussians', n_dims = 2, cuda=False): self.batch_size = batch_size self.time_steps = time_steps self.data_type = data_type self.n_dims = n_dims self.cuda = cuda self.mode = 'Train' self.iter = 0 if self.data_type == 'swiss_roll': self.dataset, _ = make_swiss_roll(15000+15000, 0.2) if self.n_dims == 3: self.dataset = self.dataset.astype(np.float32)/7. if self.n_dims == 2: self.dataset = self.dataset[:, [0, 2]].astype(np.float32)/7. elif self.data_type == 'grid_25_gaussians': sigma = 0.05 self.dataset = None for i in range(-2, 3): for j in range(-2, 3): for k in range(-2, 3): mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :] curr_samples = mean+np.random.randn(120+120, 3).astype(np.float32)*sigma if self.dataset is None: self.dataset = curr_samples else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) if self.n_dims == 2: self.dataset = self.dataset[:, :2] rand_index = np.arange(self.dataset.shape[0]) rand_index = np.random.permutation(rand_index) self.dataset = self.dataset[rand_index, ...] elif self.data_type == 'grid_9_gaussians': sigma = 0.05 self.dataset = None for i in [-1, 0, 1]: for j in [-1, 0, 1]: for k in [-1, 0, 1]: mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :] curr_samples = mean+np.random.randn(550+550, 3).astype(np.float32)*sigma if self.dataset is None: self.dataset = curr_samples else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) curr_samples = np.random.randn(150+150, 3).astype(np.float32) self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) if self.n_dims == 2: self.dataset = self.dataset[:, :2] rand_index = np.arange(self.dataset.shape[0]) rand_index = np.random.permutation(rand_index) self.dataset = self.dataset[rand_index, ...] elif self.data_type == 'circular_8_gaussians': sigma = 0.05 self.dataset = None for (i, j) in [[-1.6, 0], [-1, 1], [-1, -1], [0, 1.4], [0, -1.4], [1, 1], [1, -1], [1.6, 0]]: for k in [-1, 1]: mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :] curr_samples = mean+np.random.randn(1875, 3).astype(np.float32)*sigma if self.dataset is None: self.dataset = curr_samples else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) if self.n_dims == 2: self.dataset = self.dataset[:, :2] rand_index = np.arange(self.dataset.shape[0]) rand_index = np.random.permutation(rand_index) self.dataset = self.dataset[rand_index, ...] elif self.data_type == 'star_8_gaussians': sigma = 0.25 self.dataset = None for it, (i, j) in enumerate([[1.6, 0], [1, 1], [0, 1.4], [-1, 1], [-1.6, 0], [-1, -1], [0, -1.4], [1, -1]]): for k in [-1, 1]: mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :] rand_part = np.random.randn(1875, 3).astype(np.float32) rand_part[:, 1] = rand_part[:, 1]*sigma rand_part[:, 0] = rand_part[:, 0]*0.5 rotation_mat = np.asarray([np.cos(it*np.pi/4), np.sin(it*np.pi/4), -np.sin(it*np.pi/4), np.cos(it*np.pi/4)]).reshape(2, 2) rand_part[:,:2] = np.matmul(rand_part[:,:2], rotation_mat) curr_samples = mean+rand_part if self.dataset is None: self.dataset = curr_samples else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) if self.n_dims == 2: self.dataset = self.dataset[:, :2] rand_index = np.arange(self.dataset.shape[0]) rand_index = np.random.permutation(rand_index) self.dataset = self.dataset[rand_index, ...] # self.dataset = self.dataset*2 elif self.data_type == 'circular_3_gaussians': sigma = 0.3 self.dataset = None # for (i, j) in [[-1.6, 0], [-1, 1], [-1, -1], [0, 1.4], [0, -1.4], [1, 1], [1, -1], [1.6, 0]]: for (i, j) in [[-1.6, 0], [1, 1], [-1, -1]]: for k in [-1, 1]: mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :] curr_samples = mean+np.random.randn(5000, 3).astype(np.float32)*sigma if self.dataset is None: self.dataset = curr_samples else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) if self.n_dims == 2: self.dataset = self.dataset[:, :2] rand_index = np.arange(self.dataset.shape[0]) rand_index = np.random.permutation(rand_index) self.dataset = self.dataset[rand_index, ...] elif self.data_type == 'linear_degenerate_circular_3_gaussians' or self.data_type == 'nonlinear_degenerate_circular_3_gaussians': sigma = 1 degree1 = 30 degree2 = 45 self.dataset = None # for (i, j) in [[-1.6, 0], [-1, 1], [-1, -1], [0, 1.4], [0, -1.4], [1, 1], [1, -1], [1.6, 0]]: for (i, j) in [[-1.6, 0], [2, 2], [-2, -2]]: for k in [-1, 1]: mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :] curr_samples = mean+np.random.randn(5000, 3).astype(np.float32)*sigma if self.dataset is None: self.dataset = curr_samples else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) if self.n_dims == 2: self.dataset = self.dataset[:, :2] if self.data_type == 'linear_degenerate_circular_3_gaussians': self.dataset = np.concatenate([self.dataset, np.zeros((self.dataset.shape[0],1))], axis=-1) elif self.data_type == 'nonlinear_degenerate_circular_3_gaussians': self.dataset = np.concatenate([self.dataset, 8*np.tanh(self.dataset[:,0,np.newaxis])], axis=-1) # -60, 30 to -13 18 45 12 degree1 = 15 degree2 = 0 degree3 = -30 degreeRad1 = float(degree1)*np.pi/180. degreeRad2 = float(degree2)*np.pi/180. degreeRad3 = float(degree3)*np.pi/180. rotation_mat1 = np.asarray([np.cos(degreeRad1), np.sin(degreeRad1), 0, -np.sin(degreeRad1), np.cos(degreeRad1), 0, 0, 0, 1]).reshape(3, 3) rotation_mat2 = np.asarray([1, 0, 0, 0, np.cos(degreeRad2), np.sin(degreeRad2), 0, -np.sin(degreeRad2), np.cos(degreeRad2)]).reshape(3, 3) rotation_mat3 = np.asarray([np.cos(degreeRad3), 0, np.sin(degreeRad3), 0, 1, 0, -np.sin(degreeRad3), 0, np.cos(degreeRad3)]).reshape(3, 3) self.dataset = np.matmul(self.dataset, rotation_mat1) self.dataset = np.matmul(self.dataset, rotation_mat2) self.dataset = np.matmul(self.dataset, rotation_mat3) rand_index = np.arange(self.dataset.shape[0]) rand_index = np.random.permutation(rand_index) self.dataset = self.dataset[rand_index, ...] self.n_dims = 3 else: pdb.set_trace() self.dataset = self.dataset*0.5 # helper.dataset_plotter([self.dataset,], show_also=True) # pdb.set_trace() self.train() self.reset() self.batch = {} self.batch['context'] = {'properties': {'flat': [], 'image': []}, 'data': {'flat': None, 'image': None}} self.batch['observed'] = {'properties': {'flat': [{'dist': 'cont', 'name': 'Toy Data', 'size': tuple([self.batch_size, self.time_steps, self.n_dims])}], 'image': []}, 'data': {'flat': None, 'image': None}}
N = K.shape[0] one_n = np.ones((N, N)) / N K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n) # Obtaining eigenvalues in descending order with corresponding # eigenvectors from the symmetric matrix. eigvals, eigvecs = eigh(K) # Obtaining the i eigenvectors that corresponds to the i highest eigenvalues. X_pc = np.column_stack( (eigvecs[:, -i] for i in range(1, n_components + 1))) return X_pc X, color = make_swiss_roll(n_samples=800, random_state=123) #plot initial data fig = plt.figure(figsize=(7, 7)) ax = fig.add_subplot(111, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.rainbow) plt.title('Swiss Roll in 3D') #plt.show() plt.savefig('../figs/tutorial/sebraex4_1.png') plt.close() # Linear PCA scikit_pca = PCA(n_components=2) X_spca = scikit_pca.fit_transform(X) plt.figure(figsize=(8, 6))
#-*- coding: utf-8 -*- __author__ = 'gongwenqiang' import numpy as np from sklearn.cluster import AgglomerativeClustering import matplotlib.pyplot as plt import mpl_toolkits.mplot3d.axes3d as p3 from time import time from sklearn.neighbors import kneighbors_graph from sklearn.datasets.samples_generator import make_swiss_roll n_samples=1500 noise=0.05 X,_=make_swiss_roll(n_samples,noise) X[:,1] *=.5 print("Compute unstructured hierarchical clustering...") t0=time() cluster0=AgglomerativeClustering(n_clusters=6,linkage='ward') cluster0.fit(X) elapsed_time=time()-t0 labels=cluster0.labels_ print("Elapsed time: %.2fs" % elapsed_time) print("Number of points: %i" % labels.size) fig=plt.figure() ax=p3.Axes3D(fig) ax.view_init(7,-80) for l in np.unique(labels): ax.plot3D(X[labels==l,0],X[labels==l,1],X[labels==l,2],'o',color=plt.cm.jet(np.float(l)/ np.max(labels + 1)))
from sklearn.datasets.samples_generator import make_swiss_roll, make_s_curve from sklearn.manifold import TSNE import mpl_toolkits.mplot3d data_flag = 1 # 1: s-curve dataset, 2: swiss-roll dataset perplexity = 85 # 85 in data_flag = 1, 50 in data_flag = 2 number_of_samples = 1000 noise = 0 random_state_number = 100 if data_flag == 1: original_X, color = make_s_curve(number_of_samples, noise, random_state=0) elif data_flag == 2: original_X, color = make_swiss_roll(number_of_samples, noise, random_state=0) # plot plt.rcParams["font.size"] = 18 fig = plt.figure(figsize=(7, 6)) ax = fig.add_subplot(111, projection='3d') ax.set_xlabel("x1") ax.set_ylabel("x2") ax.set_zlabel("x3") p = ax.scatter(original_X[:, 0], original_X[:, 1], original_X[:, 2], c=color) #fig.colorbar(p) plt.tight_layout() plt.show() autoscaled_X = (original_X - original_X.mean(axis=0)) / original_X.std(axis=0,
# License: BSD print(__doc__) import time as time import numpy as np import pylab as pl import mpl_toolkits.mplot3d.axes3d as p3 from sklearn.cluster import Ward from sklearn.datasets.samples_generator import make_swiss_roll ############################################################################### # Generate data (swiss roll dataset) n_samples = 1000 noise = 0.05 X, _ = make_swiss_roll(n_samples, noise) # Make it thinner X[:, 1] *= .5 ############################################################################### # Compute clustering print("Compute unstructured hierarchical clustering...") st = time.time() ward = Ward(n_clusters=6).fit(X) label = ward.labels_ print("Elapsed time: ", time.time() - st) print("Number of points: ", label.size) ############################################################################### # Plot result fig = pl.figure()
import numpy as np from scipy import sparse from sklearn.datasets.samples_generator import make_swiss_roll rng = np.random.RandomState(123) ns = 3000 X, t = make_swiss_roll( ns, noise = 0.0, random_state = rng) X = np.asarray( X, order="C" ) nf = 750 rad0 = 2.5 dim = 2 rad = rad0/ns**(1./(dim+6)) #check the scaling n_noisef = nf - 3 noise_rad_frac = 0.1 noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac Xnoise = rng.rand(ns, n_noisef) * noiserad X = np.hstack((X, Xnoise)) rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions from Mmani.geometry.distance import distance_matrix dmat = distance_matrix(X, method = 'cython', radius = rad)