コード例 #1
0
def generate_data_for_swiss_roll(N, noise):
    """
    This method is used to generate the data set for Part two of Task 2. This data set
    is called Swiss Roll dataset.

    Here, we use built-in method make_swiss_roll() from sklearn library.

    :param N: It is number of data points to be generated.
    :param noise: It is the noise (if any) for the data
    :return: It returns a data frame containing the data for swiss roll data set.
    """
    # Generating the data points for swiss roll data set and put them in data frame
    data, data_color = make_swiss_roll(N, noise)
    idx_plot = np.random.permutation(N)[0:N]
    df = pd.DataFrame(data, columns=['xcord', 'ycord', 'zcord'])
    """
    Now we plot the swiss roll data set on a 3-d plot with columns of data frame - xcord, ycord, zcord 
    on x, y and z axes respectively.
    """
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(data[idx_plot, 0],
               data[idx_plot, 1],
               data[idx_plot, 2],
               c=data_color[idx_plot],
               cmap=plt.cm.Spectral)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    ax.set_title("Swiss-Roll data manifold")
    ax.view_init(10, 70)
    plt.show()
    return df
コード例 #2
0
def get_dict_swiss_roll(name, n_samples, noise, **kwargs):
    from sklearn.datasets.samples_generator import make_swiss_roll

    data, t = make_swiss_roll(n_samples, noise)
    # Make it thinner
    data[:, 1] *= 0.5
    # Normalize to range of [-1, 1]
    data = _normalize_data(data)

    data = data.astype(np.float32)
    pclouds = torch.from_numpy(data)
    pclouds = pclouds.contiguous()

    meta_dict = {}
    meta_dict['num_images'] = 1
    MetadataCatalog.get(name).set(**meta_dict)

    dataset_dicts = []

    record = {}

    record["id"] = 0
    record["num_samples"] = n_samples
    record["dim"] = 3
    record["points"] = pclouds
    dataset_dicts.append(record)

    return dataset_dicts
コード例 #3
0
def compute_bench_dense(n_samples, n_features, rad0, dim, quiet = False):
    dense_d_results = []
    dense_a_results = []
    dense_l_results = []
    dense_e_results = []
    dense_r_results = []
    it = 0
    rng = np.random.RandomState(123)
    for ns in n_samples:
        # make a dataset
        X, t = make_swiss_roll( ns, noise = 0.0, random_state = rng )
        X = np.asarray( X, order="C" )

        for nf in n_features:
            it += 1
            rad = rad0/ns**(1./(dim+6))  #check the scaling
            if not quiet:
                print('==================')
                print('Iteration %s of %s' % (it, max(len(n_samples),
                                              len(n_features))))
                print('==================')

            if nf < 3:
                raise ValueError('n_features must be at least 3 for swiss roll')
            else:
                # add noise dimensions up to n_features
                n_noisef = nf - 3
                noise_rad_frac = 0.1
                noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac
                Xnoise = rng.rand(ns, n_noisef) * noiserad
                # Xnoise = np.random.random((ns, n_noisef)) * noiserad 
                X = np.hstack((X, Xnoise))
                rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions
            gc.collect()
            if not quiet:
                print("- benchmarking dense")
                print( 'rad=', rad, 'ns=', ns, 'nf=', nf )
            Geom = Geometry(X, neighborhood_radius = 1.5*rad, affinity_radius = 1.5*rad, 
                            distance_method = 'brute', input_type = 'data', 
                            laplacian_type = 'symmetricnormalized')
            tstart = time()
            dists = Geom.get_distance_matrix(copy=False)
            dense_d_results.append(time() - tstart)
            A = Geom.get_affinity_matrix(copy = False, symmetrize = True)
            dense_a_results.append(time() - tstart)
            if sparse.isspmatrix(A):
                A.todense()
                Geom.assign_affinity_matrix(A, affinity_radius = rad*1.5)
            lap = Geom.get_laplacian_matrix(scaling_epps=rad*1.5, return_lapsym=True,
                                            copy = False)
            dense_l_results.append(time() - tstart)
            gc.collect()
            embed = spectral_embedding(Geom, n_components = 2, eigen_solver = 'dense')
            dense_e_results.append(time() - tstart)
            gc.collect()
    return dense_d_results, dense_a_results, dense_l_results, dense_e_results
コード例 #4
0
def swissroll(path):
    X, color = make_swiss_roll(n_samples=2000, random_state=123)
    df_data = pd.DataFrame(X)
    df_labels = pd.DataFrame(color)
    df_data.to_csv(path + "swissroll.csv", header=None, index=False)
    df_labels.to_csv(path + "groundtruth.csv", header=None, index=False)
    del df_data
    del df_labels
    gc.collect()
    return X, color
def swiss_roll_dataset(number_of_samples=1000,plot=True):
    X, color = make_swiss_roll(n_samples=number_of_samples, random_state=123)
    fig = plt.figure(figsize=(7, 7))
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.rainbow)
    plt.title('Swiss Roll in 3D')
    if plot:
        plt.show()
    plt.clf()
    return X, color
コード例 #6
0
def compute_bench_sklearn(n_samples, n_features, rad0, dim, quiet = False):
    sklearn_d_results = []
    sklearn_a_results = []
    sklearn_e_results = []
    it = 0
    rng = np.random.RandomState(123)
    for ns in n_samples:
        # make a dataset
        X, t = make_swiss_roll( ns, noise = 0.0, random_state = rng )
        X = np.asarray( X, order="C" )

        for nf in n_features:
            it += 1
            rad = rad0/ns**(1./(dim+6))  #check the scaling
            if not quiet:
                print('==================')
                print('Iteration %s of %s' % (it, max(len(n_samples),
                                              len(n_features))))
                print('==================')
                print( 'rad=', rad, 'ns=', ns, 'nf=', nf )

            if nf < 3:
                raise ValueError('n_features must be at least 3 for swiss roll')
            else:
                # add noise dimensions up to n_features
                n_noisef = nf - 3
                noise_rad_frac = 0.1
                noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac
                # Xnoise = np.random.random((ns, n_noisef)) * noiserad 
                Xnoise = rng.rand(ns, n_noisef) * noiserad
                X = np.hstack((X, Xnoise))
                rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions
            gc.collect()
            if not quiet:
                print("- benchmarking sklearn")
            tstart = time()           
            dists = radius_neighbors_graph(X, radius = rad*1.5, mode = 'distance')
            dists = 0.5 * (dists + dists.T)
            sklearn_d_results.append(time() - tstart)
            # taken from sklearn.metrics.pairwise.rbf_kernel()
            gamma = -1.0/(rad*1.5)
            A = dists.copy()
            A.data = A.data**2
            A.data = A.data/(-(rad*1.5)**2)
            np.exp(A.data,A.data)
            sklearn_a_results.append(time() - tstart)
            embed = se(A, n_components = 2)
            sklearn_e_results.append(time() - tstart)
            gc.collect()
    return sklearn_d_results, sklearn_a_results, sklearn_e_results
コード例 #7
0
def plot_swiss_roll():

    # 1 generate data (swiss roll dataset)
    X, _ = make_swiss_roll(n_samples=1500, noise=0.05)
    X[:, 1] *= .5  # make it thinner

    # 2 apply hierarchical clustering without connectivity
    st = time.time()
    ward = AgglomerativeClustering(n_clusters=6, linkage='ward').fit(X)
    elapsed_time = time.time() - st
    label = ward.labels_

    # 3 plot results (no connectivity)
    fig = plt.figure()
    ax = p3.Axes3D(fig)
    ax.view_init(7, -80)
    for l in np.unique(label):
        ax.scatter(X[label == l, 0],
                   X[label == l, 1],
                   X[label == l, 2],
                   color=plt.cm.jet(np.float(l) / np.max(label + 1)),
                   s=20,
                   edgecolor='k')
    plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time)

    # 4 apply hierarchical clustering with connectivity
    from sklearn.neighbors import kneighbors_graph
    connectivity = kneighbors_graph(X, n_neighbors=10, include_self=False)
    st = time.time()
    ward = AgglomerativeClustering(n_clusters=6,
                                   connectivity=connectivity,
                                   linkage='ward').fit(X)
    elapsed_time = time.time() - st
    label = ward.labels_

    # 5 plot results (with connectivity)
    fig = plt.figure()
    ax = p3.Axes3D(fig)
    ax.view_init(7, -80)
    for l in np.unique(label):
        ax.scatter(X[label == l, 0],
                   X[label == l, 1],
                   X[label == l, 2],
                   color=plt.cm.jet(float(l) / np.max(label + 1)),
                   s=20,
                   edgecolor='k')
    plt.title('With connectivity constraints (time %.2fs)' % elapsed_time)
    plt.show()
コード例 #8
0
def compute_bench(n_samples, n_features, rad0, dim, quiet = False):

    dense_d_results = []
    dense_a_results = []
    dense_l_results = []
    it = 0
    
    for ns in n_samples:
        # make a dataset
        X, t = make_swiss_roll( ns, noise = 0.0 )
        X = np.asarray( X, order="C" )

        for nf in n_features:
            it += 1
            rad = rad0/ns**(1./(dim+6))  #check the scaling
            if not quiet:
                print('==================')
                print('Iteration %s of %s' % (it, max(len(n_samples),
                                              len(n_features))))
                print('==================')
                print( 'rad=', rad, 'ns=', ns )

            if nf < 3:
                raise ValueError('n_features must be at least 3 for swiss roll')
            else:
                # add noise dimensions up to n_features
                n_noisef = nf - 3
                noise_rad_frac = 0.1
                noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac
                Xnoise = np.random.random((ns, n_noisef)) * noiserad 
                X = np.hstack((X, Xnoise))
                rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions

            gc.collect()
            if not quiet:
                print("- benchmarking dense")
            tstart = time()
            dists = distance_matrix(X, flindex = None, mode='radius_neighbors',
                                    neighbors_radius=rad*1.5 )
            dense_d_results.append(time() - tstart)
            A = affinity_matrix( dists, rad )
            dense_a_results.append(time() - tstart)
            lap = graph_laplacian(A, normed='geometric', symmetrize=False, scaling_epps=rad, return_lapsym=False)
            dense_l_results.append(time() - tstart)
            gc.collect()
    return  dense_d_results, dense_a_results, dense_l_results
コード例 #9
0
def main():
    # loading dataset
    X, color = make_swiss_roll(n_samples=1200, random_state=123)
    Xstd = StandardScaler().fit_transform(X)

    print('Fitting ISOMAP model...')
    model = ISOMAP(neighbors=9, dim=2)
    X_hat = model.fit(Xstd)
    print('done.')

    # show ISOMAP results
    Y = X_hat.T
    plt.figure(figsize=(8,6))
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.rainbow)
    plt.title('Aprendizado de variedades com ISOMAP\nVizinhos-mais-próximos: {}'.format(model.neighbors))
    plt.xlabel('Dimensão 1')
    plt.ylabel('Dimensão 2')
    plt.show()
コード例 #10
0
    def load_swiss_roll(self):
        n_samples = 1500
        noise = 0.05
        X, _ = make_swiss_roll(n_samples, noise)
        # Make it thinner
        X[:, 1] *= .5

        dim = X.shape[1]
        if dim > 3:
            _min = min(self.dimensions, dim)

            self.dataset = [[sample[d] for d in range(_min)] for sample in X]
        else:
            self.dataset = X

        ward = AgglomerativeClustering(n_clusters=6, linkage='ward').fit(X)
        self.tags = ward.labels_
        self.tags_set = set(self.tags)
コード例 #11
0
def demoSwiss(k=6, parallel_client=None):
    '''
    Demonstrate the performance of LCC
    on the swiss roll data set.
    Some of the code is from the scikits.learn example for applying
    ward's clustering to the swiss roll data, but appropriately modified
    to use LCC instead.
    
    Original authors of the non-LCC version:
    # Authors : Vincent Michel, 2010
    #           Alexandre Gramfort, 2010
    #           Gael Varoquaux, 2010
    # License: BSD
    '''
    import numpy as np
    import pylab as pl
    import mpl_toolkits.mplot3d.axes3d as p3
    from sklearn.datasets.samples_generator import make_swiss_roll
    
    # Generate data (swiss roll dataset)
    n_samples = 1000
    noise = 0.05
    X, _ = make_swiss_roll(n_samples, noise)
    # Make it thinner
    X[:, 1] *= .5

    #Convert data matrix X to a list of samples
    N = X.shape[0]
    dat = [X[i,:] for i in range(N)]
    
    #generate LCC clustering
    print "Generating LCC Clustering"
    (label, _, _, _) = pf.LatentConfigurationClustering(dat, pt_dist, k, numtrees=27, parallel_client=parallel_client)
    
    # Plot result
    fig = pl.figure()
    ax = p3.Axes3D(fig)
    ax.view_init(7, -80)
    for l in np.unique(label):
        ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
                  'o', color=pl.cm.jet(np.float(l) / np.max(label + 1)))
    pl.title('Latent Configuration Clustering')
    
    pl.show()
コード例 #12
0
def compute_silhouette_simulated_data(num_cluster, metric_measure, sub_data_sizes, num_neighbors, iterations, verbose=False):
    print "Computing silhouette score :"
    sil = np.zeros((len(sub_data_sizes), len(num_neighbors),iterations))
    time = np.zeros((len(sub_data_sizes), len(num_neighbors),iterations))

    for m, sub_data_size in enumerate(sub_data_sizes):
        if verbose: print 'Sub data size: ', sub_data_size
	###############################################################################
	# Generate data (swiss roll dataset)
    	noise = 0.05
    	X, _ = make_swiss_roll(sub_data_size, noise)
    	# Make it thinner
    	X[:, 1] *= .5        
      
        for j, num_neigh in enumerate(num_neighbors):
            print "number of neighbors:", num_neigh, " - ", 
            for k in range(iterations):
                print '\n',k,
                #stdout.flush()
                if verbose: print("Generating at %s neighbor" % num_neigh),                
                
                connectivity = kneighbors_graph(X, n_neighbors=num_neigh)                          
                
                st = cpu_time()#time.clock()
                ward = Ward(n_clusters=num_cluster, connectivity=connectivity).fit(X)#, compute_full_tree=False).fit(X)
                #t = time.clock() - st
                t = cpu_time() - st
                
                label = ward.labels_
                score = metrics.silhouette_score(X, label, metric = metric_measure)
            
                sil[m,j,k] = score
                time[m,j,k] = t
                #print score, '\t', time
            print
    return sil, time
コード例 #13
0
ファイル: demo_inverse_gtmr.py プロジェクト: ustamatsu/dcekit
shape_of_map = [30, 30]
shape_of_rbf_centers = [4, 4]
variance_of_rbfs = 0.5
lambda_in_em_algorithm = 0.001
number_of_iterations = 300
display_flag = True
noise_ratio_of_y = 0.1
random_state_number = 30000

number_of_samples = 1000

numbers_of_x = [0, 1, 2]
numbers_of_y = [3]

# load a swiss roll dataset and make a y-variable
x, color = make_swiss_roll(number_of_samples, 0, random_state=10)
raw_y = 0.3 * x[:, 0] - 0.1 * x[:, 1] + 0.2 * x[:, 2]
y = raw_y + noise_ratio_of_y * raw_y.std(ddof=1) * np.random.randn(len(raw_y))
# plot
plt.rcParams['font.size'] = 18
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
p = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y)
fig.colorbar(p)
plt.show()

variables = np.c_[x, y]
# standardize x and y
autoscaled_variables = (variables - variables.mean(axis=0)) / variables.std(
    axis=0, ddof=1)
autoscaled_target_y_value = (target_y_value - variables.mean(
コード例 #14
0
 def toy_swiss_bag_gen(self,
                       num_bags,
                       scaler=None,
                       scaler_bag=None,
                       indiv=None,
                       bw=None,
                       bw_bag=None,
                       data_noise=1.0,
                       train=False,
                       sigma=1.0,
                       y_type='normal',
                       kernel='rbf',
                       seed=23):
     rs = check_random_state(seed)
     if y_type == 'normal':
         scale = 1.0
         y_gen = partial(rs.normal, scale=sigma)
     elif y_type == 'poisson':
         scale = 0.5
         y_gen = lambda rate: rs.poisson(np.abs(rate))
     else:
         raise TypeError('y_gen type {} not understood'.format(y_type))
     sizes = self.bag_size_gen(num_bags, rs)
     #print('sizes:', sizes)
     total_pts = np.sum(sizes)
     X, label = make_swiss_roll(total_pts,
                                noise=data_noise,
                                random_state=rs)
     label = scale * label
     sort_index = X[:, 2].argsort()
     X = X[sort_index[::-1]]
     label = label[sort_index[::-1]]
     X_dim = increase_dim(X[:, :2], self.dim)
     data = rotate_orth(X_dim,
                        seed=23)  # Rotate into dim-dimensional object.
     indexes = [0] + np.cumsum(sizes).tolist()
     # Bag Variable on Manifold generate
     indiv_var = standardise(X[:, 2])
     bag_var = []
     bags = []
     indiv_labels = []
     indiv_true_labels = []
     bag_true_labels = []
     bag_labels = np.zeros(num_bags)
     for i in range(num_bags):
         lower = indexes[i]
         upper = indexes[i + 1]
         #print(indiv_var[lower:upper])
         indiv_label_bag = [y_gen(phi) for phi in label[lower:upper]]
         bag_value = self.s_manifold(indiv_var[lower:upper],
                                     random_state=rs)
         bag_value_rep = np.tile(bag_value, (upper - lower, 1))
         bag_var.append(bag_value)
         bag_labels[i] = np.sum(indiv_label_bag)
         bags.append(np.hstack((data[lower:upper], bag_value_rep)))
         indiv_labels.append(indiv_label_bag)
         indiv_true_labels.append(label[lower:upper])
         bag_true_labels.append(np.sum(label[lower:upper]))
     bag_var = np.vstack(bag_var)
     bags, scaler, bw = self.preprocessing(bags,
                                           scaler,
                                           train,
                                           bw,
                                           kernel,
                                           seed=seed)
     bag_var, scaler_bag, bw_bag = self.preprocessing(bag_var,
                                                      scaler_bag,
                                                      train,
                                                      bw_bag,
                                                      kernel,
                                                      seed=seed,
                                                      bag_var=True)
     bags = [
         np.column_stack((bags[index], np.ones(len(bags[index])),
                          indiv_true_labels[index], indiv_labels[index]))
         for index in range(num_bags)
     ]
     return Mal_features(bags,
                         pop=True,
                         indiv=True,
                         y=bag_labels,
                         true_indiv=True,
                         true_y=bag_true_labels,
                         bag_var=bag_var,
                         bag_pop=sizes), scaler, bw, scaler_bag, bw_bag
コード例 #15
0
ファイル: datasets.py プロジェクト: sumesh1/computer-vision
def get_swiss_roll_data(n_samples=1000):
    noise = 0.2
    X, _ = make_swiss_roll(n_samples, noise)
    X = X.astype('float32')[:, [0, 2]]
    return X, _
コード例 #16
0
# %% {"slideshow": {"slide_type": "slide"}}
# %%output filename='../media/03-scurve-latent' fig='png'
(
    s_curve_components.hvplot.scatter(
        x="Component 1", y="Component 2", color="y", groupby="tag", cmap="spectral"
    )
    .layout()
    .opts(title="S-Curve Manifold", shared_axes=False)
    .cols(2)
)

# %% {"slideshow": {"slide_type": "skip"}}
swissroll_models = get_models()
swissroll_X, swissroll_color = samples_generator.make_swiss_roll(
    n_points, random_state=0
)

swissroll_components = pd.concat(
    [
        get_components(m, swissroll_X, swissroll_color, t)
        for t, m in swissroll_models.items()
    ]
)

# %% {"slideshow": {"slide_type": "slide"}}
# %%output filename='../media/03-swissroll-latent' fig='png'
(
    swissroll_components.hvplot.scatter(
        x="Component 1", y="Component 2", color="y", groupby="tag", cmap="spectral"
    )
コード例 #17
0
ファイル: example.py プロジェクト: bernease/Mmani
from sklearn.datasets.samples_generator import make_swiss_roll


# Same as first_example.py but using embed_with_rmetric()

rad = 0.05
n_samples = 1000
if True:
    X = np.random.random((n_samples, 2))
    thet = X[:,0]
    X1 = np.array( 3*thet*np.sin(2*thet ))
    X2 = np.array( 3*thet*np.cos(2*thet ))
    X = np.array( (X1, X2, X[:,1]) )
    X = X.T
else:
    X, thet = make_swiss_roll( n_samples, noise = 0.03 )
    X /= 10.
    thet -= thet.min()
    thet /= thet.max()  # normalize thet between [0,1] 
#    print( "max,min(thet)", max( thet), min(thet))
#    print( X.max(0), X.min(0))
    
X = np.asarray( X, order="C" )

#print( X.flags )
#print( X.shape, type(X))


distance_matrix, similarity_matrix, laplacian, Y, H = embed_with_rmetric( X,2, rad ) 

# Plot the results
コード例 #18
0
desplayflag = 1
k = 10
numofsamples = 1000
noisey = 0.1
random_state_number = 30000

import numpy as np
#import pandas as pd
from sklearn.datasets.samples_generator import make_swiss_roll
from gtm import gtm
import matplotlib.pyplot as plt
import matplotlib.figure as figure
from mpl_toolkits.mplot3d import Axes3D

# load a swiss roll dataset and make a y-variable
OriginalX, color = make_swiss_roll(numofsamples, 0, random_state=10)
X = OriginalX
rawy = 0.3 * OriginalX[:, 0] - 0.1 * OriginalX[:, 1] + 0.2 * OriginalX[:, 2]
originaly = rawy + noisey * rawy.std(ddof=1) * np.random.randn(len(rawy))
# plot
plt.rcParams["font.size"] = 18
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
p = ax.scatter(OriginalX[:, 0], OriginalX[:, 1], OriginalX[:, 2], c=originaly)
fig.colorbar(p)
plt.show()

# autoscaling
autoscaledX = (OriginalX - OriginalX.mean(axis=0)) / OriginalX.std(axis=0,
                                                                   ddof=1)
コード例 #19
0
	def __init__(self, batch_size, time_steps, data_type='circular_8_gaussians', n_dims = 2,  cuda=False):
		self.batch_size = batch_size
		self.time_steps = time_steps
		self.data_type = data_type 
		self.n_dims = n_dims
		self.cuda = cuda
		self.mode = 'Train'
		self.iter = 0

		if self.data_type == 'swiss_roll':
			self.dataset, _ = make_swiss_roll(15000+15000, 0.2)
			if self.n_dims == 3:
				self.dataset = self.dataset.astype(np.float32)/7.
			if self.n_dims == 2:
				self.dataset = self.dataset[:, [0, 2]].astype(np.float32)/7.
		elif self.data_type == 'grid_25_gaussians':
			sigma = 0.05
			self.dataset = None
			for i in range(-2, 3):
				for j in range(-2, 3):
					for k in range(-2, 3):
						mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :]
						curr_samples = mean+np.random.randn(120+120, 3).astype(np.float32)*sigma
						if self.dataset is None: self.dataset = curr_samples
						else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0)
			if self.n_dims == 2:
				self.dataset = self.dataset[:, :2]						
			rand_index = np.arange(self.dataset.shape[0])
			rand_index = np.random.permutation(rand_index)
			self.dataset = self.dataset[rand_index, ...]
		elif self.data_type == 'grid_9_gaussians':
			sigma = 0.05
			self.dataset = None
			for i in [-1, 0, 1]:
				for j in [-1, 0, 1]:
					for k in [-1, 0, 1]:
						mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :]
						curr_samples = mean+np.random.randn(550+550, 3).astype(np.float32)*sigma
						if self.dataset is None: self.dataset = curr_samples
						else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0)
			curr_samples = np.random.randn(150+150, 3).astype(np.float32)
			self.dataset = np.concatenate([self.dataset, curr_samples], axis=0) 			
			if self.n_dims == 2:
				self.dataset = self.dataset[:, :2]						
			rand_index = np.arange(self.dataset.shape[0])
			rand_index = np.random.permutation(rand_index)
			self.dataset = self.dataset[rand_index, ...]
		elif self.data_type == 'circular_8_gaussians':
			sigma = 0.05
			self.dataset = None
			for (i, j) in [[-1.6, 0], [-1, 1], [-1, -1], [0, 1.4], [0, -1.4], [1, 1], [1, -1], [1.6, 0]]:
				for k in [-1, 1]:
					mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :]
					curr_samples = mean+np.random.randn(1875, 3).astype(np.float32)*sigma
					if self.dataset is None: self.dataset = curr_samples
					else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0)
			if self.n_dims == 2:
				self.dataset = self.dataset[:, :2]						
			rand_index = np.arange(self.dataset.shape[0])
			rand_index = np.random.permutation(rand_index)
			self.dataset = self.dataset[rand_index, ...]
		elif self.data_type == 'star_8_gaussians':
			sigma = 0.25
			self.dataset = None
			for it, (i, j) in enumerate([[1.6, 0], [1, 1], [0, 1.4], [-1, 1], [-1.6, 0], [-1, -1], [0, -1.4], [1, -1]]):
				for k in [-1, 1]:
					mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :]
					rand_part = np.random.randn(1875, 3).astype(np.float32)
					rand_part[:, 1] = rand_part[:, 1]*sigma
					rand_part[:, 0] = rand_part[:, 0]*0.5
					rotation_mat = np.asarray([np.cos(it*np.pi/4), np.sin(it*np.pi/4), -np.sin(it*np.pi/4), np.cos(it*np.pi/4)]).reshape(2, 2)
					rand_part[:,:2] = np.matmul(rand_part[:,:2], rotation_mat)
					curr_samples = mean+rand_part
					if self.dataset is None: self.dataset = curr_samples
					else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0)
			if self.n_dims == 2:
				self.dataset = self.dataset[:, :2]
			rand_index = np.arange(self.dataset.shape[0])
			rand_index = np.random.permutation(rand_index)
			self.dataset = self.dataset[rand_index, ...]
			# self.dataset = self.dataset*2
		elif self.data_type == 'circular_3_gaussians':
			sigma = 0.3
			self.dataset = None
			# for (i, j) in [[-1.6, 0], [-1, 1], [-1, -1], [0, 1.4], [0, -1.4], [1, 1], [1, -1], [1.6, 0]]:
			for (i, j) in [[-1.6, 0], [1, 1], [-1, -1]]:
				for k in [-1, 1]:
					mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :]
					curr_samples = mean+np.random.randn(5000, 3).astype(np.float32)*sigma
					if self.dataset is None: self.dataset = curr_samples
					else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0)
			if self.n_dims == 2:
				self.dataset = self.dataset[:, :2]						
			rand_index = np.arange(self.dataset.shape[0])
			rand_index = np.random.permutation(rand_index)
			self.dataset = self.dataset[rand_index, ...]

		elif self.data_type == 'linear_degenerate_circular_3_gaussians' or self.data_type == 'nonlinear_degenerate_circular_3_gaussians':
			sigma = 1
			degree1 = 30
			degree2 = 45
			self.dataset = None
			# for (i, j) in [[-1.6, 0], [-1, 1], [-1, -1], [0, 1.4], [0, -1.4], [1, 1], [1, -1], [1.6, 0]]:
			for (i, j) in [[-1.6, 0], [2, 2], [-2, -2]]:
				for k in [-1, 1]:
					mean = np.asarray([i, j, k]).astype(np.float32)[np.newaxis, :]
					curr_samples = mean+np.random.randn(5000, 3).astype(np.float32)*sigma
					if self.dataset is None: self.dataset = curr_samples
					else: self.dataset = np.concatenate([self.dataset, curr_samples], axis=0)
			if self.n_dims == 2:
				self.dataset = self.dataset[:, :2]						

			if self.data_type == 'linear_degenerate_circular_3_gaussians':
				self.dataset = np.concatenate([self.dataset, np.zeros((self.dataset.shape[0],1))], axis=-1)
			elif self.data_type == 'nonlinear_degenerate_circular_3_gaussians':
				self.dataset = np.concatenate([self.dataset, 8*np.tanh(self.dataset[:,0,np.newaxis])], axis=-1)

			# -60, 30 to -13 18  45 12
			degree1 = 15
			degree2 = 0		
			degree3 = -30			
			degreeRad1 = float(degree1)*np.pi/180.
			degreeRad2 = float(degree2)*np.pi/180.
			degreeRad3 = float(degree3)*np.pi/180.
			rotation_mat1 = np.asarray([np.cos(degreeRad1), np.sin(degreeRad1), 0, -np.sin(degreeRad1), np.cos(degreeRad1), 0, 0, 0, 1]).reshape(3, 3)
			rotation_mat2 = np.asarray([1, 0, 0, 0, np.cos(degreeRad2), np.sin(degreeRad2), 0, -np.sin(degreeRad2), np.cos(degreeRad2)]).reshape(3, 3)
			rotation_mat3 = np.asarray([np.cos(degreeRad3), 0, np.sin(degreeRad3), 0, 1, 0, -np.sin(degreeRad3), 0, np.cos(degreeRad3)]).reshape(3, 3)
			
			self.dataset = np.matmul(self.dataset, rotation_mat1) 
			self.dataset = np.matmul(self.dataset, rotation_mat2) 
			self.dataset = np.matmul(self.dataset, rotation_mat3) 

			rand_index = np.arange(self.dataset.shape[0])
			rand_index = np.random.permutation(rand_index)
			self.dataset = self.dataset[rand_index, ...]

			self.n_dims = 3
		else:
			pdb.set_trace()
		self.dataset = self.dataset*0.5
		
		# helper.dataset_plotter([self.dataset,], show_also=True)
		# pdb.set_trace()

		self.train()
		self.reset()
		self.batch = {}
		self.batch['context'] = {'properties': {'flat': [], 'image': []},
									 'data':       {'flat': None, 'image': None}}
		self.batch['observed'] = {'properties': {'flat': [{'dist': 'cont', 'name': 'Toy Data', 'size': tuple([self.batch_size, self.time_steps, self.n_dims])}], 
												 'image': []},
								  'data':       {'flat': None, 'image': None}}
コード例 #20
0
    N = K.shape[0]
    one_n = np.ones((N, N)) / N
    K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n)

    # Obtaining eigenvalues in descending order with corresponding
    # eigenvectors from the symmetric matrix.
    eigvals, eigvecs = eigh(K)

    # Obtaining the i eigenvectors that corresponds to the i highest eigenvalues.
    X_pc = np.column_stack(
        (eigvecs[:, -i] for i in range(1, n_components + 1)))

    return X_pc


X, color = make_swiss_roll(n_samples=800, random_state=123)

#plot initial data
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.rainbow)
plt.title('Swiss Roll in 3D')
#plt.show()
plt.savefig('../figs/tutorial/sebraex4_1.png')
plt.close()

# Linear PCA
scikit_pca = PCA(n_components=2)
X_spca = scikit_pca.fit_transform(X)

plt.figure(figsize=(8, 6))
コード例 #21
0
#-*- coding: utf-8 -*-
__author__ = 'gongwenqiang'


import numpy as np
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3
from time import time
from sklearn.neighbors import kneighbors_graph
from sklearn.datasets.samples_generator import make_swiss_roll

n_samples=1500
noise=0.05
X,_=make_swiss_roll(n_samples,noise)
X[:,1] *=.5

print("Compute unstructured hierarchical clustering...")
t0=time()
cluster0=AgglomerativeClustering(n_clusters=6,linkage='ward')
cluster0.fit(X)
elapsed_time=time()-t0
labels=cluster0.labels_
print("Elapsed time: %.2fs" % elapsed_time)
print("Number of points: %i" % labels.size)

fig=plt.figure()
ax=p3.Axes3D(fig)
ax.view_init(7,-80)
for l in np.unique(labels):
    ax.plot3D(X[labels==l,0],X[labels==l,1],X[labels==l,2],'o',color=plt.cm.jet(np.float(l)/ np.max(labels + 1)))
コード例 #22
0
from sklearn.datasets.samples_generator import make_swiss_roll, make_s_curve
from sklearn.manifold import TSNE
import mpl_toolkits.mplot3d

data_flag = 1  # 1: s-curve dataset, 2: swiss-roll dataset
perplexity = 85  # 85 in data_flag = 1, 50 in data_flag = 2

number_of_samples = 1000
noise = 0
random_state_number = 100

if data_flag == 1:
    original_X, color = make_s_curve(number_of_samples, noise, random_state=0)
elif data_flag == 2:
    original_X, color = make_swiss_roll(number_of_samples,
                                        noise,
                                        random_state=0)

# plot
plt.rcParams["font.size"] = 18
fig = plt.figure(figsize=(7, 6))
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel("x1")
ax.set_ylabel("x2")
ax.set_zlabel("x3")
p = ax.scatter(original_X[:, 0], original_X[:, 1], original_X[:, 2], c=color)
#fig.colorbar(p)
plt.tight_layout()
plt.show()

autoscaled_X = (original_X - original_X.mean(axis=0)) / original_X.std(axis=0,
コード例 #23
0
# License: BSD

print(__doc__)

import time as time
import numpy as np
import pylab as pl
import mpl_toolkits.mplot3d.axes3d as p3
from sklearn.cluster import Ward
from sklearn.datasets.samples_generator import make_swiss_roll

###############################################################################
# Generate data (swiss roll dataset)
n_samples = 1000
noise = 0.05
X, _ = make_swiss_roll(n_samples, noise)
# Make it thinner
X[:, 1] *= .5

###############################################################################
# Compute clustering
print("Compute unstructured hierarchical clustering...")
st = time.time()
ward = Ward(n_clusters=6).fit(X)
label = ward.labels_
print("Elapsed time: ", time.time() - st)
print("Number of points: ", label.size)

###############################################################################
# Plot result
fig = pl.figure()
コード例 #24
0
ファイル: segementation_fault.py プロジェクト: Jerryzcn/Mmani
import numpy as np
from scipy import sparse
from sklearn.datasets.samples_generator import make_swiss_roll

rng = np.random.RandomState(123)
ns = 3000
X, t = make_swiss_roll( ns, noise = 0.0, random_state = rng)
X = np.asarray( X, order="C" )
nf = 750 
rad0 = 2.5
dim = 2

rad = rad0/ns**(1./(dim+6))  #check the scaling
n_noisef = nf - 3
noise_rad_frac = 0.1
noiserad = rad/np.sqrt(n_noisef) * noise_rad_frac
Xnoise = rng.rand(ns, n_noisef) * noiserad
X = np.hstack((X, Xnoise))
rad = rad*(1+noise_rad_frac) # add a fraction for noisy dimensions

from Mmani.geometry.distance import distance_matrix
dmat = distance_matrix(X, method = 'cython', radius = rad)