def test_manifold_fit(self, mock_fit_transform): """ Test manifold fit method """ X, y = make_s_curve(1000, random_state=888) manifold = Manifold(target="auto") assert manifold.fit(X, y) is manifold, "fit did not return self" mock_fit_transform.assert_called_once()
def test_manifold_no_transform(self): """ Test the exception when manifold doesn't implement transform. """ X, _ = make_s_curve(1000, random_state=888) manifold = Manifold(manifold='mds', target="auto") assert not hasattr(manifold._manifold, 'transform') with pytest.raises(AttributeError, match="try using fit_transform instead"): manifold.transform(X)
def test_manifold_fit(self, mock_draw): """ Test manifold fit method """ X, y = make_s_curve(1000, random_state=888) manifold = Manifold(target="auto") assert not hasattr(manifold, 'fit_time_') assert manifold.fit(X, y) is manifold, "fit did not return self" mock_draw.assert_called_once() assert hasattr(manifold, 'fit_time_') assert manifold._target_color_type == CONTINUOUS
def s_manifold(self, values, dim=5, n_points=10000, noise=0.0, random_state=None): value = np.mean(values) X, y = make_s_curve(n_points, noise=noise, random_state=random_state) s_curve_y = standardise(y) X = np.expand_dims(X[np.argmin(np.abs(s_curve_y - value)), :], 0) #X_dim = increase_dim(X, dim) #X = rotate_orth(X_dim, seed=23) return X
def test_manifold_fit_transform(self, mock_draw): """ Test manifold fit_transform method """ X, y = make_s_curve(1000, random_state=888) manifold = Manifold(target="auto") assert not hasattr(manifold, 'fit_time_') Xp = manifold.fit_transform(X, y) assert Xp.shape == (X.shape[0], 2) mock_draw.assert_called_once() assert hasattr(manifold, 'fit_time_') assert manifold._target_color_type == CONTINUOUS
def test_manifold_algorithm_fit(self): """ Test that all algorithms can be fitted correctly """ # TODO: parametrize this once unittest.TestCase dependency removed. algorithms = [ "lle", "ltsa", "hessian", "modified", "isomap", "mds", "spectral", "tsne", ] X, y = make_s_curve(200, random_state=888) for algorithm in algorithms: oz = Manifold(manifold=algorithm, random_state=223) oz.fit(X, y)
def test_manifold_pandas(self): """ Test manifold on a dataset made up of a pandas DataFrame and Series """ X, y = make_s_curve(200, random_state=888) X = pd.DataFrame(X) y = pd.Series(y) oz = Manifold(manifold='ltsa', colors='nipy_spectral', target='continuous', random_state=223).fit(X, y) # TODO: find a way to decrease this tolerance self.assert_images_similar(oz, tol=35)
def test_manifold_pandas(self): """ Test manifold on a dataset made up of a pandas DataFrame and Series """ X, y = make_s_curve(200, random_state=888) X = pd.DataFrame(X) y = pd.Series(y) oz = Manifold( manifold='ltsa', colors='nipy_spectral', target='continuous', random_state=223 ).fit(X, y) # TODO: find a way to decrease this tolerance self.assert_images_similar(oz, tol=35)
horizontalalignment='right') if plot_num % 2 == 1: ax.set(ylabel='%s' % data_name) plot_num += 1 plt.savefig('compare1.png',dpi=1200) plt.show() ################################################ # compare2 ################################################ # Next line to silence pyflakes. This import is needed. Axes3D n_points = 1000 n_components = 2 X, color = samples_generator.make_s_curve(n_points,random_state=0) fig = plt.figure(figsize=(10,4)) fig.subplots_adjust(wspace=.5) # 3D origin ax = fig.add_subplot(131,projection='3d') ax.view_init(5,-55) ax.scatter(X[:, 0],X[:, 1],X[:, 2],c=color,cmap='Spectral') # PCA pca = PCA(n_components,random_state=1) t0 = time.time() X_pca = pca.fit_transform(X) t1 = time.time() var = 100*pca.explained_variance_ratio_ ax = fig.add_subplot(132)
candidates_of_lambda_in_em_algorithm = np.append( 0, candidates_of_lambda_in_em_algorithm) number_of_iterations = 200 display_flag = 1 noise_ratio_of_y = 0.1 random_state_number = 30000 number_of_samples = 300 number_of_test_samples = 100 numbers_of_x = [0, 1, 2] numbers_of_y = [3, 4] # Generate samples for demonstration np.random.seed(seed=100) x, color = make_s_curve(number_of_samples, random_state=10) raw_y1 = 0.3 * x[:, 0] - 0.1 * x[:, 1] + 0.2 * x[:, 2] y1 = raw_y1 + noise_ratio_of_y * raw_y1.std(ddof=1) * np.random.randn( len(raw_y1)) raw_y2 = np.arcsin(x[:, 0]) + np.log(x[:, 1]) - 0.5 * x[:, 2]**4 + 5 y2 = raw_y2 + noise_ratio_of_y * raw_y2.std(ddof=1) * np.random.randn( len(raw_y2)) # plot y1 vs. y2 plt.rcParams['font.size'] = 18 plt.figure(figsize=figure.figaspect(1)) plt.scatter(y1, y2) plt.xlabel('y1') plt.ylabel('y2') plt.show()
def toy_s_gen(self, num_bags, scaler=None, indiv=None, bw=None, data_noise=0.2, train=False, y_type='normal', kernel='rbf', sigma=1.0, seed=23): rs = check_random_state(seed) if y_type == 'normal': scale = 1.0 y_gen = partial(rs.normal, scale=sigma) elif y_type == 'poisson': scale = 0.5 y_gen = lambda rate: rs.poisson(rate) else: raise TypeError('y_gen type {} not understood'.format(y_type)) sizes = self.bag_size_gen(num_bags, rs) total_pts = np.sum(sizes) X, label = make_s_curve(total_pts, noise=data_noise, random_state=rs) label = label + 6.0 # To ensure everything is positive label = scale * label sort_index = X[:, 2].argsort() # Last dimension is vertical axis X = X[sort_index[::-1]] label = label[sort_index[::-1]] X_dim = increase_dim(X, self.dim) data = rotate_orth(X_dim, seed=23) # Rotate into dim-dimensional object. indexes = [0] + np.cumsum(sizes).tolist() bags = [] indiv_labels = [] indiv_true_labels = [] bag_true_labels = [] bag_labels = np.zeros(num_bags) for i in range(num_bags): lower = indexes[i] upper = indexes[i + 1] indiv_label_bag = [y_gen(phi) for phi in label[lower:upper]] bag_labels[i] = np.sum(indiv_label_bag) bags.append(data[lower:upper]) indiv_labels.append(indiv_label_bag) indiv_true_labels.append(label[lower:upper]) bag_true_labels.append(np.sum(label[lower:upper])) bags, scaler, bw = self.preprocessing(bags, scaler, train, bw, kernel, seed=seed) bags = [ np.column_stack((bags[index], np.ones(len(bags[index])), indiv_true_labels[index], indiv_labels[index])) for index in range(num_bags) ] return Mal_features(bags, pop=True, indiv=True, true_indiv=True, y=bag_labels, true_y=bag_true_labels, bag_pop=sizes), scaler, bw
# Demonstration of t-SNE (t-distributed Stochastic Neighbor Embedding) using scikit-learn import matplotlib.pyplot as plt from sklearn.datasets.samples_generator import make_swiss_roll, make_s_curve from sklearn.manifold import TSNE import mpl_toolkits.mplot3d data_flag = 1 # 1: s-curve dataset, 2: swiss-roll dataset perplexity = 85 # 85 in data_flag = 1, 50 in data_flag = 2 number_of_samples = 1000 noise = 0 random_state_number = 100 if data_flag == 1: original_X, color = make_s_curve(number_of_samples, noise, random_state=0) elif data_flag == 2: original_X, color = make_swiss_roll(number_of_samples, noise, random_state=0) # plot plt.rcParams["font.size"] = 18 fig = plt.figure(figsize=(7, 6)) ax = fig.add_subplot(111, projection='3d') ax.set_xlabel("x1") ax.set_ylabel("x2") ax.set_zlabel("x3") p = ax.scatter(original_X[:, 0], original_X[:, 1], original_X[:, 2], c=color) #fig.colorbar(p) plt.tight_layout()