def ISOMAP_transform(train_feature, test_feature, n_components, n_neighbors = 5): """ ISOMAP method """ from sklearn.manifold import Isomap isomap = Isomap(n_neighbors, n_components).fit(train_feature) train_feature_transformed = isomap.transform(train_feature) test_feature_transformed = isomap.transform(test_feature) return train_feature_transformed, test_feature_transformed
def dimension_reduce(): ''' This compares a few different methods of dimensionality reduction on the current dataset. ''' pca = PCA(n_components=2) # initialize a dimensionality reducer pca.fit(digits.data) # fit it to our data X_pca = pca.transform(digits.data) # apply our data to the transformation plt.subplot(1, 3, 1) plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold se = SpectralEmbedding() X_se = se.fit_transform(digits.data) plt.subplot(1, 3, 2) plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target) isomap = Isomap(n_components=2, n_neighbors=20) isomap.fit(digits.data) X_iso = isomap.transform(digits.data) plt.subplot(1, 3, 3) plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target) plt.show() plt.matshow(pca.mean_.reshape(8, 8)) # plot the mean components plt.matshow(pca.components_[0].reshape(8, 8)) # plot the first principal component plt.matshow(pca.components_[1].reshape(8, 8)) # plot the second principal component plt.show()
def test_isomap(): # Test chaining KNeighborsTransformer and Isomap with # neighbors_algorithm='precomputed' algorithm = 'auto' n_neighbors = 10 X, _ = make_blobs(random_state=0) X2, _ = make_blobs(random_state=1) # compare the chained version and the compact version est_chain = make_pipeline( KNeighborsTransformer(n_neighbors=n_neighbors, algorithm=algorithm, mode='distance'), Isomap(n_neighbors=n_neighbors, metric='precomputed')) est_compact = Isomap(n_neighbors=n_neighbors, neighbors_algorithm=algorithm) Xt_chain = est_chain.fit_transform(X) Xt_compact = est_compact.fit_transform(X) assert_array_almost_equal(Xt_chain, Xt_compact) Xt_chain = est_chain.transform(X2) Xt_compact = est_compact.transform(X2) assert_array_almost_equal(Xt_chain, Xt_compact)
def process_sylvine(Xtrain, ytrain, Xval, Xtest): print 'ITS A SYLVINE TIME' print t0 = time.time() goods = np.array([False, False, False, False, False, False, True, False, True, True, False, False, False, False, True, True, False, False, False, True]) Xnewtrain = np.array(Xtrain[:, goods]) Xnewtest = np.array(Xtest[:, goods]) Xnewval = np.array(Xval[:, goods]) t0 = time.time() iso = Isomap(n_neighbors = 20, n_components = 3).fit(Xnewtrain[:, :6]) print 'ISOSTAS !!!' print (time.time() - t0) / 60. t0 = time.time() Xisotrain = iso.transform(Xnewtrain[:, :6]) Xisotest = iso.transform(Xnewtest[:, :6]) Xisoval = iso.transform(Xnewval[:, :6]) print 'ISOSTAS RETURNED !!!' print (time.time() - t0) / 60. Xnewtrain = np.hstack((Xnewtrain, Xisotrain)) Xnewtest = np.hstack((Xnewtest, Xisotest)) Xnewval = np.hstack((Xnewval, Xisoval)) modelrf = ExtraTreesClassifier(n_estimators = 10000, n_jobs = -1) modelrf.fit(Xnewtrain, ytrain) print 'STASON ET DONE' print (time.time() - t0) / 60. ytestrf = modelrf.predict_proba(Xnewtest)[:, 1] yvalrf = modelrf.predict_proba(Xnewval)[:, 1] ytestfinal = np.round(ytestrf) yvalfinal = np.round(yvalrf) return yvalfinal, ytestfinal
class FloorplanEstimator: """ Simple estimator for rough floorplans """ def __init__(self): """ Instantiate floorplan estimator """ self.dimred = Isomap(n_neighbors=25, n_components=2) self._fingerprints = None self._label = None def fit(self, fingerprints, label): """ Estimate floorplan from labeled fingerprints :param fingerprints: list of fingerprints :param label: list of corresponding labels """ self.dimred.fit(fingerprints) self._fingerprints = fingerprints self._label = label def transform(self, fingerprints): """ Get x,y coordinates of fingerprints on floorplan :param fingerprints: list of fingerprints :return: list of [x,y] coordinates """ return self.dimred.transform(fingerprints) def draw(self): """ Draw the estimated floorplan in the current figure """ xy = self.dimred.transform(self._fingerprints) x_min, x_max = xy[:, 0].min(), xy[:, 0].max() y_min, y_max = xy[:, 1].min(), xy[:, 1].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0), np.arange(y_min, y_max, 1.0)) clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0) clf.fit(xy, self._label) label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) plt.pcolormesh(xx, yy, label) plt.scatter(xy[:, 0], xy[:, 1], c=self._label, vmin=0)
class FloorplanEstimator: """ Simple estimator for rough floorplans """ def __init__(self): """ Instantiate floorplan estimator """ self.dimred = Isomap(n_neighbors=25, n_components=2) self._fingerprints = None self._label = None def fit(self, fingerprints, label): """ Estimate floorplan from labeled fingerprints :param fingerprints: list of fingerprints :param label: list of corresponding labels """ self.dimred.fit(fingerprints) self._fingerprints = fingerprints self._label = label def transform(self, fingerprints): """ Get x,y coordinates of fingerprints on floorplan :param fingerprints: list of fingerprints :return: list of [x,y] coordinates """ return self.dimred.transform(fingerprints) def draw(self): """ Draw the estimated floorplan in the current figure """ xy = self.dimred.transform(self._fingerprints) x_min, x_max = xy[:,0].min(), xy[:,0].max() y_min, y_max = xy[:,1].min(), xy[:,1].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0), np.arange(y_min, y_max, 1.0)) clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0) clf.fit(xy, self._label) label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) plt.pcolormesh(xx, yy, label) plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
def df_isomap(df, n_comp=2, n_jobs=1, n_neighbors=5, max_iter=1000): rd_df = normalize_dataframe(df) rd = Isomap(n_components=n_comp, n_neighbors=n_neighbors, max_iter=max_iter) rd.fit(caracteristicas_df) caracteristicas_rd = rd.transform(rd_df) caracteristicas_rd_df = pd.DataFrame(caracteristicas_rd) return caracteristicas_rd_df
def iso_map(d: pd.DataFrame): iso = Isomap(n_components=2, n_jobs=-1) iso.fit(d) app = iso.transform(d) df = pd.DataFrame(app, columns=['comp1', 'comp2'], index=d.index) df.to_csv(ISOMAP_FILE, index=True) return df
def isomap10FoldClf(X, y, nclf): acc = [] kf = KFold(X.shape[0], n_folds=10, shuffle=True) i = 0 for train_index, test_index in kf: yTest = y[test_index] yTrain = y[train_index] n_neighbors = 30 clf = Isomap(n_neighbors, n_components=2) clf.fit(X[train_index]) newRepTrain = clf.transform(X[train_index]) newRepTest = clf.transform(X[test_index]) # NN = neighbors.KNeighborsClassifier(n_neighbors=2) nclf.fit(newRepTrain, yTrain) XPred = nclf.predict(newRepTest) acc.append(np.sum(XPred == yTest) * 1.0 / yTest.shape[0]) # print i,":",acc[i] i += 1 return np.mean(acc), np.std(acc)
def _exec_pca(self): pca = Isomap(n_components=int(self._training_data.shape[0] / 10)) #pca = KernelPCA(n_components=int(self._training_data.shape[1]), kernel='rbf', gamma=20.0) stdsc = StandardScaler() self._training_data = pd.DataFrame(stdsc.fit_transform( pca.fit_transform(self._training_data)), index=self._training_data.index) self._pred_data = pd.DataFrame(stdsc.transform( pca.transform(self._pred_data)), index=self._pred_data.index)
class IsomapClassifier(BaseEstimator): def __init__(self, n_neighbors=5, n_components=2, n_clusters=2, eigen_solver='auto', random_state=3319): self.n_neighbors = n_neighbors self.n_components = n_components self.n_clusters = n_clusters self.random_state = random_state def fit(self, X, y): #creating a manifold on training data self.model = Isomap(n_neighbors=self.n_neighbors, n_components=self.n_components, eigen_solver=self.eigen_solver).fit(X, y) #determining centroids for given classes self.centroids = KMeans(n_clusters=self.n_clusters, random_state=self.random_state).fit( self.model.transform(X)) labels = self.centroids.predict(self.model.transform( X)) # Every point is assigned to a certain cluster. #assigning each centroid to the correct cluster confusion_m = confusion_matrix(y, labels) m = Munkres() cost_m = make_cost_matrix(confusion_m) target_cluster = m.compute( cost_m) # (target, cluster) assignment pairs. #saving mapping for predictions self.mapping = { cluster: target for target, cluster in dict(target_cluster).items() } def predict(self, X_test): #transforming test set using manifold learning method X_trans = self.model.transform(X_test) #assigning each of the points to the closest centroid labels = self.centroids.predict(X_trans) y_pred = list(map(self.mapping.get, labels)) return y_pred
def reduce_features_to_two_dimensions(features): ''' The Isomap reduces the dimensionality of the features from 784 to 2. This allows the visualize_features function to visualize the data in two dimensions. ''' isomap = Isomap(n_components = 2) isomap.fit(features.data) transformed_features = isomap.transform(features.data) return transformed_features
def runIsomap(X_train, X_test, y_train, y_test, comp_range, n_neigh): rbf_scores = [] linear_scores = [] for n_comp in comp_range: print("\nn_comp=%d\n" % (n_comp)) transformer = Isomap(n_neighbors=n_neigh, n_components=n_comp, n_jobs=8) transformer.fit(X_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) if n_comp == 2: np.save('X_train_proj_2d_Isomap_' + str(n_neigh), X_train_proj) np.save('X_test_proj_2d_Isomap_' + str(n_neigh), X_test_proj) score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) for i, scores in enumerate([rbf_scores, linear_scores]): if i == 0: kernel = 'rbf' elif i == 1: kernel = 'linear' else: kernel = '' bestIdx = np.argmax(scores) bestNComp = comp_range[bestIdx] bestAcc = scores[bestIdx] with open('res_Isomap_' + kernel + '_' + str(n_neigh) + '.txt', 'w') as f: for j in range(len(comp_range)): f.write(kernel + ": n_comp = %f, acc = %f\n" % (comp_range[j], scores[j])) f.write(kernel + ": Best n_comp = %f\n" % (bestNComp)) f.write(kernel + ": acc = %f\n" % (bestAcc)) return rbf_scores, linear_scores
def PlotAllMethods(X, k, reducedFeatures): pca = PCA(reducedFeatures).fit(X) isomap = Isomap(n_components=reducedFeatures, n_neighbors=1).fit(X) X_PCA = pca.transform(X) X_ISO = isomap.transform(X) labels_orig, inertia_orig = Kmeans(X, k) labels_PCA, inertia_PCA = Kmeans(X_PCA, k) labels_ISO, inertia_ISO = Kmeans(X_ISO, k) PlotElbow(inertia_orig, k) PlotElbow(inertia_PCA, k) PlotElbow(inertia_ISO, k)
def plot(self, n_components=2, n_neighbors=5, transform="log", switch_x=False, switch_y=False, switch_z=False, colors=None, max_features=500, show_plot=True): """ :param n_components: at number starting at 2 or a value below 1 e.g. 0.95 means select automatically the number of components to capture 95% of the variance :param transform: can be 'log' or 'anscombe', log is just log10. count with zeros, are set to 1 """ from sklearn.manifold import Isomap import numpy as np pylab.clf() data, kept = self.scale_data(transform_method=transform, max_features=max_features) iso = Isomap(n_neighbors=n_neighbors, n_components=n_components) iso.fit(data.T) Xr = iso.transform(data.T) self.Xr = Xr if switch_x: Xr[:, 0] *= -1 if switch_y: Xr[:, 1] *= -1 if switch_z: Xr[:, 2] *= -1 # PC1 vs PC2 if show_plot: pylab.figure(1) self._plot(Xr, pca=None, pc1=0, pc2=1, colors=colors) if n_components >= 3: if show_plot: pylab.figure(2) self._plot(Xr, pca=None, pc1=0, pc2=2, colors=colors) pylab.figure(3) self._plot(Xr, pca=None, pc1=1, pc2=2, colors=colors) return iso
class IsomapImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def spectral_embedding(train, val, method, plot_folder, neighbors, classes, dimensions=2): projected_train = train.copy() projected_val = val.copy() if method == "Isomap": embedding = Isomap(n_neighbors=neighbors, n_components=dimensions).fit(train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "TSNE": embedding = TSNE(n_components=dimensions) projected_train['data'] = embedding.fit_transform(train['data']) projected_val['data'] = embedding.fit_transform(val['data']) elif method == "LLE": embedding = LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions).fit( train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "modified_LLE": embedding = LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method="modified").fit( train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "hessian_LLE": embedding = LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method="hessian").fit(train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "laplacian_eigenmaps": embedding = SpectralEmbedding(n_components=dimensions) projected_train['data'] = embedding.fit_transform(train['data']) projected_val['data'] = embedding.fit_transform(val['data']) visualize_groundTruth(projected_train, method + "_training", plot_folder, classes) visualize_groundTruth(projected_val, method + "_validation", plot_folder, classes) return projected_train, projected_val
def _exec_pca(self): pca = Isomap(n_components=int(self._training_data.shape[0] / 10)) #pca = KernelPCA(n_components=int(self._training_data.shape[1]), kernel='rbf', gamma=20.0) if self._scaler_type == 1: scaler = StandardScaler() elif self._scaler_type == 2: scaler = MinMaxScaler() else: scaler = RobustScaler(quantile_range=(25., 75.)) self._training_data = pd.DataFrame(scaler.fit_transform( pca.fit_transform(self._training_data)), index=self._training_data.index) self._pred_data = pd.DataFrame(scaler.transform( pca.transform(self._pred_data)), index=self._pred_data.index)
def example_04(): digits = load_digits() # fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks': [], 'yticks': []}, # gridspec_kw=dict(hspace=0.1, wspace=0.1)) # # # axes.flat 一维迭代器 # for i, ax in enumerate(axes.flat): # ax.imshow(digits.images[i], cmap='binary') # ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green') # plt.show() X = digits.data y = digits.target from sklearn.manifold import Isomap iso = Isomap(n_components=2) iso.fit(X) data_projected = iso.transform(X) # plt.scatter(data_projected[:, 0], data_projected[:, 1], c=y, edgecolors='none', alpha=0.5, # cmap=plt.cm.get_cmap('Spectral', 10)) # plt.colorbar(label='digit label', ticks=range(10)) # plt.clim(-0.5, 9.5) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) from sklearn.naive_bayes import GaussianNB model = GaussianNB() model.fit(X_train, y_train) y_model = model.predict(X_test) # 量化评分 from sklearn.metrics import accuracy_score print(accuracy_score(y_model, y_test)) # 得到结果是85%的正确率 但是还是不知道哪里出了问题 # 解决这个问题的方法就是打印混淆矩阵 from sklearn.metrics import confusion_matrix mat = confusion_matrix(y_test, y_model) sns.heatmap(mat, square=True, annot=True, cbar=True) plt.xlabel('predict value') plt.ylabel('True value') plt.show()
def main(): digits = load_digits() print(digits.images.shape) # get the 2D representation of the images [n_samples, n_features] X = digits.data y = digits.target # reduce dimensionality iso = Isomap(n_components=2) iso.fit(digits.data) data_prj = iso.transform(digits.data) plt.scatter(data_prj[:, 0], data_prj[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Accent', 10)) plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) plt.show() Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0) # create the model model = GaussianNB() model.fit(Xtrain, ytrain) y_model = model.predict(Xtest) accuracy_score(ytest, y_model) mat = confusion_matrix(ytest, y_model) sns.heatmap(mat, square=True, annot=True, cbar=False) plt.xlabel('predicted value') plt.ylabel('true value') plt.show() fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1)) for i, ax in enumerate(axes.flat): ax.imshow(digits.images[i], cmap='binary', interpolation='nearest') ax.text(0.05, 0.05, str(y_model[i]), transform=ax.transAxes, color='green' if (ytest[i] == y_model[i]) else 'red') plt.show()
def compute_iso_map(self, original_features): feature_matrix = original_features.drop('file', 1).as_matrix() feature_matrix = np.nan_to_num(feature_matrix) dimen_reductor = Isomap(n_components=self.n_components) full_size = feature_matrix.shape[0] train_size = int(self.ratio * full_size) row_indices = list(range(full_size)) feature_training_indices = np.random.choice(row_indices, size = train_size) training_feature_matrix = feature_matrix[feature_training_indices, :] dimen_reductor.fit(training_feature_matrix) reduced_features = dimen_reductor.transform(feature_matrix) reduced_normalized_features = reduced_features - reduced_features.min(axis=0) reduced_normalized_features /= reduced_normalized_features.max(axis=0) return reduced_normalized_features
def _OnClick3(self, event): if self.var3.get() == "Off": self.var3.set("On") elif self.var3.get() == "On": self.var3.set("Off") print("Isomap is running...") label = pd.read_csv(self.labelVar, header=None)[0].tolist() df = pd.read_csv(self.dfLabel, header=None) array = df.copy() label = label iso = Isomap(n_components=2) iso.fit(array) manifold_2Da = iso.transform(df) manifold_2D = pd.DataFrame(manifold_2Da, columns=['Component 1', 'Component 2']) principalDf = pd.DataFrame(data=manifold_2Da, columns=['Component 1', 'Component 2']) X1 = manifold_2D['Component 1'] X2 = manifold_2D['Component 2'] unique = np.unique(label) try: plt.scatter(X1, X2, c=label) except: print( "data matrix does not match label matrix (Select input file and label, remove headers)" ) #plt.legend(unique, loc=8, ncol=5,fontsize='x-small') name = 'ISOMAP' #CHANGE FILENAME HERE ************************************************************************* plt.title(name + " Clusters: " + str(len(unique))) plt.savefig(name + ".png") plt.show() plt.clf() principalDf.to_excel( "ISOMAP_COMPONENTS.xlsx" ) #Names of 1st and 2nd components to EXCEL here *************************************************************************
class KDEIsomapGen(GenBase): # TODO: Isomap has no inverse transformation, maybe we can solve that in the future. # TODO: Look the the work: Inverse Methods for Manifold Learning from Kathleen Kay def __init__(self, kernel="gaussian", bandwidth=0.1, n_components=None, n_neighbors=20): super().__init__() self.transformer = Isomap(n_neighbors, n_components=n_components) self.bandwidth = bandwidth self.kernel = kernel self.manifold = None raise NotImplementedError def fit(self, x): x_pca = self.transformer.fit_transform(x) self.manifold = KDEGen(kernel=self.kernel, bandwidth=self.bandwidth).fit(x_pca) return self def sample_radius(self, x_exp, n_min_kernels=20, r=None, n_samples=1, random_state=None): x_exp_pca = self.transformer.transform(x_exp) x_sample_pca = self.manifold.sample_radius(x_exp_pca, n_min_kernels=n_min_kernels, r=r, n_samples=n_samples, random_state=random_state) x_sample = self.transformer.inverse_transform(x_sample_pca) return x_sample def sample(self, n_samples=1, random_state=None): x_sample_pca = self.manifold.sample(n_samples=n_samples, random_state=random_state) x_sample = self.transformer.inverse_transform(x_sample_pca) return x_sample
# In[25]: bears = pd.DataFrame(bears) # In[26]: bears.shape num_neighbors = 6 # In[29]: iso = Isomap(n_components=3, n_neighbors=num_neighbors) iso.fit(bears) T = iso.transform(bears) T.shape isodf = pd.DataFrame(T, columns=['a', 'b', 'c']) isodf.head() fig1 = plt.figure(figsize=(12, 10)) ax1 = fig1.add_subplot(111) ax1.set_title("2D projection with {} neighbors".format(num_neighbors)) ax1.scatter(isodf.a, isodf.b, c=colors) fig2 = plt.figure(figsize=(12, 10)) ax2 = fig2.add_subplot(111, projection='3d') ax2.set_title("3D projection with {} neighbors".format(num_neighbors))
df = pd.DataFrame(samples) # # Optional: Resample the image down by a factor of two if you # have a slower computer. You can also convert the image from # 0-255 to 0.0-1.0 if you'd like, but that will have no # effect on the algorithm's results. # # .. your code here .. #%% from sklearn.manifold import Isomap iso = Isomap(n_neighbors=6, n_components=3) iso.fit(samples) T = iso.transform(samples) def Plot2D(T, title, x, y): fig = plt.figure() ax = fig.add_subplot(111) ax.set_title(title) ax.set_xlabel('Component: {0}'.format(x)) ax.set_ylabel('Component: {0}'.format(y)) ax.scatter(T[:, x], T[:, y], marker='.', alpha=0.7) def Plot3D(T, title, x, y, z): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.set_title(title)
# title is your chart title # x is the principal component you want displayed on the x-axis, Can be 0 or 1 # y is the principal component you want displayed on the y-axis, Can be 1 or 2 # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=3) pca.fit(df) T = pca.transform(df) Plot2D(T, "PCA 1 2", 1, 2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # # .. your code here .. from sklearn.manifold import Isomap imap = Isomap(n_neighbors=8, n_components=3) imap.fit(df) T2 = imap.transform(df) Plot2D(T2, "Isomap", 1, 2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # # .. your code here .. plt.show()
nISOMAP = np.arange(20, 200, 20) data = {} for k in nISOMAP: features, labels, vectorizer, selector, le, features_data = preprocess("pkl/article_2_people.pkl", "pkl/lable_2_people.pkl") features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(features, labels, test_size=0.1, random_state=42) t0 = time() iso = Isomap(n_neighbors=15, n_components=k, eigen_solver='auto') iso.fit(features_train) print ("Dimension Reduction time:", round(time()-t0, 3), "s") features_train = iso.transform(features_train) features_test = iso.transform(features_test) for name, clf in [ ('AdaBoostClassifier', AdaBoostClassifier(algorithm='SAMME.R')), ('BernoulliNB', BernoulliNB(alpha=1)), ('GaussianNB', GaussianNB()), ('DecisionTreeClassifier', DecisionTreeClassifier(min_samples_split=100)), ('KNeighborsClassifier', KNeighborsClassifier(n_neighbors=50, algorithm='ball_tree')), ('RandomForestClassifier', RandomForestClassifier(min_samples_split=100)), ('SVC', SVC(kernel='linear', C=1)) ]: if not data.has_key(name): data[name] = []
def main(): #Load the dataset from Matlab data = sio.loadmat('baseline2.mat') n_train = int(data['n_train']) n_test = int(data['n_test']) train_x = np.array(data['train_x']) train_t = np.array(data['train_t']).reshape(n_train) test_x = np.array(data['test_x']) test_t = np.array(data['test_t']).reshape(800) X_indices = np.arange(train_x.shape[-1]) #SVM Fitting C = [-10,5,10] G = [-10,5,10] CF = [-10,5,10] # Plot the cross-validation score as a function of percentile of features NG = [10,20,50,100,200] components = (10,20,50,100,200) scores = list() svcs = list() isos = list() for cc in components: for nn in NG: best_c = 0 best_g = 0 best_cf = 0 best_iso = None max_score = -np.inf iso = Isomap(n_components=cc, n_neighbors=nn) iso.fit(train_x) train = iso.transform(train_x) for c in C: for g in G: for cf in CF: #Find best C, gamma svc = svm.SVC(C=2**c, gamma=2**g, coef0=2**cf, degree=3, kernel='poly',max_iter=1000000) this_scores = cross_validation.cross_val_score(svc, train, train_t, n_jobs=-1, cv=5, scoring='accuracy') mean_score = sum(this_scores)/len(this_scores) print("C: "+str(c)+" G: "+str(g)+" CMPS: "+str(cc)+" A: "+str(mean_score) + " CF: " +str(cf) + "N: "+str(nn)) if mean_score > max_score: max_score = mean_score best_svm = svc best_iso = iso svcs.append(best_svm) isos.append(best_iso) scores.append(max_score) m_ind = scores.index(max(scores)) best_s = svcs[m_ind] iso = isos[m_ind] # Test final model test = iso.transform(test_x) train = iso.transform(train_x) best_s.fit(train,train_t) pred = best_s.predict(test) sio.savemat('predicted_iso.mat',dict(x=range(800),pred_t=pred)) final_score = best_s.score(test,test_t) print(best_s) print("Final Accuracy: "+str(final_score)) print(scores)
# Load the .mat file: mat = scipy.io.loadmat('datasets/face_data.mat') # Get the img data: pics = mat['images'].transpose() num_images = pics.shape[0] num_pixels = int(np.sqrt(pics.shape[1])) # Transpose the pictures: for i in range(num_images): pics[i, :] = pics[i, :].reshape(num_pixels, num_pixels).transpose().flatten() # Load up your face_labels dataset as a series: labels = pd.read_csv('datasets/face_labels.csv', header=None)[0] # Do train_test_split: X_train, X_test, Y_train, Y_test = train_test_split(pics, labels, test_size=.15, random_state=7) # Implement Isomap: iso = Isomap(n_components=2, n_neighbors=5) iso.fit(X_train) X_train = iso.transform(X_train) X_test = iso.transform(X_test) # Implement KNeighborsClassifier: knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, Y_train) # Print the accuracy of the testing set: print(f"Accuracy: {knn.score(X_test, Y_test)}") # Plot the decision boundary, the training data and testing images: plot_2d_boundary(knn, X_train, Y_train, X_test, Y_test) # Show graph: plt.show()
for imgname in os.listdir(folder): img = misc.imread(os.path.join(folder, imgname)) samples.append((img/255.0).reshape(-1)) colors.append('b') folder += 'i' for imgname in os.listdir(folder): img = misc.imread(os.path.join(folder, imgname)) samples.append((img/255.0).reshape(-1)) colors.append('r') df = pd.DataFrame(samples) iso = Isomap(n_components=3, n_neighbors=6) iso.fit(df) T = iso.transform(df) import matplotlib.pyplot as plt plt.figure() plt.scatter(T[:, 0], T[:, 1], c=colors) plt.show() fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.set_title('...') ax.set_xlabel('component 0') ax.set_ylabel('component 1') ax.set_zlabel('component 2') ax.scatter(T[:, 0], T[:, 1], T[:, 2], c=colors, marker='.', alpha=0.75) plt.show()
if basic_plots: ax = pp.subplot(2, 1, 1) train.describe()[1:].plot(legend=False, ax=ax) pp.title("Description of training data.") ax = pp.subplot(2, 1, 2) train.loc[:,:5].plot(legend=False, ax=ax) pp.title("First 5 series plotted.") pp.show() if do_pca: x = train.values pca = PCA(n_components=3) pca.fit(x) y = pca.transform(x) print 'Orig shape: ', x.shape, 'New shape: ', y.shape pp.scatter(y[:,0], y[:,1], c=target.values) pp.show() if do_isomap: x = train.values from sklearn.manifold import Isomap isomap = Isomap(n_components=2, n_neighbors=20) isomap.fit(x) y = isomap.transform(x) pp.scatter(y[:,0], y[:,1], c=target.values) pp.show()
class CardiotocographyMainFrame(Tk.Frame): def __init__(self, master, x_train, y_train, x_test, y_test, evaluator, console): Tk.Frame.__init__(self, master) self.evaluator = evaluator self.x_train = x_train self.y_train = y_train self.x_test = x_test self.y_test = y_test self.new_estimator = None self.console = console self.evaluator.load_data(x_train, y_train, x_test, y_test) self.evaluator.train() self.x_train_r = self.evaluator.reduce(x_train) # 特征降维 # 0. 优化按钮 self.button_opt = Tk.Button(self, text="优化", command=self.optimize_parameter) self.button_opt.pack(side=Tk.TOP, anchor=Tk.E) self.label_tips = Tk.Label(self) self.label_tips.pack(side=Tk.TOP, anchor=Tk.E) # 1. 散点图 frame_train = Tk.Frame(self) frame_train.pack(fill=Tk.BOTH, expand=1, padx=15, pady=15) self.figure_train = Figure(figsize=(5, 4), dpi=100) self.subplot_train = self.figure_train.add_subplot(111) self.subplot_train.set_title('Cardiotocography High-Dimension Data Visualization (21-dim)') self.figure_train.tight_layout() # 一定要放在add_subplot函数之后,否则崩溃 self.last_line = None self.tsne = Isomap(n_components=2, n_neighbors=10) np.set_printoptions(suppress=True) x_train_r = self.tsne.fit_transform(x_train) self.subplot_train.scatter(x_train_r[:, 0], x_train_r[:, 1], c=y_train, cmap=plt.cm.get_cmap("Paired")) self.attach_figure(self.figure_train, frame_train) y_pred = self.evaluator.pipeline.predict(x_train) accuracy = accuracy_score(y_true=y_train, y_pred=y_pred) self.console.output("[CTG] INIT MODEL: ", str(self.evaluator.pipeline.named_steps['clf']) + "\n") self.console.output("[CTG] INIT ACCURACY: ", str(accuracy) + "\n") # 2. 概率输出框 frame_prob = Tk.Frame(self) frame_prob.pack(fill=Tk.BOTH, expand=1, padx=5, pady=5) Tk.Label(frame_prob, text="prob").pack(side=Tk.LEFT) self.strvar_prob1 = Tk.StringVar() Tk.Label(frame_prob, text="1.").pack(side=Tk.LEFT) Tk.Entry(frame_prob, textvariable=self.strvar_prob1, bd=5).pack(side=Tk.LEFT, padx=5, pady=5) self.strvar_prob2 = Tk.StringVar() Tk.Label(frame_prob, text="2.").pack(side=Tk.LEFT) Tk.Entry(frame_prob, textvariable=self.strvar_prob2, bd=5).pack(side=Tk.LEFT, padx=5, pady=5) self.strvar_prob3 = Tk.StringVar() Tk.Label(frame_prob, text="3.").pack(side=Tk.LEFT) Tk.Entry(frame_prob, textvariable=self.strvar_prob3, bd=5).pack(side=Tk.LEFT, padx=5, pady=5) # 3. 滑动条 frame_slides = Tk.Frame(self) frame_slides.pack(fill=Tk.BOTH, expand=1, padx=5, pady=5) canv = Tk.Canvas(frame_slides, relief=Tk.SUNKEN) vbar = Tk.Scrollbar(frame_slides, command=canv.yview) canv.config(scrollregion=(0, 0, 300, 1500)) canv.config(yscrollcommand=vbar.set) vbar.pack(side=Tk.RIGHT, fill=Tk.Y) canv.pack(side=Tk.LEFT, expand=Tk.YES, fill=Tk.BOTH) feature_num = x_train.shape[1] self.slides = [None] * feature_num # 滑动条个数为特征个数 for i in range(feature_num): canv.create_window(60, (i + 1) * 40, window=Tk.Label(canv, text=str(i + 1) + ". ")) min_x = np.min(x_train[:, i]) max_x = np.max(x_train[:, i]) self.slides[i] = Tk.Scale(canv, from_=min_x, to=max_x, resolution=(max_x - min_x) / 100.0, orient=Tk.HORIZONTAL, command=self.predict) canv.create_window(200, (i + 1) * 40, window=self.slides[i]) # 根据即特征值,计算归属类别的概率 def predict(self, trivial): feature_num = self.x_train.shape[1] x = np.arange(feature_num, dtype='f').reshape((1, feature_num)) for i in range(feature_num): x[0, i] = float(self.slides[i].get()) result = self.evaluator.predict(x) self.strvar_prob1.set("%.2f%%" % (result[0, 0] * 100)) # 无病的概率 self.strvar_prob2.set("%.2f%%" % (result[0, 1] * 100)) # 存疑的概率 self.strvar_prob3.set("%.2f%%" % (result[0, 2] * 100)) # 确诊的概率 self.plot_point(self.subplot_train, self.tsne.transform(x)) self.figure_train.canvas.draw() # 重绘点 def plot_point(self, subplot, x): if self.last_line is not None: self.last_line.remove() del self.last_line lines = subplot.plot(x[:, 0], x[:, 1], "ro", label="case") self.last_line = lines.pop(0) subplot.legend(loc='lower right') # 将figure放到frame上 @staticmethod def attach_figure(figure, frame): canvas = FigureCanvasTkAgg(figure, master=frame) # 内嵌散点图到UI canvas.show() canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) toolbar = NavigationToolbar2TkAgg(canvas, frame) # 内嵌散点图工具栏到UI toolbar.update() canvas.tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) # 搜索最优参数 def optimize_parameter(self): self.console.output("[CTG] OPTIMIZATION START...", "\n") # 计算旧模型(即初始模型)的交叉验证精度 old_scores = cross_validation.cross_val_score(estimator=self.evaluator.pipeline, X=self.x_train, y=self.y_train, scoring='accuracy', cv=10, n_jobs=-1) old_score = np.mean(old_scores) # 计算新模型们中最好的交叉验证精度 new_score = -1.0 self.new_estimator = None for clf, param_grid in RandomParameterSettings.possible_models: self.console.output("[CTG] SEARCH MODEL:", str(clf) + "\n") estimator = Pipeline([('scl', StandardScaler()), ('pca', PCA()), ('clf', clf)]) gs = RandomizedSearchCV(estimator=estimator, param_distributions=param_grid, scoring='accuracy', cv=10, n_jobs=-1) gs = gs.fit(self.x_train, self.y_train) if new_score < gs.best_score_: new_score = gs.best_score_ self.new_estimator = gs.best_estimator_ if new_score > old_score: self.label_tips.config( text='Found a new model with improvement: %.2f%%' % (100.0 * (new_score - old_score) / old_score)) self.button_opt.config(text='应用', command=self.apply_new_estimator) else: self.label_tips.config(text="No better model founded.") self.console.output("[CTG] OPTIMIZATION COMPLETE !", "\n") self.console.output("[CTG] RESULT: ", "old_model_accuracy=%f, new_model_accuracy=%f, improvement=%.2f%%\n" % ( old_score, new_score, (100.0 * (new_score - old_score) / old_score)) + "\n") def apply_new_estimator(self): self.console.output("[CTG] APPLY NEW MODEL:", "old_model=%s \n new_model=%s\n" % (self.evaluator.pipeline, self.new_estimator)) self.evaluator.pipeline = self.new_estimator self.label_tips.config(text="New model has been applied.")
from sklearn.datasets import load_digits from sklearn.manifold import Isomap import matplotlib.pyplot as plt if __name__ == "__main__": br = '\n' digits = load_digits() X = digits.data y = digits.target print('feature data shape:', X.shape) iso = Isomap(n_components=2) iso_name = iso.__class__.__name__ iso.fit(digits.data) data_projected = iso.transform(X) print('project data to 2D:', data_projected.shape) project_1, project_2 = data_projected[:, 0],\ data_projected[:, 1] plt.figure(iso_name) plt.scatter(project_1, project_2, c=y, edgecolor='none', alpha=0.5, cmap='jet') plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) plt.show()
def isoMap(X, y): im = Isomap(n_components = 1, eigen_solver = "dense", n_neighbors = 20) im.fit(X) transformX = im.transform(X) return transformX
from sklearn.manifold import Isomap from sklearn.decomposition import PCA from sklearn import preprocessing import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.mplot3d import Axes3D import random from colorsys import hsv_to_rgb data = np.genfromtxt('data012.txt', delimiter=',') isomap = Isomap() data_xformed = isomap.fit_transform(data) # pca = PCA(n_components=2) # data_xformed = pca.fit_transform(data) print data.shape print data_xformed.shape c = [(1,0,0)]*1000+[(0,1,0)]*1000+[(1,1,0)]*1000 plt.figure() plt.scatter(data_xformed[:,0], data_xformed[:,1], c=c) plt.show() quit() train_data = np.genfromtxt('training.txt', delimiter=',') isomap = Isomap(n_components=4) train_xformed = isomap.fit_transform(train_data) test_data = np.genfromtxt('testing.txt', delimiter=',') test_xformed = isomap.transform(test_data) np.savetxt("isomap_training_reduced4.txt", train_xformed, delimiter=',') np.savetxt("isomap_testing_reduced4.txt", test_xformed, delimiter=',')
str(digits.target[i]), transform=ax.transAxes, color='green') #Treat each pixel as a feature - flatten out the array so we have length-64 array of pixel values representing each digit X = digits.data X.shape y = digits.target y.shape #Unsupervised learning: Dimensionality reduction - Isomap from sklearn.manifold import Isomap iso = Isomap(n_components=2) iso.fit(digits.data) data_projected = iso.transform(digits.data) data_projected.shape plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Spectral', 10)) plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) #generally good separation in parameter space #classification Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0) #Gaussian naive Bayes
color_sample.append('r') # # TODO: Convert the list to a dataframe # # .. your code here .. df_images = pd.DataFrame(samples) #df_images_t = df_images.transpose() # # TODO: Implement Isomap here. Reduce the dataframe df down # to three components, using K=6 for your neighborhood size # # .. your code here .. iso_bear=Isomap(n_components=3,n_neighbors=6) iso_bear.fit(df_images) T_iso_bear = iso_bear.transform(df_images) # # TODO: Create a 2D Scatter plot to graph your manifold. You # can use either 'o' or '.' as your marker. Graph the first two # isomap components # # .. your code here .. fig = plt.figure() ax = fig.add_subplot(111) ax.set_title('Manifold Scatterplot') ax.set_xlabel('Component: {0}'.format(0)) ax.set_ylabel('Component: {0}'.format(1)) ax.scatter(T_iso_bear[:,0],T_iso_bear[:,1], marker='.',alpha=0.7, c=color_sample)
plt.show() # - # Podemos ver que ahora la reducción es distinta a la de PCA. Si bien sigue viendose un Roll, esta vez podemos apreciar el "ancho" del mismo # # Veamos ahora que sucede con ISOMAP # # ## ISOMAP # # Para ISOMAP va a ser necesario definir el hiper-parámetro <i>n_neighbors</i> que indica la cantidad de vecinos a observar a la hora de construir el grafo. De este valor dependerá en gran parte la proyección resultante. # + iso = Isomap(n_neighbors=15, n_components=2) iso.fit(X) manifold_2Da = iso.transform(X) # + fig1 = plt.figure(figsize=(10, 10), facecolor='white') ax = fig1.add_subplot(1, 1, 1) ax.set_facecolor('white') plt.scatter( manifold_2Da[:, 0], manifold_2Da[:, 1], c=color, marker='o', cmap=plt.cm.Spectral ) # plt.scatter(principalComponents[df_train['Survived']==0,0], principalComponents[df_train['Survived']==0,1], color='r', s=10) plt.show() # - # Veamos ahora que sucede para un valor menor de cantidad de vecinos a observar
# the results would suffice. # # Your model should only be trained (fit) against the training data (data_train) # Once you've done this, you need use the model to transform both data_train # and data_test from their original high-D image feature space, down to 2D # # Implement Isomap here. ONLY train against your training data, but # transform both your training + test data, storing the results back into # data_train, and data_test. # iso = Isomap(n_neighbors=6, n_components=2) print("iso map fit start ") iso.fit(data_train) print("iso map fit end ") data_train = iso.transform(data_train) data_test= iso.transform(data_test) # # Implement KNeighborsClassifier here. You can use any K value from 1 # through 20, so play around with it and attempt to get good accuracy. # This is the heart of this assignment: Looking at the 2D points that # represent your images, along with a list of "answers" or correct class # labels that those 2d representations should be. # for i in range(1,21): knn = KNeighborsClassifier(n_neighbors=i) knn.fit(data_train, label_train)
pca.fit(df) T = pca.transform(df) Plot2D(T, 'chart title', 1,2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # # .. your code here .. from sklearn.manifold import Isomap im = Isomap(n_components=3) im.fit(df) T = im.transform(df) Plot2D(T, 'chart title', 1,2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # # .. your code here .. fig = plt.figure() ax = fig.add_subplot(111,projection="3d") ax.set_xlabel('0') ax.set_ylabel('1') ax.set_zlabel('2')
Plot2D(T, title='PCA 2D', x=0, y=1, num_to_plot=40) Plot2D(T, title='PCA 2D', x=1, y=2, num_to_plot=40) Plot3D(T, title='PCA 3D', x=0, y=1, z=2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # # .. your code here .. from sklearn.manifold import Isomap iso = Isomap(n_neighbors=8, n_components=3) iso.fit(df) T_iso = iso.transform(df) Plot2D(T_iso, title='ISO 3D', x=0, y=1, num_to_plot=40) Plot2D(T_iso, title='ISO 3D', x=1, y=2, num_to_plot=40) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # # .. your code here .. from mpl_toolkits.mplot3d import Axes3D Plot3D(T_iso, title='ISO 3D', x=0, y=1, z=2) plt.show()
# In[ ]: # Isomap from sklearn.manifold import Isomap n_neighbors = 5 n_components = 10 n_jobs = 4 isomap = Isomap(n_neighbors=n_neighbors, n_components=n_components, n_jobs=n_jobs) isomap.fit(X_train.loc[0:5000, :]) X_train_isomap = isomap.transform(X_train) X_train_isomap = pd.DataFrame(data=X_train_isomap, index=train_index) X_validation_isomap = isomap.transform(X_validation) X_validation_isomap = pd.DataFrame(data=X_validation_isomap, index=validation_index) scatterPlot(X_train_isomap, y_train, "Isomap") # In[ ]: # Multidimensional Scaling from sklearn.manifold import MDS n_components = 2 n_init = 12
print 'offset2: ' , offset2 #HERE structures must have only atoms of selected chain TM_align = rcu.TM_aligned_residues(pdb1,pdb2,offset1, offset2) individualjammings1 = np.asarray(get_permutations(nj1['individual'],TM_align['alignedList1'])) individualjammings2 = np.asarray(get_permutations(nj2['individual'],TM_align['alignedList2'])) PValsScore = scoreFromPvalues(individualjammings1,individualjammings2) print 'PValsScore: ', PValsScore clf = Isomap(n_components=2)#Isomap(n_components=2) clf.fit(individualjammings1) ij1 = clf.transform(individualjammings1) ij2 = clf.transform(individualjammings2) print ij1 f, (ax1, ax2,ax3) = pl.subplots(1,3, sharex=True, sharey=True) pl.ioff() pl.title('ensemble correlation: %.4f'%PValsScore) #pl.subplot(1,2,1) ax1.scatter(ij1[:,0],ij1[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r') #pl.subplot(1,2,2) ax2.scatter(ij2[:,0],ij2[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r') ax3.scatter(ij2[:,0],ij2[:,1],marker='o',s=25,facecolor='y',edgecolor='0.05',alpha=0.6) ax3.scatter(ij1[:,0],ij1[:,1],marker='o',s=25,facecolor='b',edgecolor='0.05',alpha=0.5) ax1.axes.get_xaxis().set_visible(False) ax2.axes.get_xaxis().set_visible(False) ax3.axes.get_xaxis().set_visible(False)
scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) #pcaComponent = 4 #pca = PCA(n_components=pcaComponent) #pca.fit(X_train) #X_train = pca.transform(X_train) #X_test = pca.transform(X_test) neighbors = 2 components = 4 isomap = Isomap(n_neighbors=neighbors, n_components=components) isomap.fit(X_train) X_train = isomap.transform(X_train) X_test = isomap.transform(X_test) #svc = SVC() #svc.fit(X_train, y_train) #print svc.score(X_test, y_test) best_score = 0 best_C = 0 best_gamma = 0 for C in np.arange(0.05, 2.05, 0.05): for gamma in np.arange(0.001, 1.001, 0.001): svc = SVC(C = C, gamma = gamma) svc.fit(X_train, y_train) score = svc.score(X_test, y_test) if score > best_score:
#maxabsscaler = pp.MaxAbsScaler() #maxabsscaler.fit(X) #X = maxabsscaler.transform(X) #print('MaxAbsScaler\n========') #X = pp.normalize(X) #print('normalizer\n========') # TODO: Use PCA to reduce noise, n_components 4-14 nc = 5 #pca = PCA(n_components=nc) #pca.fit(X) #X = pca.transform(X) #print('PCA: ', nc) # Use Isomap to reduce noise, n_neighbors 2-5 nn = 4 im = Isomap(n_neighbors=nn, n_components=nc) im.fit(X) X = im.transform(X) print('Isomap: ',nn, ' comp: ', nc) # TODO: train_test_split 30% and random_state=7 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7) # TODO: Create an SVC, train and score against defaults result = findMaxSVC() print(result['score'])
def getIso(X, neighbs): isoS = Isomap(n_components=2, n_neighbors=neighbs).fit(X) return isoS.transform(X)
# pca_data =PCA(n_components=3) pca_data.fit(df) T_pca = pca_data.transform(df) Plot2D(T_pca,'PCA Transformed Data PC0VsPC1',0,1) #Plot2D(T_pca,'PCA Transformed Data PC0VsPC2',0,2) #Plot2D(T_pca,'PCA Transformed Data PC1VsPC2',1,2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # iso_data = Isomap(n_neighbors=3,n_components=3) iso_data.fit(df) T_iso = iso_data.transform(df) Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx1',0,1) #Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx2',0,2) #Plot2D(T_iso,'Isomap Transformed Data Ax1VsAx2',1,2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # #fig = plt.figure() #ax = fig.add_subplot(111, projection='3d') #ax.set_xlabel('Principal Component 0') #ax.set_ylabel('Principal Component 1') #ax.set_zlabel('Principal Component 2') #ax.scatter(T_pca[:,0], T_pca[:,1], T_pca[:,2], c='r', marker='.')