def dimension_reduce(): ''' This compares a few different methods of dimensionality reduction on the current dataset. ''' pca = PCA(n_components=2) # initialize a dimensionality reducer pca.fit(digits.data) # fit it to our data X_pca = pca.transform(digits.data) # apply our data to the transformation plt.subplot(1, 3, 1) plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold se = SpectralEmbedding() X_se = se.fit_transform(digits.data) plt.subplot(1, 3, 2) plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target) isomap = Isomap(n_components=2, n_neighbors=20) isomap.fit(digits.data) X_iso = isomap.transform(digits.data) plt.subplot(1, 3, 3) plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target) plt.show() plt.matshow(pca.mean_.reshape(8, 8)) # plot the mean components plt.matshow(pca.components_[0].reshape(8, 8)) # plot the first principal component plt.matshow(pca.components_[1].reshape(8, 8)) # plot the second principal component plt.show()
def calculate_geodesic_distance(df_for_Box_Plot_features, points): """ Computes Pairwise geodesic distances Parameters ---------- df_for_Box_Plot_features : list original features points : nD array embedding Returns ---------- geo_distance_original : nD array geodesic distances in the original dataset geo_distance_embeddings : nD array geodesic distances in the embedding """ embedding = Isomap(n_components=2) embedding.fit(df_for_Box_Plot_features) unsquareform = lambda a: a[np.nonzero(np.triu(a, 1))] ## define a lambda to unsquare the distance matrix geo_distance_original = unsquareform(embedding.dist_matrix_) ## get a condensed matrix of pairwise geodesic distance among points embedding1 = Isomap(n_components=2) embedding1.fit(points) embedding1.dist_matrix_[embedding1.dist_matrix_ == 0] = -9999 ## turn all 0 distances to -9999 geo_distance_embeddings = unsquareform(embedding1.dist_matrix_) ## get a condensed matrix of pairwise geodesic distance among points geo_distance_embeddings[geo_distance_embeddings == -9999] = 0 ## turn all -9999 distances back to 0 return geo_distance_original, geo_distance_embeddings
def isomap(): ''' k-nearest neighbors = n_neighbors ''' iso = Isomap(n_neighbors=7, n_components=2) iso.fit(corr_dataframe) manifold_2D = iso.transform(corr_dataframe) return manifold_2D
def isomap(X, n_neighbors=5, n_components=2): iso = Isomap(n_components=n_components, n_neighbors=n_neighbors) X = np.asarray(X) if len(X.shape) == 1: X = X.reshape(-1, 1) iso.fit(X) return iso
def df_isomap(df, n_comp = 2, n_jobs = 1, n_neighbors = 5, max_iter = 1000): rd_df = normalize_dataframe(df) rd = Isomap(n_components=n_comp, n_neighbors = n_neighbors, max_iter = max_iter ) rd.fit(caracteristicas_df) caracteristicas_rd = rd.transform(rd_df) caracteristicas_rd_df = pd.DataFrame(caracteristicas_rd) return caracteristicas_rd_df
def iso_map(d: pd.DataFrame): iso = Isomap(n_components=2, n_jobs=-1) iso.fit(d) app = iso.transform(d) df = pd.DataFrame(app, columns=['comp1', 'comp2'], index=d.index) df.to_csv(ISOMAP_FILE, index=True) return df
def ML( self ): data = self.data.values[ :, :-3 ] scaler = MinMaxScaler() #scaler = StandardScaler() X = scaler.fit_transform( data ) #X = data isomap = Isomap( n_components = 2 ) isomap.fit( X ) #print pca.explained_variance_ratio_ import pdb; pdb.set_trace()
def reduce_features_to_two_dimensions(features): ''' The Isomap reduces the dimensionality of the features from 784 to 2. This allows the visualize_features function to visualize the data in two dimensions. ''' isomap = Isomap(n_components = 2) isomap.fit(features.data) transformed_features = isomap.transform(features.data) return transformed_features
def ML(self): data = self.data.values[:, :-3] scaler = MinMaxScaler() #scaler = StandardScaler() X = scaler.fit_transform(data) #X = data isomap = Isomap(n_components=2) isomap.fit(X) #print pca.explained_variance_ratio_ import pdb pdb.set_trace()
class IsomapImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def plot(self, n_components=2, n_neighbors=5, transform="log", switch_x=False, switch_y=False, switch_z=False, colors=None, max_features=500, show_plot=True): """ :param n_components: at number starting at 2 or a value below 1 e.g. 0.95 means select automatically the number of components to capture 95% of the variance :param transform: can be 'log' or 'anscombe', log is just log10. count with zeros, are set to 1 """ from sklearn.manifold import Isomap import numpy as np pylab.clf() data, kept = self.scale_data(transform_method=transform, max_features=max_features) iso = Isomap(n_neighbors=n_neighbors, n_components=n_components) iso.fit(data.T) Xr = iso.transform(data.T) self.Xr = Xr if switch_x: Xr[:, 0] *= -1 if switch_y: Xr[:, 1] *= -1 if switch_z: Xr[:, 2] *= -1 # PC1 vs PC2 if show_plot: pylab.figure(1) self._plot(Xr, pca=None, pc1=0, pc2=1, colors=colors) if n_components >= 3: if show_plot: pylab.figure(2) self._plot(Xr, pca=None, pc1=0, pc2=2, colors=colors) pylab.figure(3) self._plot(Xr, pca=None, pc1=1, pc2=2, colors=colors) return iso
def classify_concat_isomap_data(self, vis_data, sem_data, labels): fold = 0 accuracies = [] iso = Isomap(n_components=sem_data.shape[1], n_neighbors=20, eigen_solver='auto') skf = StratifiedKFold(n_splits=self.n_folds, random_state=None, shuffle=True) for train_index, test_index in skf.split(vis_data, labels): logging.info('Running ISO classification for fold %d' % fold) tr_vis = normalize(vis_data[train_index], norm='l2', axis=1, copy=True) te_vis = normalize(vis_data[test_index], norm='l2', axis=1, copy=True) tr_sem = normalize(sem_data[train_index], norm='l2', axis=1, copy=True) te_sem = normalize(sem_data[test_index], norm='l2', axis=1, copy=True) te_sem = SemanticDegradation.kill_semantic_attributes( te_sem, self.degradation_rate) te_sem = normalize(te_sem, norm='l2', axis=1, copy=True) tr_data, te_data = np.hstack((tr_vis, tr_sem)), np.hstack( (te_vis, te_sem)) tr_labels, te_labels = labels[train_index][:, 0], labels[ test_index][:, 0] clf = make_pipeline(StandardScaler(), SVC(gamma='auto', C=1.0, kernel='linear')) iso.fit(tr_data) clf.fit(iso.transform(tr_data), tr_labels) prediction = clf.predict(iso.transform(te_data)) fold += 1 accuracies.append(balanced_accuracy_score(te_labels, prediction)) return accuracies
def create_isomap(dissim_mat, embed_dimensions, neighbor_factor=2, **kwargs): # https://scikit-learn.org/stable/modules/manifold.html#multidimensional-scaling says isomap better suited than MDS, but DESC15 say they compared it and it's worse ([15] of [DESC15])! n_neighbors=min(max(5, dissim_mat.shape[0]//neighbor_factor), dissim_mat.shape[0]-1) print(f"Running Isomap with {get_ncpu(ignore_debug=True)} jobs for max {n_neighbors} neighbors.") embedding = Isomap(n_jobs=get_ncpu(ignore_debug=True), n_neighbors=n_neighbors, n_components=embed_dimensions, metric="precomputed", **kwargs) isomap = embedding.fit(dissim_mat) return isomap
class FloorplanEstimator: """ Simple estimator for rough floorplans """ def __init__(self): """ Instantiate floorplan estimator """ self.dimred = Isomap(n_neighbors=25, n_components=2) self._fingerprints = None self._label = None def fit(self, fingerprints, label): """ Estimate floorplan from labeled fingerprints :param fingerprints: list of fingerprints :param label: list of corresponding labels """ self.dimred.fit(fingerprints) self._fingerprints = fingerprints self._label = label def transform(self, fingerprints): """ Get x,y coordinates of fingerprints on floorplan :param fingerprints: list of fingerprints :return: list of [x,y] coordinates """ return self.dimred.transform(fingerprints) def draw(self): """ Draw the estimated floorplan in the current figure """ xy = self.dimred.transform(self._fingerprints) x_min, x_max = xy[:,0].min(), xy[:,0].max() y_min, y_max = xy[:,1].min(), xy[:,1].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0), np.arange(y_min, y_max, 1.0)) clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0) clf.fit(xy, self._label) label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) plt.pcolormesh(xx, yy, label) plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
class FloorplanEstimator: """ Simple estimator for rough floorplans """ def __init__(self): """ Instantiate floorplan estimator """ self.dimred = Isomap(n_neighbors=25, n_components=2) self._fingerprints = None self._label = None def fit(self, fingerprints, label): """ Estimate floorplan from labeled fingerprints :param fingerprints: list of fingerprints :param label: list of corresponding labels """ self.dimred.fit(fingerprints) self._fingerprints = fingerprints self._label = label def transform(self, fingerprints): """ Get x,y coordinates of fingerprints on floorplan :param fingerprints: list of fingerprints :return: list of [x,y] coordinates """ return self.dimred.transform(fingerprints) def draw(self): """ Draw the estimated floorplan in the current figure """ xy = self.dimred.transform(self._fingerprints) x_min, x_max = xy[:, 0].min(), xy[:, 0].max() y_min, y_max = xy[:, 1].min(), xy[:, 1].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0), np.arange(y_min, y_max, 1.0)) clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0) clf.fit(xy, self._label) label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) plt.pcolormesh(xx, yy, label) plt.scatter(xy[:, 0], xy[:, 1], c=self._label, vmin=0)
def example_04(): digits = load_digits() # fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks': [], 'yticks': []}, # gridspec_kw=dict(hspace=0.1, wspace=0.1)) # # # axes.flat 一维迭代器 # for i, ax in enumerate(axes.flat): # ax.imshow(digits.images[i], cmap='binary') # ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green') # plt.show() X = digits.data y = digits.target from sklearn.manifold import Isomap iso = Isomap(n_components=2) iso.fit(X) data_projected = iso.transform(X) # plt.scatter(data_projected[:, 0], data_projected[:, 1], c=y, edgecolors='none', alpha=0.5, # cmap=plt.cm.get_cmap('Spectral', 10)) # plt.colorbar(label='digit label', ticks=range(10)) # plt.clim(-0.5, 9.5) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) from sklearn.naive_bayes import GaussianNB model = GaussianNB() model.fit(X_train, y_train) y_model = model.predict(X_test) # 量化评分 from sklearn.metrics import accuracy_score print(accuracy_score(y_model, y_test)) # 得到结果是85%的正确率 但是还是不知道哪里出了问题 # 解决这个问题的方法就是打印混淆矩阵 from sklearn.metrics import confusion_matrix mat = confusion_matrix(y_test, y_model) sns.heatmap(mat, square=True, annot=True, cbar=True) plt.xlabel('predict value') plt.ylabel('True value') plt.show()
def perform_isomap(M): """Utility function to perform Isomap based on a metric Input: ====== M : Metric as a matrix of shape (n_samples, n_samples) Output: ======= emb_isomap : Embedding of the resulting Isomap algorithm as an array of shape (n_samples, 3) explained_variance : Output from the function `_get_explained_variance` above """ embedding = Isomap(n_components=3, n_neighbors=5, metric='precomputed') embedding.fit(M) Dm = embedding.dist_matrix_ emb_isomap = embedding.embedding_ explained_variance = _get_explained_variance(Dm, emb_isomap) return emb_isomap, explained_variance
def isomap10FoldClf(X, y, nclf): acc = [] kf = KFold(X.shape[0], n_folds=10, shuffle=True) i = 0 for train_index, test_index in kf: yTest = y[test_index] yTrain = y[train_index] n_neighbors = 30 clf = Isomap(n_neighbors, n_components=2) clf.fit(X[train_index]) newRepTrain = clf.transform(X[train_index]) newRepTest = clf.transform(X[test_index]) # NN = neighbors.KNeighborsClassifier(n_neighbors=2) nclf.fit(newRepTrain, yTrain) XPred = nclf.predict(newRepTest) acc.append(np.sum(XPred == yTest) * 1.0 / yTest.shape[0]) # print i,":",acc[i] i += 1 return np.mean(acc), np.std(acc)
def main(): digits = load_digits() print(digits.images.shape) # get the 2D representation of the images [n_samples, n_features] X = digits.data y = digits.target # reduce dimensionality iso = Isomap(n_components=2) iso.fit(digits.data) data_prj = iso.transform(digits.data) plt.scatter(data_prj[:, 0], data_prj[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Accent', 10)) plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) plt.show() Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0) # create the model model = GaussianNB() model.fit(Xtrain, ytrain) y_model = model.predict(Xtest) accuracy_score(ytest, y_model) mat = confusion_matrix(ytest, y_model) sns.heatmap(mat, square=True, annot=True, cbar=False) plt.xlabel('predicted value') plt.ylabel('true value') plt.show() fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1)) for i, ax in enumerate(axes.flat): ax.imshow(digits.images[i], cmap='binary', interpolation='nearest') ax.text(0.05, 0.05, str(y_model[i]), transform=ax.transAxes, color='green' if (ytest[i] == y_model[i]) else 'red') plt.show()
def compute_iso_map(self, original_features): feature_matrix = original_features.drop('file', 1).as_matrix() feature_matrix = np.nan_to_num(feature_matrix) dimen_reductor = Isomap(n_components=self.n_components) full_size = feature_matrix.shape[0] train_size = int(self.ratio * full_size) row_indices = list(range(full_size)) feature_training_indices = np.random.choice(row_indices, size = train_size) training_feature_matrix = feature_matrix[feature_training_indices, :] dimen_reductor.fit(training_feature_matrix) reduced_features = dimen_reductor.transform(feature_matrix) reduced_normalized_features = reduced_features - reduced_features.min(axis=0) reduced_normalized_features /= reduced_normalized_features.max(axis=0) return reduced_normalized_features
def _OnClick3(self, event): if self.var3.get() == "Off": self.var3.set("On") elif self.var3.get() == "On": self.var3.set("Off") print("Isomap is running...") label = pd.read_csv(self.labelVar, header=None)[0].tolist() df = pd.read_csv(self.dfLabel, header=None) array = df.copy() label = label iso = Isomap(n_components=2) iso.fit(array) manifold_2Da = iso.transform(df) manifold_2D = pd.DataFrame(manifold_2Da, columns=['Component 1', 'Component 2']) principalDf = pd.DataFrame(data=manifold_2Da, columns=['Component 1', 'Component 2']) X1 = manifold_2D['Component 1'] X2 = manifold_2D['Component 2'] unique = np.unique(label) try: plt.scatter(X1, X2, c=label) except: print( "data matrix does not match label matrix (Select input file and label, remove headers)" ) #plt.legend(unique, loc=8, ncol=5,fontsize='x-small') name = 'ISOMAP' #CHANGE FILENAME HERE ************************************************************************* plt.title(name + " Clusters: " + str(len(unique))) plt.savefig(name + ".png") plt.show() plt.clf() principalDf.to_excel( "ISOMAP_COMPONENTS.xlsx" ) #Names of 1st and 2nd components to EXCEL here *************************************************************************
def runIsomap(X_train, X_test, y_train, y_test, comp_range, n_neigh): rbf_scores = [] linear_scores = [] for n_comp in comp_range: print("\nn_comp=%d\n" % (n_comp)) transformer = Isomap(n_neighbors=n_neigh, n_components=n_comp, n_jobs=8) transformer.fit(X_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) if n_comp == 2: np.save('X_train_proj_2d_Isomap_' + str(n_neigh), X_train_proj) np.save('X_test_proj_2d_Isomap_' + str(n_neigh), X_test_proj) score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) for i, scores in enumerate([rbf_scores, linear_scores]): if i == 0: kernel = 'rbf' elif i == 1: kernel = 'linear' else: kernel = '' bestIdx = np.argmax(scores) bestNComp = comp_range[bestIdx] bestAcc = scores[bestIdx] with open('res_Isomap_' + kernel + '_' + str(n_neigh) + '.txt', 'w') as f: for j in range(len(comp_range)): f.write(kernel + ": n_comp = %f, acc = %f\n" % (comp_range[j], scores[j])) f.write(kernel + ": Best n_comp = %f\n" % (bestNComp)) f.write(kernel + ": acc = %f\n" % (bestAcc)) return rbf_scores, linear_scores
class ISO_Reducer(Reducer): '''Iso map reduction method''' def __init__(self, dimensionality=2500): self.iso = Isomap(n_neighbors=5, n_components=dimensionality, eigen_solver='auto', tol=0, max_iter=None, path_method='auto', neighbors_algorithm='auto', n_jobs=-1) def reduced(self, A): embd = self.iso.fit(A).embedding_ return np.transpose(embd)
class Isomap(MapAlgorithm): name = "Isomap" parameters = { "n_neighbors": { "type": ModelParameter.INTEGER , "defaultValue": 5 } } def __init__(self, builder, callback=None): super().__init__(builder, callback) if "n_neighbors" not in self.params: self.params["n_neighbors"] = 5 from sklearn.manifold import Isomap self._model = Isomap(n_neighbors=self.params['n_neighbors']) def getPoints(self, mols, X: DataFrame) -> [Point]: transformed_data = self.predict(X) points = [] for idx, mol in enumerate(mols): x = transformed_data[idx, 0] y = transformed_data[idx, 1] point = Point.objects.create( map=self.builder.instance, molecule=mol, x=x, y=y, ) points.append(point) return points def fit(self, X: DataFrame, y=None): self._model = self._model.fit(X) def predict(self, X: DataFrame) -> DataFrame: return self.model.transform(X) @property def model(self): return self._model
def isomap(self, n_components=2, n_neighbors=3, show=False): """ Calculates lower dimention coordinates using the isomap algorithm. :param n_components: dimentionality of the reduced space :type n_components: int, optional :param n_neighbors: Used by isomap to determine the number of neighbors for each point. Large neighbor size tends to produce a denser map. :type n_neighbors: int, optional :param show: Shows the calculated coordinates if true. :type show: boolean, optional """ model = Isomap(n_components=n_components, n_neighbors=n_neighbors) self.pos = model.fit(self.dismat).embedding_ if show: return self.pos
def mult_scl(X, labels): print('labels:') for i, label in zip(range(1, len(labels) + 1), labels): print('{}: {}'.format(i, label)) isomap = Isomap() points = isomap.fit(np.nan_to_num(X)).embedding_ f, (ax1, ax2, ax3) = plt.subplots(1, 3) plot_location(labels, ax3) ax1.scatter(points[:, 0], points[:, 1], s=20, c='r') ax1.set_title('Isomap') add_labels(labels, points, ax1) mds = MDS() points = mds.fit(np.nan_to_num(X)).embedding_ ax2.scatter(points[:, 0], points[:, 1], s=20, c='g') ax2.set_title('MDS') add_labels(labels, points, ax2) plt.show()
def exec_isomap(X, Y, mmpno): #n_neighbors=20 isomap = Isomap(n_neighbors=10, n_components=2, eigen_solver='dense') X_iso = isomap.fit(X).transform(X) Ymax = np.max(Y) Ymin = np.min(Y) Y0to1 = (Y - Ymin) / (Ymax - Ymin) plt.figure(figsize=(1, 8)) #plt.scatter(, X_iso[:, 1], c=cm.RdYlGn(1-y),s=30) plt.show() plt.figure(figsize=(8, 8)) plt.rcParams["font.size"] = 18 plt.rcParams["font.family"] = "Serif" plt.scatter(X_iso[:, 0], X_iso[:, 1], c=cm.RdYlGn(1 - Y0to1), s=30) plt.ylim(-20, 20) plt.xlim(-20, 20) plt.xlabel("z1") plt.ylabel("z2") plt.show() #plt.figure(figsize=figure.figaspect(1)) plt.figure(figsize=(8, 8)) plt.scatter(X_iso[:, 0], X_iso[:, 1], c=cm.RdYlGn(1 - Y0to1), s=30) for i, no_a in enumerate(mmpno[:, 0]): no_b = mmpno[i, 1] if no_b >= 3025: print(i) plt.plot([X_iso[no_a, 0], X_iso[no_b, 0]], [X_iso[no_a, 1], X_iso[no_b, 1]], color='blue') plt.ylim(-20, 20) plt.xlim(-20, 20) plt.xlabel("z1") plt.ylabel("z2") plt.show()
0.05, str(digits.target[i]), transform=ax.transAxes, color='green') #Treat each pixel as a feature - flatten out the array so we have length-64 array of pixel values representing each digit X = digits.data X.shape y = digits.target y.shape #Unsupervised learning: Dimensionality reduction - Isomap from sklearn.manifold import Isomap iso = Isomap(n_components=2) iso.fit(digits.data) data_projected = iso.transform(digits.data) data_projected.shape plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Spectral', 10)) plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) #generally good separation in parameter space #classification Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0)
bears[:2] # In[25]: bears = pd.DataFrame(bears) # In[26]: bears.shape num_neighbors = 6 # In[29]: iso = Isomap(n_components=3, n_neighbors=num_neighbors) iso.fit(bears) T = iso.transform(bears) T.shape isodf = pd.DataFrame(T, columns=['a', 'b', 'c']) isodf.head() fig1 = plt.figure(figsize=(12, 10)) ax1 = fig1.add_subplot(111) ax1.set_title("2D projection with {} neighbors".format(num_neighbors)) ax1.scatter(isodf.a, isodf.b, c=colors) fig2 = plt.figure(figsize=(12, 10)) ax2 = fig2.add_subplot(111, projection='3d')
# title is your chart title # x is the principal component you want displayed on the x-axis, Can be 0 or 1 # y is the principal component you want displayed on the y-axis, Can be 1 or 2 # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=3) pca.fit(df) T = pca.transform(df) Plot2D(T, "PCA 1 2", 1, 2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # # .. your code here .. from sklearn.manifold import Isomap imap = Isomap(n_neighbors=8, n_components=3) imap.fit(df) T2 = imap.transform(df) Plot2D(T2, "Isomap", 1, 2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # # .. your code here .. plt.show()
samples.append(img.reshape(-1)) df = pd.DataFrame(samples) # # Optional: Resample the image down by a factor of two if you # have a slower computer. You can also convert the image from # 0-255 to 0.0-1.0 if you'd like, but that will have no # effect on the algorithm's results. # # .. your code here .. #%% from sklearn.manifold import Isomap iso = Isomap(n_neighbors=6, n_components=3) iso.fit(samples) T = iso.transform(samples) def Plot2D(T, title, x, y): fig = plt.figure() ax = fig.add_subplot(111) ax.set_title(title) ax.set_xlabel('Component: {0}'.format(x)) ax.set_ylabel('Component: {0}'.format(y)) ax.scatter(T[:, x], T[:, y], marker='.', alpha=0.7) def Plot3D(T, title, x, y, z): fig = plt.figure() ax = fig.add_subplot(111, projection='3d')
if basic_plots: ax = pp.subplot(2, 1, 1) train.describe()[1:].plot(legend=False, ax=ax) pp.title("Description of training data.") ax = pp.subplot(2, 1, 2) train.loc[:,:5].plot(legend=False, ax=ax) pp.title("First 5 series plotted.") pp.show() if do_pca: x = train.values pca = PCA(n_components=3) pca.fit(x) y = pca.transform(x) print 'Orig shape: ', x.shape, 'New shape: ', y.shape pp.scatter(y[:,0], y[:,1], c=target.values) pp.show() if do_isomap: x = train.values from sklearn.manifold import Isomap isomap = Isomap(n_components=2, n_neighbors=20) isomap.fit(x) y = isomap.transform(x) pp.scatter(y[:,0], y[:,1], c=target.values) pp.show()
pca = PCA(n_components=3) pca.fit(df) T = pca.transform(df) Plot2D(T, 'chart title', 1,2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # # .. your code here .. from sklearn.manifold import Isomap im = Isomap(n_components=3) im.fit(df) T = im.transform(df) Plot2D(T, 'chart title', 1,2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # # .. your code here .. fig = plt.figure() ax = fig.add_subplot(111,projection="3d") ax.set_xlabel('0') ax.set_ylabel('1') ax.set_zlabel('2')
samples.append(img.reshape(-1)) color_sample.append('r') # # TODO: Convert the list to a dataframe # # .. your code here .. df_images = pd.DataFrame(samples) #df_images_t = df_images.transpose() # # TODO: Implement Isomap here. Reduce the dataframe df down # to three components, using K=6 for your neighborhood size # # .. your code here .. iso_bear=Isomap(n_components=3,n_neighbors=6) iso_bear.fit(df_images) T_iso_bear = iso_bear.transform(df_images) # # TODO: Create a 2D Scatter plot to graph your manifold. You # can use either 'o' or '.' as your marker. Graph the first two # isomap components # # .. your code here .. fig = plt.figure() ax = fig.add_subplot(111) ax.set_title('Manifold Scatterplot') ax.set_xlabel('Component: {0}'.format(0)) ax.set_ylabel('Component: {0}'.format(1)) ax.scatter(T_iso_bear[:,0],T_iso_bear[:,1], marker='.',alpha=0.7, c=color_sample)
def main(): #Load the dataset from Matlab data = sio.loadmat('baseline2.mat') n_train = int(data['n_train']) n_test = int(data['n_test']) train_x = np.array(data['train_x']) train_t = np.array(data['train_t']).reshape(n_train) test_x = np.array(data['test_x']) test_t = np.array(data['test_t']).reshape(800) X_indices = np.arange(train_x.shape[-1]) #SVM Fitting C = [-10,5,10] G = [-10,5,10] CF = [-10,5,10] # Plot the cross-validation score as a function of percentile of features NG = [10,20,50,100,200] components = (10,20,50,100,200) scores = list() svcs = list() isos = list() for cc in components: for nn in NG: best_c = 0 best_g = 0 best_cf = 0 best_iso = None max_score = -np.inf iso = Isomap(n_components=cc, n_neighbors=nn) iso.fit(train_x) train = iso.transform(train_x) for c in C: for g in G: for cf in CF: #Find best C, gamma svc = svm.SVC(C=2**c, gamma=2**g, coef0=2**cf, degree=3, kernel='poly',max_iter=1000000) this_scores = cross_validation.cross_val_score(svc, train, train_t, n_jobs=-1, cv=5, scoring='accuracy') mean_score = sum(this_scores)/len(this_scores) print("C: "+str(c)+" G: "+str(g)+" CMPS: "+str(cc)+" A: "+str(mean_score) + " CF: " +str(cf) + "N: "+str(nn)) if mean_score > max_score: max_score = mean_score best_svm = svc best_iso = iso svcs.append(best_svm) isos.append(best_iso) scores.append(max_score) m_ind = scores.index(max(scores)) best_s = svcs[m_ind] iso = isos[m_ind] # Test final model test = iso.transform(test_x) train = iso.transform(train_x) best_s.fit(train,train_t) pred = best_s.predict(test) sio.savemat('predicted_iso.mat',dict(x=range(800),pred_t=pred)) final_score = best_s.score(test,test_t) print(best_s) print("Final Accuracy: "+str(final_score)) print(scores)
#maxabsscaler = pp.MaxAbsScaler() #maxabsscaler.fit(X) #X = maxabsscaler.transform(X) #print('MaxAbsScaler\n========') #X = pp.normalize(X) #print('normalizer\n========') # TODO: Use PCA to reduce noise, n_components 4-14 nc = 5 #pca = PCA(n_components=nc) #pca.fit(X) #X = pca.transform(X) #print('PCA: ', nc) # Use Isomap to reduce noise, n_neighbors 2-5 nn = 4 im = Isomap(n_neighbors=nn, n_components=nc) im.fit(X) X = im.transform(X) print('Isomap: ',nn, ' comp: ', nc) # TODO: train_test_split 30% and random_state=7 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7) # TODO: Create an SVC, train and score against defaults result = findMaxSVC() print(result['score'])
scaler = preprocessing.StandardScaler() #0.966101694915 scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) #pcaComponent = 4 #pca = PCA(n_components=pcaComponent) #pca.fit(X_train) #X_train = pca.transform(X_train) #X_test = pca.transform(X_test) neighbors = 2 components = 4 isomap = Isomap(n_neighbors=neighbors, n_components=components) isomap.fit(X_train) X_train = isomap.transform(X_train) X_test = isomap.transform(X_test) #svc = SVC() #svc.fit(X_train, y_train) #print svc.score(X_test, y_test) best_score = 0 best_C = 0 best_gamma = 0 for C in np.arange(0.05, 2.05, 0.05): for gamma in np.arange(0.001, 1.001, 0.001): svc = SVC(C = C, gamma = gamma) svc.fit(X_train, y_train) score = svc.score(X_test, y_test)
return features_train_transformed, lables, vectorizer, selector, le, features # nFeatures = np.arange(50, 1000, 50) nISOMAP = np.arange(20, 200, 20) data = {} for k in nISOMAP: features, labels, vectorizer, selector, le, features_data = preprocess("pkl/article_2_people.pkl", "pkl/lable_2_people.pkl") features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(features, labels, test_size=0.1, random_state=42) t0 = time() iso = Isomap(n_neighbors=15, n_components=k, eigen_solver='auto') iso.fit(features_train) print ("Dimension Reduction time:", round(time()-t0, 3), "s") features_train = iso.transform(features_train) features_test = iso.transform(features_test) for name, clf in [ ('AdaBoostClassifier', AdaBoostClassifier(algorithm='SAMME.R')), ('BernoulliNB', BernoulliNB(alpha=1)), ('GaussianNB', GaussianNB()), ('DecisionTreeClassifier', DecisionTreeClassifier(min_samples_split=100)), ('KNeighborsClassifier', KNeighborsClassifier(n_neighbors=50, algorithm='ball_tree')), ('RandomForestClassifier', RandomForestClassifier(min_samples_split=100)), ('SVC', SVC(kernel='linear', C=1)) ]:
plt.show() # - # Podemos ver que ahora la reducción es distinta a la de PCA. Si bien sigue viendose un Roll, esta vez podemos apreciar el "ancho" del mismo # # Veamos ahora que sucede con ISOMAP # # ## ISOMAP # # Para ISOMAP va a ser necesario definir el hiper-parámetro <i>n_neighbors</i> que indica la cantidad de vecinos a observar a la hora de construir el grafo. De este valor dependerá en gran parte la proyección resultante. # + iso = Isomap(n_neighbors=15, n_components=2) iso.fit(X) manifold_2Da = iso.transform(X) # + fig1 = plt.figure(figsize=(10, 10), facecolor='white') ax = fig1.add_subplot(1, 1, 1) ax.set_facecolor('white') plt.scatter( manifold_2Da[:, 0], manifold_2Da[:, 1], c=color, marker='o', cmap=plt.cm.Spectral ) # plt.scatter(principalComponents[df_train['Survived']==0,0], principalComponents[df_train['Survived']==0,1], color='r', s=10) plt.show() # - # Veamos ahora que sucede para un valor menor de cantidad de vecinos a observar
class ClusterPrinter: def __init__(self, num_images=20): # self.reducer = SpectralEmbedding() self.reducer = Isomap() self.sink_features = ports.StateSink() self.sink_filename = ports.StateSink() self.sink_image = ports.StateSink() self.num_images = num_images def __call__(self, clusters): features = self.sink_features.get() if clusters is None or features is None: return None valid = clusters.labels_ != -1 view_data = features[valid] labels = clusters.labels_ valid_labels = labels[valid] if len(valid_labels) == 0: return None choice = np.random.choice(range(len(valid_labels)), size=min(2000, len(valid_labels)), replace=False) view_data = self.reducer.fit(view_data[choice, :]).transform(features) print view_data.shape fig, ax = plt.subplots(figsize=(15, 15), dpi=300) num_clusters = len(set(valid_labels)) patches = [] for l in range(num_clusters): cluster = view_data[labels == l, :] try: hull = ConvexHull(cluster) patches.append(Polygon(cluster[hull.vertices, :])) except: pass p = PatchCollection(patches, cmap=matplotlib.cm.rainbow, alpha=0.4) ax.add_collection(p) invalid = np.invert(valid) plt.scatter(view_data[invalid, 0], view_data[invalid, 1], c='w', s=0.1) ax.set_facecolor('black') plt.scatter(view_data[valid, 0], view_data[valid, 1], c=valid_labels, s=0.1, cmap='rainbow') # Add a few images to the figure choices = [] imgs_per_label = max(1, int(self.num_images / num_clusters)) for l in range(num_clusters): cluster_ind = np.where(labels == l)[0] choices += np.random.choice(cluster_ind, size=min(imgs_per_label, len(cluster_ind)), replace=False).tolist() plt.scatter(view_data[choices, 0], view_data[choices, 1], c=labels[choices], s=180, marker='s', cmap='rainbow') # Get the x and y data and transform it into pixel coordinates xy_pixels = ax.transData.transform( np.vstack([view_data[choices, 0], view_data[choices, 1]]).T) xpix, ypix = xy_pixels.T for i, c in enumerate(choices): img = self.sink_image.get(c) if img is None: continue scale = 50.0 / np.max(img.shape) img = cv2.cvtColor(cv2.resize( img, dsize=(0, 0), fx=scale, fy=scale), code=cv2.COLOR_BGR2RGB).astype(np.float32) / 255 plt.figimage(img, xo=int(xpix[i]) - 25, yo=int(ypix[i]) - 25, zorder=10) pylab.savefig(self.sink_filename.get(), dpi=fig.dpi) plt.close('all')
colors = [] for imgname in os.listdir(folder): img = misc.imread(os.path.join(folder, imgname)) samples.append((img/255.0).reshape(-1)) colors.append('b') folder += 'i' for imgname in os.listdir(folder): img = misc.imread(os.path.join(folder, imgname)) samples.append((img/255.0).reshape(-1)) colors.append('r') df = pd.DataFrame(samples) iso = Isomap(n_components=3, n_neighbors=6) iso.fit(df) T = iso.transform(df) import matplotlib.pyplot as plt plt.figure() plt.scatter(T[:, 0], T[:, 1], c=colors) plt.show() fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.set_title('...') ax.set_xlabel('component 0') ax.set_ylabel('component 1') ax.set_zlabel('component 2') ax.scatter(T[:, 0], T[:, 1], T[:, 2], c=colors, marker='.', alpha=0.75)
# decision surface / boundary. In the wild, you'd probably leave in a lot # more dimensions, but wouldn't need to plot the boundary; simply checking # the results would suffice. # # Your model should only be trained (fit) against the training data (data_train) # Once you've done this, you need use the model to transform both data_train # and data_test from their original high-D image feature space, down to 2D # # Implement Isomap here. ONLY train against your training data, but # transform both your training + test data, storing the results back into # data_train, and data_test. # iso = Isomap(n_neighbors=6, n_components=2) print("iso map fit start ") iso.fit(data_train) print("iso map fit end ") data_train = iso.transform(data_train) data_test= iso.transform(data_test) # # Implement KNeighborsClassifier here. You can use any K value from 1 # through 20, so play around with it and attempt to get good accuracy. # This is the heart of this assignment: Looking at the 2D points that # represent your images, along with a list of "answers" or correct class # labels that those 2d representations should be. # for i in range(1,21):
def isoMap(X, y): im = Isomap(n_components = 1, eigen_solver = "dense", n_neighbors = 20) im.fit(X) transformX = im.transform(X) return transformX
print 'offset1: ' , offset1 print 'offset2: ' , offset2 #HERE structures must have only atoms of selected chain TM_align = rcu.TM_aligned_residues(pdb1,pdb2,offset1, offset2) individualjammings1 = np.asarray(get_permutations(nj1['individual'],TM_align['alignedList1'])) individualjammings2 = np.asarray(get_permutations(nj2['individual'],TM_align['alignedList2'])) PValsScore = scoreFromPvalues(individualjammings1,individualjammings2) print 'PValsScore: ', PValsScore clf = Isomap(n_components=2)#Isomap(n_components=2) clf.fit(individualjammings1) ij1 = clf.transform(individualjammings1) ij2 = clf.transform(individualjammings2) print ij1 f, (ax1, ax2,ax3) = pl.subplots(1,3, sharex=True, sharey=True) pl.ioff() pl.title('ensemble correlation: %.4f'%PValsScore) #pl.subplot(1,2,1) ax1.scatter(ij1[:,0],ij1[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r') #pl.subplot(1,2,2) ax2.scatter(ij2[:,0],ij2[:,1],marker='o',s=45,facecolor='0.6',edgecolor='r') ax3.scatter(ij2[:,0],ij2[:,1],marker='o',s=25,facecolor='y',edgecolor='0.05',alpha=0.6) ax3.scatter(ij1[:,0],ij1[:,1],marker='o',s=25,facecolor='b',edgecolor='0.05',alpha=0.5) ax1.axes.get_xaxis().set_visible(False) ax2.axes.get_xaxis().set_visible(False)
# y is the principal component you want displayed on the y-axis, Can be 1 or 2 # pca_data =PCA(n_components=3) pca_data.fit(df) T_pca = pca_data.transform(df) Plot2D(T_pca,'PCA Transformed Data PC0VsPC1',0,1) #Plot2D(T_pca,'PCA Transformed Data PC0VsPC2',0,2) #Plot2D(T_pca,'PCA Transformed Data PC1VsPC2',1,2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # iso_data = Isomap(n_neighbors=3,n_components=3) iso_data.fit(df) T_iso = iso_data.transform(df) Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx1',0,1) #Plot2D(T_iso,'Isomap Transformed Data Ax0VsAx2',0,2) #Plot2D(T_iso,'Isomap Transformed Data Ax1VsAx2',1,2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # #fig = plt.figure() #ax = fig.add_subplot(111, projection='3d') #ax.set_xlabel('Principal Component 0') #ax.set_ylabel('Principal Component 1') #ax.set_zlabel('Principal Component 2')
# Load the .mat file: mat = scipy.io.loadmat('datasets/face_data.mat') # Get the img data: pics = mat['images'].transpose() num_images = pics.shape[0] num_pixels = int(np.sqrt(pics.shape[1])) # Transpose the pictures: for i in range(num_images): pics[i, :] = pics[i, :].reshape(num_pixels, num_pixels).transpose().flatten() # Load up your face_labels dataset as a series: labels = pd.read_csv('datasets/face_labels.csv', header=None)[0] # Do train_test_split: X_train, X_test, Y_train, Y_test = train_test_split(pics, labels, test_size=.15, random_state=7) # Implement Isomap: iso = Isomap(n_components=2, n_neighbors=5) iso.fit(X_train) X_train = iso.transform(X_train) X_test = iso.transform(X_test) # Implement KNeighborsClassifier: knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, Y_train) # Print the accuracy of the testing set: print(f"Accuracy: {knn.score(X_test, Y_test)}") # Plot the decision boundary, the training data and testing images: plot_2d_boundary(knn, X_train, Y_train, X_test, Y_test) # Show graph: plt.show()