def get_candidate(X, dim, k_min, k_max, verbose=0): errs = [] k_candidates = [] for k in range(k_min, k_max+1): isomap = Isomap(n_neighbors=k, n_components=dim).fit(X) rec_err = isomap.reconstruction_error() errs.append(rec_err) i = k - k_min if i > 1 and errs[i-1] < errs[i-2] and errs[i-1] < errs[i]: k_candidates.append(k-1) if len(k_candidates) == 0: k_candidates.append(k) if verbose == 2: print 'k_candidates: ', k_candidates plt.figure() plt.rc("font", size=12) plt.plot(range(k_min, k_max+1), errs, '-o') plt.xlabel('Neighborhood size') plt.ylabel('Reconstruction error') plt.title('Select candidates of neighborhood size') plt.show() return k_candidates
def isomap_embedded_data_frame(bearing_df: pd.DataFrame, verbose: bool = False, n_components: int = 5): """ Embed all observations of a bearig time series using isomap. :param bearing_df: Data frame which contains computed features or raw features. :return: Isomap embedded data frame. """ isomap = Isomap(n_components=n_components) df_transformed = isomap.fit_transform(X=bearing_df) if verbose: print("Reconstruction error: ", isomap.reconstruction_error()) return pd.DataFrame(df_transformed), isomap
def do_isomap(X,n_comp): ''' Reduces the dimensions of data to 2 or 3. :param X: data :param n_comp: number of dimensions to reduce to :return: Dimension reduced data ''' print("Performing isomap") imap = Isomap(n_components=n_comp, n_neighbors=5, n_jobs=2, neighbors_algorithm='auto') X_n = imap.fit_transform(X) print("Reconstruction Error:", imap.reconstruction_error()) return X_n
def isomap(self, data): print 'Isomap neighbours :', self.parameters["n_neighbors"] print 'Isomap components, ie final number of coordinates :', self.k k_means_n_clusters=self.parameters['k_means_n_clusters'] isomap_params = dict(self.parameters) del isomap_params["k_means_n_clusters"] m = Isomap(neighbors_algorithm = 'kd_tree',**isomap_params)#eigen_solver='auto', tol=0, path_method='auto', neighbors_algorithm='kd_tree') x = m.fit_transform(data) error=m.reconstruction_error() geod_d = m.dist_matrix_.flatten() new_euclid_d = cdist(x, x, metric='euclidean').flatten() corr=1- pearsonr(geod_d, new_euclid_d)[0]**2 new_data = x print self.parameters return self.batch_kmeans(new_data, parameters = dict(zip(params["mini-batchk-means"], [k_means_n_clusters, 1000, 500, 1000, 'k-means++', 5])))
def estimate_dim(data, verbose=0): ''' Estimate intrinsic dimensionality of data data: input data Reference: "Samko, O., Marshall, A. D., & Rosin, P. L. (2006). Selection of the optimal parameter value for the Isomap algorithm. Pattern Recognition Letters, 27(9), 968-979." ''' # Standardize by center to the mean and component wise scale to unit variance data = scale(data) # The reconstruction error will decrease as n_components is increased until n_components == intr_dim errs = [] found = False k_min, k_max = get_k_range(data, verbose=verbose) for dim in range(1, data.shape[1] + 1): k_opt = pick_k(data, dim, k_min, k_max, verbose=verbose) isomap = Isomap(n_neighbors=k_opt, n_components=dim).fit(data) err = isomap.reconstruction_error() #print(err) errs.append(err) if dim > 2 and errs[dim - 2] - errs[dim - 1] < .5 * (errs[dim - 3] - errs[dim - 2]): intr_dim = dim - 1 found = True break if not found: intr_dim = 1 # intr_dim = find_gap(errs, method='difference', verbose=verbose)[0] + 1 # intr_dim = find_gap(errs, method='percentage', threshold=.9, verbose=verbose) + 1 if verbose == 2: plt.figure() plt.rc("font", size=12) plt.plot(range(1, dim + 1), errs, '-o') plt.xlabel('Dimensionality') plt.ylabel('Reconstruction error') plt.title('Select intrinsic dimension') plt.show() return intr_dim
def run_isomap(self, n_neighbors, low_dim_size): """ Run isomap algorithm Parameters ---------- self : object EC_SCOP_Evaluate object setup for this analysis n_neighbors : int number of neighbors using for the isomap run low_dim_size : int resulted number of dimensions after isomap Returns ------- None """ print("Run isomap") isomap = Isomap(n_neighbors=n_neighbors, n_components=low_dim_size) self.X_low = isomap.fit_transform(self.get_x().values) print("Done. Reconstruction error: {:.3f}".format( isomap.reconstruction_error()))
def compute_isomap_explain_power(self, model, num_dims=2): ''' Given a computed course vectors embedding model, extract the vectors, standardize them, perform a 2-dim isomap embedding, and return the two-tuple with the reconstruction error. @param model: course context model as trained by neural net @type model: gensim.model.word_vectors @return: ratio of explained variance for each of the two dims @rtype: (float,float) ''' vectors = model.wv.vectors #******** vectors_standardized = preprocessing.scale(vectors) #vectors_standardized = vectors #vectors_standardized_normalized = preprocessing.normalize(vectors_standardized, norm='l2') #******** isomap = Isomap(n_components=num_dims) x_transformed = isomap.fit_transform(vectors_standardized) x_transformed.shape reconstruction_error = isomap.reconstruction_error() return reconstruction_error
plt.ylabel('Latent Variable 2 (explains second most variance)') plt.title('Isomap 2-Dimension Plot with Observation Class') plt.scatter(iso_dim[:, 0], iso_dim[:, 1], c=y) plt.colorbar() plt.show() #Apply isomap for many different choices of dimensions #Limitation is that nbr dimensions must be < the number of original features nbr_dim = range(3) iso_dim_nbr = [] iso_reconstruction_errors = [] for nd in nbr_dim: iso_model = Isomap(n_neighbors=5, n_components=nd + 1) iso_model.fit_transform(x_std) iso_dim_nbr.append(nd + 1) iso_reconstruction_errors.append(iso_model.reconstruction_error()) iso_results = {'nbr_dim': iso_dim_nbr, 'error': iso_reconstruction_errors} iso_results = pd.DataFrame.from_dict(iso_results) #See which number of dimensions has the lowest reconstruction error plt.plot(iso_results['nbr_dim'], iso_results['error']) plt.xlabel('Number of Latent Dimensions') plt.ylabel('Reconstruction Error') plt.title('Plot of Error by Number of Latent Variables') plt.show() #Use iso_model.transform(x_test) to fit the isomap from the training set onto the test set ''' ------------------------------------------------------------------------------- -------------------------------Modified LLE------------------------------------ ------------------------------------------------------------------------------- '''