def plot_embedding(X, Y, X_old, images, title=None): x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) plt.figure() ax = plt.subplot(111) colors = [ord(x) * 1.0 / 12.0 for x in Y[:]] plt.scatter(X[:, 0], X[:, 1], c=colors, marker='o') if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(X_old.shape[0]): dist = np.sum((X[i] - shown_images)**2, 1) if np.min(dist) < 4e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(images[i], cmap=plt.cm.bone), X[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title)
def plot_embedding(X, y, imgs=None, title=None): # Adapted from http://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) # Plot colors numbers plt.figure(figsize=(10, 10)) ax = plt.subplot(111) for i in range(X.shape[0]): # plot colored number plt.text(X[i, 0], X[i, 1], str(y[i]), color=plt.cm.Set1(y[i] / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) # Add image overlays if imgs is not None and hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(X.shape[0]): dist = np.sum((X[i] - shown_images)**2, 1) if np.min(dist) < 4e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(imgs[i], cmap=plt.cm.gray_r), X[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title)
def plot_digits(X, digits, title=None, plot_box=True): colorlist = get_colors(10) # Scale and visualize the embedding vectors x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) plt.figure() ax = plt.subplot(111) for i in range(X.shape[0]): plt.text(X[i, 0], X[i, 1], str(digits.target[i]), color=colorlist[digits.target[i]], fontdict={ 'weight': 'medium', 'size': 'smaller' }) if plot_box and hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(digits.data.shape[0]): dist = np.sum((X[i] - shown_images)**2, 1) if np.min(dist) < 4e-2: # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r), X[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) plt.xlim(-0.05, 1.05) plt.ylim(-0.05, 1.05) if title is not None: plt.title(title)
def visualize(embed, x_test, y_test): # two ways of visualization: scale to fit [0,1] scale # feat = embed - np.min(embed, 0) # feat /= np.max(feat, 0) # two ways of visualization: leave with original scale feat = embed ax_min = np.min(embed, 0) ax_max = np.max(embed, 0) ax_dist_sq = np.sum((ax_max - ax_min)**2) plt.figure() ax = plt.subplot(111) colormap = plt.get_cmap('tab10') shown_images = np.array([[1., 1.]]) for i in range(feat.shape[0]): dist = np.sum((feat[i] - shown_images)**2, 1) if np.min( dist ) < 3e-4 * ax_dist_sq: # don't show points that are too close continue shown_images = np.r_[shown_images, [feat[i]]] patch_to_color = np.expand_dims(x_test[i], -1) patch_to_color = np.tile(patch_to_color, (1, 1, 3)) patch_to_color = (1 - patch_to_color) * ( 1, 1, 1) + patch_to_color * colormap(y_test[i] / 10.)[:3] imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage( patch_to_color, zoom=0.5, cmap=plt.cm.gray_r), xy=feat[i], frameon=False) ax.add_artist(imagebox) plt.axis([ax_min[0], ax_max[0], ax_min[1], ax_max[1]]) # plt.xticks([]), plt.yticks([]) plt.title('Embedding from the last layer of the network') plt.show()
def plot_tsne_3D(X_tsne, merged, azim=120, distance=70000): fig = plt.figure(figsize=(20, 20)) ax = fig.add_subplot(111, projection=Axes3D.name) ax2d = fig.add_subplot(111, frame_on=False) ax2d.axis("off") ax.view_init(elev=30., azim=azim) for i in range(X_tsne.shape[0]): ax.scatter(X_tsne[i, 0], X_tsne[i, 1], X_tsne[i, 2], c=plt.cm.magma(merged.iloc[i][1] / 11.), s=100) if hasattr(offsetbox, 'AnnotationBbox'): shown_images = np.array([[1., 1., 1.]]) for i in range(merged.shape[0]): dist = np.sum((X_tsne[i] - shown_images)**2, 1) if np.min(dist) < distance: # don't show points that are too close continue shown_images = np.r_[shown_images, [X_tsne[i]]] image = Image.open('data/perfiles_CATA/png_full/' + merged.iloc[i][0] + '.png') inverted_image = PIL.ImageOps.invert(image) inverted_image.thumbnail((40, 40), Image.ANTIALIAS) imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(inverted_image), proj(X_tsne[i], ax, ax2d)) ax2d.add_artist(imagebox) ax.set_xlabel('X Label') ax.set_ylabel('Y Label') ax.set_zlabel('Z Label') plt.title('t-SNE over the 11 classes of vessels') plt.savefig("/tmp/movie%d.png" % azim)
def plot_embeddings(X, y, X_origin=None, show_as_imgs=False, title=None, savefig=False): '''Scale and visualize the embedding vectors ''' # extract components tsne = TSNE(n_components=2, perplexity=5) tsne_embeddings = tsne.fit_transform(X) # scale them x_min = np.min(tsne_embeddings, 0) x_max = np.max(tsne_embeddings, 0) tsne_embeddings = (tsne_embeddings - x_min) / (x_max - x_min) # create figure plt.figure(figsize=(10, 10)) ax = plt.subplot(111) for i in range(len(X)): # print the label of the sample plt.text(x = tsne_embeddings[i, 0], y = tsne_embeddings[i, 1], s = str(y[i]), color=plt.cm.Set1(y[i] / np.unique(y).size), fontdict={'weight': 'bold', 'size': 12}) # replace text labels with original images if show_as_imgs and X_origin is not None: if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(len(X)): # calculate distance between embeddings dist = tsne_embeddings[i] - shown_images dist = np.sum(dist ** 2, 1) # don't show points that are too close if np.min(dist) < 4e-4: continue # add index of the image to the shown_images list shown_images = np.r_[shown_images, [tsne_embeddings[i]]] # plot original image img = offsetbox.OffsetImage(X_origin[i], cmap=plt.cm.gray_r, zoom=0.25, filterrad=0.1) # plot img using embedding point as coords imagebox = offsetbox.AnnotationBbox(img, tsne_embeddings[i]) ax.add_artist(imagebox) else: print("To annotate embeddings with the original images, X_origin is required!") # disable grid plt.grid(False) # disable ticks plt.xticks([]), plt.yticks([]) # print title if title is not None: plt.title(title) # save figure if savefig: plt.savefig('tsne.png', dpi=300) plt.show()
def plotLLEscatter(C, ypos, St, modes, bound=None, thumb_frac=None, VecDist=None, saveFolder=None): ''' Reconstruct the mode shapes for three component single plane data Inputs: C - matrix of coefficients (mode number, coefficent for each frame) ypos - distance from the wall for each thumbnail St - thumbnail data (not necessarily velocity, should use swirl) modes - indices of modes to be plotted bound - the axis bound. If none taken to be max coefficient thumb_frac - fraction of thumbnails to show VecDist - length of each thumbnail vector pointing to coefficient location Output: plots a grid of hexbin plots for each mode ''' import numpy as np #from scipy.interpolate import griddata import matplotlib.pyplot as plt from matplotlib import offsetbox, colors if bound == None: bound = round(np.max(np.absolute(C))) if thumb_frac == None: thumb_frac = 0.5 if VecDist == None: VecDist = 0.05 fig, axs = plt.subplots(ncols=len(modes) - 1, nrows=len(modes) - 1, figsize=(9, 12)) fig.subplots_adjust(hspace=0.01, left=0.01, right=1) colorize = dict(c=ypos, cmap=plt.cm.get_cmap('rainbow', 100)) cmap = 'RdBu_r' C2 = C.copy().T for i in range(len(modes) - 1): for j in range(len(modes) - 1): ax = axs[i, j] if j >= i: hb = ax.scatter(C[i], C[j + 1], s=2, facecolor='0.5', lw=0, **colorize) ax.plot([-1 * bound, bound], [0, 0], '--k') ax.plot([0, 0], [-1 * bound, bound], '--k') if i == 0: ax.set_xlabel('C{0}'.format(j + 2)) ax.xaxis.tick_top() ax.xaxis.set_label_position("top") ax.tick_params(axis='x', labelsize=7) else: ax.set_xticklabels([]) if j == len(modes) - 2: ax.yaxis.tick_right() ax.set_ylabel('C{0}'.format(i + 1)) ax.yaxis.set_label_position("right") ax.tick_params(axis='y', labelsize=7) else: ax.set_yticklabels([]) ax.set_xlim(bound, -1 * bound) ax.set_ylim(-1 * bound, bound) ax.set_aspect("equal") ax.set_adjustable("box-forced") min_dist_2 = (thumb_frac * max(C2.max(0) - C2.min(0)))**2 shown_images = np.array([2 * C2.max(0)]) for k in range(C2.shape[0]): dist = np.sum((C2[k] - shown_images)**2, 1) if np.min(dist) < min_dist_2: # don't show points that are too close continue shown_images = np.vstack([shown_images, C2[k]]) vecNorm = (C2[k, i]**2 + C2[k, j + 1]**2)**0.5 imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(St[:, :, k], cmap=cmap, norm=colors.Normalize(-50, 50), zoom=1.5), xybox=VecDist * C2[k, [i, j + 1]] / vecNorm + C2[k, [i, j + 1]], xy=C2[k, [i, j + 1]], arrowprops=dict(arrowstyle="->")) ax.add_artist(imagebox) else: ax.axis('off') if saveFolder is not None: fig.savefig(saveFolder, transparent=True, bbox_inches='tight', pad_inches=0)
def plot_embedding(self, target_cls=None, output=None, show_thumbnail=True, title=None, thumbnail_size=(32, 32), close_thres=4e-3): """ X: PCA出力が入った辞書。辞書のキーは、クラス名 target_cls: 表示させるクラス。Noneを指定した場合、Xのすべてを表示する output: 出力先のフォルダ。Noneの場合、figオブジェクトを返す """ if not self.comp: self.calc_embedding() X = self.comp_features X_allcls = np.concatenate([X[c] for c in X], axis=0) x_min, x_max = np.min(X_allcls, axis=0), np.max(X_allcls, axis=0) def normalize(p): return (p - x_min) / (x_max - x_min) fig, ax = plt.subplots(1, 1, figsize=(10, 10), dpi=100) idx = -1 for cls in X: idx += 1 color = self.cmap(idx) x = normalize(X[cls]) ax.scatter(x[:, 0], x[:, 1], color=color, label=cls, alpha=1.0 if cls == target_cls else 0.3) if show_thumbnail and (target_cls is not None): shown_images = np.array([[1., 1.]]) # just something big x = normalize(X[target_cls]) for i in range(x.shape[0]): dist = np.sum((x[i] - shown_images)**2, 1) if np.min(dist) < (close_thres * np.min(x_max - x_min)): continue shown_images = np.r_[shown_images, [x[i]]] img = keras_image.img_to_array( keras_image.load_img(self.fpath_allcls[target_cls][i], target_size=thumbnail_size, interpolation="bicubic")) / 255. imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(img, cmap=plt.cm.gray_r), x[i], bboxprops=dict( edgecolor=self.cmap(list(X.keys()).index(target_cls)))) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) plt.legend() if title is not None: plt.title(title) if output: if not os.path.exists(os.path.dirname(output)): os.makedirs(os.path.dirname(output)) plt.savefig(output) return fig
def plot_embedding(X, labels_str, title, imgs=None, save_dir=None, frame_lable=None, max_frame=None, vid_lable=None): # http://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) if imgs is not None: fig = plt.figure(figsize=(20, 20)) ax = plt.subplot(221) else: fig = plt.figure() ax = fig.gca() # labels blow plt n_classes, y, colors, legend_elements = plt_labeled_data(ax, X, labels_str) plt.title(title) if imgs is not None: # plt again but with image overlay ax = plt.subplot(222) ax.set_title("image overlay") ax.scatter(X[:, 0], X[:, 1], color=colors) if hasattr(offsetbox, "AnnotationBbox"): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1.0, 1.0]]) # just something big for i in range(X.shape[0]): dist = np.sum((X[i] - shown_images)**2, 1) if np.min(dist) < 5e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage( imgs[i], cmap=plt.cm.gray_r, zoom=0.75), X[i], pad=0.0) ax.add_artist(imagebox) # plt legend same as befor plt_labels_blow(ax, list(legend_elements.values())) if frame_lable is not None: # plt the frames classe # show color for ever 50 frame in legend ax = plt.subplot(223) plt_labeled_data( ax, X, frame_lable, label_filter_legend=lambda l: l % 50 == 0, plt_cm=plt.cm.Spectral, index_color_factor=max_frame, ) ax.set_title("frames as label (color range normalized for every vid)") if vid_lable is not None: # plt the view pair as classe ax = plt.subplot(224) plt_labeled_data(ax, X, vid_lable, label_filter_legend=lambda x: False) ax.set_title("view pair as label") if save_dir is not None: create_dir_if_not_exists(save_dir) save_dir = os.path.expanduser(save_dir) title = os.path.join(save_dir, title) fig.savefig(title + ".pdf", bbox_inches="tight") log.info("save TSNE plt to: {}".format(title)) plt.close("all")
def plot_dataset(X, y, images=None, labels=None, gray=False, save=None, y_original=None): plt.cla() print('data size {}'.format(X.shape)) uni_y = len(np.unique(y)) x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) fig = plt.figure(figsize=(27, 18), dpi=100) ax = plt.subplot(111) for i in tqdm(range(X.shape[0])): plt.text(X[i, 0], X[i, 1], str(y[i]), color=plt.cm.Set1(y[i] / uni_y), fontdict={ 'weight': 'bold', 'size': 9 }) if images is not None: if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(X.shape[0]): dist = np.sum((X[i] - shown_images)**2, 1) if np.min(dist) < 4e-3: # don't show points that are too close continue if labels is not None: if y_original is not None: plt.text(X[i, 0] - 0.01, X[i, 1] - 0.033, labels[y_original[i]], fontdict={ 'weight': 'bold', 'size': 15 }) else: plt.text(X[i, 0] - 0.01, X[i, 1] - 0.033, labels[y[i]], fontdict={ 'weight': 'bold', 'size': 15 }) shown_images = np.r_[shown_images, [X[i]]] if gray: image_ = offsetbox.OffsetImage( np.expand_dims(util.invert(images[i]), axis=0)) else: image_ = offsetbox.OffsetImage(images[i], cmap=plt.cm.gray_r) imagebox = offsetbox.AnnotationBbox(image_, X[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) for item in [fig, ax]: item.patch.set_visible(False) ax.axis('off') if save is not None: print('Saving Image {} ...'.format(save)) plt.title('epoch ' + save.split('.')[0].split()[-1], fontdict={'fontsize': 20}, loc='left') plt.savefig(save) plt.close() else: plt.show() del X, y, fig, ax gc.collect()
def load_image(path: str): return offsetbox.OffsetImage(image.imread( resource_filename("doomguy_status", f"graphics/{path}")), zoom=ZOOM)
def plot_embedding(data, labels, title=None, plot_type='scatter', images=None): """Visualize the data as a scatter plot in a reduced dimension (either 2-d or 3-d). You can also optionally display sample images to give an idea of the data subspace, and display each data point by its class label (integers 0-9) instead of a point Args: ax = A matplotlib Axes object to plot in data = The data to be visualized labels = Class labels corresponding to data images = The original images corresponding to data title = Title of the plot point_type = Plot points either as points in a scatter plot, or as digits samples = Whether or not to display sample images on the plot Returns: fig, ax = The finished plot (Figure and Axes objects) """ # Get colormap (depending on version of Matplotlib it will be 'tab10' or 'Vega10') try: cm = plt.cm.tab10 except: cm = plt.cm.Vega10 cm = plt.cm.viridis # Get the dimensions of the data m, dim = data.shape # Normalize the data data = normalize(data) # Plot for 3-d data if dim == 3: fig = plt.figure(dpi=60) ax = axes3d.Axes3D(fig) #fig.gca(projection='3d') #ax.set_axis_off() # Loop over all data vectors, plotting either as points or digits if plot_type == 'digit': for i in range(m): # Plot digit as a string, at the location determined by the data point # Color is determined from colormap ax.text(data[i, 0], data[i, 1], data[i, 2], str(labels[i]), color=cm(labels[i]), fontdict={ 'weight': 'bold', 'size': 9 }) elif plot_type == 'scatter': ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=labels, cmap=cm) # Plot for 2-d data if dim == 2: fig, ax = plt.subplots() ax.scatter(0, 0, s=0) # Loop over all data vectors, plotting either as points or digits if plot_type == 'digit': for i in range(m): # Plot digit as a string, at the location determined by the data point # Color is determined from colormap ax.text(data[i, 0], data[i, 1], data[i, 2], str(labels[i]), color=cm(labels[i]), fontdict={ 'weight': 'bold', 'size': 9 }) elif plot_type == 'scatter': ax.scatter(data[:, 0], data[:, 1], c=labels, cmap=cm) # Show sample images if desired (will only work with matplotlib v1.0+) if (images != None) and hasattr(offsetbox, 'AnnotationBbox'): # Initialize shown images locations array, starting with upper right corner of plot shown_images = np.array([[1., 1.]]) # Loop over all data points for i in range(m): # Calculate squared distance between current image's data point and all others that have already been displayed dist = np.sum((dat[i] - shown_images)**2, 1) # If the smallest squared distance is below threshold, don't display (this ensures plot isn't overcrowded) if np.min(dist) < 4e-3: continue # Otherwise, add data point to array of shown images and display the image at the corresponding location shown_images = np.r_[shown_images, [data[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(images[i], cmap=plt.cm.gray_r), data[i]) ax.add_artist(imagebox) # Set axes limits, title, etc. ax.set_xlim(0, 1) ax.set_ylim(0, 1) if dim == 3: ax.set_zlim(0, 1) if title is not None: ax.set_title(title) return fig, ax
def render_gm_state(cstate): """Render a GoldMiner state---as represented by a CState---to an RGB array.""" import matplotlib.pyplot as plt import matplotlib.offsetbox as obox import matplotlib.ticker as tick # some housekeeping gm_state = GMState(cstate) fig = plt.figure() ax = plt.gca() # now make underlying grid showing whether each location is soft rock/hard # rock/clear space grid_rgb = gm_state.make_rgb_grid() plt.imshow(grid_rgb) asset_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'assets') obj_types = ['bomb', 'gold', 'laser', 'robot'] for obj_type in obj_types: sprite_path = osp.join(asset_dir, obj_type + '.png') image = obox.OffsetImage(plt.imread(sprite_path), zoom=1) location = getattr(gm_state, obj_type + '_loc') if location is not None: x, y = location artist = obox.AnnotationBbox(image, (x, y), frameon=False) ax.add_artist(artist) # finally, draw whatever the robot has in its little robot hands (bomb, # gold, laser, etc.) hold_obj_types = ['bomb', 'gold', 'laser'] for obj_type in hold_obj_types: sprite_path = osp.join(asset_dir, obj_type + '.png') image = obox.OffsetImage(plt.imread(sprite_path), zoom=0.5) holding_thing = getattr(gm_state, 'has_' + obj_type) if not holding_thing: continue xoff, yoff = gm_state.robot_loc xoff += 0.3 yoff += 0.3 # does this work? Who knows? artist = obox.AnnotationBbox(image, (xoff, yoff), frameon=False) ax.add_artist(artist) # the nuclear option for getting rid of blank space, thanks to # https://stackoverflow.com/a/27227718 ax.set_axis_off() plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) ax.xaxis.set_major_locator(tick.NullLocator()) ax.yaxis.set_major_locator(tick.NullLocator()) # render to array out_buffer = io.BytesIO() fig.savefig(out_buffer, bbox_inches='tight', pad_inches=0) out_buffer.seek(0) data = plt.imread(out_buffer) plt.close(fig) return data
def visualize(dataSet, primaryClass, numOfInstances, helperClassFlag, dimension): ''' For tSNE embedding of Primary Class and Generated Images If HelperClass flag is 1, plot the helper class too along with primary and generated class ''' # Dataset images realImages, realLabels = loadDataset(dataSet, primaryClass, 1000, 64, False) # GAN generated images fakeImages, fakeLabels = getFakeData(dataSet, [primaryClass], numOfInstances) # concatenate image pixel and labels fakeLabels.fill(-1) images = np.vstack([realImages, fakeImages]) labels = np.hstack([realLabels, fakeLabels]) # 1000 from each class [ Primary Real, Primary Fake ] noSNE = 2000 # matplotlib figure and title fig = plt.figure(figsize=(18, 15)) className = getClasses(dataSet) plotTitle = dataSet + ' ' + str( className[primaryClass]) + ' ' + str(numOfInstances) fig.suptitle(plotTitle, fontsize=20) # if you want to plot the helper class too if helperClassFlag == 1: helperClass = getHelperClass(dataSet, primaryClass) if helperClass == -1: print "No Helper Class defined for primary class {} of {} dataset".format( className[primaryClass], dataSet) return # take some real images from helper class here helperImages, helperLabels = loadDataset(dataSet, helperClass, 1000, 64, False) # append them to the real and generated images of primary class images = np.vstack([images, helperImages]) labels = np.hstack([labels, helperLabels]) # 1000 from each class [ Primary Real, Primary Fake, Helper Real ] noSNE = 3000 # matplotlib figure title plotTitle = dataSet + ' ' + 'Primary: ' + str( className[primaryClass]) + ' Helper: ' + str( className[helperClass]) + ' Instances: ' + str(numOfInstances) fig.suptitle(plotTitle, fontsize=10) # Insert in pandas dataframe featCols = ['pixel' + str(i) for i in range(images.shape[1])] df = pd.DataFrame(images, columns=featCols) df['label'] = labels # applying function on one of the column of dataframe df['label'] = df['label'].apply(lambda i: str(i)) y = df['label'].values.astype('int') # size should be number of columns+1[for index] print 'Size of the dataframe: {}'.format(df.shape) # graph details colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', 'crimson', 'purple', 'olive'] labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] time_start = time.time() # 2D or 3D if dimension == 2: ax = fig.add_subplot(111) tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300) tsneResults = tsne.fit_transform(df.loc[:noSNE, featCols].values) print 't-SNE done! Time elapsed: {} seconds'.format(time.time() - time_start) dfTSNE = df.loc[:noSNE, :].copy() dfTSNE['x-tsne'] = tsneResults[:, 0] dfTSNE['y-tsne'] = tsneResults[:, 1] # distance based plotting of AnnotationBoxes # downsample the image for better spacing of AnnotationBoxes images = images.reshape(images.shape[0], 64, -1) images = images[:, ::2, ::2] # just something big shown_images = np.array([[1., 1.]]) if helperClassFlag == 0: countReal, countFake = 0, 0 for i in range(images.shape[0]): dist = np.sum((tsneResults[i] - shown_images)**2, 1) if np.min(dist) < 4: # don't show points that are too close continue if (countReal > 45 and y[i] == primaryClass) or (countFake > 45 and y[i] == -1): # don't show points for a single class beyond a threshold continue shown_images = np.r_[shown_images, [tsneResults[i]]] if y[i] == primaryClass: colorMap = plt.get_cmap('Reds') countReal = countReal + 1 elif y[i] == -1: colorMap = plt.get_cmap('Greens') countFake = countFake + 1 imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(images[i], cmap=colorMap), tsneResults[i]) # add the offsets to the visualisation ax.add_artist(imagebox) for i in [primaryClass, -1]: tsneResultsSelect = tsneResults[np.where(y == i)] if i == primaryClass: label = 'Real' color = 'r' else: label = 'Fake' color = 'g' ax.scatter(tsneResultsSelect[:, 0], tsneResultsSelect[:, 1], c=color, alpha=0.3, label=label) saveFile = 'scatter/images/2D/' + dataSet + '/' + str( className[primaryClass]) + '_' + str(numOfInstances) + '.jpg' elif helperClassFlag == 1: countReal, countFake, countHelper = 0, 0, 0 for i in range(images.shape[0]): dist = np.sum((tsneResults[i] - shown_images)**2, 1) if np.min(dist) < 4: # don't show points that are too close continue if (countReal > 30 and y[i] == primaryClass) or ( countFake > 30 and y[i] == -1) or (countHelper > 30 and y[i] == helperClass): continue shown_images = np.r_[shown_images, [tsneResults[i]]] if y[i] == primaryClass: colorMap = plt.get_cmap('Reds') countReal = countReal + 1 elif y[i] == -1: colorMap = plt.get_cmap('Greens') countFake = countFake + 1 elif y[i] == helperClass: colorMap = plt.get_cmap('Blues') countHelper = countHelper + 1 imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(images[i], cmap=colorMap), tsneResults[i]) # add the offsets to the visualisation ax.add_artist(imagebox) print countReal, countFake, countHelper for i in [primaryClass, helperClass, -1]: tsneResultsSelect = tsneResults[np.where(y == i)] if i == primaryClass: label = 'Primary' color = 'r' elif i == -1: label = 'Generated' color = 'g' elif i == helperClass: label = 'Helper' color = 'b' ax.scatter(tsneResultsSelect[:, 0], tsneResultsSelect[:, 1], c=color, alpha=0.3, label=label) saveFile = 'scatter/images/2D/' + dataSet + '/' + str( className[primaryClass]) + '_' + str( className[helperClass]) + '_' + str( numOfInstances) + '.jpg' elif dimension == 3: ax = fig.add_subplot(111, projection='3d') tsne = TSNE(n_components=3, verbose=1, perplexity=40, n_iter=300) tsneResults = tsne.fit_transform(df.loc[:noSNE, featCols].values) print 't-SNE done! Time elapsed: {} seconds'.format(time.time() - time_start) dfTSNE = df.loc[:noSNE, :].copy() dfTSNE['x-tsne'] = tsneResults[:, 0] dfTSNE['y-tsne'] = tsneResults[:, 1] dfTSNE['z-tsne'] = tsneResults[:, 2] if helperClassFlag == 0: for i in [primaryClass, -1]: tsneResultsSelect = tsneResults[np.where(y == i)] if i == primaryClass: label = 'Real' color = 'r' else: label = 'Fake' color = 'g' ax.scatter(tsneResultsSelect[:, 0], tsneResultsSelect[:, 1], tsneResultsSelect[:, 2], c=color, alpha=0.3, label=label) saveFile = 'scatter/images/2D/' + dataSet + '/' + str( className[primaryClass]) + '_' + str(numOfInstances) + '.jpg' elif helperClassFlag == 1: for i in [primaryClass, helperClass, -1]: tsneResultsSelect = tsneResults[np.where(y == i)] if i == primaryClass: label = 'Primary' color = 'r' elif i == -1: label = 'Generated' color = 'g' elif i == helperClass: label = 'Helper' color = 'b' ax.scatter(tsneResultsSelect[:, 0], tsneResultsSelect[:, 1], tsneResultsSelect[:, 2], c=color, alpha=0.3, label=label) ax.set_position([0.0, 0.0, 0.8, 0.8]) saveFile = 'scatter/images/3D/' + dataSet + '/' + str( className[primaryClass]) + '_' + str( className[helperClass]) + '_' + str( numOfInstances) + '.jpg' drawInteractivePlot(tsneResults, y, dataSet, primaryClass, numOfInstances, helperClass) ax.legend(bbox_to_anchor=(1.10, 1), loc=2, borderaxespad=0) plt.savefig(saveFile, bbox_inches='tight') plt.show()
T = tsne.fit_transform(pca_score) fig, ax = plt.subplots() ax.scatter(T.T[0], T.T[1]) plt.grid(False) shown_images = np.array([[1.0, 1.0]]) choose_200 = np.random.randint(1, T.shape[0], 200) for i in choose_200: img = Image.open(files[i]) img = img.resize((16, 16)) shown_images = np.r_[shown_images, [T[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(img, cmap=plt.cm.gray_r), T[i]) ax.add_artist(imagebox) plt.show() #%% Put TSNE data into frames and format dfmeta = ut.readMeta() df_subset = pd.DataFrame(list(shape2vec.keys()), columns=["mid"]) df_subset["tsne1"] = lvects[:, 0] df_subset["tsne2"] = lvects[:, 1] if (lvects.shape[1]) == 3: df_subset["tsne3"] = lvects[:, 2] df_subset = pd.merge(df_subset, dfmeta, how="left", on=["mid", "mid"]) dfloss = pd.DataFrame.from_dict(shape2loss, orient="index", columns=["loss"]) dfloss["logloss"] = np.log(dfloss.loss)
def visualize_embedding(embeddings, ims, labels, title, ax=None, x_lims=None, y_lims=None): # make a new figure if none specified if ax is None: plt.figure(figsize=(10, 10)) ax = plt.subplot(111) x_min, x_max = np.min(embeddings, 0), np.max(embeddings, 0) print('Min: {}, max: {}'.format(x_min, x_max)) n_points = embeddings.shape[0] for i in range(n_points): try: # if labels are numbers ax.text(embeddings[i, 0], embeddings[i, 1], '{}'.format(round(labels[i], 2)), color=plt.cm.Set1((labels[i] - np.min(labels)) / (np.max(labels) - np.min(labels)) * 10.), fontdict={ 'weight': 'bold', 'size': 9 }) except: if labels is not None: ax.text(embeddings[i, 0], embeddings[i, 1], '{}'.format(labels[i]), color=plt.cm.Set1(i / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) else: ax.text(embeddings[i, 0], embeddings[i, 1], '', color=plt.cm.Set1(i / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) # set lims if given as inputs if x_lims is not None: ax.set_xlim(tuple(x_lims)) else: ax.set_xlim((x_min[0], x_max[0])) if y_lims is not None: ax.set_ylim(tuple(y_lims)) else: ax.set_ylim((x_min[1], x_max[1])) if hasattr(offsetbox, 'AnnotationBbox') and ims is not None: # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(n_points): shown_images = np.r_[shown_images, [embeddings[i, :2]]] if ims.shape[-1] == 3: im = ims[i][:, :, [2, 1, 0]] # reverse bgr from opencv else: im = ims[i][:, :, 0] # get rid of last channel for grayscale if im.shape[0] > 128 or im.shape[1] > 128: im = cv2.resize(im, (80, 80)) imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(im), embeddings[i, :2]) ax.add_artist(imagebox) ax.set_title(title) return ax, (x_min[0], x_max[0]), (x_min[1], x_max[1])
def plot_embedding(X, y, imgs=None, title=None, name=None, save_embed=False, filename=None, batch_builder=None): X_tsne_30 = TSNE(n_components=2, random_state=1337, perplexity=30).fit_transform(X) X_tsne_10 = TSNE(n_components=2, random_state=1337, perplexity=10).fit_transform(X) X_tsne_50 = TSNE(n_components=2, random_state=1337, perplexity=50).fit_transform(X) X_pca = PCA(n_components=2).fit_transform(X) # Adapted from http://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) x_min_tsne_10, x_max_tsne_10 = np.min(X_tsne_10, 0), np.max(X_tsne_10, 0) X_tsne_10 = (X_tsne_10 - x_min_tsne_10) / (x_max_tsne_10 - x_min_tsne_10) x_min_tsne_30, x_max_tsne_30 = np.min(X_tsne_30, 0), np.max(X_tsne_30, 0) X_tsne_30 = (X_tsne_30 - x_min_tsne_30) / (x_max_tsne_30 - x_min_tsne_30) x_min_tsne_50, x_max_tsne_50 = np.min(X_tsne_50, 0), np.max(X_tsne_50, 0) X_tsne_50 = (X_tsne_50 - x_min_tsne_50) / (x_max_tsne_50 - x_min_tsne_50) x_min_pca, x_max_pca = np.min(X_pca, 0), np.max(X_pca, 0) X_pca = (X_pca - x_min_pca) / (x_max_pca - x_min_pca) # Plot colors numbers plt.figure(figsize=(30, 10)) ax = plt.subplot(141) ax1 = plt.subplot(142) ax2 = plt.subplot(143) ax3 = plt.subplot(144) for i in range(X.shape[0]): # plot colored number # ax.text(X[i, 0], X[i, 1], str(y[i]), # color=plt.cm.Set1(y[i] / 10.), # fontdict={'weight': 'bold', 'size': 9}) ax.text(X_pca[i, 0], X_pca[i, 1], str(y[i]), color=plt.cm.Set1(y[i] / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) ax1.text(X_tsne_10[i, 0], X_tsne_10[i, 1], str(y[i]), color=plt.cm.Set1(y[i] / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) ax2.text(X_tsne_30[i, 0], X_tsne_30[i, 1], str(y[i]), color=plt.cm.Set1(y[i] / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) ax3.text(X_tsne_50[i, 0], X_tsne_50[i, 1], str(y[i]), color=plt.cm.Set1(y[i] / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) ax.set_title('PCA w/ 2 components') ax1.set_title('t-SNE Perplexity=10') ax2.set_title('t-SNE Perplexity=30') ax3.set_title('t-SNE Perplexity=50') # Add image overlays if imgs is not None and hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(X.shape[0]): dist = np.sum((X[i] - shown_images)**2, 1) if np.min(dist) < 4e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [X[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(imgs[i], cmap=plt.cm.gray_r), X[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title) plt.savefig("results/" + str(name) + '_pca_tsne.png') if save_embed: if batch_builder is not None: np.savez_compressed("embed/" + str(name) + '_tsne_embed', embed=X, label=y, filename=filename, centroids=batch_builder.centroids, assignments=batch_builder.assignments) else: np.savez_compressed("embed/" + str(name) + '_tsne_embed', embed=X, label=y, filename=filename)
tsne = TSNE(n_components=2, random_state=33, n_iter=300, perplexity=5) T = tsne.fit_transform(pca_score) fig, ax = plt.subplots() ax.scatter(T.T[0], T.T[1]) plt.grid(False) shown_images = np.array([[1.0, 1.0]]) choose_200 = np.random.randint(1, T.shape[0], 200) for i in choose_200: img = Image.open(files[i]) img = img.resize((16, 16)) shown_images = np.r_[shown_images, [T[i]]] imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(img, cmap=plt.cm.gray_r), T[i]) ax.add_artist(imagebox) plt.show() #%% Put TSNE data into frames and format dfmeta = ut.readMeta() df_subset = pd.DataFrame(list(shape2vec.keys()), columns=["mid"]) df_subset["tsne1"] = lvects[:, 0] df_subset["tsne2"] = lvects[:, 1] if (lvects.shape[1]) == 3: df_subset["tsne3"] = lvects[:, 2] df_subset = pd.merge(df_subset, dfmeta, how="left", on=["mid", "mid"]) dfloss = pd.DataFrame.from_dict(shape2loss, orient="index", columns=["loss"]) dfloss["logloss"] = np.log(dfloss.loss)
cmap=plt.cm.get_cmap('jet', 10)) plt.colorbar(ticks=range(10)) plt.clim(-0.5, 9.5) plt.savefig('Mnist_lowEmding.png', format='png', dpi=300) data = X[y == '5'][0:2000] isoMap = Isomap() isoMap.fit(data) embed = isoMap.embedding_ plt.figure(3, figsize=(10, 10)) ax = plt.gca() ax.plot(embed[:, 0], embed[:, 1], '.k') from matplotlib import offsetbox min_dist_2 = (0.05 * max(embed.max(0) - embed.min(0)))**2 shown_images = np.array([2 * embed.max(0)]) for i in range(data.shape[0]): dist = np.sum((embed[i] - shown_images)**2, 1) if np.min(dist) < min_dist_2: # don't show points that are too close continue shown_images = np.vstack([shown_images, embed[i]]) imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(data[i].reshape((28, 28)), cmap=plt.cm.gray), embed[i]) ax.add_artist(imagebox) plt.savefig('Mnist_lowEmdingAnalysisDigit_5.png', format='png', dpi=300)
x = (x-x_min[0]) / (x_max[0] - x_min[0]) y = (y - x_min[1]) / (x_max[1] - x_min[1]) plt.plot(x, y, '.', color = color, markersize = 1) # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(embedding.shape[0]): dist = np.sum((embedding[i] - shown_images) ** 2, 1) if i >= 1837/3: continue if np.min(dist) < 4e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [embedding[i]]] print(i) print(embedding[i]) imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(load_image(prefix,i), zoom = 0.02, cmap=plt.cm.gray_r), embedding[i]) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title) plt.show() plt.savefig('plot.png', dpi = 500)
def plot_tSNE(testloader, num_samples, fit=False, colored=True): tsne = TSNE(n_components=2, perplexity=40, n_iter=200000, n_iter_without_progress=250, init='random', random_state=1337, verbose=4, n_jobs=12) X_img = testloader.dataset.test_data.numpy()[:num_samples] Y = testloader.dataset.test_labels.numpy()[:num_samples] if fit: X = X_img.reshape( -1, X_img.shape[1] * X_img.shape[2]) # flattening out squared images for tSNE print("fitting PCA...") t0 = time.time() pca = PCA(n_components=30) X = pca.fit_transform(X) t1 = time.time() print("done! {0:.2f} seconds".format(t1 - t0)) print("fitting tSNE...") t0 = time.time() X_tsne = tsne.fit_transform(X) t1 = time.time() print("done! {0:.2f} seconds".format(t1 - t0)) # scaling x_min, x_max = np.min(X_tsne, 0), np.max(X_tsne, 0) X_tsne = (X_tsne - x_min) / (x_max - x_min) pickle.dump( X_tsne, open("../data/tSNE/X_tSNE_{0}.p".format(num_samples), "wb")) print("loading fitted tSNE coordinates...") X_tsne = pickle.load( open("../data/tSNE/X_tSNE_{0}.p".format(num_samples), "rb")) print("plotting tSNE...") t0 = time.time() fig = plt.figure(figsize=(10, 10)) fig.subplots_adjust(left=0.01, right=0.99, top=0.95, bottom=0.01) ax = fig.add_subplot(111) # define class colors cmaps = [ plt.cm.bwr, plt.cm.bwr, plt.cm.Wistia, plt.cm.Greys, plt.cm.cool, plt.cm.Purples, plt.cm.coolwarm, plt.cm.bwr, plt.cm.PiYG, plt.cm.cool ] if hasattr(offsetbox, 'AnnotationBbox'): for i_digit in range(num_samples): # create colormap custom_cmap = cmaps[Y[i_digit]] custom_cmap_colors = custom_cmap(np.arange(custom_cmap.N)) if Y[i_digit] in [7, 6, 9]: custom_cmap_colors = custom_cmap_colors[::-1] custom_cmap_colors[:, -1] = np.linspace(0, 1, custom_cmap.N) custom_cmap = ListedColormap(custom_cmap_colors) if not colored: custom_cmap = plt.cm.Greys custom_cmap_colors = custom_cmap(np.arange(custom_cmap.N)) custom_cmap_colors[:, -1] = np.linspace(0, 1, custom_cmap.N) custom_cmap = ListedColormap(custom_cmap_colors) # correct color for plotting X_img[i_digit][X_img[i_digit, :, :] > 10] = 255 X_img[i_digit][X_img[i_digit, :, :] <= 10] = 0 imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage( X_img[i_digit], cmap=custom_cmap, zoom=0.25), X_tsne[i_digit], frameon=False, pad=0) ax.add_artist(imagebox) ax.axis("off") fig_path = "../plots/MNIST_tSNE_{0}_colored.png".format(num_samples) if not colored: fig_path = "../plots/MNIST_tSNE_{0}.png".format(num_samples) plt.savefig(fig_path, dpi=1200) # plt.show() t1 = time.time() print("done! {0:.2f} seconds".format(t1 - t0))
def plot_tSNE(testloader, labels, num_samples, name=None, title=None): X_img = testloader.dataset.test_data.numpy()[:num_samples] print("loading fitted tSNE coordinates...") X_tsne = pickle.load( open("../data/tSNE/X_tSNE_10000.p".format(num_samples), "rb")) print("plotting tSNE...") # scaling x_min, x_max = np.min(X_tsne, 0), np.max(X_tsne, 0) X_tsne = (X_tsne - x_min) / (x_max - x_min) t0 = time.time() fig = plt.figure(figsize=(10, 10)) fig.subplots_adjust(left=0.01, right=0.99, top=0.95, bottom=0.01) ax = fig.add_subplot(111) # Define custom color maps custom_cmap_black = plt.cm.Greys custom_cmap_black_colors = custom_cmap_black(np.arange( custom_cmap_black.N)) custom_cmap_black_colors[:, -1] = np.linspace(0, 1, custom_cmap_black.N) custom_cmap_black = ListedColormap(custom_cmap_black_colors) custom_cmap_red = plt.cm.bwr custom_cmap_red_colors = custom_cmap_red(np.arange(custom_cmap_red.N)) custom_cmap_red_colors[:, -1] = np.linspace(0, 1, custom_cmap_red.N) custom_cmap_red = ListedColormap(custom_cmap_red_colors) custom_cmap_orange = plt.cm.bwr_r custom_cmap_orange_colors = custom_cmap_orange( np.arange(custom_cmap_orange.N)) custom_cmap_orange_colors[:, -1] = np.linspace(0, 1, custom_cmap_orange.N) custom_cmap_orange = ListedColormap(custom_cmap_orange_colors) custom_cmap_white = plt.cm.Greys custom_cmap_white_colors = custom_cmap_white(np.arange( custom_cmap_white.N)) custom_cmap_white_colors[:, -1] = 0 custom_cmap_white = ListedColormap(custom_cmap_white_colors) custom_cmap_green = plt.cm.brg custom_cmap_green_colors = custom_cmap_green(np.arange( custom_cmap_green.N)) custom_cmap_green_colors[:, -1] = np.linspace(0, 1, custom_cmap_green.N) custom_cmap_green = ListedColormap(custom_cmap_green_colors) color_maps = [ custom_cmap_red, custom_cmap_black, custom_cmap_green, custom_cmap_orange, custom_cmap_white ] if hasattr(offsetbox, 'AnnotationBbox'): for i_digit in range(num_samples): # correct color for plotting X_img[i_digit][X_img[i_digit, :, :] > 10] = 255 X_img[i_digit][X_img[i_digit, :, :] <= 10] = 0 imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage( X_img[i_digit], cmap=color_maps[labels[i_digit]], zoom=0.25), X_tsne[i_digit], frameon=False, pad=0) ax.add_artist(imagebox) ax.set_title(title) ax.axis("off") # save figure plt.savefig("../plots/MNIST_tSNE_{0}_{1}.png".format(num_samples, name), dpi=1200) t1 = time.time() print("done! {0:.2f} seconds".format(t1 - t0))
x_min, x_max = np.min(X_r, 0), np.max(X_r, 0) X_r = (X_r - x_min) / (x_max - x_min) ax = pl.subplot(111) for i in range(digits.data.shape[0]): pl.text(X_r[i, 0], X_r[i, 1], str(digits.target[i]), color=pl.cm.Set1(digits.target[i] / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(digits.data.shape[0]): dist = np.sum((X_r[i] - shown_images)**2, 1) if np.min(dist) < 4e-3: # don't show points that are too close continue shown_images = np.r_[shown_images, [X_r[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(digits.images[i], cmap=pl.cm.gray_r), X_r[i]) ax.add_artist(imagebox) pl.xticks([]), pl.yticks([]) pl.show()
def plot_embedding_scatter(self, coordinates, images, figsize=(4, 3), frameon=False, title=None, xticks=[], yticks=[], min_dist=4e-4): import matplotlib.pyplot as plt from matplotlib import offsetbox import numpy as np X = coordinates x_min, x_max = np.min(X, 0), np.max(X, 0) # 将X进行归一化,便于显示 X = (X - x_min) / (x_max - x_min) ''' fig,ax = plt.subplots()的意思是,建立一个fig对象,建立一个axis对象。不然要用更复杂的方式来建如下: fig=plt.figure() ax=fig.add_subplot(111) ''' # figure(num=None, figsize=None, dpi=None, facecolor=None, edgecolor=None, frameon=True) #plt.figure(figsize=figsize)#,dpi=1000)#,frameon=True) # 将图像边框去除 ax = plt.subplot(111, frameon=frameon) ''' for i in range(X.shape[0]): plt.text(X[i, 0], X[i, 1], str(digits.target[i]), color=plt.cm.Set1(y[i] / 10.), fontdict={'weight': 'bold', 'size': 9}) ''' if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big # 遍历digits数据,shape[0]是其所有数据的数量 for i in range(X.shape[0]): # 距离? [(45.9419847,10.60655811) - (1,1)]^2 + 1 dist = np.sum((X[i] - shown_images)**2, 1) # 如果距离过小不再显示该图片 #if np.min(dist) < 4e-3: if np.min(dist) < min_dist: # don't show points that are too close continue # 显示的图片被赋值为:[[1,1],[45,0.6],...] # np.r_延长行;np.c_延长列 shown_images = np.r_[shown_images, [X[i]]] # 还可以使用offsetbox模块中提供的AnnotationBbox和OffsetImage实现相同的功能。 # AnnotationBbox是一个标注框,其中可以放置任何Artist对象,我们在其中放置一个OffsetImage对象, # 它能按照指定的比例显示图像,缺省比例为1。关于这两个对象的各种参数,请读者查看相关文档及源代码。 # 参数为: 图像,位置 imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(images[i], cmap=plt.cm.gray_r), X[i]) ax.add_artist(imagebox) # 设置x,y坐标,实际上可以加上 #plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title) plt.show()
model = AutoEncoder(sequence_length=1, num_epochs=40, hidden_size=10, lr=1e-4) model.fit(x_train) error = model.predict(x_test) print(roc_auc_score(y_test, error)) # e.g. 0.8614 # """Borrowed from https://github.com/scikit-learn/scikit-learn/blob/master/examples/manifold/plot_lle_digits.py#L44""" error = (error - error.min()) / (error.max() - error.min()) # Normalize error x_test = x_test.values y_random = np.random.rand(len(x_test)) * 2 - 1 plt.figure(figsize=(20, 10)) ax = plt.subplot(111) if hasattr(offsetbox, 'AnnotationBbox'): shown_images = np.array([[1., 1.]]) for i in range(len(x_test)): X_instance = [error[i], y_random[i]] dist = np.sum((X_instance - shown_images) ** 2, 1) if np.min(dist) < 4e-5: # don't show points that are too close continue shown_images = np.r_[shown_images, [X_instance]] imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(x_test[i].reshape(28, 28), cmap=plt.cm.gray_r), X_instance) ax.add_artist(imagebox) plt.xlim((0, 1.1)) plt.ylim((-1.2, 1.2)) plt.xlabel("Anomaly Score") plt.title("Predicted Anomaly Score for the Test Set") plt.show()
def compare_datasets_tsne(smiles_paths: List[str], smiles_col_name: str, colors: List[str], plot_molecules: bool, max_num_per_dataset: int, save_path: str): assert len(smiles_paths) <= len(colors) # Random seed for random subsampling np.random.seed(1) # Genenrate labels based on file name labels = [os.path.basename(path).replace('.csv', '') for path in smiles_paths] # Load the smiles datasets print('Loading data') smiles, slices = [], [] for smiles_path, color in zip(smiles_paths, colors): # Get SMILES new_smiles = pd.read_csv(smiles_path)[smiles_col_name] new_smiles = list(new_smiles[new_smiles.notna()]) # Exclude empty strings print(f'{os.path.basename(smiles_path)}: {len(new_smiles):,}') # Subsample if dataset is too large if len(new_smiles) > max_num_per_dataset: print(f'Subsampling to {max_num_per_dataset:,} molecules') new_smiles = np.random.choice(new_smiles, size=max_num_per_dataset, replace=False).tolist() slices.append(slice(len(smiles), len(smiles) + len(new_smiles))) smiles += new_smiles # Compute Morgan fingerprints print('Computing Morgan fingerprints') morgan_generator = get_features_generator('morgan') morgans = [morgan_generator(smile) for smile in tqdm(smiles, total=len(smiles))] print('Running t-SNE') import time start = time.time() tsne = TSNE(n_components=2, init='pca', random_state=0, metric='jaccard') X = tsne.fit_transform(morgans) print(f'time = {time.time() - start}') print('Plotting t-SNE') x_min, x_max = np.min(X, axis=0), np.max(X, axis=0) X = (X - x_min) / (x_max - x_min) makedirs(save_path, isfile=True) plt.clf() scale = 10 fontsize = 5 * scale fig = plt.figure(figsize=(6.4 * scale, 4.8 * scale)) plt.title('t-SNE using Morgan fingerprint with Jaccard similarity', fontsize=2 * fontsize) ax = fig.gca() handles = [] legend_kwargs = dict(loc='upper right', fontsize=fontsize) for slc, color, label in zip(slices, colors, labels): if plot_molecules: # Plots molecules handles.append(mpatches.Patch(color=color, label=label)) for smile, (x, y) in zip(smiles[slc], X[slc]): img = Draw.MolsToGridImage([Chem.MolFromSmiles(smile)], molsPerRow=1, subImgSize=(200, 200)) imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(img), (x, y), bboxprops=dict(color=color)) ax.add_artist(imagebox) else: # Plots points s = 450 if label == 'sars_pos' else 150 plt.scatter(X[slc, 0], X[slc, 1], s=s, color=color, label=label) if plot_molecules: legend_kwargs['handles'] = handles plt.legend(**legend_kwargs) plt.xticks([]), plt.yticks([]) plt.savefig(save_path) # Plot pairs of sars_pos and other dataset if 'sars_pos' in labels: pos_index = labels.index('sars_pos') for index in range(len(labels) - 1): plt.clf() fontsize = 50 plt.figure(figsize=(6.4 * 10, 4.8 * 10)) plt.title('t-SNE using Morgan fingerprint with Jaccard similarity', fontsize=2 * fontsize) plt.scatter(X[slices[index], 0], X[slices[index], 1], s=150, color=colors[index], label=labels[index]) plt.scatter(X[slices[pos_index], 0], X[slices[pos_index], 1], s=450, color=colors[pos_index], label=labels[pos_index]) plt.xticks([]), plt.yticks([]) plt.legend(loc='upper right', fontsize=fontsize) plt.savefig(save_path.replace('.png', f'_{labels[index]}.png'))
def compare_datasets_tsne(args: Args): if len(args.smiles_paths) > len(args.colors) or len( args.smiles_paths) > len(args.sizes): raise ValueError( 'Must have at least as many colors and sizes as datasets') # Random seed for random subsampling np.random.seed(0) # Load the smiles datasets print('Loading data') smiles, slices, labels = [], [], [] for smiles_path in args.smiles_paths: # Get label label = os.path.basename(smiles_path).replace('.csv', '') # Get SMILES new_smiles = get_smiles(path=smiles_path, smiles_columns=args.smiles_column, flatten=True) print(f'{label}: {len(new_smiles):,}') # Subsample if dataset is too large if len(new_smiles) > args.max_per_dataset: print(f'Subsampling to {args.max_per_dataset:,} molecules') new_smiles = np.random.choice(new_smiles, size=args.max_per_dataset, replace=False).tolist() slices.append(slice(len(smiles), len(smiles) + len(new_smiles))) labels.append(label) smiles += new_smiles # Compute Morgan fingerprints print('Computing Morgan fingerprints') morgan_generator = get_features_generator('morgan') morgans = [ morgan_generator(smile) for smile in tqdm(smiles, total=len(smiles)) ] print('Running t-SNE') start = time.time() tsne = TSNE(n_components=2, init='pca', random_state=0, metric='jaccard') X = tsne.fit_transform(morgans) print(f'time = {time.time() - start:.2f} seconds') if args.cluster: import hdbscan # pip install hdbscan print('Running HDBSCAN') start = time.time() clusterer = hdbscan.HDBSCAN(min_cluster_size=5, gen_min_span_tree=True) colors = clusterer.fit_predict(X) print(f'time = {time.time() - start:.2f} seconds') print('Plotting t-SNE') x_min, x_max = np.min(X, axis=0), np.max(X, axis=0) X = (X - x_min) / (x_max - x_min) makedirs(args.save_path, isfile=True) plt.clf() fontsize = 50 * args.scale fig = plt.figure(figsize=(64 * args.scale, 48 * args.scale)) plt.title('t-SNE using Morgan fingerprint with Jaccard similarity', fontsize=2 * fontsize) ax = fig.gca() handles = [] legend_kwargs = dict(loc='upper right', fontsize=fontsize) if args.cluster: plt.scatter(X[:, 0], X[:, 1], s=150 * np.mean(args.sizes), c=colors, cmap='nipy_spectral') else: for slc, color, label, size in zip(slices, args.colors, labels, args.sizes): if args.plot_molecules: # Plots molecules handles.append(mpatches.Patch(color=color, label=label)) for smile, (x, y) in zip(smiles[slc], X[slc]): img = Draw.MolsToGridImage([Chem.MolFromSmiles(smile)], molsPerRow=1, subImgSize=(200, 200)) imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(img), (x, y), bboxprops=dict(color=color)) ax.add_artist(imagebox) else: # Plots points plt.scatter(X[slc, 0], X[slc, 1], s=150 * size, color=color, label=label) if args.plot_molecules: legend_kwargs['handles'] = handles plt.legend(**legend_kwargs) plt.xticks([]), plt.yticks([]) print('Saving t-SNE') plt.savefig(args.save_path)
def plot_embedding(X_embedded, X_original=None, color=None, minDist=4e-3, adjMatrix=None, title=None, cmap=cm.cubehelix, ax=None, markersize=20, noColorbar_FLAG=False, zoom=0.5): """ plots a 2D embeding of image patches, plotting the patches at their position in latent space only a fraction of patches is plotted to uniformly cover the whole plot :param X_embedded: the embedding in 2D :param X_original: the original image data, must be 3D: samples * X * Y :param color: color the datapoints accordinf to some label :param minDist: controls the density of the patches: smaller-> more dense (default 4e-3) :param title: :param cmap: colormap for the scatter :param ax: optionally pass an axis handle. the scatter plot will be put into those axis (usful for subplots) :return: """ x_min, x_max = np.min(X_embedded, 0), np.max(X_embedded, 0) X_embedded = (X_embedded - x_min) / (x_max - x_min) assert X_embedded.shape[1] == 2, "X_embed must only have two columns" if ax is None: plt.figure() ax = plt.subplot(111) plt.sca(ax) plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=color, cmap=cmap, s=markersize, linewidths=0) # for i in range(X_embedded.shape[0]): # plt.text(X_embedded[i, 0], X_embedded[i, 1], str(digits.target[i]), # color=plt.cm.Set1(y[i] / 10.), # fontdict={'weight': 'bold', 'size': 9}) if X_original is not None: assert X_embedded.shape[0] == X_original.shape[ 0], "X_embedded and X_original have different number of samples" assert len(X_original.shape) == 3 or (len(X_original.shape) ==4 and X_original.shape[-1] == 3), \ "X_original must be 3D: samples * x * y, or 3D with RGB channel" if hasattr(offsetbox, 'AnnotationBbox'): # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1., 1.]]) # just something big for i in range(X_original.shape[0]): dist = np.sum((X_embedded[i, :] - shown_images)**2, 1) if np.min(dist) < minDist: # don't show points that are too close continue shown_images = np.r_[shown_images, [X_embedded[i, :]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(X_original[i, :, :], cmap=cm.gray, zoom=zoom), X_embedded[i, :2], pad=0 ) #bboxprops={'facecolor':color} bboxprops = dict(facecolor='wheat',boxstyle='round',color='black') ax.add_artist(imagebox) if adjMatrix is not None: assert adjMatrix.shape == ( X_embedded.shape[0], X_embedded.shape[0] ), 'dim of X_embedded and ajaceny matrix dont mathc' # for i in range(X_embedded.shape[0]): # for j in range(X_embedded.shape[0]): # if adjMatrix[i,j] != 0: # plt.plot([X_embedded[i,0], X_embedded[j,0]], [X_embedded[i,1], X_embedded[j,1]], c='b' ) # faster x, y = adjMatrix.nonzero( ) # get all nonzero entries. convenient functino of the spasrse matrix for i in range(x.shape[0]): ix1, ix2 = x[i], y[i] plt.plot([X_embedded[ix1, 0], X_embedded[ix2, 0]], [X_embedded[ix1, 1], X_embedded[ix2, 1]], c='b') # # percentage = 0.01 # ix = np.random.random_integers(0,X_embedded.shape[0], int(X_embedded.shape[0] * percentage)) # if hasattr(offsetbox, 'AnnotationBbox'): # # only print thumbnails with matplotlib > 1.0 # shown_images = np.array([[1., 1.]]) # just something big # for i in range(ix.shape[0]): # imagebox = offsetbox.AnnotationBbox( # offsetbox.OffsetImage(X_original[ix[i],:,:], cmap=cm.gray), # X_embedded[i,:2], pad=0) # ax.add_artist(imagebox) if color is not None and not noColorbar_FLAG: plt.colorbar() plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title)
# Train network learn_rate = 0.01 / batch_size learn_rule = dp.RMSProp(learn_rate) trainer = dp.GradientDescent(net, train_feed, learn_rule) trainer.train_epochs(n_epochs=15) # Plot 2D embedding test_feed = dp.Feed(x_test) x_test = np.reshape(x_test, (-1, ) + dataset.img_shape) embedding = net.embed(test_feed) embedding -= np.min(embedding, 0) embedding /= np.max(embedding, 0) plt.figure() ax = plt.subplot(111) shown_images = np.array([[1., 1.]]) for i in range(embedding.shape[0]): dist = np.sum((embedding[i] - shown_images)**2, 1) if np.min(dist) < 6e-4: # don't show points that are too close continue shown_images = np.r_[shown_images, [embedding[i]]] imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage( x_test[i], zoom=0.6, cmap=plt.cm.gray_r), xy=embedding[i], frameon=False) ax.add_artist(imagebox) plt.xticks([]), plt.yticks([]) plt.title('Embedding from the last layer of the network')
def plot_embedding(X_orig, X_trans, y, title=None, fig=None, subplot_pos=111, images=False, im_thres=3e-3): """ Plots the manifold embedding with the some of the original images across the data. Strongly inspired and based on sklearn docs examples: http://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html # Authors: Fabian Pedregosa <*****@*****.**> # Olivier Grisel <*****@*****.**> # Mathieu Blondel <*****@*****.**> # Gael Varoquaux # License: BSD 3 clause (C) INRIA 2011 Parameters ------------- X_orig: original data to be able to print its pictures (nb_samples, pixels) X_trans: transformed data (nb_samples, nb_components=2) y: labels for inputdata to print numbers as colors title: title of plot fig: matplotlib figure object subplot_pos: (three-digit integer) symbolizes position in subplot. Look at matplotlib documentation for subplots for more. images: (boolean) if you want to have images sporadically over your plot im_thres: (float) if images enabled, how far abort should they pop up Output ------------- ax of plot with colored classes and possible pictures """ SMALL_SIZE = 15 MEDIUM_SIZE = 25 BIGGER_SIZE = 30 plt.rc('font', size=SMALL_SIZE) # controls default text sizes plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title x_min, x_max = np.min(X_trans, 0), np.max(X_trans, 0) # multiplied scalar to the range to get the point away from the plot range X_trans = ((X_trans - x_min) / ((x_max - x_min) * 1.1)) + 0.05 if fig is None: fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(subplot_pos) for i in range(X_trans.shape[0]): plt.text(X_trans[i, 0], X_trans[i, 1], str(int(y[i])), color=plt.cm.Set1(y[i] / 10.), fontdict={ 'weight': 'bold', 'size': 9 }) # the pictures are too big if images: # only print thumbnails with matplotlib > 1.0 shown_images = np.array([[1, 1]]) # just something big for i in range(X_trans.shape[0]): dist = np.sum((X_trans[i] - shown_images)**2, 1) if np.min(dist) < im_thres: # don't show points that are too close continue shown_images = np.r_[shown_images, [X_trans[i]]] imagebox = offsetbox.AnnotationBbox( offsetbox.OffsetImage(X_orig[i].reshape( (int(np.sqrt(X_orig.shape[1])), int(np.sqrt(X_orig.shape[1])))), cmap=plt.cm.gray_r, zoom=0.6), X_trans[i]) ax.add_artist(imagebox) #plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title) return ax