def scatterPlotSingleUser(model, embedding_dim, userIndex, numMovies, tsneIter, perplexity): ''' Creates a visualisation of a single-user along with all the items of the dataset (using latent factors). This shows the items that are most similar and least similar to that user's tastes. model: the prediction model built by spotlight (from which we get the item and user latent factors). idNoLabel: Movies with no corresponding Id in the dataset userIndex: the user whose taste we wish the visualise. tsneIter: number of iterations to plot tSNE's visualisation. perplexity: setting for tSNE visualisation (see sources for more info). ''' allLatentFactors = np.empty((numMovies+1,embedding_dim)) pca = PCA(n_components=10) allLatentFactors = pca.fit_transform(allLatentFactors) dimReduc = tsne(tsneIter,allLatentFactors, 2, 10, perplexity) plot1 = plt.scatter(dimReduc[:numMovies, 0], dimReduc[:numMovies, 1], 10 ,'black') plot2 = plt.scatter(dimReduc[numMovies, 0], dimReduc[numMovies, 1], 20 ,'red','*') plt.legend([plot1,plot2],['items','user '+str(userIndex)],bbox_to_anchor=(1.1, 1.05)) return (dimReduc, plot1)
def main(): data_dir = '../../../shared-data/' img_dir = '../../data/text-imgs/' dimensionality = 10 img_res = 48 data_dump_path = data_dir + 'tsne_dump_{0}d_{1}px.json'.format( dimensionality, img_res) grid_plot_count = 30 # No. of images per axis in plot of the images in the 2-D space img_count_limit = 0 use_stored_data = False img_matrix, img_names = read_img_matrix(img_dir, img_res, img_count_limit) if use_stored_data == True: data_dict = load_data(data_dump_path) Z = np.asarray(data_dict['Z']) else: max_iter = 500 num_pcs = 300 perplexity = 20.0 # Originally 20.0 Z = tsne(img_matrix, dimensionality, num_pcs, perplexity, max_iter) dump_data(data_dump_path, Z, img_res, img_count_limit, img_names) if dimensionality == 2: plot_tsne_grid(img_matrix, img_res, Z, grid_plot_count, data_dir)
def visualization(feature, label, save_dir, nameStr): '''t-SNE visualization for visual features''' assert feature.shape[0] == label.shape[0] X = feature labels = label Y = tsne(X, 2, 50, 20.0) plt.scatter(Y[:, 0], Y[:, 1], 20, labels) save_path = os.path.join(save_dir, nameStr + '.png') plt.savefig(save_path) print('visualization results are saved done in %s!' % save_dir)
def scatterPlotEntireModel(modelPredict, tsneIter, perplexity, labels): ''' Creates a scatter plot with the list of movies along with a legend (single-labels prioritising alphabetical order). Each movie is plotted separately, and is given a legend only if the specific plot for this label has been created. Movies with IDs not found in the MovieLens DataSet will be assigned a None label modelPredict: matrix containing predicted ratings for all user/item combinations. Shape is items x users. tsneIter: number of iterations to plot tSNE's visualisation. perplexity: setting for tSNE visualisation (see sources for more info). labels: array of colour values for each different genre (contains as many elements as there are items). ''' # Predictions.shape = (1683,2) pca = PCA(n_components=10) modelPredict = pca.fit_transform(modelPredict) predictions = tsne(tsneIter,modelPredict, 2, 10, perplexity) assignSingleLabels(predictions,labels) return False
def tsne_viz(X, vocab, output_filename, colors=None, no_dims=2, initial_dims=50, perplexity=30.0): """Plot a 2-dimensional graph by applying t-SNE on the embedding matrix X to represent the vocabulary. It also saves the figure. Parameters ---------- X : array shaped list The embedding matrix. vocab : dict The list of all tokens in the data. It is alphabetically sorted. output_filename : str The name of the figure. colors : list A list with the same first dimension as X to indicate the annotation color of each token. If it is None, then the default color black is used for each token. no_dims : int The output dimension of t-SNE. initial_dims : int The output dimension of PCA, that is applied on X before going through t-SNE. perplexity : float The perplexity. """ assert X.shape[0] == len( vocab), "Error: X and vocab must have same dimensions." if colors is None: colors = ['black' for _ in range(len(X))] # Run t-SNE on the word representation matrix Y = tsne(X, no_dims, initial_dims, perplexity) # Plotting: xvals, yvals = Y[:, 0], Y[:, 1] plt.figure(figsize=(100, 100)) plt.plot(xvals, yvals, marker='', linestyle='') # Text labels: for word, x, y, color in zip(vocab, xvals, yvals, colors): plt.annotate(word, (x, y), fontsize=0.1, color=color) plt.savefig(output_filename, bbox_inches='tight', format="svg", dpi=1200)
def scatterPlotAllUsers(model, embedding_dim, userIndex, numUsers, pointNum, tsneIter, perplexity, previousClosest=["0"]): ''' Creates a visualisation of all users from the dataset. This is useful to find neighbour users to a specific user. model: the prediction model built by spotlight (from which we get the item and user latent factors). userIndex: the user for which we will plot the closest points. numUsers: number of total users in the model. pointNum: number of closest points (or users) we want to represent. tsneIter: number of iterations to plot tSNE's visualisation. perplexity: setting for tSNE visualisation (see sources for more info). ''' allUserFactors = np.empty((numUsers,embedding_dim)) for i in range (numUsers): allUserFactors[i,:] = model._net.user_embeddings.weight[i].detach() #PCA used to reduce from 32 to 10 if(embedding_dim>10): pca = PCA(n_components=10) allUserFactors = pca.fit_transform(allUserFactors) allUsersReduction = tsne(tsneIter,allUserFactors, 2, 10, perplexity) userX = allUsersReduction[userIndex,0] userY = allUsersReduction[userIndex,1] distances = [] for index in range (numUsers): pointX = allUsersReduction[index,0] pointY = allUsersReduction[index,1] dist = math.sqrt((pointX-userX)**2+(pointY-userY)**2) distances += [dist] distIndexes = np.argsort(distances) #The first index will be the index of the chosen user (distance to itself is 0) distSmallestIndexes = distIndexes[1:pointNum+1] closestPoints = np.empty((pointNum,2)) counter = 0 for index in distSmallestIndexes: closestPoints[counter] = allUsersReduction[index,:] counter += 1 plot1 = plt.scatter(allUsersReduction[:, 0], allUsersReduction[:, 1], 10 ,'black') plot2 = plt.scatter(closestPoints[:, 0], closestPoints[:, 1], 10 ,'lime') plot3 = plt.scatter(allUsersReduction[userIndex, 0], allUsersReduction[userIndex, 1], 20 ,'red','*') if "0" in previousClosest: plt.legend([plot1,plot2,plot3],['Other Users', 'Closest '+str(pointNum)+' Users', 'user '+str(userIndex)],bbox_to_anchor=(1.1, 1.05)) else: previousClosestPoints = np.empty((pointNum,2)) counter = 0 for index in previousClosest: previousClosestPoints[counter]= allUsersReduction[index,:] counter += 1 plot4 = plt.scatter(previousClosestPoints[:, 0], previousClosestPoints[:, 1], 10 ,'deeppink') plt.legend([plot1,plot2,plot3,plot4],['Other Users', 'Current neighbours', 'user '+str(userIndex), 'Previous neighbours'],bbox_to_anchor=(1.1, 1.05)) print("The users most similar to user",userIndex,"are:",distSmallestIndexes) return (distSmallestIndexes, False)