def retrieveRecommendedGenres(self, user_id): recommended_genres = [] classics_by_genre = [] movies = pd.read_csv('movies.csv') ratings = pd.read_csv('ratings.csv') # ipologizw to average rating kathe genre genre_ratings = helper.get_genre_ratings(ratings, movies, [ 'Romance', 'Thriller', 'Horror', 'Sci-Fi', 'Action', 'Adventure', 'Comedy', 'Fantasy', 'Drama', 'Animation' ], [ 'avg_romance_rating', 'avg_thriller_rating', 'avg_horror_rating', 'avg_scifi_rating', 'avg_action_rating', 'avg_adv_rating', 'avg_comedy_raring', 'avg_fantasy_rating', 'avg_drama_rating', 'avg_animation_rating' ]) best_genres = genre_ratings.iloc[user_id].nlargests(3) for genre in best_genres.index: recommended_genres.append(genre) for classic in self.getClassics(): for genre in recommended_genres: if classic.getClassicByGenre(genre) == True: classics_by_genre.append(classic) return classics_by_genre
import pandas as pd import matplotlib.pyplot as plt import numpy as np from scipy.sparse import scr_matrix import helper from sklearn.cluster import KMeans # import movies dataset movies = pd.read_csv('ml-latest-small/movies.csv') ratings = pd.read_csv('ml-latest-small/ratings.csv') genre_ratings = helper.get_genre_ratings( ratings, movies, ['Romance', 'Sci-Fi'], ['avg_romance_rating', 'avg_scifi_rating']) genre_ratings.head() biased_dataset = helper.bias_genre_rating_dataset(genre_ratings, 3.2, 2.5) print("Number of records: ", len(biased_dataset)) biased_dataset.head() get_ipython().run_line_magic('matplotlib', 'inline') helper.draw_scatterplot(biased_dataset['avg_scifi_rating'], 'Avg scifi rating', biased_dataset['avg_romance_rating'], 'Avg romance rating') # use k-means kmeans_1 = KMeans(n_cluster=2) predictions = kmeans_1.fit_predict(X)
from csv import reader from sklearn.cluster import KMeans from sklearn import metrics from scipy.spatial.distance import cdist import numpy as np import matplotlib.pyplot as plt import pandas as pd import helper movies = pd.read_csv('ml-latest-small/movies.csv') # Import the ratings dataset ratings = pd.read_csv('ml-latest-small/ratings.csv') genre_ratings = helper.get_genre_ratings( ratings, movies, ['Romance', 'Sci-Fi'], ['avg_romance_rating', 'avg_scifi_rating']) genre_ratings = np.nan_to_num(genre_ratings) genre_ratings = pd.DataFrame(genre_ratings) #avg_romance_rating -- X-axis x1 = genre_ratings.iloc[:, 0] #avg_sci-fi_rating -- Y-axis x2 = genre_ratings.iloc[:, 1] plt.plot() plt.xlim([0, 10]) plt.ylim([0, 10]) plt.title('Dataset') plt.scatter(x1, x2) plt.show()