Ejemplo n.º 1
0
 def retrieveRecommendedGenres(self, user_id):
     recommended_genres = []
     classics_by_genre = []
     movies = pd.read_csv('movies.csv')
     ratings = pd.read_csv('ratings.csv')
     # ipologizw to average rating kathe genre
     genre_ratings = helper.get_genre_ratings(ratings, movies, [
         'Romance', 'Thriller', 'Horror', 'Sci-Fi', 'Action', 'Adventure',
         'Comedy', 'Fantasy', 'Drama', 'Animation'
     ], [
         'avg_romance_rating', 'avg_thriller_rating', 'avg_horror_rating',
         'avg_scifi_rating', 'avg_action_rating', 'avg_adv_rating',
         'avg_comedy_raring', 'avg_fantasy_rating', 'avg_drama_rating',
         'avg_animation_rating'
     ])
     best_genres = genre_ratings.iloc[user_id].nlargests(3)
     for genre in best_genres.index:
         recommended_genres.append(genre)
     for classic in self.getClassics():
         for genre in recommended_genres:
             if classic.getClassicByGenre(genre) == True:
                 classics_by_genre.append(classic)
     return classics_by_genre
Ejemplo n.º 2
0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.sparse import scr_matrix
import helper
from sklearn.cluster import KMeans

# import movies dataset
movies = pd.read_csv('ml-latest-small/movies.csv')
ratings = pd.read_csv('ml-latest-small/ratings.csv')

genre_ratings = helper.get_genre_ratings(
    ratings, movies, ['Romance', 'Sci-Fi'],
    ['avg_romance_rating', 'avg_scifi_rating'])
genre_ratings.head()

biased_dataset = helper.bias_genre_rating_dataset(genre_ratings, 3.2, 2.5)

print("Number of records: ", len(biased_dataset))
biased_dataset.head()

get_ipython().run_line_magic('matplotlib', 'inline')

helper.draw_scatterplot(biased_dataset['avg_scifi_rating'], 'Avg scifi rating',
                        biased_dataset['avg_romance_rating'],
                        'Avg romance rating')

# use k-means

kmeans_1 = KMeans(n_cluster=2)
predictions = kmeans_1.fit_predict(X)
from csv import reader
from sklearn.cluster import KMeans
from sklearn import metrics
from scipy.spatial.distance import cdist
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import helper

movies = pd.read_csv('ml-latest-small/movies.csv')

# Import the ratings dataset
ratings = pd.read_csv('ml-latest-small/ratings.csv')

genre_ratings = helper.get_genre_ratings(
    ratings, movies, ['Romance', 'Sci-Fi'],
    ['avg_romance_rating', 'avg_scifi_rating'])
genre_ratings = np.nan_to_num(genre_ratings)
genre_ratings = pd.DataFrame(genre_ratings)
#avg_romance_rating -- X-axis
x1 = genre_ratings.iloc[:, 0]
#avg_sci-fi_rating	   -- Y-axis
x2 = genre_ratings.iloc[:, 1]

plt.plot()
plt.xlim([0, 10])
plt.ylim([0, 10])
plt.title('Dataset')
plt.scatter(x1, x2)
plt.show()