def test_random_train_test_split(test_percentage): data = fetch_movielens()["train"] train, test = random_train_test_split(data, test_percentage=test_percentage) assert test.nnz / float(data.nnz) == test_percentage _assert_disjoint(train, test)
def test_basic_fetching(): data = fetch_movielens() assert isinstance(data['train'], sp.coo_matrix) assert isinstance(data['test'], sp.coo_matrix) assert data['train'].shape == data['test'].shape assert data['train'].shape == (943, 1682) assert (data['train'].getnnz() + data['test'].getnnz()) == 100000 assert data['item_features'].shape == (1682, 1682) assert len(data['item_feature_labels']) == 1682 assert data['item_feature_labels'] is data['item_labels'] data = fetch_movielens(genre_features=True) assert data['item_features'].shape == (1682, len(data['item_feature_labels'])) assert data['item_feature_labels'] is not data['item_labels'] with pytest.raises(ValueError): data = fetch_movielens(indicator_features=False, genre_features=False)
def test_movielens_genre_accuracy(): item_features = fetch_movielens(indicator_features=False, genre_features=True)[ "item_features" ] assert item_features.shape[1] < item_features.shape[0] model = LightFM(random_state=SEED) model.fit_partial(train, item_features=item_features, epochs=10) train_predictions = model.predict(train.row, train.col, item_features=item_features) test_predictions = model.predict(test.row, test.col, item_features=item_features) assert roc_auc_score(train.data, train_predictions) > 0.75 assert roc_auc_score(test.data, test_predictions) > 0.69
def test_movielens_both_accuracy(): """ Accuracy with both genre metadata and item-specific features shoul be no worse than with just item-specific features (though more training may be necessary). """ item_features = fetch_movielens(indicator_features=True, genre_features=True)[ "item_features" ] model = LightFM(random_state=SEED) model.fit_partial(train, item_features=item_features, epochs=15) train_predictions = model.predict(train.row, train.col, item_features=item_features) test_predictions = model.predict(test.row, test.col, item_features=item_features) assert roc_auc_score(train.data, train_predictions) > 0.84 assert roc_auc_score(test.data, test_predictions) > 0.75
def get_movielens_100k(min_positive_score=4, negative_value=0): movielens_100k_dict = datasets.fetch_movielens(indicator_features=True, genre_features=True) def flip_ratings(ratings_matrix): ratings_matrix.data = np.array([1 if rating >= min_positive_score else negative_value for rating in ratings_matrix.data]) return ratings_matrix test_interactions = flip_ratings(movielens_100k_dict['test']) train_interactions = flip_ratings(movielens_100k_dict['train']) # Create indicator features for all users num_users = train_interactions.shape[0] user_features = sp.identity(num_users) # Movie titles titles = movielens_100k_dict['item_labels'] return train_interactions, test_interactions, user_features, movielens_100k_dict['item_features'], titles
dtype=np.int32).tocsr() return (user_features.tocsr(), item_features.tocsr()) def _binarize(dataset): positives = dataset.data >= 4.0 dataset.data[positives] = 1.0 dataset.data[np.logical_not(positives)] = -1.0 return dataset movielens = fetch_movielens() train, test = _binarize(movielens['train']), _binarize(movielens['test']) (train_user_features, train_item_features) = _get_feature_matrices(train) (test_user_features, test_item_features) = _get_feature_matrices(test) def test_movielens_accuracy(): model = LightFM(random_state=SEED) model.fit_partial(train, epochs=10)
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM import scipy.sparse as sp i = 0 data = fetch_movielens(indicator_features=True, genre_features=True) print data ''' model = LightFM(loss='warp') #train model model.fit(data['train'], epochs=30, num_threads=2) pos_values_for_a_user = data["item_labels"][data["train"].tocsr()[942].indices] #print data["train"].tocsr()[24].shape scores = model.predict(942,np.arange(data["train"].shape[0])) top_items = data["item_labels"][np.argsort(-scores)] top_N = [str(x) for x in top_items[:5]] print top_N for x in top_items[:5] : print x for i in range(len(top_items)) :
""" DOCSTRING """ import lightfm import lightfm.datasets as datasets import numpy data = datasets.fetch_movielens(min_rating=4.0) print (repr(data['train'])) print (repr(data['test'])) model = lightfm.LightFM(loss='warp') model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): n_users, n_items = data['train'].shape for user_id in user_ids: known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices] scores = model.predict(user_id, numpy.arange(n_items)) top_items = data['item_labels'][numpy.argsort(-scores)] print ("User %s" % user_id) print (" Known positives:") for x in known_positives[:3]: print (" %s" % x) print (" Recommended:") for x in top_items[:3]: print (" %s" % x) sample_recommendation(model, data, [3, 25, 450])
# -*- coding: utf-8 -*- """Untitled1.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/17_ZWfnM3s4qxUmCUEKNWKjZMaZlAuybl """ # importing the dependencies import numpy as np from lightfm.datasets import fetch_movielens # movie dataset from lightfm import LightFM # library for various recommondation models data = fetch_movielens( min_rating=4) # using only movies with 4 or higher rating model = LightFM(loss='warp') # defining the loss function model.fit(data['train'], epochs=50, num_threads=4) # training the model # created a function to recommend movies to the user def my_rec(model, data, user_ids): # trained model, our dataset, and the userid n_users, n_item = data[ 'train'].shape # calculating the number of user and movies for id in user_ids: # extrating the known positive movie means movies already liked by user known_positive = data['item_labels'][data['train'].tocsr() [user_ids].indices] score = model.predict(
#!/usr/bin/env python # coding: utf-8 # In[ ]: # Import libaries import numpy as np from lightfm.datasets import fetch_movielens # movielens is big csv file containing 100K movies ratings from 1K users on 1700 movies from lightfm import lightFM # Fetch and format data data = fetch_movielens( min_rating=4.0 ) # creat a variable called "data" to store data set in; min_rating equals 4.0 means we only include the movies with a rating of 4 or higher # print training and testing data print(repr(data['train'])) print(repr(data['test'])) # creat and initialize model model = LightFM( loss='warp' ) # choosing a Weighted Approximate-Rank Pairwise loss function called warp, which will help us creat recommendations for each users by looking at the existing users rating pairs and predicting rankings for each. It uses the gradient descent algorithm to iteratively find the weights that improve our prediction over time. This model takes into account book the user's past rating history content and similar users ratings collaborative # train the model model.fit(data['train'], epochs=30, num_threads=2) # define a sample recommendation function def sampel_recommendation(model, data, user_ids): n_users, n_items = data[
from personal.MaurizioFramework.MatrixFactorization.Cython.MF_BPR_Cython import MF_BPR_Cython # from utils.datareader import Datareader from recommenders.similarity.similarity import * import numpy as np import scipy.sparse as sps from tqdm import tqdm import gc from sklearn.model_selection import train_test_split import lightfm.datasets as ld import sys # sys.stdout = open("mf_prova.txt", "w") data = ld.fetch_movielens(min_rating=1.0) train = sp.csr_matrix(data["train"]) test = sp.csr_matrix(data["test"]) train.data = np.ones(len(train.data)) test.data = np.ones(len(test.data)) print(train.shape, train.nnz) print(test.shape, test.nnz) print(len(train.data), np.sum(train.data)) print(len(test.data), np.sum(test.data)) print(train[100]) print(test[100]) epochs = 500 factors = 42 learning = 0.001
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM data = fetch_movielens( min_rating=4.0 ) #data holds all the fetched and formated data with a minimum rating of 4.0 print(repr(data['train'])) #print out the size of training and test data print(repr(data['test'])) #create model model = LightFM( loss='warp') # choose a loss = weighted approximate-rank pairwise #train the model model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): #number of users and items n_users, n_items = data['train'].shape #go through every user and assigne a recommendation for user_id in user_ids: #store the movies they already like liked_movies = data['item_labels'][data['train'].tocsr() [user_id].indices] #our prediction score = model.predict(user_id, np.arange(n_items))
import numpy as np from flask import Flask, request, render_template from models.simple_recommender_model import lfm_model from lightfm.datasets import fetch_movielens model = lfm_model() # import model data = fetch_movielens(min_rating=4.0) # import data app = Flask(__name__) @app.route('/') def my_form(): return render_template('my-form.html') @app.route("/predict", methods=['POST']) def sample_recommendation(model=model, data=data): result = request.form['number'] input_user_ids = np.fromstring(result, dtype=int, sep=' ') # User input data n_users, n_items = data['train'].shape x_list = [] i_list = [] for user_id in input_user_ids: known_positives = data['item_labels'][data['train'].tocsr() [user_id].indices]
from lightfm import LightFM from lightfm.datasets import fetch_movielens from lightfm.evaluation import precision_at_k data = fetch_movielens(data_home='../../_dataset/recommend', min_rating=5.0) # Instantiate and train the model model = LightFM(loss='warp') model.fit(data['train'], epochs=30, num_threads=2) # Evaluate the trained model test_precision = precision_at_k(model, data['test'], k=5).mean() print(test_precision) print(data['train'])
def main(): args = get_arguments() data = fetch_movielens(min_rating=4.0) model = train_model(data['train']) sample_recommendations(model, data['train'], data['item_labels'], args.users.split(','))
def fetch_data(): return fetch_movielens(min_rating=4.0)
import numpy as np from lightfm.datasets import fetch_movielens #fetch_movielens is a method from lightfm import LightFM #importing lightfm class to create a model later from lightfm.evaluation import precision_at_k #fetch the data and format it data = fetch_movielens(min_rating=4.0) #limiting movies below rating 4.0 #This method creates an interaction matrix from the data. #The data has movie names and ratings of it from ALL users #print(repr(data['train'])) #print(repr(data['test'])) #Create model using the lightfm model, pass the value 'warp' to the loss parameter #Loss means the loss function and it measures the diff between the model's prediction and the actual output #warp means Weighted Approximate-Rank Pairwise. It helps us create recommendations #for each user by looking at the existing user-rating pairs and predicting #rankings for each. It uses the gradient descent algorithm to iteratively #find the weights that improve our prediction over time. It takes into #account a user's past ratings and similar user's ratings for the same title #thus Content+Collaborative model_1 = LightFM(loss='warp') model_1.fit(data['train'], epochs=30, num_threads=2) test_precision_1 = precision_at_k(model_1, data['test'], k=3).mean() model_2 = LightFM(loss='warp-kos', n=10, k=5) model_2.fit(data['train'], epochs=30, num_threads=2)
#importing all libraries import numpy as np import pandas as pd from lightfm.datasets import fetch_movielens from lightfm import LightFM #fetching of movies having rating 5 m_u_data = fetch_movielens(min_rating=5.0) print(m_u_data) print(repr(m_u_data['train'])) print(repr(m_u_data['test'])) print(m_u_data['train']) print(m_u_data['item_labels']) #model creation model = LightFM(loss='warp') #fiting of a model with epoch 40 model.fit(m_u_data['train'], epochs=40, num_threads=2) def recommendation(model, m_u_data, uids): n_users = m_u_data['train'].shape[0] n_items = m_u_data['train'].shape[1] for uid in uids: known_positive = m_u_data['item_labels'][m_u_data['train'].tocsr() [uid].indices] scores = model.predict(uid, np.arange(n_items)) top_items = m_u_data['item_labels'][np.argsort(-scores)] print("User :"******"known movies :")
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM data = fetch_movielens(data_home='.', min_rating=4.0) #print training and testing data print(repr(data['train'])) print(repr(data['test'])) model = LightFM(loss='warp') model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): n_users, n_items = data['train'].shape for user_id in user_ids: known_positives = data['item_labels'][data['train'].tocsr() [user_id].indices] scores = model.predict(user_id, np.arange(n_items)) top_items = data['item_labels'][np.argsort(-scores)] #print out the results print("User %s" % user_id) print(" --Known Movies:") for x in known_positives[:3]: print(" %s" % x) print(" --Recommended Movies:")
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM # Will we helpfull in creating a Model. # fetch data and format it. data = fetch_movielens( min_rating=4.0 ) # only collecting the movies with the rating of 4.0 or higher # this method will create a interaction matrix from CSV file and store it in our "data" variable as a dictionary. print(repr(data['train'])) # 90% print(repr(data['test'])) # 10% ''' # Store our model in a variable named "model" # Initialize a Lightfm class using a single parameter called "lose" # Lose means a Loss function and it measures the difference between the model prediction and desired output # We want to minimize it during training so that our model gets more accurate over time in its prediction. # Here we are using a Loss called WARP = Weighted Approximate-Rate PairWise. ''' model = LightFM(loss='warp') ''' # Wrap help us create recommendation for each users by looking at the existing user rating pairs, # and predicting ratings for each. # It uses the gradiebt descent algorithm to iteratively find the weights # that imporve our prediction overtime. Using users past rating history and similar users rating. # Content + Collaborative = Hybrid System ''' # Use fit method to trian our model # fit takes 3 parameters
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM # fetch data and format it data = fetch_movielens( min_rating=4.0) # only collect the movies with a rating of 4 or higher # the structure of data #first: train # first: test # first: item_features # first: item_feature_labels # first: item_labels # print training and testing data # print(data['train']) # for single_data in data['train']: # print("first:",single_data[0],"second",data[1]) # print("first:",) # print(repr(data['train'])) # print(repr(data['test'])) '''repr()函数将对象转化为供解释器读取的形式''' # 下面是例子,相当于加了括号 # >>> dict = {'runoob': 'runoob.com', 'google': 'google.com'}; # >>> repr(dict) # "{'google': 'google.com', 'runoob': 'runoob.com'}" # m1=data['train'].tocsr() # [rows,cols]=m1.shape # for i in range(5): # for j in range(5):
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM #fetch date and format it data = fetch_movielens(min_rating=4.0) #Data is a CSV file that contens movies #print training and testing data print(repr(data['train'])) print(repr(data['test'])) #Create model model = LightFM(loss='warp') #Train model model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): #Number of users and movies in traing data n_users, n_items = data['train'].shape #Generate recommendations for each user we input for user_id in user_ids: #Movies they already like known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices] #Movies our model predicts they will like scores = model.predict(user_id, np.arange(n_items)) #Rank them in order of most liked to least top_items = data['item_labels'][np.argsort(-scores)] #Print Out the results print("Users {}".format(user_id))
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM # fetch data and format it data = fetch_movielens(min_rating=0.4) #print training and testing data print(repr(data['train'])) print(repr(data['test'])) # create model model = LightFM(loss='warp') model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): # n. of users and movies in training data n_users, n_items = data['train'].shape #generate recommendations for each user we input for user_id in user_ids: # movies they already liked known_positives = data['item_labels'][data['train'].tocsr() [user_id].indices] # movies our model predict they will like scores = model.predict(user_id, np.arange(n_items))
# 案例 from lightfm import LightFM from lightfm.datasets import fetch_movielens from lightfm.evaluation import precision_at_k import numpy as np # Load the MovieLens 100k dataset. Only five # star ratings are treated as positive. data = fetch_movielens(data_home='./data', min_rating=5.0) print(data['train']) # Instantiate and train the model model = LightFM(loss='warp') model.fit(data['train'], epochs=30, num_threads=2) # Evaluate the trained model test_precision = precision_at_k(model, data['test'], k=5).mean() print("Train precision: %.2f" % precision_at_k(model, data['train'], k=5).mean()) print("Test precision: %.2f" % precision_at_k(model, data['test'], k=5).mean()) def sample_recommendation(model, data, user_ids): n_users, n_items = data['train'].shape for user_id in user_ids: known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices] print(data['train'].tocsr()) print(data['train'].tocsr()[user_id]) print(data['train'].tocsr()[user_id].indices)
# Uses a predefined data set of users and movies to generate movie recommendations for a particular user. # Uses the 100k MovieLens database and the LightFM package import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM #fetches and formats the data from dataset data = fetch_movielens() #training data - data['train'] #testing data - data['test'] #create a model for recommendations model = LightFM(loss='warp') #train the model with the training data model.fit(data['train'], epochs=30, num_threads=2) def recommendation(model, data, user_ids): #no. of users and movies in the training data num_users, num_movies = data['train'].shape #generate recommendation for each user in the user_ids array for user in user_ids: #movies they like liked_movies = data['item_labels'][data['train'].tocsr()[user].indices] #predicted movies predicted = model.predict(user, np.arange(num_movies)) #rank them from most liked to least
from lightfm.datasets import fetch_movielens from lightfm import LightFM from lightfm.evaluation import precision_at_k from lightfm.evaluation import recall_at_k from lightfm.evaluation import auc_score EPOCHS = 10 ALPHA = 1e-3 NUM_THREADS = 4 # fetch data movielens = fetch_movielens(data_home='.') for key, value in movielens.items(): print(key, type(value), value.shape) train = movielens['train'] test = movielens['test'] # BPR model model = LightFM(learning_rate=0.05, loss='bpr') model = model.fit(train, epochs=EPOCHS, num_threads=NUM_THREADS) train_precision = precision_at_k(model, train, k=10).mean() test_precision = precision_at_k(model, test, k=10).mean() train_auc = auc_score(model, train).mean() test_auc = auc_score(model, test).mean() print 'BPR model'
from flask import Flask, jsonify import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM import json app = Flask(__name__) data = fetch_movielens(min_rating=3.5) model = LightFM(loss='warp') model.fit(data['train'], epochs=30, num_threads=2) def recommendation(model, data, user_ids): n_users, n_items = data['train'].shape for user_id in user_ids: known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices] scores = model.predict(user_id, np.arange(n_items)) top_items = data['item_labels'][np.argsort(-scores)] movies_to_send = [] for x in top_items[:10]: movies_to_send.append(x) return movies_to_send @app.route('/') def index(): to_display = recommendation(model, data, [1]) return jsonify(to_display)
def fetch_data(rating=4.0): # fetch data and format it print("Fetching Movie data with ratings: {0} and above.".format(rating)) data = fetch_movielens(min_rating=rating) # len(data) return data
import numpy as np from lightfm.datasets import fetch_movielens #A large csv dataset that has 100K movie ratings from 1K users on 1700 movies from lightfm import LightFM #fetch and format data data = fetch_movielens( min_rating=4.0) #Only fetches movies rated 4.0 stars or higher #print training and testing data we use repr to see just the shape of the data rather than every single piece print(repr(data['train'])) print(repr(data['test'])) #create model model = LightFM(loss='warp') #train model model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): #number of users and movies in training data n_users, n_items = data['train'].shape #generate recommendations for each inputted user for user_id in user_ids: #movies they already like known_positives = data['item_labels'][data['train'].tocsr() [user_id].indices] #movies our model predicts they will like scores = model.predict(user_id, np.arange(n_items))
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM dt = fetch_movielens(min_rating=5.0) print("train - " + repr(dt['train'])) print("test - " + repr(dt['test'])) model = LightFM(loss='warp') #train model model.fit(dt['train'], epochs=30, num_threads=2) #Modify this function so that it parses your dataset correctly to retrieve #the necessary variables (products, songs, tv shows, etc.) #then print out the recommended results def sample_recommendation(model, data, user_ids): #number of users and movies in training data n_users, n_items = data['train'].shape #generate recommendations for each user we input for user_id in user_ids: #movies they already like known_positives = data['item_labels'][data['train'].tocsr() [user_id].indices] #movies our model predicts they will like
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM # fetch data and format it data = fetch_movielens(min_rating=4.0) # print training and testing data print(repr(data['train'])) print(repr(data['test'])) # create model model = LightFM(loss='warp') #train model model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): # number of users and movies in training datasets n_users, n_items = data['train'].shape # generate recommendations for each user we input for user_id in user_ids: # movies they already like known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices] # movies our model predicts they will like scores = model.predict(user_id, np.arange(n_items))
import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM #fetch data and format it #movies with rating 4.0 or higher data = fetch_movielens( min_rating=4.0 ) # 100k movie rating from 1k users on 1700 movies - each user rated at least 20 movies 1-5 #print training and testing data print(repr(data['train'])) print(repr(data['test'])) #create model model = LightFM( loss='warp' ) # warp = Weighted Approximate-Rank Pairwise - https://medium.com/@gabrieltseng/intro-to-warp-loss-automatic-differentiation-and-pytorch-b6aa5083187a #train model model.fit(data['train'], epochs=30) def sample_recommendation(model, data, user_ids): #number of users and movies in training data n_users, n_items = data['train'].shape #generate recommendations for each user we input for user_id in user_ids: #movies they already like known_positives = data['item_labels'][data['train'].tocsr()
#collaborative systems and content based systems import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM #fetch data and format it data = fetch_movielens(min_rating=4.0) #print training and testing data print(repr(data['train'])) print(repr(data['test'])) #create model model = LightFM(loss='warp') #train model model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): #number of users and movies in training data n_users, n_items = data['train'].shape #generate recommendations for each user we input for user_id in user_ids: #movies they already like known_positives = data['item_labels'][data['train'].tocsr() [user_id].indices]
# instalar numpy, scipy e lightfm import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM dados = fetch_movielens(min_rating=4.0) #Listando teste e treino dos dados print(repr(dados['train'])) print(repr(dados['test'])) #Criando o model # weighted approximate rank pairwise, ajuda a criar recomendaçõeos model = LightFM(loss='warp') # Treinando os modelos model.fit(dados['train'], epochs=30, num_threads=2) #Função de recomendação def recomendacao(model, dados, user_ids): n_users, n_items = dados['train'].shape # Gerar recomendação de acordo com cada usuário inserido