Beispiel #1
0
    def GradePerformance(self, modelType='factorization'):

        if modelType not in ['factorization', 'ranking']:
            raise ValueError("Not a valid modelType")
        model = self.trained_model[modelType]
        self.model_performance[modelType] = graphlab.compare(
            self.test_data, [model])
	def collaborative_filtering_algo():		
		#Reading users file:
		u_cols = ['CV_Id', 'Skills', 'Designation', 'Industry', 'Experience (in months)', 'Institute', 'Degree']
		users = pd.read_csv('CandidateTrain.csv', sep='|', names=u_cols,
		 encoding='latin-1')

		#Reading job_mapping file:
		r_cols = ['JobId', 'CandidateID']
		job_mapping_train = pd.read_csv('job_mapping_train.csv', sep='|', names=r_cols, encoding='latin-1') #read train dataset. 
		job_mapping_test = pd.read_csv('job_mapping_test.csv', sep='|', names=r_cols, encoding='latin-1') #read test dataset.

		graphlab.product_key.set_product_key('EBFD-E604-0274-7909-2769-C6C5-0D2F-7516') # Set Graphlab product key
		train_data = graphlab.SFrame(job_mapping_train) #Load training data
		test_data = graphlab.SFrame(job_mapping_test) #Load testing data

		#Train Model
		item_sim_model = graphlab.item_similarity_recommender.create(train_data, user_id='JobId', item_id= "CandidateID", 	similarity_type='pearson')

		#Make Recommendations of first five users.
		item_sim_recomm = item_sim_model.recommend(users=range(1,6),k=5)
		item_sim_recomm.print_rows(num_rows=25)	

		#Measaure the performance of trained model
		model_performance = graphlab.compare(test_data, [item_sim_model])
item_sim_cosine= graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='cosine')
item_sim_jaccard = graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='jaccard')

#Make Recommendations:
print("\n Collaborative Filtering Model(pearson)")
item_sim_recomm = item_sim_pearson.recommend(users=range(1,6),k=5)
item_sim_recomm.print_rows(num_rows=25)
print("\n Collaborative Filtering Model(cosine)")
item_sim_recomm1 = item_sim_cosine.recommend(users=range(1,6),k=5)
item_sim_recomm1.print_rows(num_rows=25)
print("\n Collaborative Filtering Model(jaccard)")
item_sim_recomm2 = item_sim_jaccard.recommend(users=range(1,6),k=5)
item_sim_recomm2.print_rows(num_rows=25)

#graphlab.item_similarity_recommender.compare_models(train_data, [popularity_model, item_sim_model,item_sim_model1,item_sim_model2],metric='precision_recall')



#Evaluating Recommendation Engines
#Lets compare both the models  built till now based on precision-recall characteristics:
model_performance = graphlab.compare(test_data, [popularity_model, item_sim_pearson,item_sim_cosine,item_sim_jaccard])
graphlab.show_comparison(model_performance,[popularity_model, item_sim_pearson,item_sim_cosine,item_sim_jaccard])

# factorization method
#fact_rec_model=graphlab.recommender.factorization_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', user_data=None, item_data=None, num_factors=8, regularization=1e-08, linear_regularization=1e-10, side_data_factorization=True, nmf=False, binary_target=False, max_iterations=50, sgd_step_size=0, random_seed=0, solver='auto', verbose=True)
#Make Recommendations:
#fact_sim_recomm = fact_rec_model.recommend(users=range(1,6),k=5)
#fact_sim_recomm.print_rows(num_rows=25)
graphlab.canvas.show()
graphlab.canvas.set_target('ipynb')
Beispiel #4
0
#k=5 specifies top 5 recommendations to be given
popularity_recomm = popularity_model.recommend(users=range(1, 6), k=5)
popularity_recomm.print_rows(num_rows=25)

#Train Model(pearson)
item_sim_model = graphlab.item_similarity_recommender.create(
    train_data,
    user_id='user_id',
    item_id='anime_id',
    target='rating',
    similarity_type='pearson')
item_sim_recomm = item_sim_model.recommend(users=range(1, 6), k=5)
item_sim_recomm.print_rows(num_rows=25)

#Campare popular model vs. pearson model
model_performance = graphlab.compare(test_data,
                                     [popularity_model, item_sim_model])
graphlab.show_comparison(model_performance, [popularity_model, item_sim_model])

#Train Model(Jaccard)
item_sim_model2 = graphlab.item_similarity_recommender.create(
    train_data,
    user_id='user_id',
    item_id='anime_id',
    target='rating',
    similarity_type='jaccard')
item_sim_recomm2 = item_sim_model2.recommend(users=range(1, 6), k=5)
item_sim_recomm2.print_rows(num_rows=25)

#campare 3 models
model_performance3 = graphlab.compare(
    test_data, [popularity_model, item_sim_model, item_sim_model2])
Beispiel #5
0
personalized_model.get_similar_items(['With Or Without You - U2'])

# In[19]:

personalized_model.get_similar_items(
    ['Chan Chan (Live) - Buena Vista Social Club'])

# # Quantitative comparison between the models
#
# We now formally compare the popularity and the personalized models using precision-recall curves.

# In[20]:

if graphlab.version[:3] >= "1.6":
    model_performance = graphlab.compare(
        test_data, [popularity_model, personalized_model], user_sample=0.05)
    graphlab.show_comparison(model_performance,
                             [popularity_model, personalized_model])
else:
    get_ipython().magic(u'matplotlib inline')
    model_performance = graphlab.recommender.util.compare_models(
        test_data, [popularity_model, personalized_model], user_sample=.05)

# The curve shows that the personalized model provides much better performance.

# In[41]:

west = song_data[song_data['artist'] == 'Kanye West']
count_west = len(west['user_id'].unique())
count_west
Beispiel #6
0
rating_ent = airline_us.dropna(subset=['inflight_entertainment_rating'])

train = rating_all[:4000]
test = rating_all[4000:]

# convert tables into SFrames using graphlab
train_data = graphlab.SFrame(train)
test_data = graphlab.SFrame(test)

# A Simple Popularity Model
popularity_model = graphlab.popularity_recommender.create(train_data, user_id='author', item_id='airline_name', target='overall_rating')

#Get recommendations for first 5 users and print them
#k=5 specifies top 5 recommendations to be given
popularity_recomm = popularity_model.recommend(k=5)
popularity_recomm.print_rows(num_rows=25)

# verified by checking the airlines with highest mean rating
train.groupby(by='airline_name')['overall_rating'].mean().sort_values(ascending=False)

# A Collaborative Filtering Model
#Train Model
item_sim_model = graphlab.item_similarity_recommender.create(train_data, user_id='author', item_id='airline_name', target='overall_rating', similarity_type='pearson')

#Make Recommendations:
item_sim_recomm = item_sim_model.recommend(k=5)
item_sim_recomm.print_rows(num_rows=25)

model_performance = graphlab.compare(test_data, [popularity_model, item_sim_model])
graphlab.show_comparison(model_performance,[popularity_model, item_sim_model])
                           encoding='latin-1')

ratings_test = pd.read_csv('ml-100k/ub.test',
                           sep='\t',
                           names=r_cols,
                           encoding='latin-1')

#print ratings_base.shape

#print ratings_test.shape

train_data = graphlab.SFrame(ratings_base)

test_data = graphlab.SFrame(ratings_test)

#print ratings_base.groupby(by='movie_id')['rating'].mean().sort_values(ascending=False).head(20)

item_sim_model = graphlab.item_similarity_recommender.create(
    train_data,
    user_id='user_id',
    item_id='movie_id',
    target='rating',
    similarity_type='pearson')

item_sim_recomm = item_sim_model.recommend(users=range(1, 100), k=20)

item_sim_recomm.print_rows(num_rows=2000)

model_performance = graphlab.compare(test_data, [item_sim_model])

graphlab.show_comparison(model_performance, [item_sim_model])
Beispiel #8
0
def compare_models(test_data, m1, m2):
    model_performance = graphlab.compare(test_data, [m1, m2])
    graphlab.show_comparison(model_performance, [m1, m2])
Beispiel #9
0
recommender_movie_model = graphlab.item_similarity_recommender.create(
    training_data,
    user_id='user_id',
    item_id='movie_id',
    target='rating',
    similarity_type='cosine')
recommend_movie = recommender_movie_model.recommend(users=range(1, 5), k=6)
recommend_movie.print_rows(num_rows=24)

# To check for user's recommendation
recommend_movie = recommender_movie_model.recommend(
    users=[946], k=6)  # Replace "946" with your user_id
recommend_movie.print_rows(num_rows=6)

# Evaluation of our model
rating_test_data = pd.read_csv('ml-100k/ua.test',
                               sep='\t',
                               names=r_cols,
                               encoding='latin-1')
test_data = graphlab.SFrame(rating_test_data)
# print rating_test_data.shape

evaluation = graphlab.compare(
    test_data, [popularity_movie_model, recommender_movie_model])
graphlab.show_comparison(evaluation,
                         [popularity_movie_model, recommender_movie_model])
"""
Movie recommendation system
# Reference: https://www.analyticsvidhya.com/blog/2016/06/quick-guide-build-recommendation-engine-python/
"""
Beispiel #10
0
personalized_model.get_similar_items(["With Or Without You - U2"])


# In[ ]:

personalized_model.get_similar_items(["Chan Chan (Live) - Buena Vista Social Club"])


# #Quantitative comparison between the models
#
# We now formally compare the popularity and the personalized models using precision-recall curves.

# In[ ]:

if graphlab.version[:3] >= "1.6":
    model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05)
    graphlab.show_comparison(model_performance, [popularity_model, personalized_model])
else:
    get_ipython().magic(u"matplotlib inline")
    model_performance = graphlab.recommender.util.compare_models(
        test_data, [popularity_model, personalized_model], user_sample=0.05
    )


# The curve shows that the personalized model provides much better performance.

# In[21]:

item_similarity_model = graphlab.item_similarity_recommender.create(train_data, user_id="user_id", item_id="song_id")

Beispiel #11
0
                           encoding='latin-1')
TestRatings = pd.read_csv('Data/ml-100k/ua.test',
                          sep='\t',
                          names=ratingsCol,
                          encoding='latin-1')

#TestRatings.shape
#TrainRatings.shape

import graphlab as gl
trainData = gl.SFrame(TrainRatings)
testData = gl.SFrame(TestRatings)

popModel = gl.popularity_recommender.create(trainData,
                                            user_id='user_id',
                                            item_id='movie_id',
                                            target='rating')
recommendedMovies = popModel.recommend(users=range(1, 6), k=5)
recommendedMovies.print_rows(num_rows=25)

itemItemCF = gl.item_similarity_recommender.create(
    trainData,
    user_id='user_id',
    item_id='movie_id',
    target='rating',
)
recommendedMovies2 = itemItemCF.recommend(users=range(1, 6), k=5)
recommendedMovies2.print_rows(num_rows=25)

performanceComparasion = gl.compare(testData, [popModel, itemItemCF])
gl.show_comparison(performanceComparasion, [popModel, itemItemCF])
import graphlab

data = graphlab.SFrame.read_csv(
    "https://static.turi.com/datasets/movie_ratings/training_data.csv",
    column_type_hints={"rating": int})
#print data.head()
data_train, data_test = data.random_split(.8, seed=5)
model1 = graphlab.recommender.create(data_train,
                                     user_id="user",
                                     item_id="movie",
                                     target="rating")
#results = model.recommend(users=None, k=5)
#print results.head(20)

model2 = graphlab.popularity_recommender.create(data_train,
                                                user_id="user",
                                                item_id="movie",
                                                target="rating")
#results2 = model2.recommend(users=None, k=5)
#print results2.head(20)

model_performance = graphlab.compare(data_test, [model1, model2])
graphlab.show_comparison(model_performance, [model1, model2])