def recommendPosts(dataModel): model = MatrixPreferenceDataModel(dataModel) print 'User ID`s: ' print model.user_ids() ## print 'Item ID`s: ' ## print model.item_ids() userID= input('Please enter a userID: ') print 'Loading recommended posts...' similarity = UserSimilarity(model, pearson_correlation) recommender = UserBasedRecommender(model, similarity, with_preference=True) return recommender.recommend(userID)
def recom(i): try: M = pd.read_pickle(path + 'DA_Rating.df') user = M.ix[:, i] user.name usernnz = user > 0 usernnzIDX = np.where(usernnz) targets = user[usernnz].index.values N = M.copy() N.ix[:, i] = 0 idx = np.random.random_integers( 0, len(usernnzIDX[0]), min(len(usernnzIDX[0]) - 3, numFeedbacks)) N.ix[idx, i] = user.iloc[idx] model = MatrixPreferenceDataModel(getRecommenderData(N)) similarity = UserSimilarity(model, cosine_distances, 3) recommender = UserBasedRecommender(model, similarity, with_preference=True) rec = recommender.recommend(user.name, how_many=100) ranking = map(lambda x: x[0], rec) ap, mrr = AP(ranking, targets), MRR(ranking, targets) pd.to_pickle((ap, mrr), outpath + 'numFB{}i{}.pkl'.format(numFeedbacks, i)) except: pass
def build_and_save_recommendations(self, user_items_preference_map): model = MatrixPreferenceDataModel(user_items_preference_map) #item_similarity = ItemSimilarity(model, loglikehood_coefficient) user_similarity = UserSimilarity(model, loglikehood_coefficient) item_recommendations = ItemBasedColaborativeFiltering(user_items_preference_map).build_recommendations() user_recommender = UserBasedRecommender(model, user_similarity, with_preference=True) for item, item_weight_map in item_recommendations.iteritems(): ItemRecommendations.save_recommendations_for_item(item, item_weight_map.iteritems()) user_work_queue = multiprocessing.Queue() for user_id in model.user_ids().tolist(): user_work_queue.put(user_id) ParallelWork(UserRecommenderWorker, (user_work_queue, user_recommender)).begin()
def get_recommend(request): result_list = list() if 1 == 1: #判斷是否登入,沒有登入則return not login if request.user.is_authenticated(): user_id = request.user.id if user_id == 1: result_list = [{'result': 'no jimmy'}] return HttpResponse(json.dumps(result_list)) else: result_list = [{'result': 'notlogin'}] return HttpResponse(json.dumps(result_list)) #取回所有評分資料 for row in course_score.objects.all().order_by('u_id'): temp_dict = { 'user_id': row.u_id, 'course_id': row.c_id.id, 'score': row.cs_score, 'comment': row.cs_comment } result_list.append(temp_dict) #將評分資料做成crab格式 crabdata = getCrabFormat(result_list) # return HttpResponse((result_list)) #推薦系統 model = MatrixPreferenceDataModel(crabdata) similarity = UserSimilarity(model, pearson_correlation) recommender = UserBasedRecommender(model, similarity, with_preference=True) recommend_list = recommender.recommend(user_id) # return HttpResponse((recommend_list)) recommend_list = [ list(recommend_list[i]) for i in range(0, len(recommend_list)) ] #推薦後資料加上課程名稱、課程資訊... result_list = list() for i in range(0, len(recommend_list)): for course_row in course.objects.filter(id=recommend_list[i][0]): tem_dict = { 'name': course_row.c_name, 'teacher': course_row.c_teacher, 'credit': course_row.raw_course_set.all()[0].rc_credit, 'depart': course_row.raw_course_set.all()[0].rc_department, 'semester': course_row.raw_course_set.all()[0].rc_semester, 'c_id': recommend_list[i][0], 'raw_cid': course_row.raw_course_set.all()[0].id, 'score': recommend_list[i][1] } result_list.append(tem_dict) return HttpResponse(json.dumps(result_list)) else: result_list = [{'result': 'error'}] return HttpResponse(json.dumps(result_list))
def data_points_to_crab_model(data_points): from scikits.crab.models import MatrixPreferenceDataModel from collections import defaultdict data = defaultdict(dict) for (u, i, r) in data_points: data[u][i] = r model = MatrixPreferenceDataModel(data) return model
def custom_recommend(dataset, recommend_to, num_of_recommendations = 10): #This is based on the USER-BASED RECOMMENDATION Model #Build the model. It returns a dictionary of the recommendations model = MatrixPreferenceDataModel(dataset) similarity = UserSimilarity(model, euclidean_distances) #Build the Recommender recommender = UserBasedRecommender(model, similarity, with_preference = True) return recommender.recommend(recommend_to, num_of_recommendations)
def api_import1(): movies = datasets.load_sample_movies() import pprint ## to make printed items clearer pprint.pprint(movies.data) pprint.pprint(movies.user_ids) pprint.pprint(movies.item_ids) from scikits.crab.models import MatrixPreferenceDataModel #Build the model model = MatrixPreferenceDataModel(movies.data) from scikits.crab.metrics import pearson_correlation from scikits.crab.similarities import UserSimilarity #Build the similarity similarity = UserSimilarity(model, pearson_correlation) from sklearn.base import BaseEstimator from scikits.crab.recommenders.knn import UserBasedRecommender #build the user Based recommender recommender = UserBasedRecommender(model, similarity, with_preference=True) #recommend item for the user 5 (Toby) recomendations = recommender.recommend(5) pprint.pprint(recomendations) return 'List of ' + url_for('api_articles')
def user_base(input_file, output_file, data_file): # 基础数据-测试数据 from scikits.crab import datasets # movies = datasets.load_sample_movies() user_name = read_user_name(input_file) shopping_history = load_test_data(data_file) user_order_num = userName_to_userOrderNum(data_file, user_name) #Build the model from scikits.crab.models import MatrixPreferenceDataModel model = MatrixPreferenceDataModel(shopping_history.data) #Build the similarity # 选用算法 pearson_correlation # from scikits.crab.metrics import pearson_correlation # need score from scikits.crab.metrics import cosine_distances # DO NOT need score from scikits.crab.similarities import UserSimilarity similarity = UserSimilarity(model, cosine_distances) # 选择 基于User的推荐 from scikits.crab.recommenders.knn import UserBasedRecommender recommender = UserBasedRecommender(model, similarity, with_preference=True) ret = recommender.recommend( user_order_num) # 输出个结果看看效果 Recommend items for the user 5 (Toby) if ret: # 所有推荐结果都保存到文件,交给服务器处理 f_w = open(output_file, 'w') for i in ret: product_order_num = i[0] product_score = i[1] product_name = productOrderNum_to_productName( data_file, product_order_num) f_w.write(str(product_name) + ',' + str(product_score) + '\n')
from scikits.crab import datasets movies = datasets.load_sample_movies() songs = datasets.load_sample_songs() print movies.data print movies.user_ids print movies.item_ids from scikits.crab.models import MatrixPreferenceDataModel #Build the model model = MatrixPreferenceDataModel(movies.data) from scikits.crab.metrics import pearson_correlation from scikits.crab.similarities import UserSimilarity #Build the similarity similarity = UserSimilarity(model, pearson_correlation) print similarity[1] from scikits.crab.metrics.pairwise import euclidean_distances similarity = UserSimilarity(model, euclidean_distances) print similarity[5] from scikits.crab.recommenders.knn import UserBasedRecommender #Build the User based recommender recommender = UserBasedRecommender(model, similarity, with_preference=True) #Recommend items for the user 5 (Toby) recommender.recommend(5)
from scikits.crab.models import MatrixPreferenceDataModel from scikits.crab.metrics import pearson_correlation from scikits.crab.similarities import UserSimilarity from pymongo import MongoClient import os # get data from the database client = MongoClient('mongodb://localhost:27017/') db = client['if'] # TODO generate a table of user bubble ratings # can be based on number of visits or something data = db['collaborativeFilterData'].find() model = MatrixPreferenceDataModel(data) # backup the old model and save the new one os.rename("/data/models/collaborativeFilter", "/data/models/collaborativeFilter_bk") model.save("/data/models/collaborativeFilter")
def collaborativeFilter(userId, searchTerm): data = os.read("/data/models/collaborativeFilter") model = MatrixPreferenceDataModel(data) similarity = UserSimilarity(model, pearson_correlation) recommender = UserBasedRecommender(model, similarity, with_preference=True) return recommender.recommend(50)
for (item, pre) in pre: # print 'item=', item, ',pre=', pre if np.isnan(pre): print 'item=', item def testParam(how_many=None, **param): print 'how_many:', how_many print 'param:', param if __name__ == '__main__': # testParam(how_many=12, tt=10, dd=40) start_time = time.time() file_name = 'score_data.txt' with open(file_name) as f: d = f.read() # print 'd=', d d = eval(d) model = MatrixPreferenceDataModel(d) #build the model similarity = UserSimilarity(model, pearson_correlation, num_best=50) #build the similarity recommender = UserBasedRecommender(model, similarity, with_preference=True) #build the recommender print recommender.recommend(user_id=31071, how_many=30, minimal_similarity=0.8) # print recommender.recommend(20832) # print recommender.most_similar_users(user_id=31071) # print d[20832] print '====recommand time:', time.time() - start_time ''' for key in d: print '===key===', key print recommender.recommend(key) '''
# search the keyword in recent and popular tweets twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET) tweets = twitter.search(q=keyword, count=100, result_type='mixed') # count the text's words from collections import Counter user_pref_words_list = {} count = 0 for u_num in range(0, len(tweets['statuses'])): text = tweets['statuses'][u_num]['text'] user_pref_words_list[u_num] = dict( Counter(text.replace('\n', ' ').split(' '))) # calculate user's preference based on frequency of the number of words by using crab from scikits.crab.models import MatrixPreferenceDataModel model = MatrixPreferenceDataModel(user_pref_words_list) from scikits.crab.metrics import pearson_correlation from scikits.crab.similarities import UserSimilarity similarity = UserSimilarity(model, pearson_correlation) from scikits.crab.recommenders.knn import UserBasedRecommender recommender = UserBasedRecommender(model, similarity, with_preference=True) # calculate each word preference average from collections import defaultdict words_pref = defaultdict(float) for u_num, pref_words_list in user_pref_words_list.items(): for word, count in pref_words_list.items(): if (words_pref[word]): words_pref[word] = (words_pref[word] + float(count)) / 2.0
7: { 1: 2.5, 4: 3.5, 5: 4.0 } } movies['user_ids'] = { 1: 'Jack Matthews', 2: 'Mick LaSalle', 3: 'Claudia Puig', 4: 'Lisa Rose', 5: 'Toby', 6: 'Gene Seymour', 7: 'Michael Phillips' } movies['item_ids'] = { 1: 'Lady in the Water', 2: 'Snakes on a Planet', 3: 'You, Me and Dupree', 4: 'Superman Returns', 5: 'The Night Listener', 6: 'Just My Luck' } model = MatrixPreferenceDataModel(movies['data']) similarity = UserSimilarity(model, pearson_correlation) recommender = UserBasedRecommender(model, similarity, with_preference=True) print recommender.recommend(5)
def recommender(request): freelancer = {"user_ids": {}, 'data': {}, 'item_ids': {}} for user in User.objects.all(): freelancer['user_ids'][user.id] = user.username for user in User.objects.all(): freelancer['item_ids'][user.id] = user.username test_item = {"ios": 1, "ui": 2, "android": 3, "website": 4} for user in User.objects.all(): freelancer['data'][user.id] = {} for project in Project.objects.all(): if project.user_id.username == project.user_id.id: freelancer['data'][user.id][test_item[proj_type]] = 4 break for id, item in test_item.items(): freelancer['data'][user.id][item] = random.randint(1, 4) print freelancer['data'] freelancer['data'] = { 1: { 1: 3.0, 2: 4.0, 3: 3.5, 4: 5.0, 5: 3.0 }, 2: { 1: 3.0, 2: 4.0, 3: 2.0, 5: 3.0, 6: 2.0 }, 3: { 2: 3.5, 3: 2.5, 4: 4.0, 5: 4.5, 6: 3.0 }, 4: { 1: 2.5, 2: 3.5, 3: 2.5, 4: 3.5, 5: 3.0, 6: 3.0 }, 5: { 2: 4.5, 3: 1.0, 4: 4.0 }, 6: { 1: 3.0, 2: 3.5, 3: 3.5, 4: 5.0, 5: 3.0, 6: 1.5 }, 7: { 1: 2.5, 4: 3.5, 5: 4.0 } } model = MatrixPreferenceDataModel(freelancer['data']) similarity = UserSimilarity(model, pearson_correlation) recommender = UserBasedRecommender(model, similarity, with_preference=True) print freelancer['data'] print recommender.recommend(request.user.id) return JsonResponse({ "status": True, "data": recommender.recommend(request.user.id) })
def create_ml_model_for_recomendations(data): from scikits.crab.models import MatrixPreferenceDataModel model = MatrixPreferenceDataModel(data) return model
from scikits.crab.similarities import UserSimilarity from scikits.crab.metrics import euclidean_distances from scikits.crab.recommenders.knn import UserBasedRecommender # For reproducibility np.random.seed(1000) if __name__ == '__main__': # Define a user-item matrix user_item_matrix = { 1: {1: 2, 2: 5, 3: 3}, 2: {1: 5, 4: 2}, 3: {2: 3, 4: 5, 3: 2}, 4: {3: 5, 5: 1}, 5: {1: 3, 2: 3, 4: 1, 5: 3} } # Build a matrix preference model model = MatrixPreferenceDataModel(user_item_matrix) # Build a similarity matrix similarity_matrix = UserSimilarity(model, euclidean_distances) # Create a recommender recommender = UserBasedRecommender(model, similarity_matrix, with_preference=True) # Test the recommender for user 2 with warnings.catch_warnings(): warnings.simplefilter("ignore") print(recommender.recommend(2))
description='Welcome to Recommender system') parser.add_argument('Active_user', type=int, help='Insert id of user to recommend :') parser.add_argument('student_ID', type=int, help='Insert student_id of user to recommend :') args = parser.parse_args() active_user = int(args.Active_user) student_ID = int(args.student_ID) #start_time = timeit.default_timer() myDataset_course = create_finally_dataset(50, active_user, student_ID) #pprint(myDataset_course) new_active_user_id = find_new_user_id(myDataset_course, str(student_ID)) model = MatrixPreferenceDataModel(myDataset_course['data']) similarity_item = ItemSimilarity(model, pearson_correlation) neighborhood_item = ItemsNeighborhoodStrategy() recsys_item = ItemBasedRecommender(model, similarity_item, neighborhood_item, with_preference=True) #recommend_top_5_item = recsys_item.recommended_because(new_active_user_id,42,how_many=5) recommend_list_item = recsys_item.recommend(new_active_user_id, how_many=5) #print("Item : " +recommend_list_to_json(recommend_list_item)) #evaluator = CfEvaluator() #test_item_a = evaluator.evaluate_on_split(recsys_item,at=4, sampling_ratings=0.5,permutation=False,cv=5) #pprint(test_item_a) #elapsed = timeit.default_timer() - start_time #print elapsed
dataset = {} with open('dataset-recsys.csv') as myfile: reader = csv.DictReader(myfile, delimiter=',') i = 0 for line in reader: i += 1 if (i == 1): continue if (int(line['user_id']) not in dataset): dataset[int(line['user_id'])] = {} dataset[int(line['user_id'])][int(line['item_id'])] = float(line['star_rating']) model = MatrixPreferenceDataModel(dataset) # User-based Similarity #similarity = UserSimilarity(model, cosine_distances) #neighborhood = NearestNeighborsStrategy() #recsys = UserBasedRecommender(model, similarity, neighborhood) # Item-based Similarity similarity = ItemSimilarity(model, cosine_distances) nhood_strategy = ItemsNeighborhoodStrategy() recsys = ItemBasedRecommender(model, similarity, nhood_strategy, with_preference=False) #recsys = MatrixFactorBasedRecommender(model=model, items_selection_strategy=nhood_strategy, n_features=10, n_interations=1)
ratings_pd = pd.DataFrame( ratings, columns=["user_id", "movie_id", "rating", "timestamp"]) ratings_pd = ratings_pd.drop(["timestamp"], 1) movies = ratings_pd.pivot(index="movie_id", columns="user_id", values="rating") i = [ '55', '56', '64', '71', '94', '102', '118', '133', '141', '168', '173', '196', '1278', '928', '780', '651', '514', '483', '432', '404' ] movies = movies.loc[i, :] l = [ '13', '128', '201', '207', '222', '23', '234', '246', '267', '268', '269', '276', '493', '642', '655', '846', '95', '262', '194', '130', '1' ] movies = movies.loc[:, l] movies = movies.astype(float).fillna(0.0) movie_dict = movies.to_dict() from scikits.crab.models import MatrixPreferenceDataModel from scikits.crab.metrics import euclidean_distances from scikits.crab.similarities import ItemSimilarity from scikits.crab.recommenders.knn import UserBasedRecommender model = MatrixPreferenceDataModel(movie_dict) similarity = ItemSimilarity(model, euclidean_distances) recommender = UserBasedRecommender(model, similarity, with_preference=True)
""" print '==========movies data============' print 'user_ids :', movies.user_ids print 'user_data:', movies.data print 'item_ids :', movies.item_ids print '==========movies data============' """ """ #data format of movies user_ids : {1: 'Jack Matthews', 2: 'Mick LaSalle', 3: 'Claudia Puig', 4: 'Lisa Rose', 5: 'Toby', 6: 'Gene Seymour', 7: 'Michael Phillips'} user_data: { 1: {1: 3.0, 2: 4.0, 3: 3.5, 4: 5.0, 5: 3.0 }, 2: {1: 3.0, 2: 4.0, 3: 2.0, 4: 3.0, 5: 3.0, 6: 2.0}, 3: { 2: 3.5, 3: 2.5, 4: 4.0, 5: 4.5, 6: 3.0}, 4: {1: 2.5, 2: 3.5, 3: 2.5, 4: 3.5, 5: 3.0, 6: 3.0}, 5: { 2: 4.5, 3: 1.0, 4: 4.0 }, 6: {1: 3.0, 2: 3.5, 3: 3.5, 4: 5.0, 5: 3.0, 6: 1.5}, 7: {1: 2.5, 2: 3.0, 4: 3.5, 5: 4.0 }} item_ids : {1: 'Lady in the Water', 2: 'Snakes on a Planet', 3: 'You, Me and Dupree', 4: 'Superman Returns', 5: 'The Night Listener', 6: 'Just My Luck'} """ model = MatrixPreferenceDataModel(movies.data) #build the model similarity = UserSimilarity(model, pearson_correlation) #build the similarity recommender = UserBasedRecommender( model, similarity, with_preference=True) #build the recommender # print recommender.recommend(3) # print recommender.recommend(1, how_many=None, minimal_similarity=5.0) print recommender.recommend(1, how_many=100, model__minimal_similarity=0.5) # print recommender.most_similar_users(1)
print data.user_ids # {1: 'Jack Matthews', # 2: 'Mick LaSalle', # 3: 'Claudia Puig', # 4: 'Lisa Rose', # 5: 'Toby', # 6: 'Gene Seymour', # 7: 'Michael Phillips'} print data.item_ids # {1: 'Lady in the Water', # 2: 'Snakes on a Planet', # 3: 'You, Me and Dupree', # 4: 'Superman Returns', # 5: 'The Night Listener', # 6: 'Just My Luck'} # Build the data model model = MatrixPreferenceDataModel(data.data) # Build the similarity similarity = UserSimilarity(model, pearson_correlation) # Build the User based recommender recommender = UserBasedRecommender(model, similarity, with_preference=True) # Recommend items for the user 5 (Toby) print recommender.recommend(5, how_many=3) # [(5, 3.3477895267131013), (1, 2.8572508984333034), (6, 2.4473604699719846)]