コード例 #1
0
ファイル: recosystem.py プロジェクト: ethaniz/RecoTour
def compute_mapk(interactions_dict, recomendations_dict):
	actual = []
	pred = []
	for k,_ in recomendations_dict.items():
		actual.append(list(interactions_dict[k]))
		pred.append(list(recomendations_dict[k]))
	return mapk(actual,pred)
コード例 #2
0
ファイル: recosystem.py プロジェクト: ethaniz/RecoTour
		def objective(params):
			"""
			objective function for lightgbm.
			"""

			# hyperopt casts as float
			params['num_boost_round'] = int(params['num_boost_round'])
			params['num_leaves'] = int(params['num_leaves'])

			# need to be passed as parameter
			params['verbose'] = -1
			params['seed'] = 1

			cv_result = lgb.cv(
			params,
			train,
			nfold=3,
			metrics='rmse',
			num_boost_round=params['num_boost_round'],
			early_stopping_rounds=20,
			stratified=False,
			)
			early_stop_dict[objective.i] = len(cv_result['rmse-mean'])
			params['num_boost_round'] = len(cv_result['rmse-mean'])

			model = lgb.LGBMRegressor(**params)
			model.fit(train.data,train.label,feature_name=all_cols,categorical_feature=cat_cols)
			preds = model.predict(X_valid)

			df_eval['interest'] = preds
			recomendations_dict = self.recomendations_dictionary(df_eval, ranking_metric='interest')

			actual = []
			pred = []
			for k,_ in recomendations_dict.items():
				actual.append(list(interactions_valid_dict[k]))
				pred.append(list(recomendations_dict[k]))

			result = mapk(actual,pred)

			if verbose:
				print("INFO: iteration {} MAP {:.3f}".format(objective.i, result))

			objective.i+=1

			return 1-result
コード例 #3
0
accuracy.rmse(predictions, verbose=True)


def get_top_n(predictions, n=10):
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n


top_n = get_top_n(predictions, n=7)

#evaluate
actual_list = [[pid] for pid in test['project_id'].values]
predicted_list = [[project_id for (project_id, _) in top_n[uid]]
                  for uid in test['userCode'].values]

print(average_precision.mapk(actual_list, predicted_list))

# Print the recommended items for each user
# for uid, ratings in top_n.items():
#     print(uid, [(project_id, rating) for (project_id, rating) in ratings])
コード例 #4
0
    # recommend_items(sample_userCode, 7)
    if is_test:
        test = pd.read_csv('./input/test_tiny.csv', nrows=50)
    else:
        test = pd.read_csv('./input/testing_users.csv', delimiter=';')

    predicted_list = []

    with tqdm.tqdm(total=len(test)) as progress:
        for uid in test['userCode']:
            # print(visited_dict[uid])
            recom = recommend_items(uid,
                                    7,
                                    items_to_ignore=visited_dict[uid] +
                                    ignore_project)  # todo item ignore
            predicted_list.append(recom)
            progress.update(1)

    evaluate = 1
    if evaluate:  #evaluate
        actual_list = [[pid] for pid in test['project_id'].values]
        print('{:.10f}'.format(
            average_precision.mapk(actual_list, predicted_list, k=7)))

    to_csv = 1
    if to_csv:
        test['project_id'] = [
            ' '.join(map(str, pre)) for pre in predicted_list
        ]
        test.to_csv('submission.csv', index=False)
コード例 #5
0
    predicted_list = []

    with tqdm.tqdm(total=len(test)) as progress:
        for uid in test['userCode'].unique():
            predictions = model.predict(unique_user_list.index(uid),
                                    np.arange(num_project),
                                    user_features=user_feature_matrix,
                                    item_features=item_feature_matrix
                                    )
            top_items = unique_project.iloc[np.argsort(-predictions)]
            top_list = []
            top_n = 0
            for project_id in top_items.values:
                if project_id not in visited_dict[uid]: # todo add ignore project
                    top_list.append(project_id)
                    top_n+=1
                if top_n >= 7:
                    break
            predicted_list.append(top_list)
            progress.update(1)

    to_csv = 0
    if to_csv:
        test['project_id'] = [' '.join(map(str, pre)) for pre in predicted_list]
        test[['userCode','project_id']].to_csv('submission_{}.csv'.format('lightfm'), index=False)
    # print(top_items.values)
    if is_evaluate:
        actual_list = [[pid] for pid in test['project_id'].values]
        print('%.10f'%average_precision.mapk(actual_list, predicted_list, k=7))