def test_with_ratio_split(): data_file = './tests/data.txt' data = Reader.read_uir_triplets(data_file) exp = Experiment(eval_method=RatioSplit(data, verbose=True), models=[PMF(1, 0)], metrics=[MAE(), RMSE(), Recall(1), FMeasure(1)], verbose=True) exp.run() assert (1, 4) == exp.avg_results.shape assert 1 == len(exp.user_results) assert 4 == len(exp.user_results['PMF']) assert 2 == len(exp.user_results['PMF']['MAE']) assert 2 == len(exp.user_results['PMF']['RMSE']) assert 2 == len(exp.user_results['PMF']['Recall@1']) assert 2 == len(exp.user_results['PMF']['F1@1']) try: Experiment(None, None, None) except ValueError: assert True try: Experiment(None, [PMF(1, 0)], None) except ValueError: assert True
def generateLatentVectors(mongohost, mongoport, cerebro_url, dim): mongodb_url = 'mongodb://' + mongohost + ':' + mongoport client = MongoClient(mongodb_url) cerebro = client.cerebro ratings = cerebro.ratings users = cerebro.users data = [] i = 0 for user in users.find(): userid = user.pop("_id") records = ratings.find({"userID": userid}) itemid = "" for record in records: if record['itemID'] == itemid: continue itemid = record['itemID'] l = (userid, itemid, record['rating']) data.insert(i, l) i += 1 ratio_split = RatioSplit(data=data, test_size=0.01, rating_threshold=4.0, seed=5654) pmf = PMF(k=dim, max_iter=50, learning_rate=0.001) mae = cornac.metrics.MAE() rmse = cornac.metrics.RMSE() rec_10 = cornac.metrics.Recall(k=10) ndcg_10 = cornac.metrics.NDCG(k=10) auc = cornac.metrics.AUC() exp = Experiment(eval_method=ratio_split, models=[pmf], metrics=[mae, rmse, rec_10, ndcg_10, auc], user_based=True) exp.run() userid = list(pmf.train_set.user_ids) itemid = list(pmf.train_set.item_ids) userVec = list(pmf.U) itemVec = list(pmf.V) print("userid len:" + str(len(userid))) print("uservec len:" + str(len(userVec))) print("itemid len:" + str(len(itemid))) print("itemVec len:" + str(len(itemVec))) for (id, vec) in zip(userid, userVec): vec = list(vec) users.update_one({"_id": id}, {"$set": {"vec": vec}}) for (id, vec) in zip(itemid, itemVec): vec = list(vec) cerebro.items.update_one({"_id": id}, {"$set": {"vec": vec}}) json_msg = {"msg": "update"} r = requests.post(url=cerebro_url + '/update/buildIdx', json=json_msg) print(r.text)
def test_splits(self): ratio_split = RatioSplit(self.data, test_size=0.1, val_size=0.1, seed=123, verbose=True) ratio_split._split() self.assertTrue(ratio_split._split_ran) ratio_split._split()
def test_with_ratio_split(self): exp = Experiment(eval_method=RatioSplit(self.data, verbose=True), models=[PMF(1, 0)], metrics=[MAE(), RMSE(), Recall(1), FMeasure(1)], verbose=True) exp.run() try: Experiment(None, None, None) except ValueError: assert True try: Experiment(None, [PMF(1, 0)], None) except ValueError: assert True
def test_splits(): data_file = './tests/data.txt' data = reader.read_uir(data_file) ratio_split = RatioSplit(data, test_size=0.1, val_size=0.1, seed=123, verbose=True) ratio_split.split() assert ratio_split._split_ran ratio_split.split()
def test_with_ratio_split(self): Experiment(eval_method=RatioSplit( self.data + [(self.data[0][0], self.data[1][1], 5.0)], exclude_unknowns=True, seed=123, verbose=True), models=[PMF(1, 0)], metrics=[MAE(), RMSE()], verbose=True).run() try: Experiment(None, None, None) except ValueError: assert True try: Experiment(None, [PMF(1, 0)], None) except ValueError: assert True
def setUp(self): data = Reader().read("./tests/data.txt") self.eval_method = RatioSplit(data, test_size=0.2, val_size=0.2, exclude_unknowns=False)
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Your very first example with Cornac""" import cornac from cornac.eval_methods import RatioSplit from cornac.models import MF, PMF, BPR from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP # load the built-in MovieLens 100K and split the data based on ratio ml_100k = cornac.datasets.movielens.load_feedback() rs = RatioSplit(data=ml_100k, test_size=0.2, rating_threshold=4.0, seed=123) # initialize models, here we are comparing: Biased MF, PMF, and BPR models = [ MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123), PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001, seed=123), BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123), ] # define metrics to evaluate the models metrics = [
docs, item_ids = citeulike.load_text() data = citeulike.load_data(reader=Reader(item_set=item_ids)) # build text module item_text_modality = TextModality( corpus=docs, ids=item_ids, tokenizer=BaseTokenizer(stop_words='english'), max_vocab=8000, max_doc_freq=0.5) ratio_split = RatioSplit(data=data, test_size=0.2, exclude_unknowns=True, item_text=item_text_modality, verbose=True, seed=123, rating_threshold=0.5) cdr = cornac.models.CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128, lambda_u=0.01, lambda_v=0.1, lambda_w=0.0001, lambda_n=5, learning_rate=0.001, vocab_size=8000)
""" Example for Visual Bayesian Personalized Ranking Original data: http://jmcauley.ucsd.edu/data/tradesy/ @author: Quoc-Tuan Truong <*****@*****.**> """ import cornac from cornac.datasets import tradesy from cornac.data import ImageModule from cornac.eval_methods import RatioSplit item_feature = tradesy.load_feature() # BIG file item_image_module = ImageModule(id_feature=item_feature, normalized=True) ratio_split = RatioSplit(data=tradesy.load_data(), test_size=0.1, rating_threshold=0.5, exclude_unknowns=True, verbose=True, item_image=item_image_module) vbpr = cornac.models.VBPR(k=10, k2=20, n_epochs=50, batch_size=100, learning_rate=0.005, lambda_w=1, lambda_b=0.01, lambda_e=0.0, use_gpu=True) auc = cornac.metrics.AUC() rec_50 = cornac.metrics.Recall(k=50) exp = cornac.Experiment(eval_method=ratio_split, models=[vbpr], metrics=[auc, rec_50]) exp.run()
def test_evaluate(): data_file = './tests/data.txt' data = reader.read_uir(data_file) ratio_split = RatioSplit(data, exclude_unknowns=True, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False) ratio_split = RatioSplit(data, exclude_unknowns=False, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False) users = [] items = [] for u, i, r in data: users.append(u) items.append(i) for u in users: for i in items: data.append((u, i, 5)) ratio_split = RatioSplit(data, exclude_unknowns=True, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True) ratio_split = RatioSplit(data, exclude_unknowns=False, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True)
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example for Social Bayesian Personalized Ranking with Epinions dataset""" import cornac from cornac.data import Reader, GraphModule from cornac.datasets import epinions from cornac.eval_methods import RatioSplit ratio_split = RatioSplit(data=epinions.load_data(Reader(bin_threshold=4.0)), test_size=0.1, rating_threshold=0.5, exclude_unknowns=True, verbose=True, user_graph=GraphModule(data=epinions.load_trust())) sbpr = cornac.models.SBPR(k=10, max_iter=50, learning_rate=0.001, lambda_u=0.015, lambda_v=0.025, lambda_b=0.01, verbose=True) rec_10 = cornac.metrics.Recall(k=10) cornac.Experiment(eval_method=ratio_split, models=[sbpr], metrics=[rec_10]).run()
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit from cornac.models import IBPR # Load the MovieLens 1M dataset ml_1m = movielens.load_1m() # Instantiate an evaluation method. ratio_split = RatioSplit(data=ml_1m, test_size=0.2, rating_threshold=1.0, exclude_unknowns=True, verbose=True) # Instantiate a IBPR recommender model. ibpr = IBPR(k=10, init_params={'U': None, 'V': None}, verbose=True) # Instantiate evaluation metrics. rec_20 = cornac.metrics.Recall(k=20) pre_20 = cornac.metrics.Precision(k=20) # Instantiate and then run an experiment. exp = cornac.Experiment(eval_method=ratio_split, models=[ibpr], metrics=[rec_20, pre_20], user_based=True) exp.run()
# -*- coding: utf-8 -*- """ Example for Matrix Factorization with biases @author: Quoc-Tuan Truong <*****@*****.**> """ import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit ratio_split = RatioSplit(data=movielens.load_1m(), test_size=0.2, exclude_unknowns=False, verbose=True) mf = cornac.models.MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, early_stop=True, verbose=True) mae = cornac.metrics.MAE() rmse = cornac.metrics.RMSE() exp = cornac.Experiment(eval_method=ratio_split, models=[mf], metrics=[mae, rmse], user_based=True) exp.run()
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example for Bayesian Personalized Ranking with Netflix dataset""" import cornac from cornac.data import Reader from cornac.datasets import netflix from cornac.eval_methods import RatioSplit ratio_split = RatioSplit( data=netflix.load_data_small(reader=Reader(bin_threshold=1.0)), test_size=0.1, rating_threshold=1.0, exclude_unknowns=True, verbose=True) most_pop = cornac.models.MostPop() bpr = cornac.models.BPR(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.01, verbose=True) auc = cornac.metrics.AUC() rec_20 = cornac.metrics.Recall(k=20) cornac.Experiment(eval_method=ratio_split, models=[most_pop, bpr],
# -*- coding: utf-8 -*- """ Example for Matrix Factorization with biases @author: Quoc-Tuan Truong <*****@*****.**> """ import cornac from cornac.datasets import MovieLens100K from cornac.eval_methods import RatioSplit data = MovieLens100K.load_data() ratio_split = RatioSplit(data=data, test_size=0.2, exclude_unknowns=False, verbose=True) mf = cornac.models.MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, early_stop=True, verbose=True) mae = cornac.metrics.MAE() rmse = cornac.metrics.RMSE() exp = cornac.Experiment(eval_method=ratio_split, models=[mf],
"""Example for Multi-Task Explainable Recommendation""" from cornac.datasets import amazon_toy from cornac.data import SentimentModality from cornac.eval_methods import RatioSplit from cornac.metrics import NDCG, RMSE from cornac.models import MTER from cornac import Experiment data = amazon_toy.load_rating() sentiment = amazon_toy.load_sentiment() md = SentimentModality(data=sentiment) eval_method = RatioSplit(data, test_size=0.2, rating_threshold=1.0, sentiment=md, exclude_unknowns=True, verbose=True, seed=123) mter = MTER(n_user_factors=15, n_item_factors=15, n_aspect_factors=12, n_opinion_factors=12, n_bpr_samples=1000, n_element_samples=200, lambda_reg=0.1, lambda_bpr=5, n_epochs=2000, lr=0.1, verbose=True,
# See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example to run Probabilistic Matrix Factorization (PMF) model with Ratio Split evaluation strategy""" import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit from cornac.models import PMF # Load the MovieLens 100K dataset ml_100k = movielens.load_feedback() # Instantiate an evaluation method. ratio_split = RatioSplit(data=ml_100k, test_size=0.2, rating_threshold=4.0, exclude_unknowns=False) # Instantiate a PMF recommender model. pmf = PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001) # Instantiate evaluation metrics. mae = cornac.metrics.MAE() rmse = cornac.metrics.RMSE() rec_20 = cornac.metrics.Recall(k=20) pre_20 = cornac.metrics.Precision(k=20) # Instantiate and then run an experiment. cornac.Experiment( eval_method=ratio_split, models=[pmf],
def test_evaluate(self): ratio_split = RatioSplit(self.data, exclude_unknowns=True, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False) ratio_split = RatioSplit(self.data, exclude_unknowns=False, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False) users = [] items = [] for u, i, r in self.data: users.append(u) items.append(i) for u in users: for i in items: self.data.append((u, i, 5)) ratio_split = RatioSplit(self.data, exclude_unknowns=True, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True) ratio_split = RatioSplit(self.data, exclude_unknowns=False, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True)
def test_validate_size(self): train_size, val_size, test_size = RatioSplit.validate_size(0.1, 0.2, 10) self.assertEqual(train_size, 7) self.assertEqual(val_size, 1) self.assertEqual(test_size, 2) train_size, val_size, test_size = RatioSplit.validate_size(None, 0.5, 10) self.assertEqual(train_size, 5) self.assertEqual(val_size, 0) self.assertEqual(test_size, 5) train_size, val_size, test_size = RatioSplit.validate_size(None, None, 10) self.assertEqual(train_size, 10) self.assertEqual(val_size, 0) self.assertEqual(test_size, 0) train_size, val_size, test_size = RatioSplit.validate_size(2, 2, 10) self.assertEqual(train_size, 6) self.assertEqual(val_size, 2) self.assertEqual(test_size, 2) try: RatioSplit.validate_size(-1, 0.2, 10) except ValueError: assert True try: RatioSplit.validate_size(1, -0.2, 10) except ValueError: assert True try: RatioSplit.validate_size(11, 0.2, 10) except ValueError: assert True try: RatioSplit.validate_size(0, 11, 10) except ValueError: assert True try: RatioSplit.validate_size(3, 8, 10) except ValueError: assert True
from cornac.data.text import BaseTokenizer plots, movie_ids = movielens.load_plot() ml_1m = movielens.load_1m(reader=Reader(item_set=movie_ids)) # build text module item_text_modality = TextModality(corpus=plots, ids=movie_ids, tokenizer=BaseTokenizer( sep='\t', stop_words='english'), max_vocab=5000, max_doc_freq=0.5) ratio_split = RatioSplit(data=ml_1m, test_size=0.2, exclude_unknowns=True, item_text=item_text_modality, verbose=True, seed=123) hft = cornac.models.HFT(k=10, max_iter=40, grad_iter=5, l2_reg=0.001, lambda_text=0.01, vocab_size=5000, seed=123) mse = cornac.metrics.MSE() exp = cornac.Experiment(eval_method=ratio_split, models=[hft],
# limitations under the License. # ============================================================================ """Example to run Non-negative Matrix Factorization (NMF) model with Ratio Split evaluation strategy""" import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit # Load the MovieLens 100K dataset ml_100k = movielens.load_feedback() # Instantiate an evaluation method. eval_method = RatioSplit( data=ml_100k, test_size=0.2, rating_threshold=4.0, exclude_unknowns=True, verbose=True, seed=123, ) # Instantiate a NMF recommender model. nmf = cornac.models.NMF( k=15, max_iter=50, learning_rate=0.005, lambda_u=0.06, lambda_v=0.06, lambda_bu=0.02, lambda_bi=0.02, use_bias=False, verbose=True,
# In addition to learning from preference data, CVAECF further leverages users' auxiliary data (social network in this example). # The necessary data can be loaded as follows ratings = filmtrust.load_feedback() trust = filmtrust.load_trust() # Instantiate a GraphModality, it makes it convenient to work with graph (network) auxiliary information # For more details, please refer to the tutorial on how to work with auxiliary data user_graph_modality = GraphModality(data=trust) # Define an evaluation method to split feedback into train and test sets ratio_split = RatioSplit( data=ratings, test_size=0.2, rating_threshold=2.5, exclude_unknowns=True, verbose=True, user_graph=user_graph_modality, seed=123, ) # Instantiate CVAECF model cvaecf = CVAECF(z_dim=20, h_dim=20, autoencoder_structure=[40], learning_rate=0.001, n_epochs=70, batch_size=128, verbose=True, seed=123)
def test_validate_size(): train_size, val_size, test_size = RatioSplit.validate_size(0.1, 0.2, 10) assert 7 == train_size assert 1 == val_size assert 2 == test_size train_size, val_size, test_size = RatioSplit.validate_size(None, 0.5, 10) assert 5 == train_size assert 0 == val_size assert 5 == test_size train_size, val_size, test_size = RatioSplit.validate_size(None, None, 10) assert 10 == train_size assert 0 == val_size assert 0 == test_size train_size, val_size, test_size = RatioSplit.validate_size(2, 2, 10) assert 6 == train_size assert 2 == val_size assert 2 == test_size try: RatioSplit.validate_size(-1, 0.2, 10) except ValueError: assert True try: RatioSplit.validate_size(1, -0.2, 10) except ValueError: assert True try: RatioSplit.validate_size(11, 0.2, 10) except ValueError: assert True try: RatioSplit.validate_size(0, 11, 10) except ValueError: assert True try: RatioSplit.validate_size(3, 8, 10) except ValueError: assert True
from cornac.eval_methods import RatioSplit from cornac.experiment import Experiment from cornac import metrics from cornac.models import PCRL from cornac.datasets import amazon_office as office # Load office ratings and item contexts ratings = office.load_rating() contexts = office.load_context() item_graph_module = GraphModule(data=contexts) ratio_split = RatioSplit(data=ratings, test_size=0.2, rating_threshold=3.5, shuffle=True, exclude_unknowns=True, verbose=True, item_graph=item_graph_module) pcrl = PCRL(k=100, z_dims=[300], max_iter=300, learning_rate=0.001) # Evaluation metrics nDgc = metrics.NDCG(k=-1) rec = metrics.Recall(k=20) pre = metrics.Precision(k=20) # Instantiate and run your experiment exp = Experiment(eval_method=ratio_split, models=[pcrl], metrics=[nDgc, rec, pre])
# ============================================================================ """Example for hyper-parameter searching with Matrix Factorization""" import numpy as np import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit from cornac.hyperopt import Discrete, Continuous from cornac.hyperopt import GridSearch, RandomSearch # Load MovieLens 100K ratings ml_100k = movielens.load_feedback(variant="100K") # Define an evaluation method to split feedback into train, validation and test sets ratio_split = RatioSplit(data=ml_100k, test_size=0.1, val_size=0.1, verbose=True) # Instantiate MAE and RMSE for evaluation mae = cornac.metrics.MAE() rmse = cornac.metrics.RMSE() # Define a base MF model with fixed hyper-parameters mf = cornac.models.MF(max_iter=20, learning_rate=0.01, early_stop=True, verbose=True) # Wrap MF model inside GridSearch along with the searching space gs_mf = GridSearch( model=mf,
# See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import cornac from cornac.eval_methods import RatioSplit from cornac.datasets import amazon_clothing from cornac.data import Reader # Load the Amazon Clothing dataset, and binarise ratings using cornac.data.Reader feedback = amazon_clothing.load_feedback(reader=Reader(bin_threshold=1.0)) # Define an evaluation method to split feedback into train and test sets ratio_split = RatioSplit(data=feedback, test_size=0.2, rating_threshold=1.0, seed=123, exclude_unknowns=True, verbose=True) # Instantiate the recommender models to be compared gmf = cornac.models.GMF(num_factors=8, num_epochs=10, learner='adam', batch_size=256, lr=0.001, num_neg=50, seed=123) mlp = cornac.models.MLP(layers=[64, 32, 16, 8], act_fn='tanh', learner='adam', num_epochs=10,
# SBPR integrates user social network into Bayesian Personalized Ranking. # The necessary data can be loaded as follows feedback = epinions.load_feedback( Reader(bin_threshold=4.0 )) # feedback is binarised (turned into implicit) using Reader. trust = epinions.load_trust() # Instantiate a GraphModality, it make it convenient to work with graph (network) auxiliary information # For more details, please refer to the tutorial on how to work with auxiliary data user_graph_modality = GraphModality(data=trust) # Define an evaluation method to split feedback into train and test sets ratio_split = RatioSplit(data=feedback, test_size=0.1, rating_threshold=0.5, exclude_unknowns=True, verbose=True, user_graph=user_graph_modality) # Instantiate SBPR sbpr = cornac.models.SBPR(k=10, max_iter=50, learning_rate=0.001, lambda_u=0.015, lambda_v=0.025, lambda_b=0.01, verbose=True) # Use Recall@10 for evaluation rec_10 = cornac.metrics.Recall(k=10)
# limitations under the License. # ============================================================================ """Example for Variational Autoencoder for Collaborative Filtering, using the CiteULike dataset""" import cornac from cornac.datasets import citeulike from cornac.eval_methods import RatioSplit # Load user-item feedback data = citeulike.load_feedback() # Instantiate an evaluation method to split data into train and test sets. ratio_split = RatioSplit( data=data, test_size=0.2, exclude_unknowns=True, verbose=True, seed=123, rating_threshold=0.5, ) # Instantiate the VAECF model vaecf = cornac.models.VAECF( k=10, autoencoder_structure=[20], act_fn="tanh", likelihood="mult", n_epochs=100, batch_size=100, learning_rate=0.001, beta=1.0, seed=123,
# VBPR extends Bayesian Personalized Randing to leverage item visual features (extracted from product images using CNN) # The necessary data can be loaded as follows feedback = tradesy.load_feedback() features, item_ids = tradesy.load_feature() # BIG file # Instantiate a ImageModality, it makes it convenient to work with visual auxiliary information # For more details, please refer to the tutorial on how to work with auxiliary data item_image_modality = ImageModality(features=features, ids=item_ids, normalized=True) # Define an evaluation method to split feedback into train and test sets ratio_split = RatioSplit( data=feedback, test_size=0.1, rating_threshold=0.5, exclude_unknowns=True, verbose=True, item_image=item_image_modality, ) # Instantiate CVAE vbpr = cornac.models.VBPR( k=10, k2=20, n_epochs=50, batch_size=100, learning_rate=0.005, lambda_w=1, lambda_b=0.01, lambda_e=0.0, use_gpu=True,