def test_load_feedback(self): # only run data download tests 20% of the time to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: ml_100k = movielens.load_feedback() self.assertEqual(len(ml_100k), 100000) if random.random() > 0.8: ml_1m = movielens.load_feedback(variant='1M') self.assertEqual(len(ml_1m), 1000209)
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example to run Probabilistic Matrix Factorization (PMF) model with Ratio Split evaluation strategy""" import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit from cornac.models import PMF # Load the MovieLens 100K dataset ml_100k = movielens.load_feedback() # Instantiate an evaluation method. ratio_split = RatioSplit(data=ml_100k, test_size=0.2, rating_threshold=4.0, exclude_unknowns=False) # Instantiate a PMF recommender model. pmf = PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001) # Instantiate evaluation metrics. mae = cornac.metrics.MAE() rmse = cornac.metrics.RMSE() rec_20 = cornac.metrics.Recall(k=20) pre_20 = cornac.metrics.Precision(k=20)
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example for Matrix Factorization with biases""" import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit # Load MovieLens 1M ratings ml_1m = movielens.load_feedback(variant="1M") # Define an evaluation method to split feedback into train and test sets ratio_split = RatioSplit( data=ml_1m, test_size=0.2, exclude_unknowns=False, verbose=True ) # Instantiate the global average baseline and MF model global_avg = cornac.models.GlobalAvg() mf = cornac.models.MF( k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, early_stop=True,
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example SKMeans vs BPR on MovieLens data""" import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit # Load user-item feedback data = movielens.load_feedback(variant="100K") # Instantiate an evaluation method to split data into train and test sets. ratio_split = RatioSplit( data=data, test_size=0.2, exclude_unknowns=True, verbose=True, seed=123, rating_threshold=0.5, ) # Instantiate models skm = cornac.models.SKMeans(k=5, max_iter=100, tol=1e-10, seed=123) bpr = cornac.models.BPR(k=5,
# See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example for Hidden Factors as Topics (HFT) with Movilen 1M dataset """ import cornac from cornac.data import Reader from cornac.datasets import movielens from cornac.eval_methods import RatioSplit from cornac.data import TextModality from cornac.data.text import BaseTokenizer # HFT jointly models the user-item preferences and item texts (e.g., product reviews) with shared item factors # Below we fit HFT to the MovieLens 1M dataset. We need both the ratings and movie plots information plots, movie_ids = movielens.load_plot() ml_1m = movielens.load_feedback(variant="1M", reader=Reader(item_set=movie_ids)) # Instantiate a TextModality, it makes it convenient to work with text auxiliary information # For more details, please refer to the tutorial on how to work with auxiliary data item_text_modality = TextModality( corpus=plots, ids=movie_ids, tokenizer=BaseTokenizer(sep="\t", stop_words="english"), max_vocab=5000, max_doc_freq=0.5, ) # Define an evaluation method to split feedback into train and test sets ratio_split = RatioSplit( data=ml_1m, test_size=0.2,
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """Example for hyper-parameter searching with Matrix Factorization""" import numpy as np import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit from cornac.hyperopt import Discrete, Continuous from cornac.hyperopt import GridSearch, RandomSearch # Load MovieLens 100K ratings ml_100k = movielens.load_feedback(variant="100K") # Define an evaluation method to split feedback into train, validation and test sets ratio_split = RatioSplit(data=ml_100k, test_size=0.1, val_size=0.1, verbose=True) # Instantiate MAE and RMSE for evaluation mae = cornac.metrics.MAE() rmse = cornac.metrics.RMSE() # Define a base MF model with fixed hyper-parameters mf = cornac.models.MF(max_iter=20, learning_rate=0.01, early_stop=True,
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import cornac from cornac.datasets import movielens from cornac.eval_methods import RatioSplit from cornac.models import IBPR # Load the MovieLens 1M dataset ml_1m = movielens.load_feedback(variant='1M') # Instantiate an evaluation method. ratio_split = RatioSplit(data=ml_1m, test_size=0.2, rating_threshold=1.0, exclude_unknowns=True, verbose=True) # Instantiate a IBPR recommender model. ibpr = IBPR(k=10, init_params={'U': None, 'V': None}, verbose=True) # Instantiate evaluation metrics. rec_20 = cornac.metrics.Recall(k=20) pre_20 = cornac.metrics.Precision(k=20)