def setUpClass(cls): cls.interactions, cls.user_features, cls.item_features = generate_dummy_data_with_indicator( num_users=10, num_items=12, interaction_density=.5) model = TensorRec(n_components=10) model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10) cls.model = model cls.ranks = model.predict_rank(user_features=cls.user_features, item_features=cls.item_features)
def test_fit_from_tfrecords(self): set_session(None) model = TensorRec(n_components=10) model.fit(self.interactions_path, self.user_features_path, self.item_features_path, epochs=10)
def setUpClass(cls): cls.n_users = 15 cls.n_items = 30 cls.interactions, cls.user_features, cls.item_features = generate_dummy_data( num_users=cls.n_users, num_items=cls.n_items, interaction_density=.5, num_user_features=200, num_item_features=200, n_features_per_user=20, n_features_per_item=20, pos_int_ratio=.5) cls.standard_model = TensorRec( n_components=10, n_tastes=3, user_repr_graph=NormalizedLinearRepresentationGraph(), attention_graph=LinearRepresentationGraph()) cls.standard_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10) cls.unbiased_model = TensorRec( n_components=10, n_tastes=3, biased=False, user_repr_graph=NormalizedLinearRepresentationGraph(), attention_graph=LinearRepresentationGraph()) cls.unbiased_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10)
def setUpClass(cls): cls.n_users = 15 cls.n_items = 30 cls.interactions, cls.user_features, cls.item_features = generate_dummy_data( num_users=cls.n_users, num_items=cls.n_items, interaction_density=.5, num_user_features=200, num_item_features=200, n_features_per_user=20, n_features_per_item=20, pos_int_ratio=.5, return_datasets=True) cls.standard_model = TensorRec(n_components=10) cls.standard_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10) cls.unbiased_model = TensorRec(n_components=10, biased=False) cls.unbiased_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10)
def test_balanced_wmrb_loss_biased(self): model = TensorRec(loss_graph=BalancedWMRBLossGraph(), biased=True) model.fit(self.interactions, self.user_features, self.item_features, epochs=5, n_sampled_items=10)
def test_fit(self): interactions, user_features, item_features = generate_dummy_data( num_users=10, num_items=10, interaction_density=.5) model = TensorRec(n_components=10) model.fit(interactions, user_features, item_features, epochs=10) # Ensure that the nodes have been built self.assertIsNotNone(model.tf_prediction_dense)
def setUpClass(cls): # Blow away an existing session to avoid 'tf_map_func not found' error set_session(None) cls.n_users = 15 cls.n_items = 30 int_ds, uf_ds, if_ds = generate_dummy_data( num_users=cls.n_users, num_items=cls.n_items, interaction_density=.5, num_user_features=200, num_item_features=200, n_features_per_user=20, n_features_per_item=20, pos_int_ratio=.5 ) cls.temp_dir = tempfile.mkdtemp() cls.interactions = os.path.join(cls.temp_dir, 'interactions.tfrecord') cls.user_features = os.path.join(cls.temp_dir, 'user_features.tfrecord') cls.item_features = os.path.join(cls.temp_dir, 'item_features.tfrecord') write_tfrecord_from_sparse_matrix(cls.interactions, int_ds) write_tfrecord_from_sparse_matrix(cls.user_features, uf_ds) write_tfrecord_from_sparse_matrix(cls.item_features, if_ds) cls.standard_model = TensorRec(n_components=10) cls.standard_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10) cls.unbiased_model = TensorRec(n_components=10, biased=False) cls.unbiased_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10)
def test_save_and_load_model(self): model = TensorRec(n_components=10) model.fit(self.interactions, self.user_features, self.item_features, epochs=10) predictions = model.predict(user_features=self.user_features, item_features=self.item_features) ranks = model.predict_rank(user_features=self.user_features, item_features=self.item_features) model.save_model(directory_path=self.test_dir) # Check that, after saving, the same predictions come back predictions_after_save = model.predict(user_features=self.user_features, item_features=self.item_features) ranks_after_save = model.predict_rank(user_features=self.user_features, item_features=self.item_features) self.assertTrue((predictions == predictions_after_save).all()) self.assertTrue((ranks == ranks_after_save).all()) # Blow away the session set_session(None) tf.reset_default_graph() # Reload the model, predict, and check for equal predictions new_model = TensorRec.load_model(directory_path=self.test_dir) new_predictions = new_model.predict(user_features=self.user_features, item_features=self.item_features) new_ranks = new_model.predict_rank(user_features=self.user_features, item_features=self.item_features) self.assertTrue((predictions == new_predictions).all()) self.assertTrue((ranks == new_ranks).all())
def test_wmrb_loss(self): model = TensorRec(loss_graph=WMRBLossGraph()) model.fit(self.interactions, self.user_features, self.item_features, epochs=5, n_sampled_items=10)
def test_basic_usage(self): # Build the model with default parameters model = TensorRec() # Generate some dummy data interactions, user_features, item_features = generate_dummy_data( num_users=100, num_items=150, interaction_density=.05) # Fit the model model.fit(interactions, user_features, item_features, epochs=5, verbose=True) # Predict scores for user 75 on items 100, 101, and 102 predictions = model.predict(user_ids=[75, 75, 75], item_ids=[100, 101, 102], user_features=user_features, item_features=item_features) # Calculate and print the recall at 10 r_at_k = recall_at_k(model, interactions, k=10, user_features=user_features, item_features=item_features) print(np.mean(r_at_k)) self.assertIsNotNone(predictions)
def test_init_fail_none_factory(self): with self.assertRaises(ValueError): TensorRec(user_repr_graph=None) with self.assertRaises(ValueError): TensorRec(item_repr_graph=None) with self.assertRaises(ValueError): TensorRec(loss_graph=None)
def test_fit_predict_unbiased(self): model = TensorRec(n_components=10, biased=False) model.fit(self.interactions, self.user_features, self.item_features, epochs=10) predictions = model.predict(user_features=self.user_features, item_features=self.item_features) self.assertEqual(predictions.shape, (self.user_features.shape[0], self.item_features.shape[0]))
def test_fit(self): model = TensorRec(n_components=10) model.fit(self.interactions, self.user_features, self.item_features, epochs=10) # Ensure that the nodes have been built self.assertIsNotNone(model.tf_prediction)
def test_fit_interactions_as_dataset(self): int_as_dataset = create_tensorrec_dataset_from_sparse_matrix( self.interactions) model = TensorRec(n_components=10) model.fit(int_as_dataset, self.user_features, self.item_features, epochs=10)
def test_predict_item_repr(self): model = TensorRec(n_components=10, biased=False) model.fit(self.interactions, self.user_features, self.item_features, epochs=10) item_repr = model.predict_item_representation(self.item_features) self.assertEqual(item_repr.shape, (self.item_features.shape[0], 10))
def setUpClass(cls): cls.interactions, cls.user_features, cls.item_features = generate_dummy_data( num_users=10, num_items=12, interaction_density=.5) model = TensorRec(n_components=10) model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10) cls.model = model
def test_fit_from_datasets(self): uf_as_dataset = create_tensorrec_dataset_from_sparse_matrix( self.user_features, False) if_as_dataset = create_tensorrec_dataset_from_sparse_matrix( self.item_features, True) int_as_dataset = create_tensorrec_dataset_from_sparse_matrix( self.interactions, False) model = TensorRec(n_components=10) model.fit(int_as_dataset, uf_as_dataset, if_as_dataset, epochs=10)
def metric_test(self): """ uses tensorrec eval as benchmark for rating performance of various reco algorithms """ k = 10 latent_factor = 10 n_users = 10 n_items = 12 interactions, user_features, item_features = util.generate_dummy_data_with_indicator( num_users=n_users, num_items=n_items, interaction_density=.5) print("interactiosn shape={}".format(np.shape(interactions))) print("user features shape={}".format(np.shape( user_features.toarray()))) print("item features shape={}".format(np.shape( item_features.toarray()))) model = TensorRec(n_components=latent_factor) model.fit(interactions, user_features, item_features, epochs=19) ranks = model.predict_rank(user_features=user_features, item_features=item_features) print("Ranks shape={}".format(np.shape(ranks))) self.assertTrue(np.shape(interactions) == np.shape(ranks)) tr_recall_result = eval.recall_at_k(predicted_ranks=ranks, test_interactions=interactions, k=k, preserve_rows=False) # print (tr_recall_result.mean()) tr_precision_result = eval.precision_at_k( predicted_ranks=ranks, test_interactions=interactions, k=k, preserve_rows=False) # print(tr_precision_result.mean()) # we need csr for interactions data interactions_ = interactions.tocsr() recall_result = metrics.recall_at_k(ranks, interactions_, k=k, preserve_rows=False) # print(recall_result.mean()) precision_result = metrics.precision_at_k(ranks, interactions_, k=k, preserve_rows=False) # print (precision_result.mean()) self.assertTrue(tr_recall_result.mean() == recall_result.mean()) self.assertTrue(tr_precision_result.mean() == precision_result.mean())
def test_fit_fail_batching_dataset(self): model = TensorRec(n_components=10) interactions_as_dataset = create_tensorrec_dataset_from_sparse_matrix( self.interactions) with self.assertRaises(BatchNonSparseInputException): model.fit(interactions_as_dataset, self.user_features, self.item_features, epochs=10, user_batch_size=2)
def test_predict(self): interactions, user_features, item_features = generate_dummy_data( num_users=10, num_items=20, interaction_density=.5) model = TensorRec(n_components=10) model.fit(interactions, user_features, item_features, epochs=10) predictions = model.predict(user_features=user_features, item_features=item_features) self.assertEqual(predictions.shape, (user_features.shape[0], item_features.shape[0]))
def test_wmrb_loss(self): model = TensorRec(loss_graph=WMRBLossGraph(), stratified_sample=True, logdir='/Users/jasonchen/tmp/test', log_interval=100) model.fit(self.interactions.tocsr(), self.user_features.tocsr(), self.item_features.tocsr()[:self.n_test_item], epochs=10, verbose=True, train_threads=5, use_reg=True)
def test_predict(self): interactions, user_features, item_features = generate_dummy_data( num_users=10, num_items=10, interaction_density=.5) model = TensorRec(n_components=10) model.fit(interactions, user_features, item_features, epochs=10) predictions = model.predict(user_ids=[1, 2, 3], item_ids=[4, 5, 6], user_features=user_features, item_features=item_features) self.assertEqual(len(predictions), 3)
def test_movie_lens_fit_wmrb(self): """ This test checks whether the movielens getter works and that the resulting data is viable for fitting/testing a TensorRec model. """ train_interactions, test_interactions, user_features, item_features = self.movielens_100k model = TensorRec(loss_graph=wmrb_loss) model.fit(interactions=train_interactions, user_features=user_features, item_features=item_features) predictions = model.predict(user_features=user_features, item_features=item_features) self.assertIsNotNone(predictions)
def test_fit_fail_bad_input(self): model = TensorRec(n_components=10) with self.assertRaises(ValueError): model.fit(np.array([1, 2, 3, 4]), self.user_features, self.item_features, epochs=10) with self.assertRaises(ValueError): model.fit(self.interactions, np.array([1, 2, 3, 4]), self.item_features, epochs=10) with self.assertRaises(ValueError): model.fit(self.interactions, self.user_features, np.array([1, 2, 3, 4]), epochs=10)
def test_movie_lens_fit(self): """ This test checks whether the movielens getter works and that the resulting data is viable for fitting/testing a TensorRec model. """ train_interactions, test_interactions, user_features, item_features, _ = self.movielens_100k model = TensorRec() model.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=5) predictions = model.predict(user_features=user_features, item_features=item_features) self.assertIsNotNone(predictions)
def setUpClass(cls): cls.interactions, cls.user_features, cls.item_features = generate_dummy_data( num_users=15, num_items=30, interaction_density=.5, num_user_features=200, num_item_features=200, n_features_per_user=20, n_features_per_item=20, pos_int_ratio=.5 ) cls.standard_model = TensorRec(n_components=10) cls.standard_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10)
def test_fit(self, name, user_repr, item_repr, n_user_features, n_item_features, n_components): interactions, user_features, item_features = generate_dummy_data( num_users=15, num_items=30, interaction_density=.5, num_user_features=n_user_features, num_item_features=n_item_features, n_features_per_user=20, n_features_per_item=20, pos_int_ratio=.5) model = TensorRec(n_components=n_components, user_repr_graph=user_repr(), item_repr_graph=item_repr()) model.fit(interactions, user_features, item_features, epochs=10) # Ensure that the nodes have been built self.assertIsNotNone(model.tf_prediction)
def test_custom_loss_graph(self): # Define a custom loss function graph def build_simple_error_graph(tf_prediction, tf_y, **kwargs): return tf.reduce_mean(tf.abs(tf_y - tf_prediction)) # Build a model with the custom loss function model = TensorRec(loss_graph=build_simple_error_graph) self.assertIsNotNone(model)
def test_save_and_load_model_same_session(self): model = TensorRec(n_components=10) model.fit(self.interactions, self.user_features, self.item_features, epochs=10) predictions = model.predict(user_features=self.user_features, item_features=self.item_features) ranks = model.predict_rank(user_features=self.user_features, item_features=self.item_features) model.save_model(directory_path=self.test_dir) # Reload the model, predict, and check for equal predictions new_model = TensorRec.load_model(directory_path=self.test_dir) new_predictions = new_model.predict(user_features=self.user_features, item_features=self.item_features) new_ranks = new_model.predict_rank(user_features=self.user_features, item_features=self.item_features) self.assertEqual(predictions.all(), new_predictions.all()) self.assertEqual(ranks.all(), new_ranks.all())
def setUpClass(cls): cls.interactions, cls.user_features, cls.item_features = generate_dummy_data_with_indicator( num_users=10, num_items=20, interaction_density=.5) cls.standard_model = TensorRec(n_components=10, normalize_items=True, normalize_users=True) cls.standard_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10) cls.unbiased_model = TensorRec(n_components=10, normalize_items=True, normalize_users=True, biased=False) cls.unbiased_model.fit(cls.interactions, cls.user_features, cls.item_features, epochs=10)
def test_save_and_load_model_same_session(self): model = TensorRec(n_components=10) model.fit(self.interactions, self.user_features, self.item_features, epochs=10) predictions = model.predict(user_features=self.user_features, item_features=self.item_features) ranks = model.predict_rank(user_features=self.user_features, item_features=self.item_features) model.save_model(directory_path=self.test_dir) # Reload the model, predict, and check for equal predictions new_model = TensorRec.load_model(directory_path=self.test_dir) new_predictions = new_model.predict(user_features=self.user_features, item_features=self.item_features) new_ranks = new_model.predict_rank(user_features=self.user_features, item_features=self.item_features) self.assertTrue((predictions == new_predictions).all()) self.assertTrue((ranks == new_ranks).all())
def test_fit_fail_mismatched_batches(self): model = TensorRec(n_components=10) with self.assertRaises(ValueError): model.fit( self.interactions, [self.user_features, self.user_features], [self.item_features, self.item_features, self.item_features], epochs=10) with self.assertRaises(ValueError): model.fit(self.interactions, [self.user_features, self.user_features], [self.item_features, self.item_features], epochs=10) model.fit([self.interactions, self.interactions], [self.user_features, self.user_features], self.item_features, epochs=10) model.fit([self.interactions, self.interactions], [self.user_features, self.user_features], [self.item_features, self.item_features], epochs=10)
def test_predict_user_repr_biased_fails(self): model = TensorRec(n_components=10) model.fit(self.interactions, self.user_features, self.item_features, epochs=10) with self.assertRaises(NotImplementedError): model.predict_user_representation(self.user_features)
def test_fit_fail_mismatched_batches(self): model = TensorRec(n_components=10) with self.assertRaises(ValueError): model.fit(self.interactions, [self.user_features, self.user_features], [self.item_features, self.item_features, self.item_features], epochs=10) with self.assertRaises(ValueError): model.fit(self.interactions, [self.user_features, self.user_features], [self.item_features, self.item_features], epochs=10) model.fit([self.interactions, self.interactions], [self.user_features, self.user_features], self.item_features, epochs=10) model.fit([self.interactions, self.interactions], [self.user_features, self.user_features], [self.item_features, self.item_features], epochs=10)
def test_fit_from_datasets(self): uf_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.user_features) if_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.item_features) int_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.interactions) model = TensorRec(n_components=10) model.fit(int_as_dataset, uf_as_dataset, if_as_dataset, epochs=10)
def test_fit_verbose(self): model = TensorRec(n_components=10) model.fit(self.interactions, self.user_features, self.item_features, epochs=10, verbose=True) # Ensure that the nodes have been built self.assertIsNotNone(model.tf_prediction)
logging.getLogger().setLevel(logging.INFO) # Load the movielens dataset train_interactions, test_interactions, user_features, item_features, item_titles = \ get_movielens_100k(negative_value=-1.0) # Assemble parameters for fitting. 'epochs' is 1 in the fit_kwargs because we will be calling fit_partial 1000 times to # run 1000 epochs. epochs = 1000 fit_kwargs = {'epochs': 1, 'alpha': 0.0001, 'verbose': True, 'learning_rate': .01, 'n_sampled_items': int(item_features.shape[0] * .1)} # Build the TensorRec model model = TensorRec(n_components=2, biased=False, loss_graph=BalancedWMRBLossGraph(), item_repr_graph=ReLURepresentationGraph(), n_tastes=3) # Make some random selections of movies and users we want to plot movies_to_plot = np.random.choice(a=item_features.shape[0], size=50, replace=False) user_to_plot = np.random.choice(a=user_features.shape[0], size=100, replace=False) # Coerce data to datasets for faster fitting train_interactions_ds = create_tensorrec_dataset_from_sparse_matrix(train_interactions) user_features_ds = create_tensorrec_dataset_from_sparse_matrix(user_features) item_features_ds = create_tensorrec_dataset_from_sparse_matrix(item_features) # Iterate through 1000 epochs, outputting a JPG plot each epoch for epoch in range(epochs): model.fit_partial(interactions=train_interactions_ds,
def test_fit_batched(self): model = TensorRec(n_components=10) model.fit(self.interactions, self.user_features, self.item_features, epochs=10, user_batch_size=2) # Ensure that the nodes have been built self.assertIsNotNone(model.tf_prediction)
def test_fit_fail_batching_dataset(self): model = TensorRec(n_components=10) interactions_as_dataset = create_tensorrec_dataset_from_sparse_matrix(self.interactions) with self.assertRaises(ValueError): model.fit(interactions_as_dataset, self.user_features, self.item_features, epochs=10, user_batch_size=2)
def test_cos_distance(self): model = TensorRec(prediction_graph=CosineSimilarityPredictionGraph()) model.fit(self.interactions, self.user_features, self.item_features, epochs=5)
def test_rmse_loss(self): model = TensorRec(loss_graph=RMSELossGraph()) model.fit(self.interactions, self.user_features, self.item_features, epochs=5)
def test_dot_product(self): model = TensorRec(prediction_graph=DotProductPredictionGraph()) model.fit(self.interactions, self.user_features, self.item_features, epochs=5)
def test_rmse_dense_loss_biased(self): model = TensorRec(loss_graph=RMSEDenseLossGraph(), biased=True) model.fit(self.interactions, self.user_features, self.item_features, epochs=5)