def test_bloom(compression_ratio, expected_rmse): interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) user_embeddings = BloomEmbedding(interactions.num_users, 32, compression_ratio=compression_ratio, num_hash_functions=2) item_embeddings = BloomEmbedding(interactions.num_items, 32, compression_ratio=compression_ratio, num_hash_functions=2) network = BilinearNet(interactions.num_users, interactions.num_items, user_embedding_layer=user_embeddings, item_embedding_layer=item_embeddings) model = ExplicitFactorizationModel(loss='regression', n_iter=10, batch_size=1024, learning_rate=1e-2, l2=1e-5, representation=network, use_cuda=CUDA) model.fit(train) print(model) rmse = rmse_score(model, test) print(rmse) assert rmse - EPSILON < expected_rmse
def data(): interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) return train, test
def test_precision_recall(data_implicit_factorization, k): (train, test, model) = data_implicit_factorization interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) precision, recall = precision_recall_score(model, test, train, k=k) assert precision.shape == recall.shape if not isinstance(k, list): assert len(precision.shape) == 1 else: assert precision.shape[1] == len(k)
def data_implicit_factorization(): interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) model = ImplicitFactorizationModel(loss='bpr', n_iter=1, batch_size=1024, learning_rate=1e-2, l2=1e-6, random_state=RANDOM_STATE, use_cuda=CUDA) model.fit(train) return train, test, model
def test_poisson(): interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) model = ExplicitFactorizationModel(loss='poisson', n_iter=10, batch_size=1024, learning_rate=1e-3, l2=1e-6, use_cuda=CUDA) model.fit(train) rmse = rmse_score(model, test) assert rmse - EPSILON < 1.0
def test_hinge(): interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) model = ImplicitFactorizationModel(loss='hinge', n_iter=10, batch_size=1024, learning_rate=1e-2, l2=1e-6, use_cuda=CUDA) model.fit(train) mrr = mrr_score(model, test, train=train).mean() assert mrr + EPSILON > 0.07
def test_to_sequence(max_sequence_length, step_size): interactions = movielens.get_movielens_dataset('100K') _, interactions = random_train_test_split(interactions) sequences = interactions.to_sequence( max_sequence_length=max_sequence_length, step_size=step_size) if step_size == 1: assert sequences.sequences.shape == (len(interactions), max_sequence_length) else: assert sequences.sequences.shape[1] == max_sequence_length _test_just_padding(sequences.sequences) _test_final_column_no_padding(sequences.sequences) _test_shifted(sequences.user_ids, sequences.sequences, step_size) _test_temporal_order(sequences.user_ids, sequences.sequences, interactions)
def test_check_input(): # Train for single iter. interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) model = ExplicitFactorizationModel(loss='regression', n_iter=1, batch_size=1024, learning_rate=1e-3, l2=1e-6, use_cuda=CUDA) model.fit(train) # Modify data to make imcompatible with original model. train.user_ids[0] = train.user_ids.max() + 1 with pytest.raises(ValueError): model.fit(train)
def test_logistic(): interactions = movielens.get_movielens_dataset('100K') # Convert to binary interactions.ratings = (interactions.ratings > 3).astype(np.float32) # Convert from (0, 1) to (-1, 1) interactions.ratings = interactions.ratings * 2 - 1 train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) model = ExplicitFactorizationModel(loss='logistic', n_iter=10, batch_size=1024, learning_rate=1e-3, l2=1e-6, use_cuda=CUDA) model.fit(train) rmse = rmse_score(model, test) assert rmse - EPSILON < 1.05
def test_bpr_custom_optimizer(): interactions = movielens.get_movielens_dataset('100K') train, test = random_train_test_split(interactions, random_state=RANDOM_STATE) def adagrad_optimizer(model_params, lr=1e-2, weight_decay=1e-6): return torch.optim.Adagrad(model_params, lr=lr, weight_decay=weight_decay) model = ImplicitFactorizationModel(loss='bpr', n_iter=10, batch_size=1024, optimizer_func=adagrad_optimizer, use_cuda=CUDA) model.fit(train) mrr = mrr_score(model, test, train=train).mean() assert mrr + EPSILON > 0.05
step_size = max_sequence_length train, rest = user_based_train_test_split(dataset, test_percentage=0.05, random_state=random_state) test, validation = user_based_train_test_split( rest, test_percentage=0.5, random_state=random_state) train = train.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) test = test.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) validation = validation.to_sequence( max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) print('In test {}, in validation {}'.format(len(test.sequences), len(validation.sequences))) elif args.model == 'factorization': train, rest = random_train_test_split(dataset, test_percentage=test_percentage, random_state=random_state) test, validation = random_train_test_split(rest, test_percentage=0.5, random_state=random_state) experiment_name = '{}_{}'.format(args.dataset, args.model) run(experiment_name, train, test, validation, random_state)