コード例 #1
0
def test_bloom(compression_ratio, expected_rmse):

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    user_embeddings = BloomEmbedding(interactions.num_users,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    item_embeddings = BloomEmbedding(interactions.num_items,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    network = BilinearNet(interactions.num_users,
                          interactions.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ExplicitFactorizationModel(loss='regression',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-5,
                                       representation=network,
                                       use_cuda=CUDA)

    model.fit(train)
    print(model)

    rmse = rmse_score(model, test)
    print(rmse)

    assert rmse - EPSILON < expected_rmse
コード例 #2
0
def data():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    return train, test
コード例 #3
0
def test_precision_recall(data_implicit_factorization, k):

    (train, test, model) = data_implicit_factorization

    interactions = movielens.get_movielens_dataset('100K')
    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    precision, recall = precision_recall_score(model, test, train, k=k)

    assert precision.shape == recall.shape

    if not isinstance(k, list):
        assert len(precision.shape) == 1
    else:
        assert precision.shape[1] == len(k)
コード例 #4
0
def data_implicit_factorization():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=1,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       random_state=RANDOM_STATE,
                                       use_cuda=CUDA)
    model.fit(train)

    return train, test, model
コード例 #5
0
def test_poisson():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ExplicitFactorizationModel(loss='poisson',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    rmse = rmse_score(model, test)

    assert rmse - EPSILON < 1.0
コード例 #6
0
def test_hinge():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ImplicitFactorizationModel(loss='hinge',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr + EPSILON > 0.07
コード例 #7
0
def test_to_sequence(max_sequence_length, step_size):

    interactions = movielens.get_movielens_dataset('100K')
    _, interactions = random_train_test_split(interactions)

    sequences = interactions.to_sequence(
        max_sequence_length=max_sequence_length, step_size=step_size)

    if step_size == 1:
        assert sequences.sequences.shape == (len(interactions),
                                             max_sequence_length)
    else:
        assert sequences.sequences.shape[1] == max_sequence_length

    _test_just_padding(sequences.sequences)
    _test_final_column_no_padding(sequences.sequences)
    _test_shifted(sequences.user_ids, sequences.sequences, step_size)
    _test_temporal_order(sequences.user_ids, sequences.sequences, interactions)
コード例 #8
0
def test_check_input():
    # Train for single iter.
    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ExplicitFactorizationModel(loss='regression',
                                       n_iter=1,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    # Modify data to make imcompatible with original model.
    train.user_ids[0] = train.user_ids.max() + 1
    with pytest.raises(ValueError):
        model.fit(train)
コード例 #9
0
def test_logistic():

    interactions = movielens.get_movielens_dataset('100K')

    # Convert to binary
    interactions.ratings = (interactions.ratings > 3).astype(np.float32)
    # Convert from (0, 1) to (-1, 1)
    interactions.ratings = interactions.ratings * 2 - 1

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ExplicitFactorizationModel(loss='logistic',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    rmse = rmse_score(model, test)

    assert rmse - EPSILON < 1.05
コード例 #10
0
def test_bpr_custom_optimizer():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    def adagrad_optimizer(model_params, lr=1e-2, weight_decay=1e-6):

        return torch.optim.Adagrad(model_params,
                                   lr=lr,
                                   weight_decay=weight_decay)

    model = ImplicitFactorizationModel(loss='bpr',
                                       n_iter=10,
                                       batch_size=1024,
                                       optimizer_func=adagrad_optimizer,
                                       use_cuda=CUDA)
    model.fit(train)

    mrr = mrr_score(model, test, train=train).mean()

    assert mrr + EPSILON > 0.05
コード例 #11
0
        step_size = max_sequence_length

        train, rest = user_based_train_test_split(dataset,
                                                  test_percentage=0.05,
                                                  random_state=random_state)
        test, validation = user_based_train_test_split(
            rest, test_percentage=0.5, random_state=random_state)
        train = train.to_sequence(max_sequence_length=max_sequence_length,
                                  min_sequence_length=min_sequence_length,
                                  step_size=step_size)
        test = test.to_sequence(max_sequence_length=max_sequence_length,
                                min_sequence_length=min_sequence_length,
                                step_size=step_size)
        validation = validation.to_sequence(
            max_sequence_length=max_sequence_length,
            min_sequence_length=min_sequence_length,
            step_size=step_size)
        print('In test {}, in validation {}'.format(len(test.sequences),
                                                    len(validation.sequences)))
    elif args.model == 'factorization':
        train, rest = random_train_test_split(dataset,
                                              test_percentage=test_percentage,
                                              random_state=random_state)
        test, validation = random_train_test_split(rest,
                                                   test_percentage=0.5,
                                                   random_state=random_state)

    experiment_name = '{}_{}'.format(args.dataset, args.model)

    run(experiment_name, train, test, validation, random_state)