def _get_synthetic_data(num_users=100,
                        num_items=100,
                        num_interactions=10000,
                        randomness=0.01,
                        order=2,
                        random_state=None):

    interactions = synthetic.generate_sequential(
        num_users=num_users,
        num_items=num_items,
        num_interactions=num_interactions,
        concentration_parameter=randomness,
        order=order,
        random_state=random_state)

    print('Max prob {}'.format(
        (np.unique(interactions.item_ids, return_counts=True)[1] /
         num_interactions).max()))

    train, test = user_based_train_test_split(interactions,
                                              random_state=random_state)

    train = train.to_sequence(max_sequence_length=10)
    test = test.to_sequence(max_sequence_length=10)

    return train, test
def _get_synthetic_data(num_users=100,
                        num_items=100,
                        num_interactions=10000,
                        randomness=0.01,
                        order=2,
                        max_sequence_length=10,
                        random_state=None):

    interactions = synthetic.generate_sequential(num_users=num_users,
                                                 num_items=num_items,
                                                 num_interactions=num_interactions,
                                                 concentration_parameter=randomness,
                                                 order=order,
                                                 random_state=random_state)

    print('Max prob {}'.format((np.unique(interactions.item_ids,
                                          return_counts=True)[1] /
                                num_interactions).max()))

    train, test = user_based_train_test_split(interactions,
                                              random_state=random_state)

    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              step_size=None)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            step_size=None)

    return train, test
Beispiel #3
0
def preprocess_generated(num_users = 100, num_items = 1000, num_interactions = 10000):
    from spotlight.datasets.synthetic import generate_sequential
    from spotlight.cross_validation import user_based_train_test_split

    dataset = generate_sequential(num_users=num_users,
                                  num_items=num_items,
                                  num_interactions=num_interactions,
                                  concentration_parameter=0.0001,
                                  order=3)

    dat = {key: dat for key, dat in zip(["train","test"], user_based_train_test_split(dataset))}
    dat_seq = {key : val.to_sequence() for key, val in dat.items()}

    ind2val = {}
    ind2val['itemId'] = {idx : item for item, idx in enumerate(range(dataset.item_ids.max()))}

    return dat, dat_seq, ind2val
Beispiel #4
0
import numpy as np
from spotlight.cross_validation import user_based_train_test_split
from spotlight.evaluation import sequence_mrr_score
from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.datasets.synthetic import generate_sequential

dataset = generate_sequential(num_users=100,
                              num_items=1000,
                              num_interactions=10000,
                              concentration_parameter=0.01,
                              order=3)
train, test = user_based_train_test_split(dataset)

train = train.to_sequence()
test = test.to_sequence()

model = ImplicitSequenceModel(n_iter=3, representation='cnn', loss='bpr')
model.fit(train)

mrr = sequence_mrr_score(model, test)