def _get_synthetic_data(num_users=100, num_items=100, num_interactions=10000, randomness=0.01, order=2, random_state=None): interactions = synthetic.generate_sequential( num_users=num_users, num_items=num_items, num_interactions=num_interactions, concentration_parameter=randomness, order=order, random_state=random_state) print('Max prob {}'.format( (np.unique(interactions.item_ids, return_counts=True)[1] / num_interactions).max())) train, test = user_based_train_test_split(interactions, random_state=random_state) train = train.to_sequence(max_sequence_length=10) test = test.to_sequence(max_sequence_length=10) return train, test
def _get_synthetic_data(num_users=100, num_items=100, num_interactions=10000, randomness=0.01, order=2, max_sequence_length=10, random_state=None): interactions = synthetic.generate_sequential(num_users=num_users, num_items=num_items, num_interactions=num_interactions, concentration_parameter=randomness, order=order, random_state=random_state) print('Max prob {}'.format((np.unique(interactions.item_ids, return_counts=True)[1] / num_interactions).max())) train, test = user_based_train_test_split(interactions, random_state=random_state) train = train.to_sequence(max_sequence_length=max_sequence_length, step_size=None) test = test.to_sequence(max_sequence_length=max_sequence_length, step_size=None) return train, test
def preprocess_generated(num_users = 100, num_items = 1000, num_interactions = 10000): from spotlight.datasets.synthetic import generate_sequential from spotlight.cross_validation import user_based_train_test_split dataset = generate_sequential(num_users=num_users, num_items=num_items, num_interactions=num_interactions, concentration_parameter=0.0001, order=3) dat = {key: dat for key, dat in zip(["train","test"], user_based_train_test_split(dataset))} dat_seq = {key : val.to_sequence() for key, val in dat.items()} ind2val = {} ind2val['itemId'] = {idx : item for item, idx in enumerate(range(dataset.item_ids.max()))} return dat, dat_seq, ind2val
import numpy as np from spotlight.cross_validation import user_based_train_test_split from spotlight.evaluation import sequence_mrr_score from spotlight.sequence.implicit import ImplicitSequenceModel from spotlight.datasets.synthetic import generate_sequential dataset = generate_sequential(num_users=100, num_items=1000, num_interactions=10000, concentration_parameter=0.01, order=3) train, test = user_based_train_test_split(dataset) train = train.to_sequence() test = test.to_sequence() model = ImplicitSequenceModel(n_iter=3, representation='cnn', loss='bpr') model.fit(train) mrr = sequence_mrr_score(model, test)