コード例 #1
0
ファイル: test_explicit.py プロジェクト: delldu/Spotlight
def test_bloom(compression_ratio, expected_rmse):

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    user_embeddings = BloomEmbedding(interactions.num_users,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    item_embeddings = BloomEmbedding(interactions.num_items,
                                     32,
                                     compression_ratio=compression_ratio,
                                     num_hash_functions=2)
    network = BilinearNet(interactions.num_users,
                          interactions.num_items,
                          user_embedding_layer=user_embeddings,
                          item_embedding_layer=item_embeddings)

    model = ExplicitFactorizationModel(loss='regression',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-2,
                                       l2=1e-5,
                                       representation=network,
                                       use_cuda=CUDA)

    model.fit(train)
    print(model)

    rmse = rmse_score(model, test)
    print(rmse)

    assert rmse - EPSILON < expected_rmse
コード例 #2
0
def best_params_spotlight(losses,
                          n_iters,
                          batch_sizes,
                          l2s,
                          learning_rates,
                          embedding_dims,
                          train_data,
                          t=Timer()):
    rmses = dict()
    params = dict()
    t.start()
    for loss in losses:
        params['loss'] = loss
        for n_iter in n_iters:
            params['n_iter'] = n_iter
            for batch_size in batch_sizes:
                params['batch_size'] = batch_size
                for l2 in l2s:
                    params['l2'] = l2
                    for learning_rate in learning_rates:
                        params['learning_rate'] = learning_rate
                        for embedding_dim in embedding_dims:
                            params['embedding_dim'] = embedding_dim
                            model = ExplicitFactorizationModel(
                                loss='regression',
                                embedding_dim=
                                embedding_dim,  # latent dimensionality
                                n_iter=n_iter,  # number of epochs of training
                                batch_size=batch_size,  # minibatch size
                                l2=l2,  # strength of L2 regularization
                                learning_rate=learning_rate,
                                use_cuda=torch.cuda.is_available())

                            params['model'] = model

                            train_tr_data, test_tr_data = random_train_test_split(
                                train_data,
                                random_state=np.random.RandomState(42))

                            model.fit(train_tr_data, verbose=True)

                            rmse = rmse_score(model, test_tr_data)

                            rmses[rmse] = params
                            print(
                                "-----------Time: {}, Loss: {}, n_iter: {}, l2: {}, batch_size: {}, learning_rate: {}, embedding_dim: {}, rmse: {}-------------\n\n"
                                .format(t.stop(), loss, n_iter, l2, batch_size,
                                        learning_rate, embedding_dim, rmse))
                            # restart timer
                            t.start()
コード例 #3
0
def train(user_ids, item_ids, ratings, num_dimensions, verbose):
    dataset = Interactions(np.array(user_ids, dtype=np.int32),
                           np.array(item_ids, dtype=np.int32),
                           ratings=np.array(ratings, dtype=np.float32))

    is_cuda_available = False if device.type == 'cpu' else True

    m = ExplicitFactorizationModel(loss='logistic',
                                   use_cuda=is_cuda_available,
                                   embedding_dim=num_dimensions)
    m.fit(dataset, verbose=verbose)

    user_embeddings = m._net.user_embeddings.weight.detach().cpu().numpy()

    return user_embeddings
コード例 #4
0
    def obtener_modelo_gui(self, lista_param):
        """
        Método obtener_modelo_gui. Obtiene el modelo escogido según los parámetros pasados.

        Este método solo se utiliza en la interfaz web.

        Parameters
        ----------

        lista_param: list
            lista que contiene los parámetros escogidos por el usuario para crear el modelo.
        """

        global modelo

        # Se guardan los parámetros en variables para que sea más legible
        loss = lista_param[0]
        embedding_dim = lista_param[1]
        n_iter = lista_param[2]
        batch_size = lista_param[3]
        l2 = lista_param[4]
        learning_rate = lista_param[5]
        representation = lista_param[6]

        # Se instancia el modelo según los parámetros anteriores
        if self.opcion_modelo == 1:
            modelo = ExplicitFactorizationModel(loss=loss, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
        elif self.opcion_modelo == 2:
            modelo = ImplicitFactorizationModel(loss=loss, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
        else:
            modelo = ImplicitSequenceModel(loss=loss, representation=representation, embedding_dim=embedding_dim, n_iter=n_iter, batch_size=batch_size, 
                l2=l2, learning_rate=learning_rate, use_cuda=torch.cuda.is_available())
コード例 #5
0
def build_model(data, loss, embedding_dim, n_iter, batch_size, l2,
                learning_rate, **kwargs):
    model = ExplicitFactorizationModel(
        loss=loss,
        embedding_dim=embedding_dim,  # latent dimensionality
        n_iter=n_iter,  # number of epochs of training
        batch_size=batch_size,  # minibatch size
        l2=l2,  # strength of L2 regularization
        learning_rate=learning_rate,
        use_cuda=torch.cuda.is_available())

    train, test = random_train_test_split(
        data, random_state=np.random.RandomState(42))
    model.fit(train, verbose=True)
    test_rmse = rmse_score(model, test)
    return test_rmse
コード例 #6
0
def test_explicit_serialization(data):

    train, test = data

    model = ExplicitFactorizationModel(loss='regression',
                                       n_iter=3,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-5,
                                       use_cuda=CUDA)
    model.fit(train)

    rmse_original = rmse_score(model, test)
    rmse_recovered = rmse_score(_reload(model), test)

    assert rmse_original == rmse_recovered
コード例 #7
0
def test_poisson():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ExplicitFactorizationModel(loss='poisson',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-6)
    model.fit(train)

    rmse = rmse_score(model, test)

    assert rmse < 1.0
コード例 #8
0
def test_poisson():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ExplicitFactorizationModel(loss='poisson',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-6)
    model.fit(train)

    rmse = rmse_score(model, test)

    assert rmse < 1.0
コード例 #9
0
ファイル: spotlight_helpers.py プロジェクト: S-Rey/ML-Project
def train_spotlight_models(train, test, dataset_testing, embedding_dims, n_iters, batch_sizes, l2s, learning_rates, is_save = False):
    """
    takes train, test, dataset_testing datasets as spotlight.interactions.
    train multiple spotlight models using ExplicitFactorizationModel, with given parameters.
    parameters are given in embedding_dims, n_iters, batch_sizes, l2s, learning_rates.
    return predictions of train, test, dataset_testing datasets as well as rmse on train and test.
    """
    
    # initialize train_rmses and test_rmses, these store rmse on train and test set
    train_rmses = np.array([])
    test_rmses = np.array([])
    # initialize preds_train_trains, preds_train_tests, preds_tests; these store predictions of models  on train, test and dataset_testing datasets
    preds_train_trains = []
    preds_train_tests = []
    preds_tests = []
    
    # traverse all parameter combinations
    # embedding_din, n_iter, batch_size, l2 regularization, learning_rate 
    for embedding_dim in embedding_dims:
        for n_iter in n_iters:
            for batch_size in batch_sizes:
                for l2 in l2s:
                    for learning_rate in learning_rates:
                        # initialize model with parameter, ues GPU is torch.cuda.is_available() returns True, otherwise use CPU
                        model = ExplicitFactorizationModel(loss='regression',
                                                           embedding_dim=embedding_dim,  # latent dimensionality
                                                           n_iter=n_iter,  # number of epochs of training
                                                           batch_size=batch_size,  # minibatch size
                                                           l2=l2,  # strength of L2 regularization
                                                           learning_rate=learning_rate,
                                                           use_cuda=torch.cuda.is_available())
                        
                        # print which model is being trained
                        print("embedding_dim={}, n_iter={}, batch_size={}, l2={}, learning_rate={}".format(embedding_dim, n_iter, batch_size, l2, learning_rate))
                        # fit model
                        model.fit(train, verbose=True)
                        # find rmse on train
                        train_rmse = rmse_score(model, train)
                        # find rmse on test
                        test_rmse = rmse_score(model, test)
                        # store rmse on train and test sets
                        train_rmses = np.append(train_rmses, train_rmse)
                        test_rmses = np.append(test_rmses, test_rmse)   
                        # print train and test rmses
                        print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse))
                        # if is_save given, save the models to disk
                        if is_save:
                            torch.save(model, "models/embedding_dim={}, n_iter={}, batch_size={}, l2={}, learning_rate={}".format(embedding_dim, n_iter, batch_size, l2, learning_rate))
                        # find predictions of train, test and dataset_testing datasets
                        preds_train_train = model.predict(train.user_ids,train.item_ids)
                        preds_train_test = model.predict(test.user_ids,test.item_ids)
                        preds_test = model.predict(dataset_testing.user_ids,dataset_testing.item_ids)
                        #store those predictions
                        preds_train_trains.append(preds_train_train)
                        preds_train_tests.append(preds_train_test)
                        preds_tests.append(preds_test)
    
    # return stored predictions on train, test, dataset_testing; return rmses on train and test
    return preds_train_trains, preds_train_tests, preds_tests, train_rmses, test_rmses
コード例 #10
0
    def obtener_modelos(self):
        """
        Método obtener_modelos. Obtiene, entrena y guarda el modelo escogido.

        Este método solo se utiliza en la interfaz de texto.
        """
        
        global train, modelo
        
        # Se obtiene el modelo, se entrena con parámetros por defecto y se guarda
        if self.opcion_modelo == 1:
            modelo = ExplicitFactorizationModel(loss='logistic', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de factorización explícito')
        elif self.opcion_modelo == 2:
            modelo = ImplicitFactorizationModel(loss='bpr', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de factorización implícito')
        else:
            modelo = ImplicitSequenceModel(loss='bpr',  representation='pooling', use_cuda=torch.cuda.is_available())
            modelo.fit(train, verbose=True)
            guardar_modelos_dl(modelo, 'el modelo de secuencia explícito')
コード例 #11
0
ファイル: spotlight_helpers.py プロジェクト: S-Rey/ML-Project
def train_spotlight_models_using_all_data(train, dataset_testing, embedding_dims, n_iters, batch_sizes, l2s, learning_rates, verbose=True):
    """
    takes train dataset as spotlight.interactions.
    train multiple spotlight models using ExplicitFactorizationModel, with given parameters.
    parameters are given in embedding_dims, n_iters, batch_sizes, l2s, learning_rates.
    saves trained models into disk
    """
    
    # store predictions on test set
    preds_tests = []

    # traverse all parameter combinations
    # embedding_din, n_iter, batch_size, l2 regularization, learning_rate 
    for embedding_dim in embedding_dims:
        for n_iter in n_iters:
            for batch_size in batch_sizes:
                for l2 in l2s:
                    for learning_rate in learning_rates:
                        # initialize model
                        model = ExplicitFactorizationModel(loss='regression',
                                                           embedding_dim=embedding_dim,  # latent dimensionality
                                                           n_iter=n_iter,  # number of epochs of training
                                                           batch_size=batch_size,  # minibatch size
                                                           l2=l2,  # strength of L2 regularization
                                                           learning_rate=learning_rate,
                                                           use_cuda=torch.cuda.is_available())
                        
                        # print if given True
                        if verbose:
                            print("embedding_dim={}, n_iter={}, batch_size={}, l2={}, learning_rate={}".format(embedding_dim, n_iter, batch_size, l2, learning_rate))
                        # fit model using train dataset
                        model.fit(train, verbose=verbose)
                        preds_test = model.predict(dataset_testing.user_ids,dataset_testing.item_ids)
                        preds_tests.append(preds_test)
                        # save model to disk
                        torch.save(model, "models_all_data/embedding_dim={}, n_iter={}, batch_size={}, l2={}, learning_rate={}".format(embedding_dim, n_iter, batch_size, l2, learning_rate))

    # return stored predictions on dataset_testing
    return preds_tests
コード例 #12
0
ファイル: test_explicit.py プロジェクト: delldu/Spotlight
def test_logistic():

    interactions = movielens.get_movielens_dataset('100K')

    # Convert to binary
    interactions.ratings = (interactions.ratings > 3).astype(np.float32)
    # Convert from (0, 1) to (-1, 1)
    interactions.ratings = interactions.ratings * 2 - 1

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ExplicitFactorizationModel(loss='logistic',
                                       n_iter=10,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-6,
                                       use_cuda=CUDA)
    model.fit(train)

    rmse = rmse_score(model, test)

    assert rmse - EPSILON < 1.05
コード例 #13
0
def create_model(loss, k, number_epochs, batch_size, l2_penal, gamma):
    """

    :param loss: The loss we want to use for our optimization process
    :param k: the latent dimension of our matrix factorization
    :param number_epochs: the number of times we want to go through all our training set during the training phase
    :param batch_size: the size of the batch to perform optimization algorithm
    :param l2_penal: ridge penalization (L2)
    :param gamma: our optimization learning rate
    :return: a factorization model ready to fit our input data
    """
    model = ExplicitFactorizationModel(loss=loss,
                                       embedding_dim=k,  # latent dimensionality
                                       n_iter=number_epochs,  # number of epochs of training
                                       batch_size=batch_size,  # minibatch size
                                       l2=l2_penal,  # strength of L2 regularization
                                       learning_rate=gamma)
    return model
コード例 #14
0
def test_check_input():
    # Train for single iter.
    interactions = movielens.get_movielens_dataset('100K')

    train, test = random_train_test_split(interactions,
                                          random_state=RANDOM_STATE)

    model = ExplicitFactorizationModel(loss='regression',
                                       n_iter=1,
                                       batch_size=1024,
                                       learning_rate=1e-3,
                                       l2=1e-6)
    model.fit(train)

    # Modify data to make imcompatible with original model.
    train.user_ids[0] = train.user_ids.max() + 1
    with pytest.raises(ValueError):
        model.fit(train)
コード例 #15
0
        lite = pickle.load(f)

from spotlight.interactions import Interactions

user_ids = np.array(lite['user']).astype(np.int32)
item_ids = np.array(lite['item']).astype(np.int32)
ratings = np.array(lite['rating']).astype(np.float32)
times = np.array(lite['time']).astype(np.int32)
dataset = Interactions(user_ids, item_ids, ratings, times)

# Prepare train test
train, test = user_based_train_test_split(dataset)
# train, test = random_train_test_split(dataset)

# Test baseline
model = ExplicitFactorizationModel(n_iter=20)
model.fit(train, verbose=True)
print('RMSE', rmse_score(model, test))

from scipy.sparse import coo_matrix

ratings = coo_matrix((dataset.ratings, (dataset.user_ids, dataset.item_ids)),
                     shape=(dataset.num_users, dataset.num_items)).tocsr()

train_seq = train.to_sequence(SEQ_LEN)
test_seq = test.to_sequence(SEQ_LEN)

model = ExplicitSequenceModel(n_iter=30, representation='lstm', batch_size=1)
model.fit(train_seq, ratings, verbose=True)

SEQ_ID = 0
コード例 #16
0
  fra_rating_5.append((book_groups['ratings_5'].mean()/book_groups['ratings_count'].mean()).as_matrix())
RR_fra_rating_5 = calc_reciprank(test_dataset, fra_rating_5, train=exp_dataset)
MRR_fra_rating_5 = RR_fra_rating_5.mean()
print("MRR of fraction of 5* ratings: ", MRR_fra_rating_5)

"""####  So, the best model of Question 1 is Fraction of 5* ratings(fra_rating_5)

## Question 2
"""

from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel

#ExplicitFactorizationModel
emodel = ExplicitFactorizationModel(n_iter=10,
                                    embedding_dim=32, 
                                    use_cuda=False)
emodel.fit(exp_train, verbose=True)
score_emodel = scoreAll(emodel)
print(calc_reciprank(exp_validation, score_emodel, train=exp_train).mean())


#ImplicitFactorizationModel
imodel = ImplicitFactorizationModel(n_iter=10,
                                    loss='bpr',
                                    embedding_dim=32, 
                                    use_cuda=False)
imodel.fit(exp_train, verbose=True)
score_imodel_32_on_exp = scoreAll(imodel)
print(calc_reciprank(exp_validation, score_imodel_32_on_exp, train=exp_train).mean())
コード例 #17
0
df['user'] = user_encoder.transform(df['user'])
df['beer'] = beer_encoder.transform(df['beer'])

user_ids = np.array(df['user'])
item_ids = np.array(df['beer'])
ratings = np.array(df['rating']).astype('float32')


#Explicit Factorization Model
explicit_interactions = Interactions(user_ids, item_ids, ratings)
explicit_interactions.tocoo().todense().shape


explicit_model = ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=32,
                                   n_iter=10,
                                   batch_size=250,
                                   learning_rate=0.01)

explicit_model.fit(explicit_interactions)


user_df = pd.DataFrame(explicit_interactions.tocoo().todense())



#Spotlight Model

#This function uses the Spotlight model to make recommendations for a given user
def spotlight_predictions(user_id):
    app_user = pd.DataFrame(user_df.iloc[user_id]).T
コード例 #18
0
finder_decisions.rename(columns={
    'age': 'Receiver_age',
    'gender': 'Receiver_gender',
    'index': 'Receiver_index'
},
                        inplace=True)

ratings = np.ones(len(finder_decisions))
ratings[finder_decisions['Decision'] == 'skip'] = -1
ratings = ratings.astype(np.float32)
dataset = Interactions(finder_decisions['Sender_index'].values,
                       finder_decisions['Receiver_index'].values, ratings)
model = ExplicitFactorizationModel(
    loss='logistic',
    embedding_dim=5,  # latent dimensionality
    n_iter=10,  # number of epochs of training
    batch_size=256,  # minibatch size
    l2=1e-9,  # strength of L2 regularization
    learning_rate=2e-2,
    use_cuda=torch.cuda.is_available())
# model = ImplicitFactorizationModel(loss='bpr',
#                                    embedding_dim=128,  # latent dimensionality
#                                    n_iter=10,  # number of epochs of training
#                                    batch_size=256,  # minibatch size
#                                    l2=1e-9,  # strength of L2 regularization
#                                    learning_rate=1e-2,
#                                    use_cuda=torch.cuda.is_available())
from spotlight.cross_validation import random_train_test_split

train, test = random_train_test_split(dataset,
                                      random_state=np.random.RandomState(42))
コード例 #19
0
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.evaluation import rmse_score
from spotlight.factorization.explicit import ExplicitFactorizationModel

dataset = get_movielens_dataset(variant='100K')

train, test = random_train_test_split(dataset)

model = ExplicitFactorizationModel(n_iter=1)
model.fit(train)

rmse = rmse_score(model, test)

print(rmse)
コード例 #20
0
ファイル: run_spotlight.py プロジェクト: zion-dup/pytorch-rec
import numpy as np

from spotlight.interactions import Interactions
from spotlight.factorization.explicit import ExplicitFactorizationModel

train = np.load('data/loocv_train.npz')
test = np.load('data/loocv_test.npz')
train_feat = train['train_feat'].astype('int64')
train_scor = train['train_scor'][:, None].astype('float32')
test_feat = test['test_feat'].astype('int64')
test_scor = test['test_scor'][:, None].astype('float32')

model = ExplicitFactorizationModel(
    loss='regression',
    embedding_dim=64,  # latent dimensionality
    n_iter=20,  # number of epochs of training
    batch_size=1024 * 4,  # minibatch size
    l2=1e-9,  # strength of L2 regularization
    learning_rate=1e-3,
    use_cuda=torch.cuda.is_available())


def features(feat, scor):
    user = feat[:, 0].astype('int64')
    item = feat[:, 1].astype('int64')
    y = scor[:, 0].astype('float32')
    return user, item, y


train = Interactions(*features(train_feat, train_scor))
test_user, test_item, test_y = features(test_feat, test_scor)
model.fit(train, verbose=True)
コード例 #21
0
from spotlight.cross_validation import random_train_test_split
from spotlight.evaluation import rmse_score

# testing with 100k user interactions
dataset = get_movielens_dataset(variant='100K')

# base, low rank factorization adopted in netflix
# part of collaborative filtering algorithm that uses implicit and explicit interaction matrices.
# spotlight gives out the interactions class for each dataset curated.
# Hyperparameters:
# loss function
# epochs
model = ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=256,
                                   n_iter=50,
                                   batch_size=1024,
                                   l2=1e-9,
                                   learning_rate=1e-3,
                                   use_cuda=torch.cuda.is_available())

# split training and testing data
train, test = random_train_test_split(dataset,
                                      random_state=np.random.RandomState(42))
# print('Split into \n {} and \n {}.'.format(train, test))

# fit the model - likely to overfit
model.fit(train, verbose=True)

# using root mean squared error measure
train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)
コード例 #22
0
## Build interactions object (building torch tensors underneath)
log.info("Building interactions object")
interactions = Interactions(
    item_ids=ratings_df.movie_int.astype(np.int32).values,
    user_ids=ratings_df.user_int.astype(np.int32).values,
    num_items=len(ratings_df.movie_int.unique()),
    num_users=len(ratings_df.user_int.unique()),
    ratings=ratings_df.vote.astype(np.float32).values)

## Build Explicit Matrix Factorization Model
# We use logistic loss since the interaction rating is binary (-1, 1)
log.info(
    "Training the recommendation engine model using Explicit Matrix Factorization"
)
model = ExplicitFactorizationModel(loss='logistic', n_iter=10)
model.fit(interactions)

# Prepare to get predictions out for each user
full_movies = movie_ind.movie_int.unique()
recommendations = []
# Convert datetime to string to ensure serialization success
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
batch_count = 0

for device, user_row in user_ind.iterrows():
    # Get list of all movies this user voted on
    log.info("Generating recommendations for user {}".format(device))
    user = user_row.user_int
    user_votes = ratings_df[ratings_df.user_int == user].movie_int.unique()
    # Calculate difference in the two lists - rate those movies only
コード例 #23
0
def trainModelUntilOverfit(dataset, modelSteps, modelIterations,
                           numberDataSplits, embedding_dim, learning_rate):

    numUsers = dataset.num_users
    numMovies = dataset.num_items
    train, test = random_train_test_split(dataset, 0.2)

    print('Split into \n {} and \n {}.'.format(train, test))

    #add random seed
    seed = np.random.RandomState(seed=55555)
    model = ExplicitFactorizationModel(n_iter=modelIterations,
                                       embedding_dim=embedding_dim,
                                       learning_rate=learning_rate,
                                       random_state=seed)

    rmseResults = np.empty((modelSteps * numberDataSplits, 2))
    indexPreviousClosest = ["0"]

    if (numberDataSplits > 1):
        arraySplits = dataSplit(train, numberDataSplits)
        print("Data set split into", len(arraySplits), "*", (arraySplits[1]))
    # Each model step fits the entire dataset
    arrayOfSteps = []
    splitCounter = 0
    fullStepCounter = 0  # increases each time the entire data set has been visited
    currentStep = 0  # increases at every split of the data set (does not reset)
    for i in range(modelSteps * numberDataSplits):
        print("\nStarting step", fullStepCounter)
        print("Data split", splitCounter)
        if (numberDataSplits == 1):
            model.fit(train, verbose=True)
        elif (numberDataSplits > 1):
            print(arraySplits[splitCounter])
            model.fit(arraySplits[splitCounter], verbose=True)

        else:
            print("Invalid number of data splits")
            break

        #predictions for any user are made for all items, matrix has shape (944, 1683)
        modelPredict = np.empty((numUsers, numMovies))
        for userIndex in range(numUsers):
            modelPredict[userIndex, :] = model.predict(userIndex)

        # We take the transpose for tsne formatting (should be more rows than columns)
        modelPredict = modelPredict.T

        #Measure the model's effectiveness (how good predictions are):
        rmse = rmse_score(model, test)
        rmseTrain = rmse_score(model, train)
        rmseTest = rmse_score(model, test)
        print("RMSE TEST:", rmseTest, "\n")
        rmseResults[i, :] = [rmseTrain, rmseTest]
        arrayOfSteps += [i]

        if (stopTraining(rmseResults, arrayOfSteps)):
            rmseResults = rmseResults[:len(arrayOfSteps)]
            break

        if (numberDataSplits > 1):
            splitCounter += 1
            if (splitCounter >= len(arraySplits)):
                splitCounter = 0
                fullStepCounter += 1

    currentStep += 1

    return (model, rmseResults)
コード例 #24
0
    mrrs = []
    rs = np.random.RandomState(100)
    pdb.set_trace()
    for i in range(5):
        print('Split - {} , Run {}'.format(split, i))
        train, test = random_train_test_split(dataset,
                                              random_state=rs,
                                              test_percentage=split)
        if args.model == 'implicit':
            model = ImplicitFactorizationModel(n_iter=args.n_epoch,
                                               loss=args.loss,
                                               use_cuda=True,
                                               learning_rate=args.lr,
                                               representation=args.net)
        elif args.model == 'explicit':
            model = ExplicitFactorizationModel(n_iter=args.n_epoch,
                                               loss=args.loss,
                                               use_cuda=True,
                                               learning_rate=args.lr)
        model.fit(train, verbose=0)

        rmse = rmse_score(model, test)
        rmses.append(rmse)
        mrr = mrr_score(model, test)
        mrrs.append(np.mean(mrr))

rmses = np.array(rmses)
mrrs = np.array(mrrs)
print('RMSE: {} +- {}'.format(np.mean(rmses), np.var(rmses)))
print('MRR: {} +- {}'.format(np.mean(mrrs), np.var(mrrs)))
users = df.UserKey
prods = df.ProdKey         
ratings = df.overall

users1 = users.to_numpy(dtype=int)
prods1 = prods.to_numpy(dtype=int)
ratings1 = ratings.to_numpy(dtype=float)

interaction = Interactions(users1,prods1,ratings1)

train, test = random_train_test_split(interaction, random_state=np.random.RandomState(42))

print('Split into \n {} and \n {}.'.format(train, test))

starttime = datetime.now()
model = ExplicitFactorizationModel(n_iter=1)
model.fit(train, verbose=True)

train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)

stoptime = datetime.now()
runtime = stoptime - starttime
print('Runtime:{}'.format(runtime))
print('Split into \n training dataset size: {} \n testing dataset size: {}.'.format(train, test))
print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse))



"""
コード例 #26
0
from spotlight.evaluation import sequence_mrr_score
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.implicit import ImplicitFactorizationModel
from spotlight.sequence.implicit import ImplicitSequenceModel

dataset = get_movielens_dataset(variant='100K')
train, test = random_train_test_split(dataset)


def train_and_test(model, train, test, score):
    print('Train and test {}'.format(model))
    model.fit(train, verbose=True)

    _score = score(model, test)
    print('score({}): {}'.format(score, _score))


explicit_model = ExplicitFactorizationModel(n_iter=1)
train_and_test(explicit_model, train, test, rmse_score)

implicit_model = ImplicitFactorizationModel(n_iter=3, loss='bpr')
train_and_test(implicit_model, train, test, rmse_score)

train = train.to_sequence()
test = test.to_sequence()

implicit_cnn_model = ImplicitSequenceModel(n_iter=3,
                                           representation='cnn',
                                           loss='bpr')
train_and_test(implicit_cnn_model, train, test, sequence_mrr_score)