예제 #1
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):
    h = hyperparameters

    net = CNNNet(train.num_items,
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_eval = {}
    test_eval['mrr'] = sequence_mrr_score(model, test).mean()

    val_eval = {}
    val_eval['mrr'] = sequence_mrr_score(model, validation).mean()

    return test_eval, val_eval
예제 #2
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    net = CNNNet(train.num_items,
                 embedding_dim=h['embedding_dim'],
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
예제 #3
0
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    net = CNNNet(train.num_items,
                 embedding_dim=h['embedding_dim'],
                 kernel_width=h['kernel_width'],
                 dilation=h['dilation'],
                 num_layers=h['num_layers'],
                 nonlinearity=h['nonlinearity'],
                 residual_connections=h['residual'])

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation=net,
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
예제 #4
0
    def objective(hyper):

        print(hyper)

        start = time.clock()

        if hyper['model']['type'] == 'lsh':
            num_hashes = int(hyper['model']['num_hash_functions'])
            num_layers = int(hyper['model']['num_layers'])
            nonlinearity = hyper['model']['nonlinearity']
            residual = hyper['model']['residual']
            embed = hyper['model']['embed']

            item_embeddings = LSHEmbedding(train.num_items,
                                           int(hyper['embedding_dim']),
                                           embed=embed,
                                           residual_connections=residual,
                                           nonlinearity=nonlinearity,
                                           num_layers=num_layers,
                                           num_hash_functions=num_hashes)
            item_embeddings.fit(train_nonsequence.tocsr().T)
        else:
            item_embeddings = ScaledEmbedding(train.num_items,
                                              int(hyper['embedding_dim']),
                                              padding_idx=0)

        network = LSTMNet(train.num_items,
                          int(hyper['embedding_dim']),
                          item_embedding_layer=item_embeddings)

        model = ImplicitSequenceModel(loss=hyper['loss'],
                                      n_iter=int(hyper['n_iter']),
                                      batch_size=int(hyper['batch_size']),
                                      learning_rate=hyper['learning_rate'],
                                      embedding_dim=int(
                                          hyper['embedding_dim']),
                                      l2=hyper['l2'],
                                      representation=network,
                                      use_cuda=CUDA,
                                      random_state=random_state)

        model.fit(train, verbose=True)

        elapsed = time.clock() - start

        print(model)

        validation_mrr = sequence_mrr_score(model, validation).mean()
        test_mrr = sequence_mrr_score(model, test).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        return {
            'loss': -validation_mrr,
            'status': STATUS_OK,
            'validation_mrr': validation_mrr,
            'test_mrr': test_mrr,
            'elapsed': elapsed,
            'hyper': hyper
        }
    def objective(space):
        """Objective function for Spotlight ImplicitFactorizationModel"""

        batch_size = int(space['batch_size'])
        embedding_dim = int(space['embedding_dim'])
        l2 = space['l2']
        learn_rate = space['learn_rate']
        loss = space['loss']
        n_iter = int(space['n_iter'])
        representation = space['representation']

        model = ImplicitSequenceModel(
            loss=loss,
            embedding_dim=embedding_dim,
            batch_size=batch_size,
            representation=representation,
            learning_rate=learn_rate,
            n_iter=n_iter,
            l2=l2,
            use_cuda=CUDA)

        start = time.clock()

        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {'loss': 0.0,
                    'status': STATUS_FAIL,
                    'validation_mrr': 0.0,
                    'test_mrr': 0.0,
                    'elapsed': elapsed,
                    'hyper': space}
        elapsed = time.clock() - start
        print(model)

        validation_mrr = sequence_mrr_score(model, valid).mean()
        test_mrr = sequence_mrr_score(model, test).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {'loss': -validation_mrr,
                'status': status,
                'validation_mrr': validation_mrr,
                'test_mrr': test_mrr,
                'elapsed': elapsed,
                'hyper': space}
예제 #6
0
def _evaluate(model, test):

    test_mrr = sequence_mrr_score(model, test)

    print('Test MRR {}'.format(test_mrr.mean()))

    return test_mrr
예제 #7
0
def evaluate_model(model, train, test, validation):

    start_time = time.time()
    model.fit(train, verbose=True)
    elapsed = time.time() - start_time

    print('Elapsed {}'.format(elapsed))
    print(model)

    if hasattr(test, 'sequences'):
        test_mrr = sequence_mrr_score(model, test)
        val_mrr = sequence_mrr_score(model, validation)
    else:
        test_mrr = mrr_score(model, test)
        val_mrr = mrr_score(model, test.tocsr() + validation.tocsr())

    return test_mrr, val_mrr, elapsed
def _evaluate(model, test):

    test_mrr = sequence_mrr_score(model, test)

    print('Test MRR {}'.format(
        test_mrr.mean()
    ))

    return test_mrr
예제 #9
0
def evaluate_lstm_model(hyperparameters, train, test, validation,
                        random_state):
    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='lstm',
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_eval = {}
    test_eval['mrr'] = sequence_mrr_score(model, test).mean()

    val_eval = {}
    val_eval['mrr'] = sequence_mrr_score(model, validation).mean()

    return test_eval, val_eval
예제 #10
0
def evaluate_pooling_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='pooling',
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
예제 #11
0
def evaluate_pooling_model(hyperparameters, train, test, validation,
                           random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='pooling',
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
def evaluate_lstm_model(hyperparameters, train, test, validation,
                        random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(
        loss=h['loss'],
        representation='lstm',
        batch_size=h['batch_size'],
        learning_rate=h['learning_rate'],
        l2=h['l2'],
        n_iter=h['n_iter'],
        use_cuda=CUDA,
        random_state=random_state,
        num_negative_samples=h["num_negative_samples"]  ## new
    )

    model.fit(train, verbose=False)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return test_mrr, val_mrr
    def evaluation(self, model, interactions: tuple):
        """Evaluates models on a number of metrics

        Takes model and evaluates it by Precision@K/Recall@K, Mean Reciprocal Rank metrics.

        Args:
            model (Arbitrary): A Spotlight model, can be of different types.
            sets (tuple): (spotlight.interactions.Interactions, spotlight.interactions.Interactions), A tuple of (train data, test data).

        Returns:
            dict: A dictionary with all the evaluation metrics.

        """

        logger = logging.getLogger()
        train, test = interactions

        logger.info("Beginning model evaluation...")

        if self._models in ('S_POOL', 'S_CNN', 'S_LSTM'):
            mrr = sequence_mrr_score(model, test).mean()
        else:
            mrr = mrr_score(model, test).mean()
        logger.info('MRR {:.8f}'.format(
            mrr
        ))

        k = 3
        
        prec, rec = sequence_precision_recall_score(
            model=model,
            test=test,
            k=k,
        )
        logger.info('Precision@{k} {:.8f}'.format(
            prec.mean(),
            k=k
        ))
        logger.info('Recall@{k} {:.8f}'.format(
            rec.mean(),
            k=k
        ))
        return {
            'test': {
                'precision':prec.mean(),
                'recall':rec.mean(),
                'f1': 2*((prec.mean()*rec.mean())/(prec.mean()+rec.mean())),
                'mrr':mrr,
            },
        }
    def resultados_secuencia(self):
        """
        Método resultados_secuencia. Calcula las métricas del modelo de secuencia implícito.

        Este método solo se utiliza en la interfaz de texto.
        """
        
        global train, test, modelo
        
        # Se calculan las métricas
        mrr = sequence_mrr_score(modelo, test).mean()
        #precision, recall = sequence_precision_recall_score(modelo, test)
        
        # Se imprimen las métricas
        imprimir_resultados_dl(mrr)
예제 #15
0
def test_implicit_sequence_serialization(data):

    train, test = data
    train = train.to_sequence(max_sequence_length=128)
    test = test.to_sequence(max_sequence_length=128)

    model = ImplicitSequenceModel(loss='bpr',
                                  representation=CNNNet(train.num_items,
                                                        embedding_dim=32,
                                                        kernel_width=3,
                                                        dilation=(1, ),
                                                        num_layers=1),
                                  batch_size=128,
                                  learning_rate=1e-1,
                                  l2=0.0,
                                  n_iter=5,
                                  random_state=RANDOM_STATE,
                                  use_cuda=CUDA)
    model.fit(train)

    mrr_original = sequence_mrr_score(model, test).mean()
    mrr_recovered = sequence_mrr_score(_reload(model), test).mean()

    assert mrr_original == mrr_recovered
def train_model(df, hyperparams):
    # Fix random_state
    seed = 42
    set_seed(seed)
    random_state = np.random.RandomState(seed)

    max_sequence_length = 15
    min_sequence_length = 2
    step_size = 1

    # create dataset using interactions dataframe and timestamps
    dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'),
                           item_ids=np.array(df['item_id'], dtype='int32'),
                           timestamps=df['entry_at'])

    # create training and test sets using a 80/20 split
    train, test = user_based_train_test_split(dataset,
                                              test_percentage=0.2,
                                              random_state=random_state)
    # convert to sequences
    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)

    print('data: {}'.format(train))

    # initialize and train model
    model = ImplicitSequenceModel(**hyperparams,
                                  use_cuda=CUDA,
                                  random_state=random_state)
    model.fit(train, verbose=True)

    # compute mrr score on test set
    test_mrr = sequence_mrr_score(model, test).mean()
    print('MRR score on test set: {}'.format(test_mrr))

    return model
    def obtener_metricas_gui(self):
        """
        Método obtener_metricas_gui. Obtiene las métricas del modelo escogido.

        Este método solo se utiliza en la interfaz web.

        Returns
        -------

        metricas_devueltas: dict
            diccionario con las métricas del modelo
        """

        global train, test, modelo

        # Se guardan las métricas en un diccionario para su futura muestra en la interfaz web
        metricas = dict()

        # Se calculan las métricas y se guardan en el diccionario formateadas
        if self.opcion_modelo == 1:
            rmse = rmse_score(modelo, test)
            mrr = mrr_score(modelo, test, train=train).mean()
            precision, recall = precision_recall_score(modelo, test, train=train, k=10)
            metricas_devueltas = {"RMSE": format(rmse, '.4f'), "MRR": format(mrr, '.4f'), "Precisión k": format(precision.mean(), '.4f'), "Recall k": format(recall.mean(), '.4f')}
            metricas_a_guardar = {"RMSE": [format(rmse, '.4f')], "MRR": [format(mrr, '.4f')], "Precisión k": [format(precision.mean(), '.4f')], "Recall k": [format(recall.mean(), '.4f')]}
        elif self.opcion_modelo == 2:
            mrr = mrr_score(modelo, test, train=train).mean()
            precision, recall = precision_recall_score(modelo, test, train=train, k=10)
            metricas_devueltas = {"MRR": format(mrr, '.4f'), "Precisión k": format(precision.mean(), '.4f'), "Recall k": format(recall.mean(), '.4f')}
            metricas_a_guardar = {"MRR": [format(mrr, '.4f')], "Precisión k": [format(precision.mean(), '.4f')], "Recall k": [format(recall.mean(), '.4f')]}
        else:
            mrr = sequence_mrr_score(modelo, test).mean()
            metricas_devueltas = {"MRR": format(mrr, '.4f')}
            metricas_a_guardar = {"MRR": [format(mrr, '.4f')]}
        
        # Se guardan las métricas en un archivo .csv
        guardar_resultados(metricas_a_guardar)

        return metricas_devueltas
예제 #18
0
df_train = pd.read_csv(train_csv)

train, test = user_based_train_test_split(train_csv)
train = train.to_sequence()
test = test.to_sequence()

#print(f"Reading {test_csv} ...")
#df_test = pd.read_csv(test_csv)

print("Build and Fit Implicit Sequence Model")
model = ImplicitSequenceModel(n_iter=3, representation='cnn', loss='bpr')
#model.fit(df_train)

model.fit(train)

print("Calculate MRR Score")
mrr = sequence_mrr_score(model, test_csv)
print("MRR Result: ", mrr)

print("Calculate Recommendations")
# get data into dataframe for extracting user ids (I think we need the testset here?)
df_test = pd.read_csv(test_csv)
user_ids = df_test[['user_id']]
# call recommendation algorithm
df_out = pd.DataFrame()
df_out = recommendation(model, df_out, df_test, user_ids)

# write result to csv file
print(f"Writing {subm_csv}...")
df_out.to_csv(subm_csv, index=False)
예제 #19
0
    def objective(hyper):

        print(hyper)

        start = time.clock()

        h = hyper['model']

        cls = ImplicitSequenceModel

        if h['type'] == 'pooling':
            representation = PoolNet(train.num_items,
                                     embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'lstm':
            representation = LSTMNet(train.num_items,
                                     embedding_dim=int(h['embedding_dim']))
        elif h['type'] == 'mixture':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = MixtureLSTMNet(train.num_items,
                                            num_components=num_components,
                                            embedding_dim=embedding_dim)
        elif h['type'] == 'mixture2':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = Mixture2LSTMNet(train.num_items,
                                             num_components=num_components,
                                             embedding_dim=embedding_dim)
        elif h['type'] == 'linear_mixture':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = LinearMixtureLSTMNet(train.num_items,
                                                  num_components=num_components,
                                                  embedding_dim=embedding_dim)
        elif h['type'] == 'diversified_mixture_fixed':
            num_components = int(h['num_components'])
            embedding_dim = int(h['embedding_dim'])
            representation = DiversifiedMixtureLSTMNet(train.num_items,
                                                       num_components=num_components,
                                                       diversity_penalty=h['diversity_penalty'],
                                                       embedding_dim=embedding_dim)
            cls = DiversifiedImplicitSequenceModel
        else:
            raise ValueError('Unknown model type')

        model = cls(
            batch_size=int(h['batch_size']),
            loss=h['loss'],
            learning_rate=h['learning_rate'],
            l2=h['l2'],
            n_iter=int(h['n_iter']),
            representation=representation,
            use_cuda=CUDA,
            random_state=np.random.RandomState(42)
        )

        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {'loss': 0.0,
                    'status': STATUS_FAIL,
                    'validation_mrr': 0.0,
                    'test_mrr': 0.0,
                    'elapsed': elapsed,
                    'hyper': h}

        elapsed = time.clock() - start

        print(model)

        validation_mrr = sequence_mrr_score(
            model,
            validation,
            exclude_preceding=True
        ).mean()
        test_mrr = sequence_mrr_score(
            model,
            test,
            exclude_preceding=True
        ).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {'loss': -validation_mrr,
                'status': status,
                'validation_mrr': validation_mrr,
                'test_mrr': test_mrr,
                'elapsed': elapsed,
                'hyper': h}
예제 #20
0
                           timestamps=timeStamps)

    if name == "test":
        dataset_test = dataset
    elif name == "train":
        dataset_train = dataset

if model_mode.lower() == "ifm":
    model = ImplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "efm":
    model = ExplicitFactorizationModel(n_iter=n_iter)
if model_mode.lower() == "cnn":
    net = CNNNet(num_items=int(foods_items))
    model = ImplicitSequenceModel(n_iter=n_iter,
                                  use_cuda=torch.cuda.is_available(),
                                  representation=net)

model.fit(dataset_train)

with open(save_file, 'wb') as f:
    pickle.dump(model, f, pickle.HIGHEST_PROTOCOL)

if model_mode.lower() == "cnn":
    mrr = sequence_mrr_score(model, dataset_test)
else:
    mrr = mrr_score(model, dataset_test)

print("mrr = ", len(mrr))
print("mean mrr = ", sum(mrr) / len(mrr))
rank = 1 / (sum(mrr) / len(mrr))
print("average rank = ", rank)
예제 #21
0
파일: run.py 프로젝트: qianrenjian/mixture
def _evaluate(model, test):

    test_mrr = sequence_mrr_score(model, test, exclude_preceding=True)

    return test_mrr.mean()
예제 #22
0
                                    min_sequence_length=min_sequence_length,
                                    step_size=step_size)

net = LSTMNet(len(set(item2idx)),
              embedding_dim=32,
              item_embedding_layer=None,
              sparse=False)
model = ImplicitSequenceModel(loss='adaptive_hinge',
                              representation=net,
                              batch_size=32,
                              learning_rate=0.01,
                              l2=10e-6,
                              n_iter=10,
                              use_cuda=False,
                              random_state=random_state)
model.fit(train, verbose=True)

test_mrr = sequence_mrr_score(model, test)
val_mrr = sequence_mrr_score(model, validation)
train_mrr = sequence_mrr_score(model, train)

print(test_mrr.mean(), val_mrr.mean(), train_mrr.mean())

for (split, split_name) in ((train, "train"), (validation, "validation"),
                            (test, "test")):
    for k in (5, 10, 50, 100):
        precision, recall = sequence_precision_recall_score(
            model, split, k=k, exclude_preceding=False)
        print(split_name, "precision at", k, precision.mean())
        print(split_name, "recall at", k, recall.mean())
예제 #23
0
    def objective(space):
        batch_size = int(space['batch_size'])
        learn_rate = space['learn_rate']
        loss = space['loss']
        n_iter = int(space['n_iter'])
        embedding_dim = int(space['embedding_dim'])
        l2 = space['l2']

        if space['type'] == 'mlstm':
            representation = mLSTMNet(
                train.num_items,
                embedding_dim=embedding_dim)
            model = ImplicitSequenceModel(
                loss=loss,
                batch_size=batch_size,
                representation=representation,
                learning_rate=learn_rate,
                n_iter=n_iter,
                l2=l2,
                use_cuda=CUDA,
                random_state=random_state)
        elif space['type'] == 'lstm':
            representation = space['representation']
            model = ImplicitSequenceModel(
                loss=loss,
                embedding_dim=embedding_dim,
                batch_size=batch_size,
                representation=representation,
                learning_rate=learn_rate,
                n_iter=n_iter,
                l2=l2,
                use_cuda=CUDA,
                random_state=random_state)
        else:
            raise ValueError('Unknown model type {}'.format(space.get('type', 'NA')))

        start = time.clock()
        try:
            model.fit(train, verbose=True)
        except ValueError:
            elapsed = time.clock() - start
            return {'loss': 0.0,
                    'status': STATUS_FAIL,
                    'validation_mrr': 0.0,
                    'test_mrr': 0.0,
                    'elapsed': elapsed,
                    'hyper': space}
        elapsed = time.clock() - start
        print(model)

        validation_mrr = sequence_mrr_score(
            model,
            valid,
            exclude_preceding=True
        ).mean()
        test_mrr = sequence_mrr_score(
            model,
            test,
            exclude_preceding=True
        ).mean()

        print('MRR {} {}'.format(validation_mrr, test_mrr))

        if np.isnan(validation_mrr):
            status = STATUS_FAIL
        else:
            status = STATUS_OK

        return {'loss': -validation_mrr,
                'status': status,
                'validation_mrr': validation_mrr,
                'test_mrr': test_mrr,
                'elapsed': elapsed,
                'hyper': space}