def evaluate_cnn_model(hyperparameters, train, test, validation, random_state): h = hyperparameters net = CNNNet(train.num_items, kernel_width=h['kernel_width'], dilation=h['dilation'], num_layers=h['num_layers'], nonlinearity=h['nonlinearity'], residual_connections=h['residual']) model = ImplicitSequenceModel(loss=h['loss'], representation=net, n_iter=h['n_iter'], use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) test_eval = {} test_eval['mrr'] = sequence_mrr_score(model, test).mean() val_eval = {} val_eval['mrr'] = sequence_mrr_score(model, validation).mean() return test_eval, val_eval
def evaluate_cnn_model(hyperparameters, train, test, validation, random_state): h = hyperparameters net = CNNNet(train.num_items, embedding_dim=h['embedding_dim'], kernel_width=h['kernel_width'], dilation=h['dilation'], num_layers=h['num_layers'], nonlinearity=h['nonlinearity'], residual_connections=h['residual']) model = ImplicitSequenceModel(loss=h['loss'], representation=net, batch_size=h['batch_size'], learning_rate=h['learning_rate'], l2=h['l2'], n_iter=h['n_iter'], use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) return test_mrr, val_mrr
def objective(hyper): print(hyper) start = time.clock() if hyper['model']['type'] == 'lsh': num_hashes = int(hyper['model']['num_hash_functions']) num_layers = int(hyper['model']['num_layers']) nonlinearity = hyper['model']['nonlinearity'] residual = hyper['model']['residual'] embed = hyper['model']['embed'] item_embeddings = LSHEmbedding(train.num_items, int(hyper['embedding_dim']), embed=embed, residual_connections=residual, nonlinearity=nonlinearity, num_layers=num_layers, num_hash_functions=num_hashes) item_embeddings.fit(train_nonsequence.tocsr().T) else: item_embeddings = ScaledEmbedding(train.num_items, int(hyper['embedding_dim']), padding_idx=0) network = LSTMNet(train.num_items, int(hyper['embedding_dim']), item_embedding_layer=item_embeddings) model = ImplicitSequenceModel(loss=hyper['loss'], n_iter=int(hyper['n_iter']), batch_size=int(hyper['batch_size']), learning_rate=hyper['learning_rate'], embedding_dim=int( hyper['embedding_dim']), l2=hyper['l2'], representation=network, use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) elapsed = time.clock() - start print(model) validation_mrr = sequence_mrr_score(model, validation).mean() test_mrr = sequence_mrr_score(model, test).mean() print('MRR {} {}'.format(validation_mrr, test_mrr)) return { 'loss': -validation_mrr, 'status': STATUS_OK, 'validation_mrr': validation_mrr, 'test_mrr': test_mrr, 'elapsed': elapsed, 'hyper': hyper }
def objective(space): """Objective function for Spotlight ImplicitFactorizationModel""" batch_size = int(space['batch_size']) embedding_dim = int(space['embedding_dim']) l2 = space['l2'] learn_rate = space['learn_rate'] loss = space['loss'] n_iter = int(space['n_iter']) representation = space['representation'] model = ImplicitSequenceModel( loss=loss, embedding_dim=embedding_dim, batch_size=batch_size, representation=representation, learning_rate=learn_rate, n_iter=n_iter, l2=l2, use_cuda=CUDA) start = time.clock() try: model.fit(train, verbose=True) except ValueError: elapsed = time.clock() - start return {'loss': 0.0, 'status': STATUS_FAIL, 'validation_mrr': 0.0, 'test_mrr': 0.0, 'elapsed': elapsed, 'hyper': space} elapsed = time.clock() - start print(model) validation_mrr = sequence_mrr_score(model, valid).mean() test_mrr = sequence_mrr_score(model, test).mean() print('MRR {} {}'.format(validation_mrr, test_mrr)) if np.isnan(validation_mrr): status = STATUS_FAIL else: status = STATUS_OK return {'loss': -validation_mrr, 'status': status, 'validation_mrr': validation_mrr, 'test_mrr': test_mrr, 'elapsed': elapsed, 'hyper': space}
def _evaluate(model, test): test_mrr = sequence_mrr_score(model, test) print('Test MRR {}'.format(test_mrr.mean())) return test_mrr
def evaluate_model(model, train, test, validation): start_time = time.time() model.fit(train, verbose=True) elapsed = time.time() - start_time print('Elapsed {}'.format(elapsed)) print(model) if hasattr(test, 'sequences'): test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) else: test_mrr = mrr_score(model, test) val_mrr = mrr_score(model, test.tocsr() + validation.tocsr()) return test_mrr, val_mrr, elapsed
def _evaluate(model, test): test_mrr = sequence_mrr_score(model, test) print('Test MRR {}'.format( test_mrr.mean() )) return test_mrr
def evaluate_lstm_model(hyperparameters, train, test, validation, random_state): h = hyperparameters model = ImplicitSequenceModel(loss=h['loss'], representation='lstm', n_iter=h['n_iter'], use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) test_eval = {} test_eval['mrr'] = sequence_mrr_score(model, test).mean() val_eval = {} val_eval['mrr'] = sequence_mrr_score(model, validation).mean() return test_eval, val_eval
def evaluate_pooling_model(hyperparameters, train, test, validation, random_state): h = hyperparameters model = ImplicitSequenceModel(loss=h['loss'], representation='pooling', batch_size=h['batch_size'], learning_rate=h['learning_rate'], l2=h['l2'], n_iter=h['n_iter'], use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) return test_mrr, val_mrr
def evaluate_lstm_model(hyperparameters, train, test, validation, random_state): h = hyperparameters model = ImplicitSequenceModel( loss=h['loss'], representation='lstm', batch_size=h['batch_size'], learning_rate=h['learning_rate'], l2=h['l2'], n_iter=h['n_iter'], use_cuda=CUDA, random_state=random_state, num_negative_samples=h["num_negative_samples"] ## new ) model.fit(train, verbose=False) test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) return test_mrr, val_mrr
def evaluation(self, model, interactions: tuple): """Evaluates models on a number of metrics Takes model and evaluates it by Precision@K/Recall@K, Mean Reciprocal Rank metrics. Args: model (Arbitrary): A Spotlight model, can be of different types. sets (tuple): (spotlight.interactions.Interactions, spotlight.interactions.Interactions), A tuple of (train data, test data). Returns: dict: A dictionary with all the evaluation metrics. """ logger = logging.getLogger() train, test = interactions logger.info("Beginning model evaluation...") if self._models in ('S_POOL', 'S_CNN', 'S_LSTM'): mrr = sequence_mrr_score(model, test).mean() else: mrr = mrr_score(model, test).mean() logger.info('MRR {:.8f}'.format( mrr )) k = 3 prec, rec = sequence_precision_recall_score( model=model, test=test, k=k, ) logger.info('Precision@{k} {:.8f}'.format( prec.mean(), k=k )) logger.info('Recall@{k} {:.8f}'.format( rec.mean(), k=k )) return { 'test': { 'precision':prec.mean(), 'recall':rec.mean(), 'f1': 2*((prec.mean()*rec.mean())/(prec.mean()+rec.mean())), 'mrr':mrr, }, }
def resultados_secuencia(self): """ Método resultados_secuencia. Calcula las métricas del modelo de secuencia implícito. Este método solo se utiliza en la interfaz de texto. """ global train, test, modelo # Se calculan las métricas mrr = sequence_mrr_score(modelo, test).mean() #precision, recall = sequence_precision_recall_score(modelo, test) # Se imprimen las métricas imprimir_resultados_dl(mrr)
def test_implicit_sequence_serialization(data): train, test = data train = train.to_sequence(max_sequence_length=128) test = test.to_sequence(max_sequence_length=128) model = ImplicitSequenceModel(loss='bpr', representation=CNNNet(train.num_items, embedding_dim=32, kernel_width=3, dilation=(1, ), num_layers=1), batch_size=128, learning_rate=1e-1, l2=0.0, n_iter=5, random_state=RANDOM_STATE, use_cuda=CUDA) model.fit(train) mrr_original = sequence_mrr_score(model, test).mean() mrr_recovered = sequence_mrr_score(_reload(model), test).mean() assert mrr_original == mrr_recovered
def train_model(df, hyperparams): # Fix random_state seed = 42 set_seed(seed) random_state = np.random.RandomState(seed) max_sequence_length = 15 min_sequence_length = 2 step_size = 1 # create dataset using interactions dataframe and timestamps dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'), item_ids=np.array(df['item_id'], dtype='int32'), timestamps=df['entry_at']) # create training and test sets using a 80/20 split train, test = user_based_train_test_split(dataset, test_percentage=0.2, random_state=random_state) # convert to sequences train = train.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) test = test.to_sequence(max_sequence_length=max_sequence_length, min_sequence_length=min_sequence_length, step_size=step_size) print('data: {}'.format(train)) # initialize and train model model = ImplicitSequenceModel(**hyperparams, use_cuda=CUDA, random_state=random_state) model.fit(train, verbose=True) # compute mrr score on test set test_mrr = sequence_mrr_score(model, test).mean() print('MRR score on test set: {}'.format(test_mrr)) return model
def obtener_metricas_gui(self): """ Método obtener_metricas_gui. Obtiene las métricas del modelo escogido. Este método solo se utiliza en la interfaz web. Returns ------- metricas_devueltas: dict diccionario con las métricas del modelo """ global train, test, modelo # Se guardan las métricas en un diccionario para su futura muestra en la interfaz web metricas = dict() # Se calculan las métricas y se guardan en el diccionario formateadas if self.opcion_modelo == 1: rmse = rmse_score(modelo, test) mrr = mrr_score(modelo, test, train=train).mean() precision, recall = precision_recall_score(modelo, test, train=train, k=10) metricas_devueltas = {"RMSE": format(rmse, '.4f'), "MRR": format(mrr, '.4f'), "Precisión k": format(precision.mean(), '.4f'), "Recall k": format(recall.mean(), '.4f')} metricas_a_guardar = {"RMSE": [format(rmse, '.4f')], "MRR": [format(mrr, '.4f')], "Precisión k": [format(precision.mean(), '.4f')], "Recall k": [format(recall.mean(), '.4f')]} elif self.opcion_modelo == 2: mrr = mrr_score(modelo, test, train=train).mean() precision, recall = precision_recall_score(modelo, test, train=train, k=10) metricas_devueltas = {"MRR": format(mrr, '.4f'), "Precisión k": format(precision.mean(), '.4f'), "Recall k": format(recall.mean(), '.4f')} metricas_a_guardar = {"MRR": [format(mrr, '.4f')], "Precisión k": [format(precision.mean(), '.4f')], "Recall k": [format(recall.mean(), '.4f')]} else: mrr = sequence_mrr_score(modelo, test).mean() metricas_devueltas = {"MRR": format(mrr, '.4f')} metricas_a_guardar = {"MRR": [format(mrr, '.4f')]} # Se guardan las métricas en un archivo .csv guardar_resultados(metricas_a_guardar) return metricas_devueltas
df_train = pd.read_csv(train_csv) train, test = user_based_train_test_split(train_csv) train = train.to_sequence() test = test.to_sequence() #print(f"Reading {test_csv} ...") #df_test = pd.read_csv(test_csv) print("Build and Fit Implicit Sequence Model") model = ImplicitSequenceModel(n_iter=3, representation='cnn', loss='bpr') #model.fit(df_train) model.fit(train) print("Calculate MRR Score") mrr = sequence_mrr_score(model, test_csv) print("MRR Result: ", mrr) print("Calculate Recommendations") # get data into dataframe for extracting user ids (I think we need the testset here?) df_test = pd.read_csv(test_csv) user_ids = df_test[['user_id']] # call recommendation algorithm df_out = pd.DataFrame() df_out = recommendation(model, df_out, df_test, user_ids) # write result to csv file print(f"Writing {subm_csv}...") df_out.to_csv(subm_csv, index=False)
def objective(hyper): print(hyper) start = time.clock() h = hyper['model'] cls = ImplicitSequenceModel if h['type'] == 'pooling': representation = PoolNet(train.num_items, embedding_dim=int(h['embedding_dim'])) elif h['type'] == 'lstm': representation = LSTMNet(train.num_items, embedding_dim=int(h['embedding_dim'])) elif h['type'] == 'mixture': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = MixtureLSTMNet(train.num_items, num_components=num_components, embedding_dim=embedding_dim) elif h['type'] == 'mixture2': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = Mixture2LSTMNet(train.num_items, num_components=num_components, embedding_dim=embedding_dim) elif h['type'] == 'linear_mixture': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = LinearMixtureLSTMNet(train.num_items, num_components=num_components, embedding_dim=embedding_dim) elif h['type'] == 'diversified_mixture_fixed': num_components = int(h['num_components']) embedding_dim = int(h['embedding_dim']) representation = DiversifiedMixtureLSTMNet(train.num_items, num_components=num_components, diversity_penalty=h['diversity_penalty'], embedding_dim=embedding_dim) cls = DiversifiedImplicitSequenceModel else: raise ValueError('Unknown model type') model = cls( batch_size=int(h['batch_size']), loss=h['loss'], learning_rate=h['learning_rate'], l2=h['l2'], n_iter=int(h['n_iter']), representation=representation, use_cuda=CUDA, random_state=np.random.RandomState(42) ) try: model.fit(train, verbose=True) except ValueError: elapsed = time.clock() - start return {'loss': 0.0, 'status': STATUS_FAIL, 'validation_mrr': 0.0, 'test_mrr': 0.0, 'elapsed': elapsed, 'hyper': h} elapsed = time.clock() - start print(model) validation_mrr = sequence_mrr_score( model, validation, exclude_preceding=True ).mean() test_mrr = sequence_mrr_score( model, test, exclude_preceding=True ).mean() print('MRR {} {}'.format(validation_mrr, test_mrr)) if np.isnan(validation_mrr): status = STATUS_FAIL else: status = STATUS_OK return {'loss': -validation_mrr, 'status': status, 'validation_mrr': validation_mrr, 'test_mrr': test_mrr, 'elapsed': elapsed, 'hyper': h}
timestamps=timeStamps) if name == "test": dataset_test = dataset elif name == "train": dataset_train = dataset if model_mode.lower() == "ifm": model = ImplicitFactorizationModel(n_iter=n_iter) if model_mode.lower() == "efm": model = ExplicitFactorizationModel(n_iter=n_iter) if model_mode.lower() == "cnn": net = CNNNet(num_items=int(foods_items)) model = ImplicitSequenceModel(n_iter=n_iter, use_cuda=torch.cuda.is_available(), representation=net) model.fit(dataset_train) with open(save_file, 'wb') as f: pickle.dump(model, f, pickle.HIGHEST_PROTOCOL) if model_mode.lower() == "cnn": mrr = sequence_mrr_score(model, dataset_test) else: mrr = mrr_score(model, dataset_test) print("mrr = ", len(mrr)) print("mean mrr = ", sum(mrr) / len(mrr)) rank = 1 / (sum(mrr) / len(mrr)) print("average rank = ", rank)
def _evaluate(model, test): test_mrr = sequence_mrr_score(model, test, exclude_preceding=True) return test_mrr.mean()
min_sequence_length=min_sequence_length, step_size=step_size) net = LSTMNet(len(set(item2idx)), embedding_dim=32, item_embedding_layer=None, sparse=False) model = ImplicitSequenceModel(loss='adaptive_hinge', representation=net, batch_size=32, learning_rate=0.01, l2=10e-6, n_iter=10, use_cuda=False, random_state=random_state) model.fit(train, verbose=True) test_mrr = sequence_mrr_score(model, test) val_mrr = sequence_mrr_score(model, validation) train_mrr = sequence_mrr_score(model, train) print(test_mrr.mean(), val_mrr.mean(), train_mrr.mean()) for (split, split_name) in ((train, "train"), (validation, "validation"), (test, "test")): for k in (5, 10, 50, 100): precision, recall = sequence_precision_recall_score( model, split, k=k, exclude_preceding=False) print(split_name, "precision at", k, precision.mean()) print(split_name, "recall at", k, recall.mean())
def objective(space): batch_size = int(space['batch_size']) learn_rate = space['learn_rate'] loss = space['loss'] n_iter = int(space['n_iter']) embedding_dim = int(space['embedding_dim']) l2 = space['l2'] if space['type'] == 'mlstm': representation = mLSTMNet( train.num_items, embedding_dim=embedding_dim) model = ImplicitSequenceModel( loss=loss, batch_size=batch_size, representation=representation, learning_rate=learn_rate, n_iter=n_iter, l2=l2, use_cuda=CUDA, random_state=random_state) elif space['type'] == 'lstm': representation = space['representation'] model = ImplicitSequenceModel( loss=loss, embedding_dim=embedding_dim, batch_size=batch_size, representation=representation, learning_rate=learn_rate, n_iter=n_iter, l2=l2, use_cuda=CUDA, random_state=random_state) else: raise ValueError('Unknown model type {}'.format(space.get('type', 'NA'))) start = time.clock() try: model.fit(train, verbose=True) except ValueError: elapsed = time.clock() - start return {'loss': 0.0, 'status': STATUS_FAIL, 'validation_mrr': 0.0, 'test_mrr': 0.0, 'elapsed': elapsed, 'hyper': space} elapsed = time.clock() - start print(model) validation_mrr = sequence_mrr_score( model, valid, exclude_preceding=True ).mean() test_mrr = sequence_mrr_score( model, test, exclude_preceding=True ).mean() print('MRR {} {}'.format(validation_mrr, test_mrr)) if np.isnan(validation_mrr): status = STATUS_FAIL else: status = STATUS_OK return {'loss': -validation_mrr, 'status': status, 'validation_mrr': validation_mrr, 'test_mrr': test_mrr, 'elapsed': elapsed, 'hyper': space}