def rs_recommend(proc_dir, model_path, item_list): with open(path.join(proc_dir, 'x2i.pickle'), 'rb') as handle: x2i = pickle.load(handle) model = DynamicAutoencoder() recoder = Recoder(model) recoder.init_from_model_file(model_path) interactions = load_item_list(x2i, recoder.num_items, item_list) out = recoder.predict(interactions) with open(path.join(proc_dir, 'recommendations.pickle'), 'wb') as handle: pickle.dump(out[0].detach().squeeze(0).numpy(), handle)
def load_models() -> Dict[str, Recoder]: model_paths = {} model_re = re.compile(r'^(?P<ds>.*)\.model$') for f in os.listdir(MODELS_DIR): match = model_re.match(f) if match: model_paths[match.group('ds')] = os.path.join(MODELS_DIR, f) recorders = {} for ds, path in model_paths.items(): model = DynamicAutoencoder() recoder = Recoder(model) recoder.init_from_model_file(path) recorders[ds] = recoder return recorders
def test_model(sparse, exp_recall_20, exp_recall_50, exp_ndcg_100): data_dir = 'tests/data/' model_dir = '/tmp/' train_df = pd.read_csv(data_dir + 'train.csv') val_df = pd.read_csv(data_dir + 'val.csv') # keep the items that exist in the training dataset val_df = val_df[val_df.sid.isin(train_df.sid.unique())] train_matrix, item_id_map, user_id_map = dataframe_to_csr_matrix(train_df, user_col='uid', item_col='sid', inter_col='watched') val_matrix, _, _ = dataframe_to_csr_matrix(val_df, user_col='uid', item_col='sid', inter_col='watched', item_id_map=item_id_map, user_id_map=user_id_map) train_dataset = RecommendationDataset(train_matrix) val_dataset = RecommendationDataset(val_matrix, train_matrix) use_cuda = False model = DynamicAutoencoder(hidden_layers=[200], activation_type='tanh', noise_prob=0.5, sparse=sparse) trainer = Recoder(model=model, use_cuda=use_cuda, optimizer_type='adam', loss='logloss') trainer.train(train_dataset=train_dataset, val_dataset=val_dataset, batch_size=500, lr=1e-3, weight_decay=2e-5, num_epochs=30, negative_sampling=True) # assert model metrics recall_20 = Recall(k=20, normalize=True) recall_50 = Recall(k=50, normalize=True) ndcg_100 = NDCG(k=100) results = trainer._evaluate(eval_dataset=val_dataset, num_recommendations=100, metrics=[recall_20, recall_50, ndcg_100], batch_size=500) for metric, value in list(results.items()): results[metric] = np.mean(results[metric]) assert np.isclose(results[recall_20], exp_recall_20, atol=0.01, rtol=0) assert np.isclose(results[recall_50], exp_recall_50, atol=0.01, rtol=0) assert np.isclose(results[ndcg_100], exp_ndcg_100, atol=0.01, rtol=0) # Save the model and evaluate again model_checkpoint = model_dir + 'test_model.model' state_file = trainer.save_state(model_checkpoint) model = DynamicAutoencoder(sparse=sparse) trainer = Recoder(model=model, use_cuda=use_cuda, optimizer_type='adam', loss='logloss') trainer.init_from_model_file(state_file) results = trainer._evaluate(eval_dataset=val_dataset, num_recommendations=100, metrics=[recall_20, recall_50, ndcg_100], batch_size=500) for metric, value in list(results.items()): results[metric] = np.mean(results[metric]) assert np.isclose(results[recall_20], exp_recall_20, atol=0.01, rtol=0) assert np.isclose(results[recall_50], exp_recall_50, atol=0.01, rtol=0) assert np.isclose(results[ndcg_100], exp_ndcg_100, atol=0.01, rtol=0) os.remove(state_file)
common_params = { 'user_col': 'uid', 'item_col': 'sid', 'inter_col': 'watched', } method = 'inference' model_file = model_dir + 'bce_ns_d_0.0_n_0.5_200_epoch_100.model' index_file = model_dir + 'bce_ns_d_0.0_n_0.5_200_epoch_100.model.index' num_recommendations = 100 if method == 'inference': model = DynamicAutoencoder() recoder = Recoder(model) recoder.init_from_model_file(model_file) recommender = InferenceRecommender(recoder, num_recommendations) elif method == 'similarity': embeddings_index = AnnoyEmbeddingsIndex() embeddings_index.load(index_file=index_file) cache_embeddings_index = MemCacheEmbeddingsIndex(embeddings_index) recommender = SimilarityRecommender(cache_embeddings_index, num_recommendations, scale=1, n=50) train_df = pd.read_csv(data_dir + 'train.csv') val_te_df = pd.read_csv(data_dir + 'test_te.csv') val_tr_df = pd.read_csv(data_dir + 'test_tr.csv') train_matrix, item_id_map, _ = dataframe_to_csr_matrix(train_df,