Ejemplo n.º 1
0
def rs_recommend(proc_dir, model_path, item_list):
    with open(path.join(proc_dir, 'x2i.pickle'), 'rb') as handle:
        x2i = pickle.load(handle)

    model = DynamicAutoencoder()
    recoder = Recoder(model)
    recoder.init_from_model_file(model_path)

    interactions = load_item_list(x2i, recoder.num_items, item_list)

    out = recoder.predict(interactions)
    with open(path.join(proc_dir, 'recommendations.pickle'), 'wb') as handle:
        pickle.dump(out[0].detach().squeeze(0).numpy(), handle)
Ejemplo n.º 2
0
def load_models() -> Dict[str, Recoder]:
    model_paths = {}
    model_re = re.compile(r'^(?P<ds>.*)\.model$')
    for f in os.listdir(MODELS_DIR):
        match = model_re.match(f)
        if match:
            model_paths[match.group('ds')] = os.path.join(MODELS_DIR, f)
    recorders = {}
    for ds, path in model_paths.items():
        model = DynamicAutoencoder()
        recoder = Recoder(model)
        recoder.init_from_model_file(path)
        recorders[ds] = recoder
    return recorders
Ejemplo n.º 3
0
def test_model(sparse, exp_recall_20, exp_recall_50, exp_ndcg_100):
  data_dir = 'tests/data/'
  model_dir = '/tmp/'

  train_df = pd.read_csv(data_dir + 'train.csv')
  val_df = pd.read_csv(data_dir + 'val.csv')

  # keep the items that exist in the training dataset
  val_df = val_df[val_df.sid.isin(train_df.sid.unique())]

  train_matrix, item_id_map, user_id_map = dataframe_to_csr_matrix(train_df, user_col='uid',
                                                                   item_col='sid', inter_col='watched')

  val_matrix, _, _ = dataframe_to_csr_matrix(val_df, user_col='uid',
                                             item_col='sid', inter_col='watched',
                                             item_id_map=item_id_map, user_id_map=user_id_map)

  train_dataset = RecommendationDataset(train_matrix)
  val_dataset = RecommendationDataset(val_matrix, train_matrix)


  use_cuda = False
  model = DynamicAutoencoder(hidden_layers=[200], activation_type='tanh',
                             noise_prob=0.5, sparse=sparse)
  trainer = Recoder(model=model, use_cuda=use_cuda, optimizer_type='adam',
                    loss='logloss')

  trainer.train(train_dataset=train_dataset, val_dataset=val_dataset,
                batch_size=500, lr=1e-3, weight_decay=2e-5,
                num_epochs=30, negative_sampling=True)

  # assert model metrics
  recall_20 = Recall(k=20, normalize=True)
  recall_50 = Recall(k=50, normalize=True)
  ndcg_100 = NDCG(k=100)

  results = trainer._evaluate(eval_dataset=val_dataset, num_recommendations=100,
                              metrics=[recall_20, recall_50, ndcg_100], batch_size=500)

  for metric, value in list(results.items()):
    results[metric] = np.mean(results[metric])

  assert np.isclose(results[recall_20], exp_recall_20, atol=0.01, rtol=0)
  assert np.isclose(results[recall_50], exp_recall_50, atol=0.01, rtol=0)
  assert np.isclose(results[ndcg_100], exp_ndcg_100, atol=0.01, rtol=0)

  # Save the model and evaluate again
  model_checkpoint = model_dir + 'test_model.model'
  state_file = trainer.save_state(model_checkpoint)

  model = DynamicAutoencoder(sparse=sparse)
  trainer = Recoder(model=model, use_cuda=use_cuda,
                    optimizer_type='adam', loss='logloss')

  trainer.init_from_model_file(state_file)

  results = trainer._evaluate(eval_dataset=val_dataset, num_recommendations=100,
                              metrics=[recall_20, recall_50, ndcg_100], batch_size=500)

  for metric, value in list(results.items()):
    results[metric] = np.mean(results[metric])

  assert np.isclose(results[recall_20], exp_recall_20, atol=0.01, rtol=0)
  assert np.isclose(results[recall_50], exp_recall_50, atol=0.01, rtol=0)
  assert np.isclose(results[ndcg_100], exp_ndcg_100, atol=0.01, rtol=0)

  os.remove(state_file)
Ejemplo n.º 4
0
common_params = {
    'user_col': 'uid',
    'item_col': 'sid',
    'inter_col': 'watched',
}

method = 'inference'
model_file = model_dir + 'bce_ns_d_0.0_n_0.5_200_epoch_100.model'
index_file = model_dir + 'bce_ns_d_0.0_n_0.5_200_epoch_100.model.index'

num_recommendations = 100

if method == 'inference':
    model = DynamicAutoencoder()
    recoder = Recoder(model)
    recoder.init_from_model_file(model_file)
    recommender = InferenceRecommender(recoder, num_recommendations)
elif method == 'similarity':
    embeddings_index = AnnoyEmbeddingsIndex()
    embeddings_index.load(index_file=index_file)
    cache_embeddings_index = MemCacheEmbeddingsIndex(embeddings_index)
    recommender = SimilarityRecommender(cache_embeddings_index,
                                        num_recommendations,
                                        scale=1,
                                        n=50)

train_df = pd.read_csv(data_dir + 'train.csv')
val_te_df = pd.read_csv(data_dir + 'test_te.csv')
val_tr_df = pd.read_csv(data_dir + 'test_tr.csv')

train_matrix, item_id_map, _ = dataframe_to_csr_matrix(train_df,