コード例 #1
0
class EmbeddingFactorsRecommender(BaseFactorizationRecommender):

    default_model_params = dict(
        loss='adaptive_hinge',  # 'bpr', 'hinge', 'adaptive hinge'
        embedding_dim=32,
        n_iter=15,
        batch_size=1024,
        l2=0.0,
        learning_rate=1e-2,
        num_negative_samples=10)

    default_fit_params = dict(verbose=True)

    def _prep_for_fit(self, train_obs, **fit_params):
        # self.toggle_mkl_blas_1_thread(False)
        self._set_data(train_obs)
        self.set_params(**fit_params)
        self.model = ImplicitFactorizationModel(**self.model_params)
        self._set_spotlight_train_data(self.train_mat)

    def _set_spotlight_train_data(self, train_mat):
        self.spotlight_dataset = spotlight_interactions_from_sparse(train_mat)

    def fit(self, train_obs, **fit_params):
        self._prep_for_fit(train_obs, **fit_params)
        self.model.fit(self.spotlight_dataset,
                       verbose=self.fit_params.get('verbose', False))

    def fit_partial(self, train_obs, epochs=1):
        self._set_epochs(epochs)
        if self.model is None:
            self.fit(train_obs)
        else:
            self.model.fit(self.spotlight_dataset)
        return self

    def _set_epochs(self, epochs):
        self.set_params(n_iter=epochs)

    def _predict_on_inds(self, user_inds, item_inds):
        return self.model.predict(user_inds, item_inds)

    def _get_item_factors(self, mode=None):
        return self.model._net.item_biases.weight.data.numpy().ravel(), \
               self.model._net.item_embeddings.weight.data.numpy()

    def _get_user_factors(self, mode=None):
        return self.model._net.user_biases.weight.data.numpy().ravel(), \
               self.model._net.user_embeddings.weight.data.numpy()

    def _predict_rank(self, test_mat, train_mat=None):
        raise NotImplementedError()
コード例 #2
0
                            num_items=num_items)
model = ImplicitFactorizationModel(embedding_dim=n_dimensions,
                                   n_iter=num_minor_iterations,
                                   loss='bpr',
                                   use_cuda=torch.cuda.is_available(),
                                   batch_size=batch_size,
                                   learning_rate=1e-3,
                                   l2=1e-5)

test_user_ids = data.userId.unique()  # keeps order of appearance

for i in tqdm(range(num_major_iterations)):
    print("doing it number {}".format(i))
    save_dir = sim_dir / str(i)
    if not save_dir.exists():
        save_dir.mkdir()
    model.fit(interactions, verbose=True)
    torch.save(model._net.state_dict(), save_dir / "model.pkl")

    with torch.no_grad():
        scores = np.empty((len(test_user_ids), num_items), dtype=np.float32)
        for e, user in enumerate(test_user_ids):
            rating = model.predict(user)
            scores[e] = rating
        scores = torch.as_tensor(scores)
        torch.save(scores, save_dir / "raw_rating_scores.pkl")
        mean = scores.mean(dim=1, keepdim=True)
        std = scores.std(dim=1, keepdim=True)
        centered_scores = (scores - mean) / std
        torch.save(centered_scores, save_dir / "centered_scores.pkl")
コード例 #3
0
    model.fit(training_interactions, verbose=True)
    print('[ %04ds ] Model fitted' % (time.time() - start_time))

    testing_set: List[Review] = Review.load_from_file(testing_set_file)
    seen_testing_set, unseen_testing_set = Review.extract_seen_reviews(
        testing_set, training_set)
    print(len(seen_testing_set), len(unseen_testing_set))
    normalized_seen_testing_set = Review.normalize_by_user(
        seen_testing_set, user_avg)
    seen_pairs, ground_truth = Review.extract_sparse_testing_matrix_and_ground_truth(
        normalized_seen_testing_set)
    testing_user_ids = []
    testing_business_ids = []
    for user, business in seen_pairs:
        testing_user_ids.append(user_id_map[user])
        testing_business_ids.append(business_id_map[business])

    predictions = model.predict(np.array(testing_user_ids),
                                np.array(testing_business_ids))
    # min_pred = np.min(predictions)
    # max_pred = np.max(predictions)
    # normalized_predictions = (np.array(predictions) - min_pred) / (max_pred - min_pred) * 4 + 1

    auc = roc_auc_score(np.array(ground_truth) >= 0, predictions)
    rmse = math.sqrt(mean_squared_error(ground_truth, predictions))

    print('[ %04ds ] Finished' % (time.time() - start_time))

    print("AUC = %.4f" % auc)
    print("RMSE = %.4f" % rmse)