class EmbeddingFactorsRecommender(BaseFactorizationRecommender): default_model_params = dict( loss='adaptive_hinge', # 'bpr', 'hinge', 'adaptive hinge' embedding_dim=32, n_iter=15, batch_size=1024, l2=0.0, learning_rate=1e-2, num_negative_samples=10) default_fit_params = dict(verbose=True) def _prep_for_fit(self, train_obs, **fit_params): # self.toggle_mkl_blas_1_thread(False) self._set_data(train_obs) self.set_params(**fit_params) self.model = ImplicitFactorizationModel(**self.model_params) self._set_spotlight_train_data(self.train_mat) def _set_spotlight_train_data(self, train_mat): self.spotlight_dataset = spotlight_interactions_from_sparse(train_mat) def fit(self, train_obs, **fit_params): self._prep_for_fit(train_obs, **fit_params) self.model.fit(self.spotlight_dataset, verbose=self.fit_params.get('verbose', False)) def fit_partial(self, train_obs, epochs=1): self._set_epochs(epochs) if self.model is None: self.fit(train_obs) else: self.model.fit(self.spotlight_dataset) return self def _set_epochs(self, epochs): self.set_params(n_iter=epochs) def _predict_on_inds(self, user_inds, item_inds): return self.model.predict(user_inds, item_inds) def _get_item_factors(self, mode=None): return self.model._net.item_biases.weight.data.numpy().ravel(), \ self.model._net.item_embeddings.weight.data.numpy() def _get_user_factors(self, mode=None): return self.model._net.user_biases.weight.data.numpy().ravel(), \ self.model._net.user_embeddings.weight.data.numpy() def _predict_rank(self, test_mat, train_mat=None): raise NotImplementedError()
num_items=num_items) model = ImplicitFactorizationModel(embedding_dim=n_dimensions, n_iter=num_minor_iterations, loss='bpr', use_cuda=torch.cuda.is_available(), batch_size=batch_size, learning_rate=1e-3, l2=1e-5) test_user_ids = data.userId.unique() # keeps order of appearance for i in tqdm(range(num_major_iterations)): print("doing it number {}".format(i)) save_dir = sim_dir / str(i) if not save_dir.exists(): save_dir.mkdir() model.fit(interactions, verbose=True) torch.save(model._net.state_dict(), save_dir / "model.pkl") with torch.no_grad(): scores = np.empty((len(test_user_ids), num_items), dtype=np.float32) for e, user in enumerate(test_user_ids): rating = model.predict(user) scores[e] = rating scores = torch.as_tensor(scores) torch.save(scores, save_dir / "raw_rating_scores.pkl") mean = scores.mean(dim=1, keepdim=True) std = scores.std(dim=1, keepdim=True) centered_scores = (scores - mean) / std torch.save(centered_scores, save_dir / "centered_scores.pkl")
model.fit(training_interactions, verbose=True) print('[ %04ds ] Model fitted' % (time.time() - start_time)) testing_set: List[Review] = Review.load_from_file(testing_set_file) seen_testing_set, unseen_testing_set = Review.extract_seen_reviews( testing_set, training_set) print(len(seen_testing_set), len(unseen_testing_set)) normalized_seen_testing_set = Review.normalize_by_user( seen_testing_set, user_avg) seen_pairs, ground_truth = Review.extract_sparse_testing_matrix_and_ground_truth( normalized_seen_testing_set) testing_user_ids = [] testing_business_ids = [] for user, business in seen_pairs: testing_user_ids.append(user_id_map[user]) testing_business_ids.append(business_id_map[business]) predictions = model.predict(np.array(testing_user_ids), np.array(testing_business_ids)) # min_pred = np.min(predictions) # max_pred = np.max(predictions) # normalized_predictions = (np.array(predictions) - min_pred) / (max_pred - min_pred) * 4 + 1 auc = roc_auc_score(np.array(ground_truth) >= 0, predictions) rmse = math.sqrt(mean_squared_error(ground_truth, predictions)) print('[ %04ds ] Finished' % (time.time() - start_time)) print("AUC = %.4f" % auc) print("RMSE = %.4f" % rmse)