Exemple #1
0
    def build(self,
              task: Task,
              base_dataset: Dataset,
              epochs=50,
              components=200,
              gmm_clusters=10,
              logloss_weight=2.0,
              lr=5e-3,
              batch_size=5000):
        task.set_user_properties(
            **to_clear_ml_params(locals(), ["task", "self"]))
        self.item_ids = list(range(len(base_dataset.id_to_item)))
        self.avg_ratings_per_user = base_dataset.n_ratings / base_dataset.n_users

        if self.user_vectors is None:
            self._build_mf(base_dataset, task, components)

        self._build_torch_model(base_dataset, task, components)

        self._train_torch_model(base_dataset,
                                task,
                                epochs,
                                logloss_weight=logloss_weight,
                                lr=lr,
                                batch_size=batch_size)

        self._build_gmm(task, gmm_clusters)
    def build(self,
              task: Task,
              base_dataset: Dataset,
              epochs=50,
              components=200,
              gmm_clusters=10,
              logloss_weight=2.0,
              lr=5e-3,
              epochs_logloss_only=25):
        task.set_user_properties(
            **to_clear_ml_params(locals(), ["task", "self"]))
        s_coo = base_dataset.rating_matrix.tocoo()
        self._implicit_coef = 1.0

        self.user_map = {}
        self.user_ids = []
        self.avg_rating = np.mean(s_coo.data)
        for u, i, r in zip(s_coo.row, s_coo.col, s_coo.data):
            self.user_ids.append(u)
            if u not in self.user_map:
                self.user_map[u] = {}
            self.user_map[u][i] = r - self.avg_rating
        self.user_map_list = {
            k: list(v.keys())
            for k, v in self.user_map.items()
        }

        self.user_ids = list(set(self.user_ids))
        self.item_ids = list(range(len(base_dataset.id_to_item)))
        self.avg_ratings_per_user = base_dataset.n_ratings / base_dataset.n_users

        if self.user_vectors is None:
            self._build_mf(base_dataset, task, components)

        self._build_torch_model(base_dataset, task, components)

        self._train_torch_model(base_dataset,
                                task,
                                epochs,
                                logloss_weight=logloss_weight,
                                lr=lr,
                                epochs_logloss_only=epochs_logloss_only)

        self._build_gmm(task, gmm_clusters)