Esempio n. 1
0
    def __init__(self, data, config, params, *args, **kwargs):
        """
        Create a CML instance.
        (see https://vision.cornell.edu/se3/wp-content/uploads/2017/03/WWW-fp0554-hsiehA.pdf for details about the algorithm design choices).

        Args:
            data: data loader object
            params: model parameters {embed_k: embedding size,
                                      [l_w, l_b]: regularization,
                                      lr: learning rate}
        """
        self._random = np.random

        self._params_list = [
            ("_user_factors", "factors", "factors", 100, None, None),
            ("_learning_rate", "lr", "lr", 0.001, None, None),
            ("_l_w", "l_w", "l_w", 0.001, None, None),
            ("_l_b", "l_b", "l_b", 0.001, None, None),
            ("_margin", "margin", "margin", 0.5, None, None),
        ]

        self.autoset_params()

        self._item_factors = self._user_factors

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict

        self._sampler = cs.Sampler(self._data.i_train_dict)

        self._model = CML_model(self._user_factors, self._item_factors,
                                self._learning_rate, self._l_w, self._l_b,
                                self._margin, self._num_users, self._num_items)
Esempio n. 2
0
    def __init__(self, data, config, params, *args, **kwargs):
        self._random = np.random

        self._params_list = [
            ("_lr", "lr", "lr", 0.001, None, None),
            ("_lj_reg", "lj_reg", "ljreg", 0.001, None, None),
            ("_li_reg", "li_reg", "lireg", 0.1, None, None),
        ]

        self.autoset_params()

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict
        self._sp_i_train = self._data.sp_i_train
        self._i_items_set = list(range(self._num_items))

        self._sampler = cs.Sampler(self._data.i_train_dict)

        self._model = BPRSlimModel(self._data,
                                   self._num_users,
                                   self._num_items,
                                   self._lr,
                                   self._lj_reg,
                                   self._li_reg,
                                   self._sampler,
                                   random_seed=42)
Esempio n. 3
0
    def __init__(self, data, config, params, *args, **kwargs):
        """
        Create a BPR-MF instance.
        (see https://arxiv.org/pdf/1205.2618 for details about the algorithm design choices).

        Args:
            data: data loader object
            params: model parameters {embed_k: embedding size,
                                      [l_w, l_b]: regularization,
                                      lr: learning rate}
        """
        self._random = np.random

        self._params_list = [
            ("_factors", "factors", "factors", 10, None, None),
            ("_learning_rate", "lr", "lr", 0.001, None, None),
            ("_l_w", "l_w", "l_w", 0.1, None, None),
            ("_l_b", "l_b", "l_b", 0.001, None, None),
        ]
        self.autoset_params()

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict

        self._sampler = cs.Sampler(self._data.i_train_dict)

        self._model = BPRMF_batch_model(self._factors, self._learning_rate,
                                        self._l_w, self._l_b, self._num_users,
                                        self._num_items)
Esempio n. 4
0
    def __init__(self, data, config, params, *args, **kwargs):
        self._random = np.random

        self._sampler = cs.Sampler(self._data.i_train_dict)

        self._params_list = [
            ("_learning_rate", "lr", "lr", 0.001, None, None),
            ("_l_w", "l_w", "l_w", 0.001, None, None),
            ("_mf_factors", "mf_factors", "mffactors", 10, None, None),
            ("_mlp_hidden_size", "mlp_hidden_size", "mlpunits", "(64,32)",
             lambda x: list(make_tuple(str(x))),
             lambda x: self._batch_remove(str(x), " []").replace(",", "-")),
            ("_dropout", "dropout", "drop", 0.45, None, None)
        ]
        self.autoset_params()

        item_indices = [
            self._data.item_mapping[self._data.private_items[item]]
            for item in range(self._num_items)
        ]

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict
        self._sp_i_train = self._data.sp_i_train
        self._i_items_set = list(range(self._num_items))

        self._model = VNPRModel(self._num_users, self._num_items,
                                self._mf_factors, self._l_w,
                                self._mlp_hidden_size, self._dropout,
                                self._learning_rate,
                                self._data.visual_features[item_indices])
Esempio n. 5
0
    def __init__(self, data, config, params, *args, **kwargs):
        super().__init__(data, config, params, *args, **kwargs)

        self._num_items = self._data.num_items
        self._num_users = self._data.num_users
        self._random = np.random

        self._params_list = [
            ("_factors", "factors", "factors", 100, None, None),
            ("_factors_d", "factors_d", "factors_d", 20, None, None),
            ("_learning_rate", "lr", "lr", 0.0005, None, None),
            ("_l_w", "l_w", "l_w", 0.000025, None, None),
            ("_l_b", "l_b", "l_b", 0, None, None),
            ("_l_e", "l_e", "l_e", 0.002, None, None)
        ]
        self.autoset_params()

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict

        self._sampler = cs.Sampler(self._data.i_train_dict)

        item_indices = [
            self._data.item_mapping[self._data.private_items[item]]
            for item in range(self._num_items)
        ]

        self._model = VBPR_model(self._factors, self._factors_d,
                                 self._learning_rate, self._l_w, self._l_b,
                                 self._l_e,
                                 self._data.visual_features[item_indices],
                                 self._num_users, self._num_items)
Esempio n. 6
0
    def __init__(self, data, config, params, *args, **kwargs):
        self._random = np.random

        self._params_list = [
            ("_factors", "factors", "f", 10, int, None),
            ("_learning_rate", "lr", "lr", 0.05, None, None),
            ("_bias_regularization", "bias_regularization", "bias_reg", 0, None, None),
            ("_user_regularization", "user_regularization", "u_reg", 0.0025,
             None, None),
            ("_positive_item_regularization", "positive_item_regularization", "pos_i_reg", 0.0025,
             None, None),
            ("_negative_item_regularization", "negative_item_regularization", "neg_i_reg", 0.00025,
             None, None),
            ("_update_negative_item_factors", "update_negative_item_factors", "up_neg_i_f", True,
             None, None),
            ("_update_users", "update_users", "up_u", True, None, None),
            ("_update_items", "update_items", "up_i", True, None, None),
            ("_update_bias", "update_bias", "up_b", True, None, None),
        ]
        self.autoset_params()

        self._batch_size = 1
        self._ratings = self._data.train_dict

        self._model = MFModel(self._factors,
                              self._data,
                              self._learning_rate,
                              self._user_regularization,
                              self._bias_regularization,
                              self._positive_item_regularization,
                              self._negative_item_regularization)
        self._sampler = cs.Sampler(self._data.i_train_dict)
Esempio n. 7
0
    def __init__(self, data, config, params, *args, **kwargs):
        """
        """
        self._random = np.random
        self._random_p = random

        self._ratings = self._data.train_dict
        self._sampler = cs.Sampler(self._data.i_train_dict)

        if self._batch_size < 1:
            self._batch_size = self._num_users

        ######################################

        self._params_list = [
            ("_learning_rate", "lr", "lr", 0.0005, None, None),
            ("_factors", "latent_dim", "factors", 64, None, None),
            ("_n_layers", "n_layers", "n_layers", 1, None, None),
            ("_l_w", "l_w", "l_w", 0.1, None, None),
            ("_n_fold", "n_fold", "n_fold", 1, None, None),
        ]
        self.autoset_params()

        self._adjacency, self._laplacian = self._create_adj_mat()

        self._model = LightGCNModel(num_users=self._num_users,
                                    num_items=self._num_items,
                                    learning_rate=self._learning_rate,
                                    embed_k=self._factors,
                                    n_layers=self._n_layers,
                                    l_w=self._l_w,
                                    n_fold=self._n_fold,
                                    adjacency=self._adjacency,
                                    laplacian=self._laplacian)
Esempio n. 8
0
    def __init__(self, data, config, params, *args, **kwargs):
        self._random = np.random

        self._sampler = cs.Sampler(self._data.i_train_dict)

        self._params_list = [
            ("_lr", "lr", "lr", 0.001, None, None),
            ("_embedding_size", "embedding_size", "embedding_size", 100, None,
             None),
            ("_cnn_channels", "cnn_channels", "cnn_channels", "(1, 32, 32)",
             lambda x: list(make_tuple(str(x))),
             lambda x: self._batch_remove(str(x), " []").replace(",", "-")),
            ("_cnn_kernels", "cnn_kernels", "cnn_kernels", "(2,2)",
             lambda x: list(make_tuple(str(x))),
             lambda x: self._batch_remove(str(x), " []").replace(",", "-")),
            ("_cnn_strides", "cnn_strides", "cnn_strides", "(2,2)",
             lambda x: list(make_tuple(str(x))),
             lambda x: self._batch_remove(str(x), " []").replace(",", "-")),
            ("_dropout_prob", "dropout_prob", "dropout_prob", 0, None, None),
            ("_l_w", "l_w", "l_w", 0.005, None, None),
            ("_l_b", "l_b", "l_b", 0.0005, None, None),
        ]
        self.autoset_params()

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict
        self._sp_i_train = self._data.sp_i_train
        self._i_items_set = list(range(self._num_items))

        self._model = ConvNeuralMatrixFactorizationModel(
            self._num_users, self._num_items, self._embedding_size, self._lr,
            self._cnn_channels, self._cnn_kernels, self._cnn_strides,
            self._dropout_prob, self._l_w, self._l_b)
Esempio n. 9
0
    def __init__(self, data, config, params, *args, **kwargs):
        """
        Create a BPR-MF instance.
        (see https://arxiv.org/pdf/1205.2618 for details about the algorithm design choices).

        Args:
            data: data loader object
            params: model parameters {embed_k: embedding size,
                                      [l_w, l_b]: regularization,
                                      lr: learning rate}
        """


        ######################################

        self._params_list = [
            ("_learning_rate", "lr", "lr", 0.0001, None, None),
            ("_l_w", "l_w", "l_w", 0.005, None, None),
            ("_l_b", "l_b", "l_b", 0, None, None),
            ("_seed", "seed", "seed", 42, None, None)
        ]
        self.autoset_params()
        np.random.seed(self._seed)
        self._random = np.random

        self._ratings = self._data.train_dict
        self._sampler = cs.Sampler(self._data.i_train_dict)
        feature_map = self._data.side_information_data.kahfm_feature_map
        self._tfidf_obj = TFIDF(feature_map)
        self._tfidf = self._tfidf_obj.tfidf()
        self._user_profiles = self._tfidf_obj.get_profiles(self._ratings)

        self._user_factors = \
            np.zeros(shape=(len(self._data.users), len(self._data.features)))
        self._item_factors = \
            np.zeros(shape=(len(self._data.items), len(self._data.features)))

        for i, f_dict in self._tfidf.items():
            if i in self._data.items:
                for f, v in f_dict.items():
                    self._item_factors[self._data.public_items[i]][self._data.public_features[f]] = v

        for u, f_dict in self._user_profiles.items():
            for f, v in f_dict.items():
                self._user_factors[self._data.public_users[u]][self._data.public_features[f]] = v

        if self._batch_size < 1:
            self._batch_size = self._num_users


        self._factors = self._data.features

        self._transactions_per_epoch = self._data.transactions

        self._model = KaHFMEmbeddingsModel(self._user_factors,
                                  self._item_factors,
                                  self._params.lr,
                                  self._params.l_w,
                                  self._params.l_b)
Esempio n. 10
0
    def __init__(self, data, config, params, *args, **kwargs):
        """
        Create a AMR instance.
        (see https://arxiv.org/pdf/1809.07062.pdf for details about the algorithm design choices).

        Args:
            data: data loader object
            params: model parameters {embed_k: embedding size,
                                      [l_w, l_b]: regularization,
                                      [eps, l_adv]: adversarial budget perturbation and adversarial regularization parameter,
                                      lr: learning rate}
        """
        super().__init__(data, config, params, *args, **kwargs)

        self._num_items = self._data.num_items
        self._num_users = self._data.num_users
        self._random = np.random

        self._params_list = [
            ("_factors", "factors", "factors", 200, None, None),
            ("_factors_d", "factors_d", "factors_d", 20, None, None),
            ("_learning_rate", "lr", "lr", 0.001, None, None),
            ("_l_w", "l_w", "l_w", 0.1, None, None),
            ("_l_b", "l_b", "l_b", 0.001, None, None),
            ("_l_e", "l_e", "l_e", 0.1, None, None),
            ("_eps", "eps", "eps", 0.1, None, None),
            ("_l_adv", "l_adv", "l_adv", 0.001, None, None),
            ("_adversarial_epochs", "adversarial_epochs", "adv_epochs",
             self._epochs // 2, None, None)
        ]
        self.autoset_params()

        if self._adversarial_epochs > self._epochs:
            raise Exception(
                f"The total epoch ({self._epochs}) "
                f"is smaller than the adversarial epochs ({self._adversarial_epochs})."
            )

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict

        self._sampler = cs.Sampler(self._data.i_train_dict)

        item_indices = [
            self._data.item_mapping[self._data.private_items[item]]
            for item in range(self._num_items)
        ]

        self._model = AMR_model(self._factors, self._factors_d,
                                self._learning_rate, self._l_w, self._l_b,
                                self._l_e, self._eps, self._l_adv,
                                self._data.visual_features[item_indices],
                                self._num_users, self._num_items)
Esempio n. 11
0
    def __init__(self, data, config, params, *args, **kwargs):
        """
        """
        self._random = np.random
        self._random_p = random

        self._ratings = self._data.train_dict
        self._sampler = cs.Sampler(self._data.i_train_dict)
        if self._batch_size < 1:
            self._batch_size = self._num_users

        ######################################

        self._params_list = [
            ("_learning_rate", "lr", "lr", 0.1, None, None),
            ("_factors", "latent_dim", "factors", 64, None, None),
            ("_l_w", "l_w", "l_w", 0.01, None, None),
            ("_weight_size", "weight_size", "weight_size", "(64,32)", lambda x: list(make_tuple(x)),
             lambda x: self._batch_remove(str(x), " []").replace(",", "-")),
            ("_node_dropout", "node_dropout", "node_dropout", "(64,32)", lambda x: list(make_tuple(x)),
             lambda x: self._batch_remove(str(x), " []").replace(",", "-")),
            ("_message_dropout", "message_dropout", "message_dropout", "(64,32)", lambda x: list(make_tuple(x)),
             lambda x: self._batch_remove(str(x), " []").replace(",", "-")),
            ("_n_fold", "n_fold", "n_fold", 1, None, None),
        ]
        self.autoset_params()

        self._n_layers = len(self._weight_size)

        self._adjacency, self._laplacian = self._create_adj_mat()

        self._model = NGCFModel(
            num_users=self._num_users,
            num_items=self._num_items,
            learning_rate=self._learning_rate,
            embed_k=self._factors,
            l_w=self._l_w,
            weight_size=self._weight_size,
            n_layers=self._n_layers,
            node_dropout=self._node_dropout,
            message_dropout=self._message_dropout,
            n_fold=self._n_fold,
            adjacency=self._adjacency,
            laplacian=self._laplacian
        )
    def __init__(self, data, config, params, *args, **kwargs):
        """
        Create a APR-MF (AMF) instance.
        (see https://arxiv.org/abs/1808.03908 for details about the algorithm design choices).

        Args:
            data: data loader object
            params: model parameters {embed_k: embedding size,
                                      [l_w, l_b]: regularization,
                                      [eps, l_adv]: adversarial budget perturbation and adversarial regularization parameter,
                                      lr: learning rate}
        """
        self._random = np.random

        self._params_list = [
            ("_factors", "factors", "factors", 200, int, None),
            ("_learning_rate", "lr", "lr", 0.001, None, None),
            ("_l_w", "l_w", "l_w", 0.1, None, None),
            ("_l_b", "l_b", "l_b", 0.001, None, None),
            ("_eps", "eps", "eps", 0.1, None, None),
            ("_l_adv", "l_adv", "l_adv", 0.001, None, None),
            ("_adversarial_epochs", "adversarial_epochs", "adv_epochs", self._epochs//2, int, None)
        ]

        self.autoset_params()

        if self._adversarial_epochs > self._epochs:
            raise Exception(f"The total epoch ({self._epochs}) "
                            f"is smaller than the adversarial epochs ({self._adversarial_epochs}).")

        if self._batch_size < 1:
            self._batch_size = self._data.transactions

        self._ratings = self._data.train_dict

        self._sampler = cs.Sampler(self._data.i_train_dict)

        self._model = AMF_model(self._factors,
                                    self._learning_rate,
                                    self._l_w,
                                    self._l_b,
                                    self._eps,
                                    self._l_adv,
                                    self._num_users,
                                    self._num_items)