예제 #1
0
class PNN(ContextRecommender):
    """PNN calculate inner and outer product of feature embedding.
    You can choose the product option with the parameter of use_inner and use_outer

    """
    def __init__(self, config, dataset):
        super(PNN, self).__init__(config, dataset)

        # load parameters info
        self.mlp_hidden_size = config['mlp_hidden_size']
        self.dropout_prob = config['dropout_prob']
        self.use_inner = config['use_inner']
        self.use_outer = config['use_outer']
        self.reg_weight = config['reg_weight']

        self.num_pair = int(self.num_feature_field *
                            (self.num_feature_field - 1) / 2)

        # define layers and loss
        product_out_dim = self.num_feature_field * self.embedding_size
        if self.use_inner:
            product_out_dim += self.num_pair
            self.inner_product = InnerProductLayer(self.num_feature_field,
                                                   device=self.device)

        if self.use_outer:
            product_out_dim += self.num_pair
            self.outer_product = OuterProductLayer(self.num_feature_field,
                                                   self.embedding_size,
                                                   device=self.device)
        size_list = [product_out_dim] + self.mlp_hidden_size
        self.mlp_layers = MLPLayers(size_list, self.dropout_prob, bn=False)
        self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.loss = nn.BCELoss()

        # parameters initialization
        self.apply(self._init_weights)

    def reg_loss(self):
        """Calculate the L2 normalization loss of model parameters.
        Including weight matrixes of mlp layers.

        Returns:
            loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,]
        """
        reg_loss = 0
        for name, parm in self.mlp_layers.named_parameters():
            if name.endswith('weight'):
                reg_loss = reg_loss + self.reg_weight * parm.norm(2)
        return reg_loss

    def _init_weights(self, module):
        if isinstance(module, nn.Embedding):
            xavier_normal_(module.weight.data)
        elif isinstance(module, nn.Linear):
            xavier_normal_(module.weight.data)
            if module.bias is not None:
                constant_(module.bias.data, 0)

    def forward(self, interaction):
        # sparse_embedding shape: [batch_size, num_token_seq_field+num_token_field, embed_dim] or None
        # dense_embedding shape: [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None
        sparse_embedding, dense_embedding = self.embed_input_fields(
            interaction)
        all_embeddings = []
        if sparse_embedding is not None:
            all_embeddings.append(sparse_embedding)
        if dense_embedding is not None and len(dense_embedding.shape) == 3:
            all_embeddings.append(dense_embedding)
        pnn_all_embeddings = torch.cat(
            all_embeddings, dim=1)  # [batch_size, num_field, embed_dim]
        batch_size = pnn_all_embeddings.shape[0]
        # linear part
        linear_part = pnn_all_embeddings.view(
            batch_size, -1)  # [batch_size,num_field*embed_dim]
        output = [linear_part]
        # second order part
        if self.use_inner:
            inner_product = self.inner_product(pnn_all_embeddings).view(
                batch_size, -1)  # [batch_size,num_pairs]
            output.append(inner_product)
        if self.use_outer:
            outer_product = self.outer_product(pnn_all_embeddings).view(
                batch_size, -1)  # [batch_size,num_pairs]
            output.append(outer_product)
        output = torch.cat(output, dim=1)  # [batch_size,d]

        output = self.predict_layer(self.mlp_layers(output))  # [batch_size,1]
        output = self.sigmoid(output)
        return output.squeeze()

    def calculate_loss(self, interaction):
        label = interaction[self.LABEL]
        output = self.forward(interaction)

        return self.loss(output, label) + self.reg_loss()

    def predict(self, interaction):
        return self.forward(interaction)
예제 #2
0
class xDeepFM(ContextRecommender):
    """xDeepFM combines a CIN (Compressed Interaction Network) with a classical DNN.
    The model is able to learn certain bounded-degree feature interactions explicitly;
    Besides, it can also learn arbitrary low- and high-order feature interactions implicitly.
    """
    def __init__(self, config, dataset):
        super(xDeepFM, self).__init__(config, dataset)

        # load parameters info
        self.mlp_hidden_size = config['mlp_hidden_size']
        self.reg_weight = config['reg_weight']
        self.dropout_prob = config['dropout_prob']
        self.direct = config['direct']
        self.cin_layer_size = temp_cin_size = list(config['cin_layer_size'])

        # Check whether the size of the CIN layer is legal.
        if not self.direct:
            self.cin_layer_size = list(
                map(lambda x: int(x // 2 * 2), temp_cin_size))
            if self.cin_layer_size[:-1] != temp_cin_size[:-1]:
                self.logger.warning(
                    'Layer size of CIN should be even except for the last layer when direct is True.'
                    'It is changed to {}'.format(self.cin_layer_size))

        # Create a convolutional layer for each CIN layer
        self.conv1d_list = []
        self.field_nums = [self.num_feature_field]
        for i, layer_size in enumerate(self.cin_layer_size):
            conv1d = nn.Conv1d(self.field_nums[-1] * self.field_nums[0],
                               layer_size, 1).to(self.device)
            self.conv1d_list.append(conv1d)
            if self.direct:
                self.field_nums.append(layer_size)
            else:
                self.field_nums.append(layer_size // 2)

        # Create MLP layer
        size_list = [self.embedding_size * self.num_feature_field
                     ] + self.mlp_hidden_size + [1]
        self.mlp_layers = MLPLayers(size_list, dropout=self.dropout_prob)

        # Get the output size of CIN
        if self.direct:
            self.final_len = sum(self.cin_layer_size)
        else:
            self.final_len = sum(
                self.cin_layer_size[:-1]) // 2 + self.cin_layer_size[-1]

        self.cin_linear = nn.Linear(self.final_len, 1, bias=False)
        self.sigmoid = nn.Sigmoid()
        self.loss = nn.BCELoss()
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Embedding):
            xavier_normal_(module.weight.data)
        elif isinstance(module, nn.Linear):
            xavier_normal_(module.weight.data)
            if module.bias is not None:
                constant_(module.bias.data, 0)

    def reg_loss(self, parameters):
        """Calculate the L2 normalization loss of parameters in a certain layer.

        Returns:
            loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,]
        """
        reg_loss = 0
        for name, parm in parameters:
            if name.endswith('weight'):
                reg_loss = reg_loss + parm.norm(2)
        return reg_loss

    def calculate_reg_loss(self):
        """Calculate the final L2 normalization loss of model parameters.
        Including weight matrixes of mlp layers, linear layer and convolutional layers.

        Returns:
            loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,]
        """
        l2_reg = 0
        l2_reg = l2_reg + self.reg_loss(self.mlp_layers.named_parameters())
        l2_reg = l2_reg + self.reg_loss(
            self.first_order_linear.named_parameters())
        for conv1d in self.conv1d_list:
            l2_reg += self.reg_loss(conv1d.named_parameters())
        return l2_reg

    def compressed_interaction_network(self,
                                       input_features,
                                       activation='identity'):
        r"""For k-th CIN layer, the output :math:`X_k` is calculated via

        .. math::
            x_{h,*}^{k} = \sum_{i=1}^{H_k-1} \sum_{j=1}^{m}W_{i,j}^{k,h}(X_{i,*}^{k-1} \circ x_{j,*}^0)

        :math:`H_k` donates the number of feature vectors in the k-th layer,
        :math:`1 \le h \le H_k`.
        :math:`\circ` donates the Hadamard product.

        And Then, We apply sum pooling on each feature map of the hidden layer.
        Finally, All pooling vectors from hidden layers are concatenated.

        Args:
            input_features(torch.Tensor): [batch_size, field_num, embed_dim]. Embedding vectors of all features.
            activation(str): name of activation function.

        Returns:
            torch.Tensor: [batch_size, num_feature_field * embedding_size]. output of CIN layer.
        """
        batch_size, _, embedding_size = input_features.shape
        hidden_nn_layers = [input_features]
        final_result = []
        for i, layer_size in enumerate(self.cin_layer_size):
            z_i = torch.einsum('bmd,bhd->bhmd', hidden_nn_layers[0],
                               hidden_nn_layers[-1])
            z_i = z_i.view(batch_size, self.field_nums[0] * self.field_nums[i],
                           embedding_size)
            z_i = self.conv1d_list[i](z_i)

            # Pass the CIN intermediate result through the activation function.
            if activation.lower() == 'identity':
                output = z_i
            else:
                activate_func = activation_layer(activation)
                if activate_func is None:
                    output = z_i
                else:
                    output = activate_func(z_i)

            # Get the output of the hidden layer.
            if self.direct:
                direct_connect = output
                next_hidden = output
            else:
                if i != len(self.cin_layer_size) - 1:
                    next_hidden, direct_connect = torch.split(
                        output, 2 * [layer_size // 2], 1)
                else:
                    direct_connect = output
                    next_hidden = 0

            final_result.append(direct_connect)
            hidden_nn_layers.append(next_hidden)
        result = torch.cat(final_result, dim=1)
        result = torch.sum(result, -1)
        return result

    def forward(self, interaction):
        sparse_embedding, dense_embedding = self.embed_input_fields(
            interaction)
        all_embeddings = []
        if sparse_embedding is not None:
            all_embeddings.append(sparse_embedding)
        if dense_embedding is not None and len(dense_embedding.shape) == 3:
            all_embeddings.append(dense_embedding)

        # Get the output of CIN.
        xdeepfm_input = torch.cat(all_embeddings,
                                  dim=1)  # [batch_size, num_field, embed_dim]
        cin_output = self.compressed_interaction_network(xdeepfm_input)
        cin_output = self.cin_linear(cin_output)

        # Get the output of MLP layer.
        batch_size = xdeepfm_input.shape[0]
        dnn_output = self.mlp_layers(xdeepfm_input.view(batch_size, -1))

        # Get predicted score.
        y_p = self.first_order_linear(interaction) + cin_output + dnn_output
        y = self.sigmoid(y_p)

        return y.squeeze(1)

    def calculate_loss(self, interaction):
        label = interaction[self.LABEL]
        output = self.forward(interaction)
        l2_reg = self.calculate_reg_loss()
        return self.loss(output, label) + self.reg_weight * l2_reg

    def predict(self, interaction):
        return self.forward(interaction)
예제 #3
0
class NAIS(GeneralRecommender):
    """NAIS is an attention network, which is capable of distinguishing which historical items
    in a user profile are more important for a prediction. We just implement the model following
    the original author with a pointwise training mode.

    Note:
        instead of forming a minibatch as all training instances of a randomly sampled user which is
        mentioned in the original paper, we still train the model by a randomly sampled interactions.

    """
    input_type = InputType.POINTWISE

    def __init__(self, config, dataset):
        super(NAIS, self).__init__(config, dataset)

        # load dataset info
        self.LABEL = config['LABEL_FIELD']

        # get all users' history interaction information.the history item
        # matrix is padding by the maximum number of a user's interactions
        self.history_item_matrix, self.history_lens, self.mask_mat = self.get_history_info(
            dataset)

        # load parameters info
        self.embedding_size = config['embedding_size']
        self.weight_size = config['weight_size']
        self.algorithm = config['algorithm']
        self.reg_weights = config['reg_weights']
        self.alpha = config['alpha']
        self.beta = config['beta']
        self.split_to = config['split_to']
        self.pretrain_path = config['pretrain_path']

        # split the too large dataset into the specified pieces
        if self.split_to > 0:
            self.logger.info('split the n_items to {} pieces'.format(
                self.split_to))
            self.group = torch.chunk(
                torch.arange(self.n_items).to(self.device), self.split_to)
        else:
            self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \
                                'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \
                                'you can append it in the command line such as `--split_to=5`')

        # define layers and loss
        # construct source and destination item embedding matrix
        self.item_src_embedding = nn.Embedding(self.n_items,
                                               self.embedding_size,
                                               padding_idx=0)
        self.item_dst_embedding = nn.Embedding(self.n_items,
                                               self.embedding_size,
                                               padding_idx=0)
        self.bias = nn.Parameter(torch.zeros(self.n_items))
        if self.algorithm == 'concat':
            self.mlp_layers = MLPLayers(
                [self.embedding_size * 2, self.weight_size])
        elif self.algorithm == 'prod':
            self.mlp_layers = MLPLayers(
                [self.embedding_size, self.weight_size])
        else:
            raise ValueError(
                "NAIS just support attention type in ['concat', 'prod'] but get {}"
                .format(self.algorithm))
        self.weight_layer = nn.Parameter(torch.ones(self.weight_size, 1))
        self.bceloss = nn.BCELoss()

        # parameters initialization
        if self.pretrain_path is not None:
            self.logger.info('use pretrain from [{}]...'.format(
                self.pretrain_path))
            self._load_pretrain()
        else:
            self.logger.info('unused pretrain...')
            self.apply(self._init_weights)

    def _init_weights(self, module):
        """Initialize the module's parameters

        Note:
            It's a little different from the source code, because pytorch has no function to initialize
            the parameters by truncated normal distribution, so we replace it with xavier normal distribution

        """
        if isinstance(module, nn.Embedding):
            normal_(module.weight.data, 0, 0.01)
        elif isinstance(module, nn.Linear):
            xavier_normal_(module.weight.data)
            if module.bias is not None:
                constant_(module.bias.data, 0)

    def _load_pretrain(self):
        """A simple implementation of loading pretrained parameters.

        """
        fism = torch.load(self.pretrain_path)['state_dict']
        self.item_src_embedding.weight.data.copy_(
            fism['item_src_embedding.weight'])
        self.item_dst_embedding.weight.data.copy_(
            fism['item_dst_embedding.weight'])
        for name, parm in self.mlp_layers.named_parameters():
            if name.endswith('weight'):
                xavier_normal_(parm.data)
            elif name.endswith('bias'):
                constant_(parm.data, 0)

    def get_history_info(self, dataset):
        """get the user history interaction information

        Args:
            dataset (DataSet): train dataset

        Returns:
            tuple: (history_item_matrix, history_lens, mask_mat)

        """
        history_item_matrix, _, history_lens = dataset.history_item_matrix()
        history_item_matrix = history_item_matrix.to(self.device)
        history_lens = history_lens.to(self.device)
        arange_tensor = torch.arange(history_item_matrix.shape[1]).to(
            self.device)
        mask_mat = (arange_tensor < history_lens.unsqueeze(1)).float()
        return history_item_matrix, history_lens, mask_mat

    def reg_loss(self):
        """calculate the reg loss for embedding layers and mlp layers

        Returns:
            torch.Tensor: reg loss

        """
        reg_1, reg_2, reg_3 = self.reg_weights
        loss_1 = reg_1 * self.item_src_embedding.weight.norm(2)
        loss_2 = reg_2 * self.item_dst_embedding.weight.norm(2)
        loss_3 = 0
        for name, parm in self.mlp_layers.named_parameters():
            if name.endswith('weight'):
                loss_3 = loss_3 + reg_3 * parm.norm(2)
        return loss_1 + loss_2 + loss_3

    def attention_mlp(self, inter, target):
        """layers of attention which support `prod` and `concat`

        Args:
            inter (torch.Tensor): the embedding of history items
            target (torch.Tensor): the embedding of target items

        Returns:
            torch.Tensor: the result of attention

        """
        if self.algorithm == 'prod':
            mlp_input = inter * target.unsqueeze(
                1)  # batch_size x max_len x embedding_size
        else:
            mlp_input = torch.cat(
                [inter, target.unsqueeze(1).expand_as(inter)],
                dim=2)  # batch_size x max_len x embedding_size*2
        mlp_output = self.mlp_layers(
            mlp_input)  # batch_size x max_len x weight_size

        logits = torch.matmul(mlp_output, self.weight_layer).squeeze(
            2)  # batch_size x max_len
        return logits

    def mask_softmax(self, similarity, logits, bias, item_num, batch_mask_mat):
        """softmax the unmasked user history items and get the final output

        Args:
            similarity (torch.Tensor): the similarity between the history items and target items
            logits (torch.Tensor): the initial weights of the history items
            item_num (torch.Tensor): user history interaction lengths
            bias (torch.Tensor): bias
            batch_mask_mat (torch.Tensor): the mask of user history interactions

        Returns:
            torch.Tensor: final output

        """
        exp_logits = torch.exp(logits)  # batch_size x max_len

        exp_logits = batch_mask_mat * exp_logits  # batch_size x max_len
        exp_sum = torch.sum(exp_logits, dim=1, keepdim=True)
        exp_sum = torch.pow(exp_sum, self.beta)
        weights = torch.div(exp_logits, exp_sum)

        coeff = torch.pow(item_num.squeeze(1), -self.alpha)
        output = torch.sigmoid(coeff.float() *
                               torch.sum(weights * similarity, dim=1) + bias)

        return output

    def softmax(self, similarity, logits, item_num, bias):
        """softmax the user history features and get the final output

        Args:
            similarity (torch.Tensor): the similarity between the history items and target items
            logits (torch.Tensor): the initial weights of the history items
            item_num (torch.Tensor): user history interaction lengths
            bias (torch.Tensor): bias

        Returns:
            torch.Tensor: final output

        """
        exp_logits = torch.exp(logits)  # batch_size x max_len
        exp_sum = torch.sum(exp_logits, dim=1, keepdim=True)
        exp_sum = torch.pow(exp_sum, self.beta)
        weights = torch.div(exp_logits, exp_sum)
        coeff = torch.pow(item_num.squeeze(1), -self.alpha)
        output = torch.sigmoid(coeff.float() *
                               torch.sum(weights * similarity, dim=1) + bias)

        return output

    def inter_forward(self, user, item):
        """forward the model by interaction

        """
        user_inter = self.history_item_matrix[user]
        item_num = self.history_lens[user].unsqueeze(1)
        batch_mask_mat = self.mask_mat[user]
        user_history = self.item_src_embedding(
            user_inter)  # batch_size x max_len x embedding_size
        target = self.item_dst_embedding(item)  # batch_size x embedding_size
        bias = self.bias[item]  # batch_size x 1
        similarity = torch.bmm(user_history, target.unsqueeze(2)).squeeze(
            2)  # batch_size x max_len
        logits = self.attention_mlp(user_history, target)
        scores = self.mask_softmax(similarity, logits, bias, item_num,
                                   batch_mask_mat)
        return scores

    def user_forward(self, user_input, item_num, repeats=None, pred_slc=None):
        """forward the model by user

        Args:
            user_input (torch.Tensor): user input tensor
            item_num (torch.Tensor): user history interaction lens
            repeats (int, optional): the number of items to be evaluated
            pred_slc (torch.Tensor, optional): continuous index which controls the current evaluation items,
                                              if pred_slc is None, it will evaluate all items

        Returns:
            torch.Tensor: result

        """
        item_num = item_num.repeat(repeats, 1)
        user_history = self.item_src_embedding(
            user_input)  # inter_num x embedding_size
        user_history = user_history.repeat(
            repeats, 1, 1)  # target_items x inter_num x embedding_size
        if pred_slc is None:
            targets = self.item_dst_embedding.weight  # target_items x embedding_size
            bias = self.bias
        else:
            targets = self.item_dst_embedding(pred_slc)
            bias = self.bias[pred_slc]
        similarity = torch.bmm(user_history, targets.unsqueeze(2)).squeeze(
            2)  # inter_num x target_items
        logits = self.attention_mlp(user_history, targets)
        scores = self.softmax(similarity, logits, item_num, bias)
        return scores

    def forward(self, user, item):
        return self.inter_forward(user, item)

    def calculate_loss(self, interaction):
        user = interaction[self.USER_ID]
        item = interaction[self.ITEM_ID]
        label = interaction[self.LABEL]
        output = self.forward(user, item)
        loss = self.bceloss(output, label) + self.reg_loss()
        return loss

    def full_sort_predict(self, interaction):
        user = interaction[self.USER_ID]
        user_inters = self.history_item_matrix[user]
        item_nums = self.history_lens[user]
        scores = []

        # test users one by one, if the number of items is too large, we will split it to some pieces
        for user_input, item_num in zip(user_inters, item_nums.unsqueeze(1)):
            if self.split_to <= 0:
                output = self.user_forward(user_input[:item_num],
                                           item_num,
                                           repeats=self.n_items)
            else:
                output = []
                for mask in self.group:
                    tmp_output = self.user_forward(user_input[:item_num],
                                                   item_num,
                                                   repeats=len(mask),
                                                   pred_slc=mask)
                    output.append(tmp_output)
                output = torch.cat(output, dim=0)
            scores.append(output)
        result = torch.cat(scores, dim=0)
        return result

    def predict(self, interaction):
        user = interaction[self.USER_ID]
        item = interaction[self.ITEM_ID]
        output = self.forward(user, item)
        return output
예제 #4
0
class ConvNCF(GeneralRecommender):
    r"""ConvNCF is a a new neural network framework for collaborative filtering based on NCF.
    It uses an outer product operation above the embedding layer, 
    which results in a semantic-rich interaction map that encodes pairwise correlations between embedding dimensions.
    We carefully design the data interface and use sparse tensor to train and test efficiently.
    We implement the model following the original author with a pairwise training mode.
    """
    input_type = InputType.PAIRWISE

    def __init__(self, config, dataset):
        super(ConvNCF, self).__init__(config, dataset)

        # load dataset info
        self.LABEL = config['LABEL_FIELD']

        # load parameters info
        self.embedding_size = config['embedding_size']
        self.cnn_channels = config['cnn_channels']
        self.cnn_kernels = config['cnn_kernels']
        self.cnn_strides = config['cnn_strides']
        self.dropout_prob = config['dropout_prob']
        self.regs = config['reg_weights']

        # define layers and loss
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(self.n_items, self.embedding_size)
        self.cnn_layers = CNNLayers(self.cnn_channels,
                                    self.cnn_kernels,
                                    self.cnn_strides,
                                    activation='relu')
        self.predict_layers = MLPLayers([self.cnn_channels[-1], 1],
                                        self.dropout_prob,
                                        activation='none')
        self.loss = ConvNCFBPRLoss()

    def forward(self, user, item):
        user_e = self.user_embedding(user)
        item_e = self.item_embedding(item)

        interaction_map = torch.bmm(user_e.unsqueeze(2), item_e.unsqueeze(1))
        interaction_map = interaction_map.unsqueeze(1)

        cnn_output = self.cnn_layers(interaction_map)
        cnn_output = cnn_output.sum(axis=(2, 3))

        prediction = self.predict_layers(cnn_output)
        prediction = prediction.squeeze(-1)

        return prediction

    def reg_loss(self):
        r"""Calculate the L2 normalization loss of model parameters.
        Including embedding matrices and weight matrices of model.

        Returns:
            loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,]
        """
        reg_1, reg_2 = self.regs[:2]
        loss_1 = reg_1 * self.user_embedding.weight.norm(2)
        loss_2 = reg_1 * self.item_embedding.weight.norm(2)
        loss_3 = 0
        for name, parm in self.cnn_layers.named_parameters():
            if name.endswith('weight'):
                loss_3 = loss_3 + reg_2 * parm.norm(2)
        for name, parm in self.predict_layers.named_parameters():
            if name.endswith('weight'):
                loss_3 = loss_3 + reg_2 * parm.norm(2)
        return loss_1 + loss_2 + loss_3

    def calculate_loss(self, interaction):
        user = interaction[self.USER_ID]
        pos_item = interaction[self.ITEM_ID]
        neg_item = interaction[self.NEG_ITEM_ID]

        pos_item_score = self.forward(user, pos_item)
        neg_item_score = self.forward(user, neg_item)

        loss = self.loss(pos_item_score, neg_item_score)
        opt_loss = loss + self.reg_loss()

        return opt_loss

    def predict(self, interaction):
        user = interaction[self.USER_ID]
        item = interaction[self.ITEM_ID]
        return self.forward(user, item)