class PNN(ContextRecommender): """PNN calculate inner and outer product of feature embedding. You can choose the product option with the parameter of use_inner and use_outer """ def __init__(self, config, dataset): super(PNN, self).__init__(config, dataset) # load parameters info self.mlp_hidden_size = config['mlp_hidden_size'] self.dropout_prob = config['dropout_prob'] self.use_inner = config['use_inner'] self.use_outer = config['use_outer'] self.reg_weight = config['reg_weight'] self.num_pair = int(self.num_feature_field * (self.num_feature_field - 1) / 2) # define layers and loss product_out_dim = self.num_feature_field * self.embedding_size if self.use_inner: product_out_dim += self.num_pair self.inner_product = InnerProductLayer(self.num_feature_field, device=self.device) if self.use_outer: product_out_dim += self.num_pair self.outer_product = OuterProductLayer(self.num_feature_field, self.embedding_size, device=self.device) size_list = [product_out_dim] + self.mlp_hidden_size self.mlp_layers = MLPLayers(size_list, self.dropout_prob, bn=False) self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1) self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid() self.loss = nn.BCELoss() # parameters initialization self.apply(self._init_weights) def reg_loss(self): """Calculate the L2 normalization loss of model parameters. Including weight matrixes of mlp layers. Returns: loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,] """ reg_loss = 0 for name, parm in self.mlp_layers.named_parameters(): if name.endswith('weight'): reg_loss = reg_loss + self.reg_weight * parm.norm(2) return reg_loss def _init_weights(self, module): if isinstance(module, nn.Embedding): xavier_normal_(module.weight.data) elif isinstance(module, nn.Linear): xavier_normal_(module.weight.data) if module.bias is not None: constant_(module.bias.data, 0) def forward(self, interaction): # sparse_embedding shape: [batch_size, num_token_seq_field+num_token_field, embed_dim] or None # dense_embedding shape: [batch_size, num_float_field] or [batch_size, num_float_field, embed_dim] or None sparse_embedding, dense_embedding = self.embed_input_fields( interaction) all_embeddings = [] if sparse_embedding is not None: all_embeddings.append(sparse_embedding) if dense_embedding is not None and len(dense_embedding.shape) == 3: all_embeddings.append(dense_embedding) pnn_all_embeddings = torch.cat( all_embeddings, dim=1) # [batch_size, num_field, embed_dim] batch_size = pnn_all_embeddings.shape[0] # linear part linear_part = pnn_all_embeddings.view( batch_size, -1) # [batch_size,num_field*embed_dim] output = [linear_part] # second order part if self.use_inner: inner_product = self.inner_product(pnn_all_embeddings).view( batch_size, -1) # [batch_size,num_pairs] output.append(inner_product) if self.use_outer: outer_product = self.outer_product(pnn_all_embeddings).view( batch_size, -1) # [batch_size,num_pairs] output.append(outer_product) output = torch.cat(output, dim=1) # [batch_size,d] output = self.predict_layer(self.mlp_layers(output)) # [batch_size,1] output = self.sigmoid(output) return output.squeeze() def calculate_loss(self, interaction): label = interaction[self.LABEL] output = self.forward(interaction) return self.loss(output, label) + self.reg_loss() def predict(self, interaction): return self.forward(interaction)
class xDeepFM(ContextRecommender): """xDeepFM combines a CIN (Compressed Interaction Network) with a classical DNN. The model is able to learn certain bounded-degree feature interactions explicitly; Besides, it can also learn arbitrary low- and high-order feature interactions implicitly. """ def __init__(self, config, dataset): super(xDeepFM, self).__init__(config, dataset) # load parameters info self.mlp_hidden_size = config['mlp_hidden_size'] self.reg_weight = config['reg_weight'] self.dropout_prob = config['dropout_prob'] self.direct = config['direct'] self.cin_layer_size = temp_cin_size = list(config['cin_layer_size']) # Check whether the size of the CIN layer is legal. if not self.direct: self.cin_layer_size = list( map(lambda x: int(x // 2 * 2), temp_cin_size)) if self.cin_layer_size[:-1] != temp_cin_size[:-1]: self.logger.warning( 'Layer size of CIN should be even except for the last layer when direct is True.' 'It is changed to {}'.format(self.cin_layer_size)) # Create a convolutional layer for each CIN layer self.conv1d_list = [] self.field_nums = [self.num_feature_field] for i, layer_size in enumerate(self.cin_layer_size): conv1d = nn.Conv1d(self.field_nums[-1] * self.field_nums[0], layer_size, 1).to(self.device) self.conv1d_list.append(conv1d) if self.direct: self.field_nums.append(layer_size) else: self.field_nums.append(layer_size // 2) # Create MLP layer size_list = [self.embedding_size * self.num_feature_field ] + self.mlp_hidden_size + [1] self.mlp_layers = MLPLayers(size_list, dropout=self.dropout_prob) # Get the output size of CIN if self.direct: self.final_len = sum(self.cin_layer_size) else: self.final_len = sum( self.cin_layer_size[:-1]) // 2 + self.cin_layer_size[-1] self.cin_linear = nn.Linear(self.final_len, 1, bias=False) self.sigmoid = nn.Sigmoid() self.loss = nn.BCELoss() self.apply(self._init_weights) def _init_weights(self, module): if isinstance(module, nn.Embedding): xavier_normal_(module.weight.data) elif isinstance(module, nn.Linear): xavier_normal_(module.weight.data) if module.bias is not None: constant_(module.bias.data, 0) def reg_loss(self, parameters): """Calculate the L2 normalization loss of parameters in a certain layer. Returns: loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,] """ reg_loss = 0 for name, parm in parameters: if name.endswith('weight'): reg_loss = reg_loss + parm.norm(2) return reg_loss def calculate_reg_loss(self): """Calculate the final L2 normalization loss of model parameters. Including weight matrixes of mlp layers, linear layer and convolutional layers. Returns: loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,] """ l2_reg = 0 l2_reg = l2_reg + self.reg_loss(self.mlp_layers.named_parameters()) l2_reg = l2_reg + self.reg_loss( self.first_order_linear.named_parameters()) for conv1d in self.conv1d_list: l2_reg += self.reg_loss(conv1d.named_parameters()) return l2_reg def compressed_interaction_network(self, input_features, activation='identity'): r"""For k-th CIN layer, the output :math:`X_k` is calculated via .. math:: x_{h,*}^{k} = \sum_{i=1}^{H_k-1} \sum_{j=1}^{m}W_{i,j}^{k,h}(X_{i,*}^{k-1} \circ x_{j,*}^0) :math:`H_k` donates the number of feature vectors in the k-th layer, :math:`1 \le h \le H_k`. :math:`\circ` donates the Hadamard product. And Then, We apply sum pooling on each feature map of the hidden layer. Finally, All pooling vectors from hidden layers are concatenated. Args: input_features(torch.Tensor): [batch_size, field_num, embed_dim]. Embedding vectors of all features. activation(str): name of activation function. Returns: torch.Tensor: [batch_size, num_feature_field * embedding_size]. output of CIN layer. """ batch_size, _, embedding_size = input_features.shape hidden_nn_layers = [input_features] final_result = [] for i, layer_size in enumerate(self.cin_layer_size): z_i = torch.einsum('bmd,bhd->bhmd', hidden_nn_layers[0], hidden_nn_layers[-1]) z_i = z_i.view(batch_size, self.field_nums[0] * self.field_nums[i], embedding_size) z_i = self.conv1d_list[i](z_i) # Pass the CIN intermediate result through the activation function. if activation.lower() == 'identity': output = z_i else: activate_func = activation_layer(activation) if activate_func is None: output = z_i else: output = activate_func(z_i) # Get the output of the hidden layer. if self.direct: direct_connect = output next_hidden = output else: if i != len(self.cin_layer_size) - 1: next_hidden, direct_connect = torch.split( output, 2 * [layer_size // 2], 1) else: direct_connect = output next_hidden = 0 final_result.append(direct_connect) hidden_nn_layers.append(next_hidden) result = torch.cat(final_result, dim=1) result = torch.sum(result, -1) return result def forward(self, interaction): sparse_embedding, dense_embedding = self.embed_input_fields( interaction) all_embeddings = [] if sparse_embedding is not None: all_embeddings.append(sparse_embedding) if dense_embedding is not None and len(dense_embedding.shape) == 3: all_embeddings.append(dense_embedding) # Get the output of CIN. xdeepfm_input = torch.cat(all_embeddings, dim=1) # [batch_size, num_field, embed_dim] cin_output = self.compressed_interaction_network(xdeepfm_input) cin_output = self.cin_linear(cin_output) # Get the output of MLP layer. batch_size = xdeepfm_input.shape[0] dnn_output = self.mlp_layers(xdeepfm_input.view(batch_size, -1)) # Get predicted score. y_p = self.first_order_linear(interaction) + cin_output + dnn_output y = self.sigmoid(y_p) return y.squeeze(1) def calculate_loss(self, interaction): label = interaction[self.LABEL] output = self.forward(interaction) l2_reg = self.calculate_reg_loss() return self.loss(output, label) + self.reg_weight * l2_reg def predict(self, interaction): return self.forward(interaction)
class NAIS(GeneralRecommender): """NAIS is an attention network, which is capable of distinguishing which historical items in a user profile are more important for a prediction. We just implement the model following the original author with a pointwise training mode. Note: instead of forming a minibatch as all training instances of a randomly sampled user which is mentioned in the original paper, we still train the model by a randomly sampled interactions. """ input_type = InputType.POINTWISE def __init__(self, config, dataset): super(NAIS, self).__init__(config, dataset) # load dataset info self.LABEL = config['LABEL_FIELD'] # get all users' history interaction information.the history item # matrix is padding by the maximum number of a user's interactions self.history_item_matrix, self.history_lens, self.mask_mat = self.get_history_info( dataset) # load parameters info self.embedding_size = config['embedding_size'] self.weight_size = config['weight_size'] self.algorithm = config['algorithm'] self.reg_weights = config['reg_weights'] self.alpha = config['alpha'] self.beta = config['beta'] self.split_to = config['split_to'] self.pretrain_path = config['pretrain_path'] # split the too large dataset into the specified pieces if self.split_to > 0: self.logger.info('split the n_items to {} pieces'.format( self.split_to)) self.group = torch.chunk( torch.arange(self.n_items).to(self.device), self.split_to) else: self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \ 'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \ 'you can append it in the command line such as `--split_to=5`') # define layers and loss # construct source and destination item embedding matrix self.item_src_embedding = nn.Embedding(self.n_items, self.embedding_size, padding_idx=0) self.item_dst_embedding = nn.Embedding(self.n_items, self.embedding_size, padding_idx=0) self.bias = nn.Parameter(torch.zeros(self.n_items)) if self.algorithm == 'concat': self.mlp_layers = MLPLayers( [self.embedding_size * 2, self.weight_size]) elif self.algorithm == 'prod': self.mlp_layers = MLPLayers( [self.embedding_size, self.weight_size]) else: raise ValueError( "NAIS just support attention type in ['concat', 'prod'] but get {}" .format(self.algorithm)) self.weight_layer = nn.Parameter(torch.ones(self.weight_size, 1)) self.bceloss = nn.BCELoss() # parameters initialization if self.pretrain_path is not None: self.logger.info('use pretrain from [{}]...'.format( self.pretrain_path)) self._load_pretrain() else: self.logger.info('unused pretrain...') self.apply(self._init_weights) def _init_weights(self, module): """Initialize the module's parameters Note: It's a little different from the source code, because pytorch has no function to initialize the parameters by truncated normal distribution, so we replace it with xavier normal distribution """ if isinstance(module, nn.Embedding): normal_(module.weight.data, 0, 0.01) elif isinstance(module, nn.Linear): xavier_normal_(module.weight.data) if module.bias is not None: constant_(module.bias.data, 0) def _load_pretrain(self): """A simple implementation of loading pretrained parameters. """ fism = torch.load(self.pretrain_path)['state_dict'] self.item_src_embedding.weight.data.copy_( fism['item_src_embedding.weight']) self.item_dst_embedding.weight.data.copy_( fism['item_dst_embedding.weight']) for name, parm in self.mlp_layers.named_parameters(): if name.endswith('weight'): xavier_normal_(parm.data) elif name.endswith('bias'): constant_(parm.data, 0) def get_history_info(self, dataset): """get the user history interaction information Args: dataset (DataSet): train dataset Returns: tuple: (history_item_matrix, history_lens, mask_mat) """ history_item_matrix, _, history_lens = dataset.history_item_matrix() history_item_matrix = history_item_matrix.to(self.device) history_lens = history_lens.to(self.device) arange_tensor = torch.arange(history_item_matrix.shape[1]).to( self.device) mask_mat = (arange_tensor < history_lens.unsqueeze(1)).float() return history_item_matrix, history_lens, mask_mat def reg_loss(self): """calculate the reg loss for embedding layers and mlp layers Returns: torch.Tensor: reg loss """ reg_1, reg_2, reg_3 = self.reg_weights loss_1 = reg_1 * self.item_src_embedding.weight.norm(2) loss_2 = reg_2 * self.item_dst_embedding.weight.norm(2) loss_3 = 0 for name, parm in self.mlp_layers.named_parameters(): if name.endswith('weight'): loss_3 = loss_3 + reg_3 * parm.norm(2) return loss_1 + loss_2 + loss_3 def attention_mlp(self, inter, target): """layers of attention which support `prod` and `concat` Args: inter (torch.Tensor): the embedding of history items target (torch.Tensor): the embedding of target items Returns: torch.Tensor: the result of attention """ if self.algorithm == 'prod': mlp_input = inter * target.unsqueeze( 1) # batch_size x max_len x embedding_size else: mlp_input = torch.cat( [inter, target.unsqueeze(1).expand_as(inter)], dim=2) # batch_size x max_len x embedding_size*2 mlp_output = self.mlp_layers( mlp_input) # batch_size x max_len x weight_size logits = torch.matmul(mlp_output, self.weight_layer).squeeze( 2) # batch_size x max_len return logits def mask_softmax(self, similarity, logits, bias, item_num, batch_mask_mat): """softmax the unmasked user history items and get the final output Args: similarity (torch.Tensor): the similarity between the history items and target items logits (torch.Tensor): the initial weights of the history items item_num (torch.Tensor): user history interaction lengths bias (torch.Tensor): bias batch_mask_mat (torch.Tensor): the mask of user history interactions Returns: torch.Tensor: final output """ exp_logits = torch.exp(logits) # batch_size x max_len exp_logits = batch_mask_mat * exp_logits # batch_size x max_len exp_sum = torch.sum(exp_logits, dim=1, keepdim=True) exp_sum = torch.pow(exp_sum, self.beta) weights = torch.div(exp_logits, exp_sum) coeff = torch.pow(item_num.squeeze(1), -self.alpha) output = torch.sigmoid(coeff.float() * torch.sum(weights * similarity, dim=1) + bias) return output def softmax(self, similarity, logits, item_num, bias): """softmax the user history features and get the final output Args: similarity (torch.Tensor): the similarity between the history items and target items logits (torch.Tensor): the initial weights of the history items item_num (torch.Tensor): user history interaction lengths bias (torch.Tensor): bias Returns: torch.Tensor: final output """ exp_logits = torch.exp(logits) # batch_size x max_len exp_sum = torch.sum(exp_logits, dim=1, keepdim=True) exp_sum = torch.pow(exp_sum, self.beta) weights = torch.div(exp_logits, exp_sum) coeff = torch.pow(item_num.squeeze(1), -self.alpha) output = torch.sigmoid(coeff.float() * torch.sum(weights * similarity, dim=1) + bias) return output def inter_forward(self, user, item): """forward the model by interaction """ user_inter = self.history_item_matrix[user] item_num = self.history_lens[user].unsqueeze(1) batch_mask_mat = self.mask_mat[user] user_history = self.item_src_embedding( user_inter) # batch_size x max_len x embedding_size target = self.item_dst_embedding(item) # batch_size x embedding_size bias = self.bias[item] # batch_size x 1 similarity = torch.bmm(user_history, target.unsqueeze(2)).squeeze( 2) # batch_size x max_len logits = self.attention_mlp(user_history, target) scores = self.mask_softmax(similarity, logits, bias, item_num, batch_mask_mat) return scores def user_forward(self, user_input, item_num, repeats=None, pred_slc=None): """forward the model by user Args: user_input (torch.Tensor): user input tensor item_num (torch.Tensor): user history interaction lens repeats (int, optional): the number of items to be evaluated pred_slc (torch.Tensor, optional): continuous index which controls the current evaluation items, if pred_slc is None, it will evaluate all items Returns: torch.Tensor: result """ item_num = item_num.repeat(repeats, 1) user_history = self.item_src_embedding( user_input) # inter_num x embedding_size user_history = user_history.repeat( repeats, 1, 1) # target_items x inter_num x embedding_size if pred_slc is None: targets = self.item_dst_embedding.weight # target_items x embedding_size bias = self.bias else: targets = self.item_dst_embedding(pred_slc) bias = self.bias[pred_slc] similarity = torch.bmm(user_history, targets.unsqueeze(2)).squeeze( 2) # inter_num x target_items logits = self.attention_mlp(user_history, targets) scores = self.softmax(similarity, logits, item_num, bias) return scores def forward(self, user, item): return self.inter_forward(user, item) def calculate_loss(self, interaction): user = interaction[self.USER_ID] item = interaction[self.ITEM_ID] label = interaction[self.LABEL] output = self.forward(user, item) loss = self.bceloss(output, label) + self.reg_loss() return loss def full_sort_predict(self, interaction): user = interaction[self.USER_ID] user_inters = self.history_item_matrix[user] item_nums = self.history_lens[user] scores = [] # test users one by one, if the number of items is too large, we will split it to some pieces for user_input, item_num in zip(user_inters, item_nums.unsqueeze(1)): if self.split_to <= 0: output = self.user_forward(user_input[:item_num], item_num, repeats=self.n_items) else: output = [] for mask in self.group: tmp_output = self.user_forward(user_input[:item_num], item_num, repeats=len(mask), pred_slc=mask) output.append(tmp_output) output = torch.cat(output, dim=0) scores.append(output) result = torch.cat(scores, dim=0) return result def predict(self, interaction): user = interaction[self.USER_ID] item = interaction[self.ITEM_ID] output = self.forward(user, item) return output
class ConvNCF(GeneralRecommender): r"""ConvNCF is a a new neural network framework for collaborative filtering based on NCF. It uses an outer product operation above the embedding layer, which results in a semantic-rich interaction map that encodes pairwise correlations between embedding dimensions. We carefully design the data interface and use sparse tensor to train and test efficiently. We implement the model following the original author with a pairwise training mode. """ input_type = InputType.PAIRWISE def __init__(self, config, dataset): super(ConvNCF, self).__init__(config, dataset) # load dataset info self.LABEL = config['LABEL_FIELD'] # load parameters info self.embedding_size = config['embedding_size'] self.cnn_channels = config['cnn_channels'] self.cnn_kernels = config['cnn_kernels'] self.cnn_strides = config['cnn_strides'] self.dropout_prob = config['dropout_prob'] self.regs = config['reg_weights'] # define layers and loss self.user_embedding = nn.Embedding(self.n_users, self.embedding_size) self.item_embedding = nn.Embedding(self.n_items, self.embedding_size) self.cnn_layers = CNNLayers(self.cnn_channels, self.cnn_kernels, self.cnn_strides, activation='relu') self.predict_layers = MLPLayers([self.cnn_channels[-1], 1], self.dropout_prob, activation='none') self.loss = ConvNCFBPRLoss() def forward(self, user, item): user_e = self.user_embedding(user) item_e = self.item_embedding(item) interaction_map = torch.bmm(user_e.unsqueeze(2), item_e.unsqueeze(1)) interaction_map = interaction_map.unsqueeze(1) cnn_output = self.cnn_layers(interaction_map) cnn_output = cnn_output.sum(axis=(2, 3)) prediction = self.predict_layers(cnn_output) prediction = prediction.squeeze(-1) return prediction def reg_loss(self): r"""Calculate the L2 normalization loss of model parameters. Including embedding matrices and weight matrices of model. Returns: loss(torch.FloatTensor): The L2 Loss tensor. shape of [1,] """ reg_1, reg_2 = self.regs[:2] loss_1 = reg_1 * self.user_embedding.weight.norm(2) loss_2 = reg_1 * self.item_embedding.weight.norm(2) loss_3 = 0 for name, parm in self.cnn_layers.named_parameters(): if name.endswith('weight'): loss_3 = loss_3 + reg_2 * parm.norm(2) for name, parm in self.predict_layers.named_parameters(): if name.endswith('weight'): loss_3 = loss_3 + reg_2 * parm.norm(2) return loss_1 + loss_2 + loss_3 def calculate_loss(self, interaction): user = interaction[self.USER_ID] pos_item = interaction[self.ITEM_ID] neg_item = interaction[self.NEG_ITEM_ID] pos_item_score = self.forward(user, pos_item) neg_item_score = self.forward(user, neg_item) loss = self.loss(pos_item_score, neg_item_score) opt_loss = loss + self.reg_loss() return opt_loss def predict(self, interaction): user = interaction[self.USER_ID] item = interaction[self.ITEM_ID] return self.forward(user, item)