def test_merge_docstrings_extra_parameter_included_with_no_documentation(): expected = """ Note that nothing is after the ``Parameters`` section here. Parameters ---------- arg1: str The first argument arg2: int This argument's description is longer. See how it is on a new line: * Even with a bullet list now! arg3: np.array arg4: int An important argument! *args: arguments **kwargs: keyword argument Additional keyword arguments to pass into ``BaseClass`` """ actual = merge_docstrings(BaseClass, ChildClassExtraParamatersNoDoc.__doc__, ChildClassExtraParamatersNoDoc.__init__) assert actual == expected
def test_merge_docstrings_no_paramaters_section(): expected = """ No ``Parameters`` section at all here! References ---------- arg8 """ actual = merge_docstrings(BaseClass, ChildClassNoParamaters.__doc__, ChildClassNoParamaters.__init__) assert actual == expected
def test_merge_docstrings_with_two_extra_sections(): expected = """ This is the short description for the child. This is a longer description for the child. It also contains many lines. With line breaks, like this. You can also have new paragraphs! NOTE: This is an important note! Look, a new line of documentation after the note! Parameters ---------- arg1: str The first argument arg2: int This argument's description is longer. See how it is on a new line: * Even with a bullet list now! arg3: np.array arg4: int An important argument! *args: arguments **kwargs: keyword argument Additional keyword arguments to pass into ``BaseClass`` References ---------- arg8: list arg9: int No description above, and that is okay! arg10: str This one is new. Notes ----- This is a note. The above ``References`` section used to say ``Returns``, but classes do not return anything and I did not feel inclined to change the description. """ actual = merge_docstrings(BaseClass, ChildClassWithTwoExtraSections.__doc__, ChildClassWithTwoExtraSections.__init__) assert actual == expected
def test_merge_docstrings_with_args_and_kwargs(): expected = """ This is the short description for the child. This is a longer description for the child. It also contains many lines. With line breaks, like this. You can also have new paragraphs! NOTE: This is an important note! Look, a new line of documentation after the note! Parameters ---------- arg1: str The first argument arg2: int This argument's description is longer. See how it is on a new line: * Even with a bullet list now! arg3: np.array arg4: int An important argument! *args: arguments **kwargs: keyword argument Additional keyword arguments to pass into ``BaseClass`` References ---------- arg8: list arg9: int No description above, and that is okay! arg10: str This one is new. """ actual = merge_docstrings(BaseClass, ChildClassWithArgsAndKwargs.__doc__, ChildClassWithArgsAndKwargs.__init__) assert actual == expected
class MatrixFactorizationModel(BasePipeline): # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``. # Only the description of new or changed parameters are included in this docstring """ Training pipeline for the matrix factorization model. ``MatrixFactorizationModel`` models have an embedding layer for both users and items which are dot-producted together to output a single float ranking value. Collie adds a twist on to this incredibly popular framework by allowing separate optimizers for embeddings and bias terms. With larger datasets and multiple epochs of training, a model might incorrectly learn to only optimize the bias terms for a quicker path towards a local loss minimum, essentially memorizing how popular each item is. By using a separate, slower optimizer for the bias terms (like Stochastic Gradient Descent), the model must prioritize optimizing the embeddings for meaningful, more varied recommendations, leading to a model that is able to achieve a much lower loss. See the documentation below for ``bias_lr`` and ``bias_optimizer`` input arguments for implementation details. All ``MatrixFactorizationModel`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import CollieTrainer, MatrixFactorizationModel model = MatrixFactorizationModel(train=train) trainer = CollieTrainer(model) trainer.fit(model) model.eval() # do evaluation as normal with ``model`` model.save_model(filename='model.pth') new_model = MatrixFactorizationModel(load_model_path='model.pth') # do evaluation as normal with ``new_model`` Parameters ---------- embedding_dim: int Number of latent factors to use for user and item embeddings dropout_p: float Probability of dropout sparse: bool Whether or not to treat embeddings as sparse tensors. If ``True``, cannot use weight decay on the optimizer bias_lr: float Bias terms learning rate. If 'infer', will set equal to ``lr`` bias_optimizer: torch.optim or str Optimizer for the bias terms. This supports the same string options as ``optimizer``, with the addition of ``infer``, which will set the optimizer equal to ``optimizer``. If ``bias_optimizer`` is ``None``, only a single optimizer will be created for all model parameters y_range: tuple Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get predicted ratings within the range of ``min`` and ``max`` """ def __init__(self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, embedding_dim: int = 30, dropout_p: float = 0.0, sparse: bool = False, lr: float = 1e-3, bias_lr: Optional[Union[float, str]] = 1e-2, lr_scheduler_func: Optional[Callable] = partial(ReduceLROnPlateau, patience=1, verbose=True), weight_decay: float = 0.0, optimizer: Union[str, Callable] = 'adam', bias_optimizer: Optional[Union[str, Callable]] = 'sgd', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, y_range: Optional[Tuple[float, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): super().__init__(**get_init_arguments()) __doc__ = merge_docstrings(BasePipeline, __doc__, __init__) def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after ``prepare_data``. """ self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users, embedding_dim=1, sparse=self.hparams.sparse) self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items, embedding_dim=1, sparse=self.hparams.sparse) self.user_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_users, embedding_dim=self.hparams.embedding_dim, sparse=self.hparams.sparse) self.item_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_items, embedding_dim=self.hparams.embedding_dim, sparse=self.hparams.sparse) self.dropout = nn.Dropout(p=self.hparams.dropout_p) def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model. Simple matrix factorization for a single user and item looks like: ````prediction = (user_embedding * item_embedding) + user_bias + item_bias```` If dropout is added, it is applied to the two embeddings and not the biases. Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ user_embeddings = self.user_embeddings(users) item_embeddings = self.item_embeddings(items) preds = ( torch.mul(self.dropout(user_embeddings), self.dropout(item_embeddings)).sum(axis=1) + self.user_biases(users).squeeze(1) + self.item_biases(items).squeeze(1) ) if self.hparams.y_range is not None: preds = ( torch.sigmoid(preds) * (self.hparams.y_range[1] - self.hparams.y_range[0]) + self.hparams.y_range[0] ) return preds def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings on device.""" return self.item_embeddings.weight.data
class CollaborativeMetricLearningModel(BasePipeline): # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``. # Only the description of new or changed parameters are included in this docstring """ Training pipeline for the collaborative metric learning model. ``CollaborativeMetricLearningModel`` models have an embedding layer for both users and items. A single float, prediction is retrieved by taking the pairwise distance between the two embeddings. The implementation here is meant to mimic its original implementation as specified here: https://arxiv.org/pdf/1803.00202.pdf [1]_ All ``CollaborativeMetricLearningModel`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import CollaborativeMetricLearningModel, CollieTrainer model = CollaborativeMetricLearningModel(train=train) trainer = CollieTrainer(model) trainer.fit(model) model.eval() # do evaluation as normal with ``model`` model.save_model(filename='model.pth') new_model = CollaborativeMetricLearningModel(load_model_path='model.pth') # do evaluation as normal with ``new_model`` Parameters ---------- embedding_dim: int Number of latent factors to use for user and item embeddings sparse: bool Whether or not to treat embeddings as sparse tensors. If ``True``, cannot use weight decay on the optimizer y_range: tuple Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get predicted ratings within the range of ``min`` and ``max`` References ---------- .. [1] Campo, Miguel, et al. "Collaborative Metric Learning Recommendation System: Application to Theatrical Movie Releases." ArXiv.org, 1 Mar. 2018, arxiv.org/abs/1803.00202. """ def __init__(self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, embedding_dim: int = 30, sparse: bool = False, lr: float = 1e-3, lr_scheduler_func: Optional[Callable] = partial( ReduceLROnPlateau, patience=1, verbose=True), weight_decay: float = 0.0, optimizer: Union[str, Callable] = 'adam', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, y_range: Optional[Tuple[float, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): super().__init__(**get_init_arguments()) __doc__ = merge_docstrings(BasePipeline, __doc__, __init__) def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after `prepare_data`. """ self.user_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_users, embedding_dim=self.hparams.embedding_dim, sparse=self.hparams.sparse) self.item_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_items, embedding_dim=self.hparams.embedding_dim, sparse=self.hparams.sparse) def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model, equivalent to: ```prediction = pairwise_distance(user_embedding * item_embedding)``` Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ user_embeddings = self.user_embeddings(users) item_embeddings = self.item_embeddings(items) preds = F.pairwise_distance(user_embeddings, item_embeddings) return preds def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings on device.""" return self.item_embeddings.weight.data
class HybridPretrainedModel(BasePipeline): # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``. # Only the description of new or changed parameters are included in this docstring """ Training pipeline for a hybrid recommendation model using a pre-trained matrix factorization model as its base. ``HybridPretrainedModel`` models contain dense layers that process item metadata, concatenate this embedding with the user and item embeddings copied from a trained ``MatrixFactorizationModel``, and send this concatenated embedding through more dense layers to output a single float ranking / rating. We add both user and item biases to this score before returning. This is the same architecture as the ``HybridModel``, but we are using the embeddings from a pre-trained model rather than training them up ourselves. All ``HybridPretrainedModel`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import CollieTrainer, HybridPretrainedModel, MatrixFactorizationModel # instantiate and fit a ``MatrixFactorizationModel`` as expected mf_model = MatrixFactorizationModel(train=train) mf_trainer = CollieTrainer(mf_model) mf_trainer.fit(mf_model) hybrid_model = HybridPretrainedModel(train=train, item_metadata=item_metadata, trained_model=mf_model) hybrid_trainer = CollieTrainer(hybrid_model) hybrid_trainer.fit(hybrid_model) hybrid_model.eval() # do evaluation as normal with ``hybrid_model`` hybrid_model.save_model(path='model') new_hybrid_model = HybridPretrainedModel(load_model_path='model') # do evaluation as normal with ``new_hybrid_model`` Parameters ---------- item_metadata: torch.tensor, pd.DataFrame, or np.array, 2-dimensional The shape of the item metadata should be (num_items x metadata_features), and each item's metadata should be available when indexing a row by an item ID trained_model: ``collie.model.MatrixFactorizationModel`` Previously trained ``MatrixFactorizationModel`` model to extract embeddings from metadata_layers_dims: list List of linear layer dimensions to apply to the metadata only, starting with the dimension directly following ``metadata_features`` and ending with the dimension to concatenate with the item embeddings combined_layers_dims: list List of linear layer dimensions to apply to the concatenated item embeddings and item metadata, starting with the dimension directly following the shape of ``item_embeddings + metadata_features`` and ending with the dimension before the final linear layer to dimension 1 freeze_embeddings: bool When initializing the model, whether or not to freeze ``trained_model``'s embeddings dropout_p: float Probability of dropout optimizer: torch.optim or str If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) """ def __init__( self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, item_metadata: Union[torch.tensor, pd.DataFrame, np.array] = None, trained_model: MatrixFactorizationModel = None, metadata_layers_dims: Optional[List[int]] = None, combined_layers_dims: List[int] = [128, 64, 32], freeze_embeddings: bool = True, dropout_p: float = 0.0, lr: float = 1e-3, lr_scheduler_func: Optional[Callable] = partial(ReduceLROnPlateau, patience=1, verbose=True), weight_decay: float = 0.0, optimizer: Union[str, Callable] = 'adam', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, # y_range: Optional[Tuple[float, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): item_metadata_num_cols = None if load_model_path is None: if trained_model is None: raise ValueError( 'Must provide ``trained_model`` for ``HybridPretrainedModel``.' ) if item_metadata is None: raise ValueError( 'Must provide item metadata for ``HybridPretrainedModel``.' ) elif isinstance(item_metadata, pd.DataFrame): item_metadata = torch.from_numpy(item_metadata.to_numpy()) elif isinstance(item_metadata, np.ndarray): item_metadata = torch.from_numpy(item_metadata) item_metadata = item_metadata.float() item_metadata_num_cols = item_metadata.shape[1] super().__init__(**get_init_arguments(), item_metadata_num_cols=item_metadata_num_cols) __doc__ = merge_docstrings(BasePipeline, __doc__, __init__) def _move_any_external_data_to_device(self): """Move item metadata to the device before training.""" self.item_metadata = self.item_metadata.to(self.device) def _load_model_init_helper(self, load_model_path: str, map_location: str, **kwargs) -> None: self.item_metadata = (joblib.load( os.path.join(load_model_path, 'metadata.pkl'))) super()._load_model_init_helper(load_model_path=os.path.join( load_model_path, 'model.pth'), map_location=map_location) def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after ``prepare_data``. """ if self.hparams.load_model_path is None: if not hasattr(self, '_trained_model'): self._trained_model = kwargs.pop('trained_model') if not hasattr(self, 'item_metadata'): self.item_metadata = kwargs.pop('item_metadata') # we are not loading in a model, so we will create a new model from scratch # we don't want to modify the ``trained_model``'s weights, so we deep copy self.embeddings = nn.Sequential( copy.deepcopy(self._trained_model.user_embeddings), copy.deepcopy(self._trained_model.item_embeddings)) self.biases = nn.Sequential( copy.deepcopy(self._trained_model.user_biases), copy.deepcopy(self._trained_model.item_biases)) if self.hparams.freeze_embeddings: self.freeze_embeddings() else: self.unfreeze_embeddings() # save hyperparameters that we need to be able to rebuilt the embedding layers on load self.hparams.user_num_embeddings = self.embeddings[ 0].num_embeddings self.hparams.user_embeddings_dim = self.embeddings[0].embedding_dim self.hparams.item_num_embeddings = self.embeddings[ 1].num_embeddings self.hparams.item_embeddings_dim = self.embeddings[1].embedding_dim else: # assume we are loading in a previously-saved model # set up dummy embeddings with the correct dimensions so we can load weights in self.embeddings = nn.Sequential( ScaledEmbedding(self.hparams.user_num_embeddings, self.hparams.user_embeddings_dim), ScaledEmbedding(self.hparams.item_num_embeddings, self.hparams.item_embeddings_dim)) self.biases = nn.Sequential( ZeroEmbedding(self.hparams.user_num_embeddings, 1), ZeroEmbedding(self.hparams.item_num_embeddings, 1)) self.dropout = nn.Dropout(p=self.hparams.dropout_p) # set up metadata-only layers metadata_output_dim = self.hparams.item_metadata_num_cols self.metadata_layers = None if self.hparams.metadata_layers_dims is not None: metadata_layers_dims = ([self.hparams.item_metadata_num_cols] + self.hparams.metadata_layers_dims) self.metadata_layers = [ nn.Linear(metadata_layers_dims[idx - 1], metadata_layers_dims[idx]) for idx in range(1, len(metadata_layers_dims)) ] for i, layer in enumerate(self.metadata_layers): nn.init.xavier_normal_(self.metadata_layers[i].weight) self.add_module('metadata_layer_{}'.format(i), layer) metadata_output_dim = metadata_layers_dims[-1] # set up combined layers combined_dimension_input = (self.hparams.user_embeddings_dim + self.hparams.item_embeddings_dim + metadata_output_dim) combined_layers_dims = [combined_dimension_input ] + self.hparams.combined_layers_dims + [1] self.combined_layers = [ nn.Linear(combined_layers_dims[idx - 1], combined_layers_dims[idx]) for idx in range(1, len(combined_layers_dims)) ] for i, layer in enumerate(self.combined_layers): nn.init.xavier_normal_(self.combined_layers[i].weight) self.add_module('combined_layer_{}'.format(i), layer) def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model. Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ if str(self.device) != str(self.item_metadata.device): self._move_any_external_data_to_device() metadata_output = self.item_metadata[items, :] if self.metadata_layers is not None: for metadata_nn_layer in self.metadata_layers: metadata_output = self.dropout( F.leaky_relu(metadata_nn_layer(metadata_output))) combined_output = torch.cat( (self.embeddings[0](users), self.embeddings[1](items), metadata_output), 1) for combined_nn_layer in self.combined_layers[:-1]: combined_output = self.dropout( F.leaky_relu(combined_nn_layer(combined_output))) pred_scores = (self.combined_layers[-1](combined_output) + self.biases[0](users) + self.biases[1](items)) return pred_scores.squeeze() def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings on device.""" # TODO: update this to get the embeddings post-MLP return self.embeddings[1].weight.data def freeze_embeddings(self) -> None: """Remove gradient requirement from the embeddings.""" self.embeddings[0].weight.requires_grad = False self.embeddings[1].weight.requires_grad = False def unfreeze_embeddings(self) -> None: """Require gradients for the embeddings.""" self.embeddings[0].weight.requires_grad = True self.embeddings[1].weight.requires_grad = True def save_model(self, path: Union[str, Path] = os.path.join(DATA_PATH / 'model'), overwrite: bool = False) -> None: """ Save the model's state dictionary, hyperparameters, and item metadata. While PyTorch Lightning offers a way to save and load models, there are two main reasons for overriding these: 1) To properly save and load a model requires the ``Trainer`` object, meaning that all deployed models will require Lightning to run the model, which is not actually needed for inference. 2) In the v0.8.4 release, loading a model back in leads to a ``RuntimeError`` unable to load in weights. Parameters ---------- path: str or Path Directory path to save model and data files overwrite: bool Whether or not to overwrite existing data """ path = str(path) if os.path.exists(path): if os.listdir(path) and overwrite is False: raise ValueError( f'Data exists in ``path`` at {path} and ``overwrite`` is False.' ) Path(path).mkdir(parents=True, exist_ok=True) joblib.dump(self.item_metadata, os.path.join(path, 'metadata.pkl')) # preserve ordering while extracting the state dictionary without the ``_trained_model`` # component state_dict_keys_to_save = [ k for k, _ in self.state_dict().items() if '_trained_model' not in k ] state_dict_vals_to_save = [ v for k, v in self.state_dict().items() if '_trained_model' not in k ] state_dict_to_save = OrderedDict( zip(state_dict_keys_to_save, state_dict_vals_to_save)) dict_to_save = { 'state_dict': state_dict_to_save, 'hparams': self.hparams } torch.save(dict_to_save, os.path.join(path, 'model.pth')) def load_from_hybrid_model(self, hybrid_model) -> None: """ Copy hyperparameters and state dictionary from an existing ``HybridPretrainedModel`` instance. This is particularly useful for creating another PyTorch Lightning trainer object to fine-tune copied-over embeddings from a ``MatrixFactorizationModel`` instance. Parameters ---------- hybrid_model: ``collie.model.HybridPretrainedModel`` HybridPretrainedModel containing hyperparameters and state dictionary to copy over """ for key, value in hybrid_model.hparams.items(): self.hparams[key] = value self._setup_model() self.load_state_dict(state_dict=hybrid_model.state_dict()) self.eval()
class NeuralCollaborativeFiltering(BasePipeline): # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``. # Only the description of new or changed parameters are included in this docstring """ Training pipeline for a neural matrix factorization model. ``NeuralCollaborativeFiltering`` models combine a collaborative filtering and multilayer perceptron network in a single, unified model. The model consists of two sections: the first is a simple matrix factorization that calculates a score by multiplying together user and item embeddings (lookups through an embedding table); the second is a MLP network that feeds embeddings from a second set of embedding tables (one for user, one for item). Both output vectors are combined and sent through a final MLP layer before returning a single recommendation score. The implementation here is meant to mimic its original implementation as specified here: https://arxiv.org/pdf/1708.05031.pdf [2]_ All ``NeuralCollaborativeFiltering`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import CollieTrainer, NeuralCollaborativeFiltering model = NeuralCollaborativeFiltering(train=train) trainer = CollieTrainer(model) trainer.fit(model) model.eval() # do evaluation as normal with ``model`` model.save_model(filename='model.pth') new_model = NeuralCollaborativeFiltering(load_model_path='model.pth') # do evaluation as normal with ``new_model`` Parameters ---------- embedding_dim: int Number of latent factors to use for the matrix factorization embedding table. For the MLP embedding table, the dimensionality will be calculated with the formula ``embedding_dim * (2 ** (num_layers - 1))`` num_layers: int Number of MLP layers to apply. Each MLP layer will have its input dimension calculated with the formula ``embedding_dim * (2 ** (``num_layers`` - ``current_layer_number``))`` final_layer: str or function Final layer activation function. Available string options include: * 'sigmoid' * 'relu' * 'leaky_relu' dropout_p: float Probability of dropout on the MLP layers optimizer: torch.optim or str If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) References ---------- .. [2] Xiangnan et al. "Neural Collaborative Filtering." Neural Collaborative Filtering | Proceedings of the 26th International Conference on World Wide Web, 1 Apr. 2017, dl.acm.org/doi/10.1145/3038912.3052569. """ def __init__( self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, embedding_dim: int = 8, num_layers: int = 3, final_layer: Optional[Union[str, Callable]] = None, dropout_p: float = 0.0, lr: float = 1e-3, lr_scheduler_func: Optional[Callable] = partial(ReduceLROnPlateau, patience=1, verbose=True), weight_decay: float = 0.0, optimizer: Union[str, Callable] = 'adam', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, # y_range: Optional[Tuple[float, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): super().__init__(**get_init_arguments()) __doc__ = merge_docstrings(BasePipeline, __doc__, __init__) def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after ``prepare_data``. """ self.user_embeddings_cf = ScaledEmbedding( num_embeddings=self.hparams.num_users, embedding_dim=self.hparams.embedding_dim) self.item_embeddings_cf = ScaledEmbedding( num_embeddings=self.hparams.num_items, embedding_dim=self.hparams.embedding_dim) mlp_embedding_dim = self.hparams.embedding_dim * (2**( self.hparams.num_layers - 1)) self.user_embeddings_mlp = ScaledEmbedding( num_embeddings=self.hparams.num_users, embedding_dim=mlp_embedding_dim, ) self.item_embeddings_mlp = ScaledEmbedding( num_embeddings=self.hparams.num_items, embedding_dim=mlp_embedding_dim, ) mlp_modules = [] for i in range(self.hparams.num_layers): input_size = self.hparams.embedding_dim * (2**( self.hparams.num_layers - i)) mlp_modules.append(nn.Dropout(p=self.hparams.dropout_p)) mlp_modules.append(nn.Linear(input_size, input_size // 2)) mlp_modules.append(nn.ReLU()) self.mlp_layers = nn.Sequential(*mlp_modules) self.predict_layer = nn.Linear(self.hparams.embedding_dim * 2, 1) for m in self.mlp_layers: if isinstance(m, nn.Linear): # initialization taken from the official repo: # https://github.com/hexiangnan/neural_collaborative_filtering/blob/master/NeuMF.py#L63 # noqa: E501 trunc_normal(m.weight.data, std=0.01) nn.init.kaiming_uniform_(self.predict_layer.weight, nonlinearity='relu') for m in self.modules(): if isinstance(m, nn.Linear) and m.bias is not None: m.bias.data.zero_() def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model. Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ user_embedding_cf = self.user_embeddings_cf(users) item_embedding_cf = self.item_embeddings_cf(items) output_cf = user_embedding_cf * item_embedding_cf user_embedding_mlp = self.user_embeddings_mlp(users) item_embedding_mlp = self.item_embeddings_mlp(items) interaction = torch.cat((user_embedding_mlp, item_embedding_mlp), -1) output_mlp = self.mlp_layers(interaction) concat = torch.cat((output_cf, output_mlp), -1) prediction = self.predict_layer(concat) if callable(self.hparams.final_layer): prediction = self.hparams.final_layer(prediction) elif self.hparams.final_layer == 'sigmoid': prediction = torch.sigmoid(prediction) elif self.hparams.final_layer == 'relu': prediction = F.relu(prediction) elif self.hparams.final_layer == 'leaky_relu': prediction = F.leaky_relu(prediction) elif self.hparams.final_layer is not None: raise ValueError( f'{self.hparams.final_layer} not valid final layer value!') return prediction.view(-1) def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings, which are the concatenated CF and MLP item embeddings, on device.""" items = torch.arange(self.hparams.num_items, device=self.device) return torch.cat(( self.item_embeddings_cf(items), self.item_embeddings_mlp(items), ), axis=1).detach()
class HybridModel(MultiStagePipeline): # NOTE: the full docstring is merged in with ``MultiStagePipeline``'s using # ``merge_docstrings``. Only the description of new or changed parameters are included in this # docstring """ Training pipeline for a multi-stage hybrid recommendation model. ``HybridModel`` models contain dense layers that process item metadata, concatenate this embedding with user and item embeddings, sending this concatenated embedding through more dense layers to output a single float ranking / rating. We add both user and item biases to this score before returning. This is the same architecture as the ``HybridPretrainedModel``, but we are training the embeddings ourselves rather than relying on pulling this from a pre-trained model. The stages in a ``HybridModel`` are, in order: 1. ``matrix_factorization`` Matrix factorization exactly as we do in ``MatrixFactorizationModel``. In this stage, metadata is NOT incorporated into the model. 2. ``metadata_only`` User and item embeddings terms are frozen, and the MLP layers for the metadata (if specified) and combined embedding-metadata data are optimized. 3. ``all`` Embedding and MLP layers are all optimized together, including those for metadata. All ``HybridModel`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import CollieTrainer, HybridModel # instantiate and fit a ``HybridModel`` as expected model = HybridModel(train=train, item_metadata=item_metadata) trainer = CollieTrainer(model) trainer.fit(model) # train for X more epochs on the next stage, ``metadata_only`` trainer.max_epochs += X model.advance_stage() trainer.fit(model) # train for Y more epochs on the next stage, ``all`` trainer.max_epochs += Y model.advance_stage() trainer.fit(model) model.eval() # do evaluation as normal with ``model`` model.save_model(path='model') new_model = HybridModel(load_model_path='model') # do evaluation as normal with ``new_model`` Note ---- The ``forward`` calculation will be different depending on the stage that is set. Note this when evaluating / saving and loading models in. Parameters ---------- item_metadata: torch.tensor, pd.DataFrame, or np.array, 2-dimensional The shape of the item metadata should be (num_items x metadata_features), and each item's metadata should be available when indexing a row by an item ID embedding_dim: int Number of latent factors to use for user and item embeddings metadata_layers_dims: list List of linear layer dimensions to apply to the metadata only, starting with the dimension directly following ``metadata_features`` and ending with the dimension to concatenate with the item embeddings combined_layers_dims: list List of linear layer dimensions to apply to the concatenated item embeddings and item metadata, starting with the dimension directly following the shape of ``item_embeddings + metadata_features`` and ending with the dimension before the final linear layer to dimension 1 dropout_p: float Probability of dropout metadata_only_stage_lr: float Learning rate for metadata and combined layers optimized during the ``metadata_only`` stage all_stage_lr: float Learning rate for all model parameters optimized during the ``all`` stage optimizer: torch.optim or str Optimizer used for embeddings and bias terms (if ``bias_optimizer`` is ``None``) during the ``matrix_factorization`` stage. If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) metadata_only_stage_optimizer: torch.optim or str Optimizer used for metadata and combined layers during the ``metadata_only`` stage. If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) all_stage_optimizer: torch.optim or str Optimizer used for all model parameters during the ``all`` stage. If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) """ def __init__(self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, item_metadata: Union[torch.tensor, pd.DataFrame, np.array] = None, embedding_dim: int = 30, metadata_layers_dims: Optional[List[int]] = None, combined_layers_dims: List[int] = [128, 64, 32], dropout_p: float = 0.0, lr: float = 1e-3, bias_lr: Optional[Union[float, str]] = 1e-2, metadata_only_stage_lr: float = 1e-3, all_stage_lr: float = 1e-4, lr_scheduler_func: Optional[Callable] = partial( ReduceLROnPlateau, patience=1, verbose=False, ), weight_decay: float = 0.0, optimizer: Union[str, Callable] = 'adam', bias_optimizer: Optional[Union[str, Callable]] = 'sgd', metadata_only_stage_optimizer: Union[str, Callable] = 'adam', all_stage_optimizer: Union[str, Callable] = 'adam', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): item_metadata_num_cols = None optimizer_config_list = None if load_model_path is None: if item_metadata is None: raise ValueError('Must provide item metadata for ``HybridPretrainedModel``.') elif isinstance(item_metadata, pd.DataFrame): item_metadata = torch.from_numpy(item_metadata.to_numpy()) elif isinstance(item_metadata, np.ndarray): item_metadata = torch.from_numpy(item_metadata) item_metadata = item_metadata.float() item_metadata_num_cols = item_metadata.shape[1] if bias_optimizer is not None: initial_optimizer_block = [ { 'lr': lr, 'optimizer': optimizer, # optimize embeddings... 'parameter_prefix_list': ['user_embedding', 'item_embedding'], 'stage': 'matrix_factorization', }, { 'lr': lr if bias_lr == 'infer' else bias_lr, 'optimizer': optimizer if bias_optimizer == 'infer' else bias_optimizer, # ... and optimize bias terms too 'parameter_prefix_list': ['user_bias', 'item_bias'], 'stage': 'matrix_factorization', }, ] else: initial_optimizer_block = [ { 'lr': lr, 'optimizer': optimizer, # optimize embeddings and bias terms all together 'parameter_prefix_list': [ 'user_embedding', 'item_embedding', 'user_bias', 'item_bias'], 'stage': 'matrix_factorization', }, ] optimizer_config_list = initial_optimizer_block + [ { 'lr': metadata_only_stage_lr, 'optimizer': metadata_only_stage_optimizer, # optimize metadata layers only 'parameter_prefix_list': ['metadata', 'combined', 'user_bias', 'item_bias'], 'stage': 'metadata_only', }, { 'lr': all_stage_lr, 'optimizer': all_stage_optimizer, # optimize everything 'parameter_prefix_list': ['user', 'item', 'metadata', 'combined'], 'stage': 'all', }, ] super().__init__(optimizer_config_list=optimizer_config_list, item_metadata_num_cols=item_metadata_num_cols, **get_init_arguments()) __doc__ = merge_docstrings(MultiStagePipeline, __doc__, __init__) def _move_any_external_data_to_device(self): """Move item metadata to the device before training.""" self.item_metadata = self.item_metadata.to(self.device) def _load_model_init_helper(self, load_model_path: str, map_location: str, **kwargs) -> None: self.item_metadata = ( joblib.load(os.path.join(load_model_path, 'metadata.pkl')) ) super()._load_model_init_helper(load_model_path=os.path.join(load_model_path, 'model.pth'), map_location=map_location, **kwargs) def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after `prepare_data`. """ if self.hparams.load_model_path is None: if not hasattr(self, 'item_metadata'): self.item_metadata = kwargs.pop('item_metadata') self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users, embedding_dim=1) self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items, embedding_dim=1) self.user_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_users, embedding_dim=self.hparams.embedding_dim) self.item_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_items, embedding_dim=self.hparams.embedding_dim) self.dropout = nn.Dropout(p=self.hparams.dropout_p) # set up metadata-only layers metadata_output_dim = self.hparams.item_metadata_num_cols self.metadata_layers = None if self.hparams.metadata_layers_dims is not None: metadata_layers_dims = ( [self.hparams.item_metadata_num_cols] + self.hparams.metadata_layers_dims ) self.metadata_layers = [ nn.Linear(metadata_layers_dims[idx - 1], metadata_layers_dims[idx]) for idx in range(1, len(metadata_layers_dims)) ] for i, layer in enumerate(self.metadata_layers): nn.init.xavier_normal_(self.metadata_layers[i].weight) self.add_module('metadata_layer_{}'.format(i), layer) metadata_output_dim = metadata_layers_dims[-1] # set up combined layers combined_dimension_input = ( self.user_embeddings.embedding_dim + self.item_embeddings.embedding_dim + metadata_output_dim ) combined_layers_dims = [combined_dimension_input] + self.hparams.combined_layers_dims + [1] self.combined_layers = [ nn.Linear(combined_layers_dims[idx - 1], combined_layers_dims[idx]) for idx in range(1, len(combined_layers_dims)) ] for i, layer in enumerate(self.combined_layers): nn.init.xavier_normal_(self.combined_layers[i].weight) self.add_module('combined_layer_{}'.format(i), layer) def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model. Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ if self.hparams.stage == 'matrix_factorization': pred_scores = ( torch.mul( self.dropout(self.user_embeddings(users)), self.dropout(self.item_embeddings(items)) ).sum(axis=1) + self.user_biases(users).squeeze(1) + self.item_biases(items).squeeze(1) ) else: # TODO: remove self.device and let lightning do it metadata_output = self.item_metadata[items, :].to(self.device) if self.metadata_layers is not None: for metadata_nn_layer in self.metadata_layers: metadata_output = self.dropout( F.leaky_relu( metadata_nn_layer(metadata_output) ) ) # TODO: make this matrix factorization instead of only a MLP combined_output = torch.cat((self.user_embeddings(users), self.item_embeddings(items), metadata_output), 1) for combined_nn_layer in self.combined_layers[:-1]: combined_output = self.dropout( F.leaky_relu( combined_nn_layer(combined_output) ) ) pred_scores = ( self.combined_layers[-1](combined_output) + self.user_biases(users) + self.item_biases(items) ) return pred_scores.squeeze() def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings on device.""" # TODO: update this to get the embeddings post-MLP return self.item_embeddings.weight.data def save_model(self, path: Union[str, Path] = os.path.join(DATA_PATH / 'model'), overwrite: bool = False) -> None: """ Save the model's state dictionary, hyperparameters, and item metadata. While PyTorch Lightning offers a way to save and load models, there are two main reasons for overriding these: 1) To properly save and load a model requires the ``Trainer`` object, meaning that all deployed models will require Lightning to run the model, which is not actually needed for inference. 2) In the v0.8.4 release, loading a model back in leads to a ``RuntimeError`` unable to load in weights. Parameters ---------- path: str or Path Directory path to save model and data files overwrite: bool Whether or not to overwrite existing data """ path = str(path) if os.path.exists(path): if os.listdir(path) and overwrite is False: raise ValueError(f'Data exists in ``path`` at {path} and ``overwrite`` is False.') Path(path).mkdir(parents=True, exist_ok=True) joblib.dump(self.item_metadata, os.path.join(path, 'metadata.pkl')) super().save_model(filename=os.path.join(path, 'model.pth'))
class ColdStartModel(MultiStagePipeline): # NOTE: the full docstring is merged in with ``MultiStagePipeline``'s using # ``merge_docstrings``. Only the description of new or changed parameters are included in this # docstring """ Training pipeline for a matrix factorization model optimized for the cold-start problem. Many recommendation models suffer from the cold start problem, when a model is unable to provide adequate recommendations for a new item until enough users have interacted with it. But, if users only interact with recommended items, the item will never be recommended, and thus the model will never improve recommendations for this item. The ``ColdStartModel`` attempts to bypass this by limiting the item space down to "item buckets", training a model on this as the item space, then expanding out to all items. During this expansion, the learned-embeddings of each bucket is copied over to each corresponding item, providing a smarter initialization than a random one for both existing and new items. Now, when we have a new item, we can use its bucket embedding as an initialization into a model. The stages in a ``ColdStartModel`` are, in order: 1. ``item_buckets`` Matrix factorization with item embeddings and bias terms bucketed by ``item_buckets`` argument. Unlike in the next stage, many items may map on to a single bucket, and this will share the same embedding and bias representation. The model should learn user preference for buckets in this stage. 2. ``no_buckets`` Standard matrix factorization as we do in ``MatrixFactorizationModel``. However, upon advancing to this stage, the item embeddings are initialized with their bucketed embedding value (and same for biases). Not only does this provide better initialization than random, but allows new items to be incorporated into the model without training by using their item bucket embedding and bias terms at prediction time. Note that the cold start problem exists for new users as well, but this functionality will be added to this model in a future version. All ``ColdStartModel`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import ColdStartModel, CollieTrainer # instantiate and fit a ``ColdStartModel`` as expected model = ColdStartModel(train=train, item_buckets=item_buckets) trainer = CollieTrainer(model) trainer.fit(model) # train for X more epochs on the next stage, ``no_buckets`` trainer.max_epochs += X model.advance_stage() trainer.fit(model) model.eval() # do evaluation as normal with ``model`` # get item-item recommendations for a new item by using the bucket ID, Z similar_items = model.item_bucket_item_similarity(item_bucket_id=Z) model.save_model(filename='model.pth') new_model = ColdStartModel(load_model_path='model.pth') # do evaluation as normal with ``new_model`` Note ---- The ``forward`` calculation will be different depending on the stage that is set. Note this when evaluating / saving and loading models in. Parameters ---------- item_buckets: torch.tensor, 1-d An ordered iterable containing the bucket ID for each item ID. For example, if you have five films and are going to bucket by primary genre, and your data looks like: * Item ID: 0, Genre ID: 1 * Item ID: 1, Genre ID: 0 * Item ID: 2, Genre ID: 2 * Item ID: 3, Genre ID: 2 * Item ID: 4, Genre ID: 1 Then ``item_buckets`` would be: ``[1, 0, 2, 2, 1]`` embedding_dim: int Number of latent factors to use for user and item embeddings dropout_p: float Probability of dropout item_buckets_stage_lr: float Learning rate for user parameters and item bucket parameters optimized during the ``item_buckets`` stage no_buckets_stage_lr: float Learning rate for user parameters and item parameters optimized during the ``no_buckets`` stage item_buckets_stage_lr: float Optimizer used for user parameters and item bucket parameters optimized during the ``item_buckets`` stage. If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) no_buckets_stage_lr: float Optimizer used for user parameters and item parameters optimized during the ``no_buckets`` stage. If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) """ def __init__(self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, item_buckets: Iterable[int] = None, embedding_dim: int = 30, dropout_p: float = 0.0, sparse: bool = False, item_buckets_stage_lr: float = 1e-3, no_buckets_stage_lr: float = 1e-3, lr_scheduler_func: Optional[Callable] = partial( ReduceLROnPlateau, patience=1, verbose=False, ), weight_decay: float = 0.0, item_buckets_stage_optimizer: Union[str, Callable] = 'adam', no_buckets_stage_optimizer: Union[str, Callable] = 'adam', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): optimizer_config_list = None num_item_buckets = None if load_model_path is None: # TODO: separate out optimizer and bias optimizer somehow optimizer_config_list = [ { 'lr': item_buckets_stage_lr, 'optimizer': item_buckets_stage_optimizer, 'parameter_prefix_list': [ 'user_embed', 'user_bias', 'item_bucket_embed', 'item_bucket_bias', ], 'stage': 'item_buckets', }, { 'lr': no_buckets_stage_lr, 'optimizer': no_buckets_stage_optimizer, 'parameter_prefix_list': [ 'user_embed', 'user_bias', 'item_embed', 'item_bias', ], 'stage': 'no_buckets', }, ] if not isinstance(item_buckets, torch.Tensor): item_buckets = torch.tensor(item_buckets) # data quality checks for ``item_buckets`` assert item_buckets.dim() == 1, ( f'``item_buckets`` must be 1-dimensional, not {item_buckets.dim()}-dimensional!' ) if len(item_buckets) != train.num_items: raise ValueError( 'Length of ``item_buckets`` must be equal to the number of items in the ' f'dataset: {len(item_buckets)} != {train.num_items}.' ) if min(item_buckets) != 0: raise ValueError(f'``item_buckets`` IDs must start at 0, not {min(item_buckets)}!') num_item_buckets = item_buckets.max().item() + 1 super().__init__(optimizer_config_list=optimizer_config_list, num_item_buckets=num_item_buckets, **get_init_arguments()) __doc__ = merge_docstrings(MultiStagePipeline, __doc__, __init__) def _move_any_external_data_to_device(self): """Move the item buckets to the device before training.""" self.hparams.item_buckets = self.hparams.item_buckets.to(self.device) def _copy_weights(self, old: nn.Embedding, new: nn.Embedding, buckets: torch.tensor) -> None: new.weight.data.copy_(old.weight.data[buckets]) def set_stage(self, stage: str) -> None: """Set the stage for the model.""" current_stage = self.hparams.stage if stage in self.hparams.stage_list: if current_stage == 'item_buckets' and stage == 'no_buckets': print('Copying over item embeddings...') self._copy_weights(self.item_bucket_biases, self.item_biases, self.hparams.item_buckets) self._copy_weights(self.item_bucket_embeddings, self.item_embeddings, self.hparams.item_buckets) else: raise ValueError( f'"{stage}" is not a valid stage, please choose one of {self.hparams.stage_list}' ) self.hparams.stage = stage print(f'Set ``self.hparams.stage`` to "{stage}"') def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after `prepare_data`. """ # define initial embedding groups self.item_bucket_biases = ZeroEmbedding( num_embeddings=self.hparams.num_item_buckets, embedding_dim=1, sparse=self.hparams.sparse, ) self.item_bucket_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_item_buckets, embedding_dim=self.hparams.embedding_dim, sparse=self.hparams.sparse, ) # define fine-tuned embedding groups self.user_biases = ZeroEmbedding( num_embeddings=self.hparams.num_users, embedding_dim=1, sparse=self.hparams.sparse ) self.item_biases = ZeroEmbedding( num_embeddings=self.hparams.num_items, embedding_dim=1, sparse=self.hparams.sparse, ) self.user_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_users, embedding_dim=self.hparams.embedding_dim, sparse=self.hparams.sparse ) self.item_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_items, embedding_dim=self.hparams.embedding_dim, sparse=self.hparams.sparse, ) self.dropout = nn.Dropout(p=self.hparams.dropout_p) def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model. Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ user_embeddings = self.user_embeddings(users) user_biases = self.user_biases(users) if self.hparams.stage == 'item_buckets': # transform item IDs to item bucket IDs items = self.hparams.item_buckets[items] item_embeddings = self.item_bucket_embeddings(items) item_biases = self.item_bucket_biases(items) elif self.hparams.stage == 'no_buckets': item_embeddings = self.item_embeddings(items) item_biases = self.item_biases(items) pred_scores = ( torch.mul(self.dropout(user_embeddings), self.dropout(item_embeddings)).sum(axis=1) + user_biases.squeeze(1) + item_biases.squeeze(1) ) return pred_scores.squeeze() def item_bucket_item_similarity(self, item_bucket_id: int) -> pd.Series: """ Get most similar item indices to a item bucket by cosine similarity. Cosine similarity is computed with item and item bucket embeddings from a trained model. Parameters ---------- item_id: int Returns ------- sim_score_idxs: pd.Series Sorted values as cosine similarity for each item in the dataset with the index being the item ID """ item_bucket_embeddings = self.item_bucket_embeddings.weight.data item_bucket_embeddings = ( item_bucket_embeddings / item_bucket_embeddings.norm(dim=1)[:, None] ) item_embeddings = self._get_item_embeddings() item_embeddings = item_embeddings / item_embeddings.norm(dim=1)[:, None] sim_score_idxs = ( torch.matmul(item_bucket_embeddings[[item_bucket_id], :], item_embeddings.transpose(1, 0)) .detach() .cpu() .numpy() .squeeze() ) sim_score_idxs_series = pd.Series(sim_score_idxs) sim_score_idxs_series = sim_score_idxs_series.sort_values(ascending=False) return sim_score_idxs_series def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings on device.""" return self.item_embeddings.weight.data
class NonlinearMatrixFactorizationModel(BasePipeline): # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``. # Only the description of new or changed parameters are included in this docstring """ Training pipeline for a nonlinear matrix factorization model. ``NonlinearMatrixFactorizationModel`` models have an embedding layer for users and items. These are sent through separate dense networks, which output more refined embeddings, which are then dot producted for a single float ranking / rating. Collie adds a twist on to this novel framework by allowing separate optimizers for embeddings and bias terms. With larger datasets and multiple epochs of training, a model might incorrectly learn to only optimize the bias terms for a quicker path towards a local loss minimum, essentially memorizing how popular each item is. By using a separate, slower optimizer for the bias terms (like Stochastic Gradient Descent), the model must prioritize optimizing the embeddings for meaningful, more varied recommendations, leading to a model that is able to achieve a much lower loss. See the documentation below for ``bias_lr`` and ``bias_optimizer`` input arguments for implementation details. All ``NonlinearMatrixFactorizationModel`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import CollieTrainer, NonlinearMatrixFactorizationModel model = NonlinearMatrixFactorizationModel(train=train) trainer = CollieTrainer(model) trainer.fit(model) model.eval() # do evaluation as normal with ``model`` model.save_model(filename='model.pth') new_model = NonlinearMatrixFactorizationModel(load_model_path='model.pth') # do evaluation as normal with ``new_model`` Parameters ---------- user_embedding_dim: int Number of latent factors to use for user embeddings item_embedding_dim: int Number of latent factors to use for item embeddings user_dense_layers_dims: list List of linear layer dimensions to apply to the user embedding, starting with the dimension directly following ``user_embedding_dim`` item_dense_layers_dims: list List of linear layer dimensions to apply to the item embedding, starting with the dimension directly following ``item_embedding_dim`` embedding_dropout_p: float Probability of dropout on the embedding layers dense_dropout_p: float Probability of dropout on the dense layers bias_lr: float Bias terms learning rate. If 'infer', will set equal to ``lr`` optimizer: torch.optim or str If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) bias_optimizer: torch.optim or str Optimizer for the bias terms. This supports the same string options as ``optimizer``, with the addition of ``infer``, which will set the optimizer equal to ``optimizer``. If ``bias_optimizer`` is ``None``, only a single optimizer will be created for all model parameters y_range: tuple Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get predicted ratings within the range of ``min`` and ``max`` """ def __init__(self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, user_embedding_dim: int = 60, item_embedding_dim: int = 60, user_dense_layers_dims: List[float] = [48, 32], item_dense_layers_dims: List[float] = [48, 32], embedding_dropout_p: float = 0.0, dense_dropout_p: float = 0.0, lr: float = 1e-3, bias_lr: Optional[Union[float, str]] = 1e-2, lr_scheduler_func: Optional[Callable] = partial( ReduceLROnPlateau, patience=1, verbose=True), weight_decay: float = 0.0, optimizer: Union[str, Callable] = 'adam', bias_optimizer: Optional[Union[str, Callable]] = 'sgd', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, y_range: Optional[Tuple[float, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): super().__init__(**get_init_arguments()) __doc__ = merge_docstrings(BasePipeline, __doc__, __init__) def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after ``prepare_data``. """ self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users, embedding_dim=1) self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items, embedding_dim=1) self.user_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_users, embedding_dim=self.hparams.user_embedding_dim) self.item_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_items, embedding_dim=self.hparams.item_embedding_dim) self.embedding_dropout = nn.Dropout(p=self.hparams.embedding_dropout_p) self.dense_dropout = nn.Dropout(p=self.hparams.dense_dropout_p) # set up user dense layers user_dense_layers_dims = ([self.hparams.user_embedding_dim] + self.hparams.user_dense_layers_dims) self.user_dense_layers = [ nn.Linear(user_dense_layers_dims[idx - 1], user_dense_layers_dims[idx]) for idx in range(1, len(user_dense_layers_dims)) ] for i, layer in enumerate(self.user_dense_layers): nn.init.xavier_normal_(self.user_dense_layers[i].weight) self.add_module('user_dense_layer_{}'.format(i), layer) # set up item dense layers item_dense_layers_dims = ([self.hparams.item_embedding_dim] + self.hparams.item_dense_layers_dims) self.item_dense_layers = [ nn.Linear(item_dense_layers_dims[idx - 1], item_dense_layers_dims[idx]) for idx in range(1, len(item_dense_layers_dims)) ] for i, layer in enumerate(self.item_dense_layers): nn.init.xavier_normal_(self.item_dense_layers[i].weight) self.add_module('item_dense_layer_{}'.format(i), layer) def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model. Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ user_embeddings = self.user_embeddings(users) item_embeddings = self.item_embeddings(items) for idx, user_dense_layer in enumerate(self.user_dense_layers): user_embeddings = F.leaky_relu(user_dense_layer(user_embeddings)) if idx < (len(self.user_dense_layers) - 1): user_embeddings = self.dense_dropout(user_embeddings) for idx, item_dense_layer in enumerate(self.item_dense_layers): item_embeddings = F.leaky_relu(item_dense_layer(item_embeddings)) if idx < (len(self.item_dense_layers) - 1): item_embeddings = self.dense_dropout(item_embeddings) preds = ((self.embedding_dropout(user_embeddings) * self.embedding_dropout(item_embeddings)).sum(1) + self.user_biases(users).squeeze(1) + self.item_biases(items).squeeze(1)) if self.hparams.y_range is not None: preds = (torch.sigmoid(preds) * (self.hparams.y_range[1] - self.hparams.y_range[0]) + self.hparams.y_range[0]) return preds def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings on device.""" if not hasattr(self, 'item_embeddings_'): items = torch.arange(self.hparams.num_items, device=self.device) item_embeddings = self.item_embeddings(items) for item_dense_layer in self.item_dense_layers: item_embeddings = F.leaky_relu( item_dense_layer(item_embeddings)) self.item_embeddings_ = item_embeddings.detach() return self.item_embeddings_
class MultiStagePipeline(BasePipeline, metaclass=ABCMeta): """ Multi-stage pipeline model architectures to inherit from. This model template is intended for models that train in distinct stages, with a different optimizer optimizing each step. This allows model components to be optimized with a set order in mind, rather than all at once, such as with the ``BasePipeline``. Generally, multi-stage models will have a training protocol like: .. code-block:: python from collie.model import CollieTrainer, SomeMultiStageModel model = SomeMultiStageModel(train=train) trainer = CollieTrainer(model) # fit stage 1 trainer.fit(model) # fit stage 2 trainer.max_epochs += 10 model.advance_stage() trainer.fit(model) # fit stage 3 trainer.max_epochs += 10 model.advance_stage() trainer.fit(model) # ... and so on, until... model.eval() Just like with ``BasePipeline``, all subclasses MUST at least override the following methods: * ``_setup_model`` - Set up the model architecture * ``forward`` - Forward pass through a model For ``item_item_similarity`` to work properly, all subclasses are should also implement: * ``_get_item_embeddings`` - Returns item embeddings from the model Notes ----- * With each call of ``trainer.fit``, the optimizer and learning rate scheduler state will reset. * When loading a multi-stage model in, the state will be set to the last possible state. This state may have a different ``forward`` calculation than other states. Parameters ---------- optimizer_config_list: list of dict List of dictionaries containing the optimizer configurations for each stage's optimizer(s). Each dictionary must contain the following keys: * ``lr``: str Learning rate for the optimizer * ``optimizer``: ``torch.optim`` or ``str`` * ``parameter_prefix_list``: List[str] List of string prefixes corressponding to the model components that should be optimized with this optimizer * ``stage``: str Name of stage This must be ordered with the intended progression of stages. """ def __init__(self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, lr_scheduler_func: Optional[Callable] = None, weight_decay: float = 0.0, optimizer_config_list: List[Dict[str, Union[float, List[str], str]]] = None, loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None, **kwargs): stage_list = None if load_model_path is None: if optimizer_config_list is None: raise ValueError( 'Must provide ``optimizer_config_list`` when initializing a new multi-stage ' 'model!') else: stage_list = list( OrderedDict.fromkeys([ optimizer_config['stage'] for optimizer_config in optimizer_config_list ])) super().__init__(stage_list=stage_list, **get_init_arguments()) if load_model_path is None: # set stage if we have not already loaded it in and set it there self.hparams.stage = self.hparams.stage_list[0] self.set_stage(self.hparams.stage) __doc__ = merge_docstrings(BasePipeline, __doc__, __init__) def _load_model_init_helper(self, *args, **kwargs) -> None: super()._load_model_init_helper(*args, **kwargs) # set the stage to the last stage self.hparams.stage = self.hparams.stage_list[-1] print(f'Set ``self.hparams.stage`` to "{self.hparams.stage}"') def advance_stage(self) -> None: """Advance the stage to the next one in ``self.hparams.stage_list``.""" stage = self.hparams.stage if stage in self.hparams.stage_list: stage_idx = self.hparams.stage_list.index(stage) if (stage_idx + 1) >= len(self.hparams.stage_list): raise ValueError( f'Cannot advance stage past {stage} - it is the final stage!' ) self.set_stage(stage=self.hparams.stage_list[stage_idx + 1]) def set_stage(self, stage: str) -> None: """Set the model to the desired stage.""" if stage in self.hparams.stage_list: self.hparams.stage = stage print(f'Set ``self.hparams.stage`` to "{self.hparams.stage}"') else: raise ValueError( f'{stage} is not a valid stage, please choose one of {self.hparams.stage_list}' ) def _get_optimizer_parameters( self, optimizer_config: List[Dict[str, Union[float, List[str], str]]], include_weight_decay: bool = True, **kwargs) -> List[Dict[str, Union[torch.tensor, float]]]: optimizer_parameters = [{ 'params': (param for (name, param) in self.named_parameters() if reduce( lambda x, y: x or y, [ name.startswith(prefix) for prefix in optimizer_config['parameter_prefix_list'] ], False, )), 'lr': optimizer_config['lr'], }] if include_weight_decay: weight_decay_dict = {'weight_decay': self.hparams.weight_decay} [d.update(weight_decay_dict) for d in optimizer_parameters] return optimizer_parameters def configure_optimizers( self ) -> (Union[Tuple[List[Callable], List[Callable]], Tuple[ Callable, Callable], Callable]): """ Configure optimizers and learning rate schedulers to use in optimization. This method will be called after `setup`. Creates an optimizer and learning rate scheduler for each configuration dictionary in ``self.hparams.optimizer_config_list``. """ # since this is the only function that is called before each ``trainer.fit`` call, we will # also take this time to ensure any external data a model might rely on has been properly # moved to the device before training self._move_any_external_data_to_device() optimizer_config_list = [ self._get_optimizer(self.optimizer, optimizer_config=optimizer_config) for optimizer_config in self.hparams.optimizer_config_list ] if self.lr_scheduler_func is not None: monitor = 'val_loss_epoch' if self.val_loader is None: monitor = 'train_loss_epoch' # add in optimizer to scheduler function scheduler_list = [{ 'scheduler': self.lr_scheduler_func(optimizer_config), 'monitor': monitor, } for optimizer_config in optimizer_config_list] return optimizer_config_list, scheduler_list else: return optimizer_config_list def optimizer_step(self, epoch: int = None, batch_idx: int = None, optimizer: torch.optim.Optimizer = None, optimizer_idx: int = None, optimizer_closure: Optional[Callable] = None, **kwargs) -> None: """ Overriding Lightning's optimizer step function to only step the optimizer associated with the relevant stage. See here for more details: https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#optimizer-step Parameters ---------- epoch: int Current epoch batch_idx: int Index of current batch optimizer: torch.optim.Optimizer A PyTorch optimizer optimizer_idx: int If you used multiple optimizers, this indexes into that list optimizer_closure: Callable Closure for all optimizers """ if self.hparams.optimizer_config_list[optimizer_idx][ 'stage'] == self.hparams.stage: optimizer.step(closure=optimizer_closure)
class MLPMatrixFactorizationModel(BasePipeline): # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``. # Only the description of new or changed parameters are included in this docstring """ Training pipeline for the matrix factorization model with MLP layers instead of a final dot product (like in ``MatrixFactorizationModel``). ``MLPMatrixFactorizationModel`` models have an embedding layer for both users and items which, are concatenated and sent through a MLP to output a single float ranking value. All ``MLPMatrixFactorizationModel`` instances are subclasses of the ``LightningModule`` class provided by PyTorch Lightning. This means to train a model, you will need a ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this ``Trainer`` instance. Example usage may look like: .. code-block:: python from collie.model import CollieTrainer, MLPMatrixFactorizationModel model = MLPMatrixFactorizationModel(train=train) trainer = CollieTrainer(model) trainer.fit(model) model.eval() # do evaluation as normal with ``model`` model.save_model(filename='model.pth') new_model = MLPMatrixFactorizationModel(load_model_path='model.pth') # do evaluation as normal with ``new_model`` Parameters ---------- embedding_dim: int Number of latent factors to use for user and item embeddings num_layers: int Number of MLP layers to apply. Each MLP layer will have its input dimension calculated with the formula ``embedding_dim * (2 ** (``num_layers`` - ``current_layer_number``))`` dropout_p: float Probability of dropout on the linear layers bias_lr: float Bias terms learning rate. If 'infer', will set equal to ``lr`` optimizer: torch.optim or str If a string, one of the following supported optimizers: * ``'sgd'`` (for ``torch.optim.SGD``) * ``'adam'`` (for ``torch.optim.Adam``) bias_optimizer: torch.optim or str Optimizer for the bias terms. This supports the same string options as ``optimizer``, with the addition of ``infer``, which will set the optimizer equal to ``optimizer``. If ``bias_optimizer`` is ``None``, only a single optimizer will be created for all model parameters y_range: tuple Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get predicted ratings within the range of ``min`` and ``max`` """ def __init__(self, train: INTERACTIONS_LIKE_INPUT = None, val: INTERACTIONS_LIKE_INPUT = None, embedding_dim: int = 30, num_layers: int = 3, dropout_p: float = 0.0, lr: float = 1e-3, bias_lr: Optional[Union[float, str]] = 1e-2, lr_scheduler_func: Optional[Callable] = partial( ReduceLROnPlateau, patience=1, verbose=True), weight_decay: float = 0.0, optimizer: Union[str, Callable] = 'adam', bias_optimizer: Optional[Union[str, Callable]] = 'sgd', loss: Union[str, Callable] = 'hinge', metadata_for_loss: Optional[Dict[str, torch.tensor]] = None, metadata_for_loss_weights: Optional[Dict[str, float]] = None, y_range: Optional[Tuple[float, float]] = None, load_model_path: Optional[str] = None, map_location: Optional[str] = None): super().__init__(**get_init_arguments()) __doc__ = merge_docstrings(BasePipeline, __doc__, __init__) def _setup_model(self, **kwargs) -> None: """ Method for building model internals that rely on the data passed in. This method will be called after `prepare_data`. """ self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users, embedding_dim=1) self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items, embedding_dim=1) self.user_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_users, embedding_dim=self.hparams.embedding_dim) self.item_embeddings = ScaledEmbedding( num_embeddings=self.hparams.num_items, embedding_dim=self.hparams.embedding_dim) mlp_modules = [] input_size = self.hparams.embedding_dim * 2 for i in range(self.hparams.num_layers): next_input_size = (int(self.hparams.embedding_dim * 2 * ((self.hparams.num_layers - i) / (self.hparams.num_layers + 1)))) mlp_modules.append(nn.Linear(input_size, next_input_size)) mlp_modules.append(nn.ReLU()) mlp_modules.append(nn.Dropout(p=self.hparams.dropout_p)) input_size = next_input_size self.mlp_layers = nn.Sequential(*mlp_modules) self.predict_layer = nn.Linear(next_input_size, 1) def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor: """ Forward pass through the model, roughly: ```prediction = MLP(concatenate(user_embedding * item_embedding)) + user_bias + item_bias``` If dropout is added, it is applied for the two embeddings and not the biases. Parameters ---------- users: tensor, 1-d Array of user indices items: tensor, 1-d Array of item indices Returns ------- preds: tensor, 1-d Predicted ratings or rankings """ user_embeddings = self.user_embeddings(users) item_embeddings = self.item_embeddings(items) concatenated_embeddings = torch.cat((user_embeddings, item_embeddings), -1) mlp_output = torch.sigmoid( self.predict_layer( self.mlp_layers(concatenated_embeddings))).squeeze() preds = (mlp_output + self.user_biases(users).squeeze(1) + self.item_biases(items).squeeze(1)) if self.hparams.y_range is not None: preds = (torch.sigmoid(preds) * (self.hparams.y_range[1] - self.hparams.y_range[0]) + self.hparams.y_range[0]) return preds def _get_item_embeddings(self) -> torch.tensor: """Get item embeddings on device.""" return self.item_embeddings.weight.data