Exemple #1
0
def test_merge_docstrings_extra_parameter_included_with_no_documentation():
    expected = """
    Note that nothing is after the ``Parameters`` section here.

    Parameters
    ----------
    arg1: str
        The first argument
    arg2: int
        This argument's description is longer.
        See how it is on a new line:
            * Even with a bullet list now!
    arg3: np.array
    arg4: int
        An important argument!
    *args: arguments
    **kwargs: keyword argument
        Additional keyword arguments to pass into ``BaseClass``

    """

    actual = merge_docstrings(BaseClass,
                              ChildClassExtraParamatersNoDoc.__doc__,
                              ChildClassExtraParamatersNoDoc.__init__)

    assert actual == expected
Exemple #2
0
def test_merge_docstrings_no_paramaters_section():
    expected = """
    No ``Parameters`` section at all here!

    References
    ----------
    arg8

    """

    actual = merge_docstrings(BaseClass, ChildClassNoParamaters.__doc__,
                              ChildClassNoParamaters.__init__)

    assert actual == expected
Exemple #3
0
def test_merge_docstrings_with_two_extra_sections():
    expected = """
    This is the short description for the child.

    This is a longer description for the child. It also contains many lines.
    With line breaks, like this.

    You can also have new paragraphs!

    NOTE: This is an important note!

    Look, a new line of documentation after the note!

    Parameters
    ----------
    arg1: str
        The first argument
    arg2: int
        This argument's description is longer.
        See how it is on a new line:
            * Even with a bullet list now!
    arg3: np.array
    arg4: int
        An important argument!
    *args: arguments
    **kwargs: keyword argument
        Additional keyword arguments to pass into ``BaseClass``

    References
    ----------
    arg8: list
    arg9: int
        No description above, and that is okay!
    arg10: str
        This one is new.

    Notes
    -----
    This is a note. The above ``References`` section used to say ``Returns``, but classes do not
    return anything and I did not feel inclined to change the description.

    """

    actual = merge_docstrings(BaseClass,
                              ChildClassWithTwoExtraSections.__doc__,
                              ChildClassWithTwoExtraSections.__init__)

    assert actual == expected
Exemple #4
0
def test_merge_docstrings_with_args_and_kwargs():
    expected = """
    This is the short description for the child.

    This is a longer description for the child. It also contains many lines.
    With line breaks, like this.

    You can also have new paragraphs!

    NOTE: This is an important note!

    Look, a new line of documentation after the note!

    Parameters
    ----------
    arg1: str
        The first argument
    arg2: int
        This argument's description is longer.
        See how it is on a new line:
            * Even with a bullet list now!
    arg3: np.array
    arg4: int
        An important argument!
    *args: arguments
    **kwargs: keyword argument
        Additional keyword arguments to pass into ``BaseClass``

    References
    ----------
    arg8: list
    arg9: int
        No description above, and that is okay!
    arg10: str
        This one is new.

    """

    actual = merge_docstrings(BaseClass, ChildClassWithArgsAndKwargs.__doc__,
                              ChildClassWithArgsAndKwargs.__init__)

    assert actual == expected
Exemple #5
0
class MatrixFactorizationModel(BasePipeline):
    # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``.
    # Only the description of new or changed parameters are included in this docstring
    """
    Training pipeline for the matrix factorization model.

    ``MatrixFactorizationModel`` models have an embedding layer for both users and items which are
    dot-producted together to output a single float ranking value.

    Collie adds a twist on to this incredibly popular framework by allowing separate optimizers
    for embeddings and bias terms. With larger datasets and multiple epochs of training, a model
    might incorrectly learn to only optimize the bias terms for a quicker path towards a local
    loss minimum, essentially memorizing how popular each item is. By using a separate, slower
    optimizer for the bias terms (like Stochastic Gradient Descent), the model must prioritize
    optimizing the embeddings for meaningful, more varied recommendations, leading to a model
    that is able to achieve a much lower loss. See the documentation below for ``bias_lr`` and
    ``bias_optimizer`` input arguments for implementation details.

    All ``MatrixFactorizationModel`` instances are subclasses of the ``LightningModule`` class
    provided by PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import CollieTrainer, MatrixFactorizationModel


        model = MatrixFactorizationModel(train=train)
        trainer = CollieTrainer(model)
        trainer.fit(model)
        model.eval()

        # do evaluation as normal with ``model``

        model.save_model(filename='model.pth')
        new_model = MatrixFactorizationModel(load_model_path='model.pth')

        # do evaluation as normal with ``new_model``

    Parameters
    ----------
    embedding_dim: int
        Number of latent factors to use for user and item embeddings
    dropout_p: float
        Probability of dropout
    sparse: bool
        Whether or not to treat embeddings as sparse tensors. If ``True``, cannot use weight decay
        on the optimizer
    bias_lr: float
        Bias terms learning rate. If 'infer', will set equal to ``lr``
    bias_optimizer: torch.optim or str
        Optimizer for the bias terms. This supports the same string options as ``optimizer``, with
        the addition of ``infer``, which will set the optimizer equal to ``optimizer``. If
        ``bias_optimizer`` is ``None``, only a single optimizer will be created for all model
        parameters
    y_range: tuple
        Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get
        predicted ratings within the range of ``min`` and ``max``

    """
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 embedding_dim: int = 30,
                 dropout_p: float = 0.0,
                 sparse: bool = False,
                 lr: float = 1e-3,
                 bias_lr: Optional[Union[float, str]] = 1e-2,
                 lr_scheduler_func: Optional[Callable] = partial(ReduceLROnPlateau,
                                                                 patience=1,
                                                                 verbose=True),
                 weight_decay: float = 0.0,
                 optimizer: Union[str, Callable] = 'adam',
                 bias_optimizer: Optional[Union[str, Callable]] = 'sgd',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 y_range: Optional[Tuple[float, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        super().__init__(**get_init_arguments())

    __doc__ = merge_docstrings(BasePipeline, __doc__, __init__)

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after ``prepare_data``.

        """
        self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users,
                                         embedding_dim=1,
                                         sparse=self.hparams.sparse)
        self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items,
                                         embedding_dim=1,
                                         sparse=self.hparams.sparse)
        self.user_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_users,
                                               embedding_dim=self.hparams.embedding_dim,
                                               sparse=self.hparams.sparse)
        self.item_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_items,
                                               embedding_dim=self.hparams.embedding_dim,
                                               sparse=self.hparams.sparse)
        self.dropout = nn.Dropout(p=self.hparams.dropout_p)

    def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model.

        Simple matrix factorization for a single user and item looks like:

        ````prediction = (user_embedding * item_embedding) + user_bias + item_bias````

        If dropout is added, it is applied to the two embeddings and not the biases.

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        user_embeddings = self.user_embeddings(users)
        item_embeddings = self.item_embeddings(items)

        preds = (
            torch.mul(self.dropout(user_embeddings), self.dropout(item_embeddings)).sum(axis=1)
            + self.user_biases(users).squeeze(1)
            + self.item_biases(items).squeeze(1)
        )

        if self.hparams.y_range is not None:
            preds = (
                torch.sigmoid(preds)
                * (self.hparams.y_range[1] - self.hparams.y_range[0])
                + self.hparams.y_range[0]
            )

        return preds

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings on device."""
        return self.item_embeddings.weight.data
class CollaborativeMetricLearningModel(BasePipeline):
    # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``.
    # Only the description of new or changed parameters are included in this docstring
    """
    Training pipeline for the collaborative metric learning model.

    ``CollaborativeMetricLearningModel`` models have an embedding layer for both users and items. A
    single float, prediction is retrieved by taking the pairwise distance between the two
    embeddings.

    The implementation here is meant to mimic its original implementation as specified here:
    https://arxiv.org/pdf/1803.00202.pdf [1]_

    All ``CollaborativeMetricLearningModel`` instances are subclasses of the ``LightningModule``
    class provided by PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import CollaborativeMetricLearningModel, CollieTrainer

        model = CollaborativeMetricLearningModel(train=train)
        trainer = CollieTrainer(model)
        trainer.fit(model)
        model.eval()

        # do evaluation as normal with ``model``

        model.save_model(filename='model.pth')
        new_model = CollaborativeMetricLearningModel(load_model_path='model.pth')

        # do evaluation as normal with ``new_model``

    Parameters
    ----------
    embedding_dim: int
        Number of latent factors to use for user and item embeddings
    sparse: bool
        Whether or not to treat embeddings as sparse tensors. If ``True``, cannot use weight decay
        on the optimizer
    y_range: tuple
        Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get
        predicted ratings within the range of ``min`` and ``max``

    References
    ----------
    .. [1] Campo, Miguel, et al. "Collaborative Metric Learning Recommendation System: Application
        to Theatrical Movie Releases." ArXiv.org, 1 Mar. 2018, arxiv.org/abs/1803.00202.

    """
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 embedding_dim: int = 30,
                 sparse: bool = False,
                 lr: float = 1e-3,
                 lr_scheduler_func: Optional[Callable] = partial(
                     ReduceLROnPlateau, patience=1, verbose=True),
                 weight_decay: float = 0.0,
                 optimizer: Union[str, Callable] = 'adam',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 y_range: Optional[Tuple[float, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        super().__init__(**get_init_arguments())

    __doc__ = merge_docstrings(BasePipeline, __doc__, __init__)

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after `prepare_data`.

        """
        self.user_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_users,
            embedding_dim=self.hparams.embedding_dim,
            sparse=self.hparams.sparse)
        self.item_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_items,
            embedding_dim=self.hparams.embedding_dim,
            sparse=self.hparams.sparse)

    def forward(self, users: torch.tensor,
                items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model, equivalent to:

        ```prediction = pairwise_distance(user_embedding * item_embedding)```

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        user_embeddings = self.user_embeddings(users)
        item_embeddings = self.item_embeddings(items)

        preds = F.pairwise_distance(user_embeddings, item_embeddings)

        return preds

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings on device."""
        return self.item_embeddings.weight.data
class HybridPretrainedModel(BasePipeline):
    # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``.
    # Only the description of new or changed parameters are included in this docstring
    """
    Training pipeline for a hybrid recommendation model using a pre-trained matrix factorization
    model as its base.

    ``HybridPretrainedModel`` models contain dense layers that process item metadata, concatenate
    this embedding with the user and item embeddings copied from a trained
    ``MatrixFactorizationModel``, and send this concatenated embedding through more dense layers to
    output a single float ranking / rating. We add both user and item biases to this score before
    returning. This is the same architecture as the ``HybridModel``, but we are using the embeddings
    from a pre-trained model rather than training them up ourselves.

    All ``HybridPretrainedModel`` instances are subclasses of the ``LightningModule`` class
    provided by PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import CollieTrainer, HybridPretrainedModel, MatrixFactorizationModel


        # instantiate and fit a ``MatrixFactorizationModel`` as expected
        mf_model = MatrixFactorizationModel(train=train)
        mf_trainer = CollieTrainer(mf_model)
        mf_trainer.fit(mf_model)

        hybrid_model = HybridPretrainedModel(train=train,
                                             item_metadata=item_metadata,
                                             trained_model=mf_model)
        hybrid_trainer = CollieTrainer(hybrid_model)
        hybrid_trainer.fit(hybrid_model)
        hybrid_model.eval()

        # do evaluation as normal with ``hybrid_model``

        hybrid_model.save_model(path='model')
        new_hybrid_model = HybridPretrainedModel(load_model_path='model')

        # do evaluation as normal with ``new_hybrid_model``

    Parameters
    ----------
    item_metadata: torch.tensor, pd.DataFrame, or np.array, 2-dimensional
        The shape of the item metadata should be (num_items x metadata_features), and each item's
        metadata should be available when indexing a row by an item ID
    trained_model: ``collie.model.MatrixFactorizationModel``
        Previously trained ``MatrixFactorizationModel`` model to extract embeddings from
    metadata_layers_dims: list
        List of linear layer dimensions to apply to the metadata only, starting with the dimension
        directly following ``metadata_features`` and ending with the dimension to concatenate with
        the item embeddings
    combined_layers_dims: list
        List of linear layer dimensions to apply to the concatenated item embeddings and item
        metadata, starting with the dimension directly following the shape of
        ``item_embeddings + metadata_features`` and ending with the dimension before the final
        linear layer to dimension 1
    freeze_embeddings: bool
        When initializing the model, whether or not to freeze ``trained_model``'s embeddings
    dropout_p: float
        Probability of dropout
    optimizer: torch.optim or str
        If a string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    """
    def __init__(
            self,
            train: INTERACTIONS_LIKE_INPUT = None,
            val: INTERACTIONS_LIKE_INPUT = None,
            item_metadata: Union[torch.tensor, pd.DataFrame, np.array] = None,
            trained_model: MatrixFactorizationModel = None,
            metadata_layers_dims: Optional[List[int]] = None,
            combined_layers_dims: List[int] = [128, 64, 32],
            freeze_embeddings: bool = True,
            dropout_p: float = 0.0,
            lr: float = 1e-3,
            lr_scheduler_func: Optional[Callable] = partial(ReduceLROnPlateau,
                                                            patience=1,
                                                            verbose=True),
            weight_decay: float = 0.0,
            optimizer: Union[str, Callable] = 'adam',
            loss: Union[str, Callable] = 'hinge',
            metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
            metadata_for_loss_weights: Optional[Dict[str, float]] = None,
            # y_range: Optional[Tuple[float, float]] = None,
            load_model_path: Optional[str] = None,
            map_location: Optional[str] = None):
        item_metadata_num_cols = None
        if load_model_path is None:
            if trained_model is None:
                raise ValueError(
                    'Must provide ``trained_model`` for ``HybridPretrainedModel``.'
                )

            if item_metadata is None:
                raise ValueError(
                    'Must provide item metadata for ``HybridPretrainedModel``.'
                )
            elif isinstance(item_metadata, pd.DataFrame):
                item_metadata = torch.from_numpy(item_metadata.to_numpy())
            elif isinstance(item_metadata, np.ndarray):
                item_metadata = torch.from_numpy(item_metadata)

            item_metadata = item_metadata.float()

            item_metadata_num_cols = item_metadata.shape[1]

        super().__init__(**get_init_arguments(),
                         item_metadata_num_cols=item_metadata_num_cols)

    __doc__ = merge_docstrings(BasePipeline, __doc__, __init__)

    def _move_any_external_data_to_device(self):
        """Move item metadata to the device before training."""
        self.item_metadata = self.item_metadata.to(self.device)

    def _load_model_init_helper(self, load_model_path: str, map_location: str,
                                **kwargs) -> None:
        self.item_metadata = (joblib.load(
            os.path.join(load_model_path, 'metadata.pkl')))
        super()._load_model_init_helper(load_model_path=os.path.join(
            load_model_path, 'model.pth'),
                                        map_location=map_location)

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after ``prepare_data``.

        """
        if self.hparams.load_model_path is None:
            if not hasattr(self, '_trained_model'):
                self._trained_model = kwargs.pop('trained_model')
            if not hasattr(self, 'item_metadata'):
                self.item_metadata = kwargs.pop('item_metadata')

            # we are not loading in a model, so we will create a new model from scratch
            # we don't want to modify the ``trained_model``'s weights, so we deep copy
            self.embeddings = nn.Sequential(
                copy.deepcopy(self._trained_model.user_embeddings),
                copy.deepcopy(self._trained_model.item_embeddings))
            self.biases = nn.Sequential(
                copy.deepcopy(self._trained_model.user_biases),
                copy.deepcopy(self._trained_model.item_biases))

            if self.hparams.freeze_embeddings:
                self.freeze_embeddings()
            else:
                self.unfreeze_embeddings()

            # save hyperparameters that we need to be able to rebuilt the embedding layers on load
            self.hparams.user_num_embeddings = self.embeddings[
                0].num_embeddings
            self.hparams.user_embeddings_dim = self.embeddings[0].embedding_dim
            self.hparams.item_num_embeddings = self.embeddings[
                1].num_embeddings
            self.hparams.item_embeddings_dim = self.embeddings[1].embedding_dim
        else:
            # assume we are loading in a previously-saved model
            # set up dummy embeddings with the correct dimensions so we can load weights in
            self.embeddings = nn.Sequential(
                ScaledEmbedding(self.hparams.user_num_embeddings,
                                self.hparams.user_embeddings_dim),
                ScaledEmbedding(self.hparams.item_num_embeddings,
                                self.hparams.item_embeddings_dim))
            self.biases = nn.Sequential(
                ZeroEmbedding(self.hparams.user_num_embeddings, 1),
                ZeroEmbedding(self.hparams.item_num_embeddings, 1))

        self.dropout = nn.Dropout(p=self.hparams.dropout_p)

        # set up metadata-only layers
        metadata_output_dim = self.hparams.item_metadata_num_cols
        self.metadata_layers = None
        if self.hparams.metadata_layers_dims is not None:
            metadata_layers_dims = ([self.hparams.item_metadata_num_cols] +
                                    self.hparams.metadata_layers_dims)
            self.metadata_layers = [
                nn.Linear(metadata_layers_dims[idx - 1],
                          metadata_layers_dims[idx])
                for idx in range(1, len(metadata_layers_dims))
            ]
            for i, layer in enumerate(self.metadata_layers):
                nn.init.xavier_normal_(self.metadata_layers[i].weight)
                self.add_module('metadata_layer_{}'.format(i), layer)

            metadata_output_dim = metadata_layers_dims[-1]

        # set up combined layers
        combined_dimension_input = (self.hparams.user_embeddings_dim +
                                    self.hparams.item_embeddings_dim +
                                    metadata_output_dim)
        combined_layers_dims = [combined_dimension_input
                                ] + self.hparams.combined_layers_dims + [1]
        self.combined_layers = [
            nn.Linear(combined_layers_dims[idx - 1], combined_layers_dims[idx])
            for idx in range(1, len(combined_layers_dims))
        ]
        for i, layer in enumerate(self.combined_layers):
            nn.init.xavier_normal_(self.combined_layers[i].weight)
            self.add_module('combined_layer_{}'.format(i), layer)

    def forward(self, users: torch.tensor,
                items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model.

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        if str(self.device) != str(self.item_metadata.device):
            self._move_any_external_data_to_device()

        metadata_output = self.item_metadata[items, :]
        if self.metadata_layers is not None:
            for metadata_nn_layer in self.metadata_layers:
                metadata_output = self.dropout(
                    F.leaky_relu(metadata_nn_layer(metadata_output)))

        combined_output = torch.cat(
            (self.embeddings[0](users), self.embeddings[1](items),
             metadata_output), 1)
        for combined_nn_layer in self.combined_layers[:-1]:
            combined_output = self.dropout(
                F.leaky_relu(combined_nn_layer(combined_output)))

        pred_scores = (self.combined_layers[-1](combined_output) +
                       self.biases[0](users) + self.biases[1](items))

        return pred_scores.squeeze()

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings on device."""
        # TODO: update this to get the embeddings post-MLP
        return self.embeddings[1].weight.data

    def freeze_embeddings(self) -> None:
        """Remove gradient requirement from the embeddings."""
        self.embeddings[0].weight.requires_grad = False
        self.embeddings[1].weight.requires_grad = False

    def unfreeze_embeddings(self) -> None:
        """Require gradients for the embeddings."""
        self.embeddings[0].weight.requires_grad = True
        self.embeddings[1].weight.requires_grad = True

    def save_model(self,
                   path: Union[str, Path] = os.path.join(DATA_PATH / 'model'),
                   overwrite: bool = False) -> None:
        """
        Save the model's state dictionary, hyperparameters, and item metadata.

        While PyTorch Lightning offers a way to save and load models, there are two main reasons
        for overriding these:

        1) To properly save and load a model requires the ``Trainer`` object, meaning that all
           deployed models will require Lightning to run the model, which is not actually needed
           for inference.

        2) In the v0.8.4 release, loading a model back in leads to a ``RuntimeError`` unable to load
           in weights.

        Parameters
        ----------
        path: str or Path
            Directory path to save model and data files
        overwrite: bool
            Whether or not to overwrite existing data

        """
        path = str(path)

        if os.path.exists(path):
            if os.listdir(path) and overwrite is False:
                raise ValueError(
                    f'Data exists in ``path`` at {path} and ``overwrite`` is False.'
                )

        Path(path).mkdir(parents=True, exist_ok=True)
        joblib.dump(self.item_metadata, os.path.join(path, 'metadata.pkl'))

        # preserve ordering while extracting the state dictionary without the ``_trained_model``
        # component
        state_dict_keys_to_save = [
            k for k, _ in self.state_dict().items()
            if '_trained_model' not in k
        ]
        state_dict_vals_to_save = [
            v for k, v in self.state_dict().items()
            if '_trained_model' not in k
        ]
        state_dict_to_save = OrderedDict(
            zip(state_dict_keys_to_save, state_dict_vals_to_save))

        dict_to_save = {
            'state_dict': state_dict_to_save,
            'hparams': self.hparams
        }
        torch.save(dict_to_save, os.path.join(path, 'model.pth'))

    def load_from_hybrid_model(self, hybrid_model) -> None:
        """
        Copy hyperparameters and state dictionary from an existing ``HybridPretrainedModel``
        instance.

        This is particularly useful for creating another PyTorch Lightning trainer object to
        fine-tune copied-over embeddings from a ``MatrixFactorizationModel`` instance.

        Parameters
        ----------
        hybrid_model: ``collie.model.HybridPretrainedModel``
            HybridPretrainedModel containing hyperparameters and state dictionary to copy over

        """
        for key, value in hybrid_model.hparams.items():
            self.hparams[key] = value

        self._setup_model()
        self.load_state_dict(state_dict=hybrid_model.state_dict())
        self.eval()
Exemple #8
0
class NeuralCollaborativeFiltering(BasePipeline):
    # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``.
    # Only the description of new or changed parameters are included in this docstring
    """
    Training pipeline for a neural matrix factorization model.

    ``NeuralCollaborativeFiltering`` models combine a collaborative filtering and multilayer
    perceptron network in a single, unified model. The model consists of two sections: the first
    is a simple matrix factorization that calculates a score by multiplying together user and item
    embeddings (lookups through an embedding table); the second is a MLP network that feeds
    embeddings from a second set of embedding tables (one for user, one for item). Both output
    vectors are combined and sent through a final MLP layer before returning a single recommendation
    score.

    The implementation here is meant to mimic its original implementation as specified here:
    https://arxiv.org/pdf/1708.05031.pdf [2]_

    All ``NeuralCollaborativeFiltering`` instances are subclasses of the ``LightningModule`` class
    provided by PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import CollieTrainer, NeuralCollaborativeFiltering


        model = NeuralCollaborativeFiltering(train=train)
        trainer = CollieTrainer(model)
        trainer.fit(model)
        model.eval()

        # do evaluation as normal with ``model``

        model.save_model(filename='model.pth')
        new_model = NeuralCollaborativeFiltering(load_model_path='model.pth')

        # do evaluation as normal with ``new_model``

    Parameters
    ----------
    embedding_dim: int
        Number of latent factors to use for the matrix factorization embedding table. For the MLP
        embedding table, the dimensionality will be calculated with the formula
        ``embedding_dim * (2 ** (num_layers - 1))``
    num_layers: int
        Number of MLP layers to apply. Each MLP layer will have its input dimension calculated with
        the formula ``embedding_dim * (2 ** (``num_layers`` - ``current_layer_number``))``
    final_layer: str or function
        Final layer activation function. Available string options include:

        * 'sigmoid'

        * 'relu'

        * 'leaky_relu'

    dropout_p: float
        Probability of dropout on the MLP layers
    optimizer: torch.optim or str
        If a string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    References
    ----------
    .. [2] Xiangnan et al. "Neural Collaborative Filtering." Neural Collaborative Filtering |
        Proceedings of the 26th International Conference on World Wide Web, 1 Apr. 2017,
        dl.acm.org/doi/10.1145/3038912.3052569.

    """
    def __init__(
            self,
            train: INTERACTIONS_LIKE_INPUT = None,
            val: INTERACTIONS_LIKE_INPUT = None,
            embedding_dim: int = 8,
            num_layers: int = 3,
            final_layer: Optional[Union[str, Callable]] = None,
            dropout_p: float = 0.0,
            lr: float = 1e-3,
            lr_scheduler_func: Optional[Callable] = partial(ReduceLROnPlateau,
                                                            patience=1,
                                                            verbose=True),
            weight_decay: float = 0.0,
            optimizer: Union[str, Callable] = 'adam',
            loss: Union[str, Callable] = 'hinge',
            metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
            metadata_for_loss_weights: Optional[Dict[str, float]] = None,
            # y_range: Optional[Tuple[float, float]] = None,
            load_model_path: Optional[str] = None,
            map_location: Optional[str] = None):
        super().__init__(**get_init_arguments())

    __doc__ = merge_docstrings(BasePipeline, __doc__, __init__)

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after ``prepare_data``.

        """
        self.user_embeddings_cf = ScaledEmbedding(
            num_embeddings=self.hparams.num_users,
            embedding_dim=self.hparams.embedding_dim)
        self.item_embeddings_cf = ScaledEmbedding(
            num_embeddings=self.hparams.num_items,
            embedding_dim=self.hparams.embedding_dim)

        mlp_embedding_dim = self.hparams.embedding_dim * (2**(
            self.hparams.num_layers - 1))
        self.user_embeddings_mlp = ScaledEmbedding(
            num_embeddings=self.hparams.num_users,
            embedding_dim=mlp_embedding_dim,
        )
        self.item_embeddings_mlp = ScaledEmbedding(
            num_embeddings=self.hparams.num_items,
            embedding_dim=mlp_embedding_dim,
        )

        mlp_modules = []
        for i in range(self.hparams.num_layers):
            input_size = self.hparams.embedding_dim * (2**(
                self.hparams.num_layers - i))
            mlp_modules.append(nn.Dropout(p=self.hparams.dropout_p))
            mlp_modules.append(nn.Linear(input_size, input_size // 2))
            mlp_modules.append(nn.ReLU())
        self.mlp_layers = nn.Sequential(*mlp_modules)

        self.predict_layer = nn.Linear(self.hparams.embedding_dim * 2, 1)

        for m in self.mlp_layers:
            if isinstance(m, nn.Linear):
                # initialization taken from the official repo:
                # https://github.com/hexiangnan/neural_collaborative_filtering/blob/master/NeuMF.py#L63  # noqa: E501
                trunc_normal(m.weight.data, std=0.01)

        nn.init.kaiming_uniform_(self.predict_layer.weight,
                                 nonlinearity='relu')

        for m in self.modules():
            if isinstance(m, nn.Linear) and m.bias is not None:
                m.bias.data.zero_()

    def forward(self, users: torch.tensor,
                items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model.

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        user_embedding_cf = self.user_embeddings_cf(users)
        item_embedding_cf = self.item_embeddings_cf(items)
        output_cf = user_embedding_cf * item_embedding_cf

        user_embedding_mlp = self.user_embeddings_mlp(users)
        item_embedding_mlp = self.item_embeddings_mlp(items)
        interaction = torch.cat((user_embedding_mlp, item_embedding_mlp), -1)
        output_mlp = self.mlp_layers(interaction)

        concat = torch.cat((output_cf, output_mlp), -1)

        prediction = self.predict_layer(concat)

        if callable(self.hparams.final_layer):
            prediction = self.hparams.final_layer(prediction)
        elif self.hparams.final_layer == 'sigmoid':
            prediction = torch.sigmoid(prediction)
        elif self.hparams.final_layer == 'relu':
            prediction = F.relu(prediction)
        elif self.hparams.final_layer == 'leaky_relu':
            prediction = F.leaky_relu(prediction)
        elif self.hparams.final_layer is not None:
            raise ValueError(
                f'{self.hparams.final_layer} not valid final layer value!')

        return prediction.view(-1)

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings, which are the concatenated CF and MLP item embeddings, on device."""
        items = torch.arange(self.hparams.num_items, device=self.device)

        return torch.cat((
            self.item_embeddings_cf(items),
            self.item_embeddings_mlp(items),
        ),
                         axis=1).detach()
Exemple #9
0
class HybridModel(MultiStagePipeline):
    # NOTE: the full docstring is merged in with ``MultiStagePipeline``'s using
    # ``merge_docstrings``. Only the description of new or changed parameters are included in this
    # docstring
    """
    Training pipeline for a multi-stage hybrid recommendation model.

    ``HybridModel`` models contain dense layers that process item metadata, concatenate this
    embedding with user and item embeddings, sending this concatenated embedding through more dense
    layers to output a single float ranking / rating. We add both user and item biases to this score
    before returning. This is the same architecture as the ``HybridPretrainedModel``, but we are
    training the embeddings ourselves rather than relying on pulling this from a pre-trained model.

    The stages in a ``HybridModel`` are, in order:

    1. ``matrix_factorization``
        Matrix factorization exactly as we do in ``MatrixFactorizationModel``. In this stage,
        metadata is NOT incorporated into the model.
    2. ``metadata_only``
        User and item embeddings terms are frozen, and the MLP layers for the metadata (if
        specified) and combined embedding-metadata data are optimized.
    3. ``all``
        Embedding and MLP layers are all optimized together, including those for metadata.

    All ``HybridModel`` instances are subclasses of the ``LightningModule`` class provided by
    PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import CollieTrainer, HybridModel


        # instantiate and fit a ``HybridModel`` as expected
        model = HybridModel(train=train, item_metadata=item_metadata)
        trainer = CollieTrainer(model)
        trainer.fit(model)

        # train for X more epochs on the next stage, ``metadata_only``
        trainer.max_epochs += X
        model.advance_stage()
        trainer.fit(model)

        # train for Y more epochs on the next stage, ``all``
        trainer.max_epochs += Y
        model.advance_stage()
        trainer.fit(model)

        model.eval()

        # do evaluation as normal with ``model``

        model.save_model(path='model')
        new_model = HybridModel(load_model_path='model')

        # do evaluation as normal with ``new_model``

    Note
    ----
    The ``forward`` calculation will be different depending on the stage that is set. Note this
    when evaluating / saving and loading models in.

    Parameters
    ----------
    item_metadata: torch.tensor, pd.DataFrame, or np.array, 2-dimensional
        The shape of the item metadata should be (num_items x metadata_features), and each item's
        metadata should be available when indexing a row by an item ID
    embedding_dim: int
        Number of latent factors to use for user and item embeddings
    metadata_layers_dims: list
        List of linear layer dimensions to apply to the metadata only, starting with the dimension
        directly following ``metadata_features`` and ending with the dimension to concatenate with
        the item embeddings
    combined_layers_dims: list
        List of linear layer dimensions to apply to the concatenated item embeddings and item
        metadata, starting with the dimension directly following the shape of
        ``item_embeddings + metadata_features`` and ending with the dimension before the final
        linear layer to dimension 1
    dropout_p: float
        Probability of dropout
    metadata_only_stage_lr: float
        Learning rate for metadata and combined layers optimized during the ``metadata_only`` stage
    all_stage_lr: float
        Learning rate for all model parameters optimized during the ``all`` stage
    optimizer: torch.optim or str
        Optimizer used for embeddings and bias terms (if ``bias_optimizer`` is ``None``) during the
        ``matrix_factorization`` stage. If a string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    metadata_only_stage_optimizer: torch.optim or str
        Optimizer used for metadata and combined layers during the ``metadata_only`` stage. If a
        string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    all_stage_optimizer: torch.optim or str
        Optimizer used for all model parameters during the ``all`` stage. If a string, one of the
        following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    """
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 item_metadata: Union[torch.tensor, pd.DataFrame, np.array] = None,
                 embedding_dim: int = 30,
                 metadata_layers_dims: Optional[List[int]] = None,
                 combined_layers_dims: List[int] = [128, 64, 32],
                 dropout_p: float = 0.0,
                 lr: float = 1e-3,
                 bias_lr: Optional[Union[float, str]] = 1e-2,
                 metadata_only_stage_lr: float = 1e-3,
                 all_stage_lr: float = 1e-4,
                 lr_scheduler_func: Optional[Callable] = partial(
                     ReduceLROnPlateau,
                     patience=1,
                     verbose=False,
                 ),
                 weight_decay: float = 0.0,
                 optimizer: Union[str, Callable] = 'adam',
                 bias_optimizer: Optional[Union[str, Callable]] = 'sgd',
                 metadata_only_stage_optimizer: Union[str, Callable] = 'adam',
                 all_stage_optimizer: Union[str, Callable] = 'adam',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        item_metadata_num_cols = None
        optimizer_config_list = None

        if load_model_path is None:
            if item_metadata is None:
                raise ValueError('Must provide item metadata for ``HybridPretrainedModel``.')
            elif isinstance(item_metadata, pd.DataFrame):
                item_metadata = torch.from_numpy(item_metadata.to_numpy())
            elif isinstance(item_metadata, np.ndarray):
                item_metadata = torch.from_numpy(item_metadata)

            item_metadata = item_metadata.float()

            item_metadata_num_cols = item_metadata.shape[1]

            if bias_optimizer is not None:
                initial_optimizer_block = [
                    {
                        'lr': lr,
                        'optimizer': optimizer,
                        # optimize embeddings...
                        'parameter_prefix_list': ['user_embedding', 'item_embedding'],
                        'stage': 'matrix_factorization',
                    },
                    {
                        'lr': lr if bias_lr == 'infer' else bias_lr,
                        'optimizer': optimizer if bias_optimizer == 'infer' else bias_optimizer,
                        # ... and optimize bias terms too
                        'parameter_prefix_list': ['user_bias', 'item_bias'],
                        'stage': 'matrix_factorization',
                    },
                ]
            else:
                initial_optimizer_block = [
                    {
                        'lr': lr,
                        'optimizer': optimizer,
                        # optimize embeddings and bias terms all together
                        'parameter_prefix_list': [
                            'user_embedding',
                            'item_embedding',
                            'user_bias',
                            'item_bias'],
                        'stage': 'matrix_factorization',
                    },
                ]

            optimizer_config_list = initial_optimizer_block + [
                {
                    'lr': metadata_only_stage_lr,
                    'optimizer': metadata_only_stage_optimizer,
                    # optimize metadata layers only
                    'parameter_prefix_list': ['metadata', 'combined', 'user_bias', 'item_bias'],
                    'stage': 'metadata_only',
                },
                {
                    'lr': all_stage_lr,
                    'optimizer': all_stage_optimizer,
                    # optimize everything
                    'parameter_prefix_list': ['user', 'item', 'metadata', 'combined'],
                    'stage': 'all',
                },
            ]

        super().__init__(optimizer_config_list=optimizer_config_list,
                         item_metadata_num_cols=item_metadata_num_cols,
                         **get_init_arguments())

    __doc__ = merge_docstrings(MultiStagePipeline, __doc__, __init__)

    def _move_any_external_data_to_device(self):
        """Move item metadata to the device before training."""
        self.item_metadata = self.item_metadata.to(self.device)

    def _load_model_init_helper(self, load_model_path: str, map_location: str, **kwargs) -> None:
        self.item_metadata = (
            joblib.load(os.path.join(load_model_path, 'metadata.pkl'))
        )
        super()._load_model_init_helper(load_model_path=os.path.join(load_model_path, 'model.pth'),
                                        map_location=map_location,
                                        **kwargs)

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after `prepare_data`.

        """
        if self.hparams.load_model_path is None:
            if not hasattr(self, 'item_metadata'):
                self.item_metadata = kwargs.pop('item_metadata')

        self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users,
                                         embedding_dim=1)
        self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items,
                                         embedding_dim=1)
        self.user_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_users,
                                               embedding_dim=self.hparams.embedding_dim)
        self.item_embeddings = ScaledEmbedding(num_embeddings=self.hparams.num_items,
                                               embedding_dim=self.hparams.embedding_dim)
        self.dropout = nn.Dropout(p=self.hparams.dropout_p)

        # set up metadata-only layers
        metadata_output_dim = self.hparams.item_metadata_num_cols
        self.metadata_layers = None
        if self.hparams.metadata_layers_dims is not None:
            metadata_layers_dims = (
                [self.hparams.item_metadata_num_cols] + self.hparams.metadata_layers_dims
            )
            self.metadata_layers = [
                nn.Linear(metadata_layers_dims[idx - 1], metadata_layers_dims[idx])
                for idx in range(1, len(metadata_layers_dims))
            ]
            for i, layer in enumerate(self.metadata_layers):
                nn.init.xavier_normal_(self.metadata_layers[i].weight)
                self.add_module('metadata_layer_{}'.format(i), layer)

            metadata_output_dim = metadata_layers_dims[-1]

        # set up combined layers
        combined_dimension_input = (
            self.user_embeddings.embedding_dim
            + self.item_embeddings.embedding_dim
            + metadata_output_dim
        )
        combined_layers_dims = [combined_dimension_input] + self.hparams.combined_layers_dims + [1]
        self.combined_layers = [
            nn.Linear(combined_layers_dims[idx - 1], combined_layers_dims[idx])
            for idx in range(1, len(combined_layers_dims))
        ]
        for i, layer in enumerate(self.combined_layers):
            nn.init.xavier_normal_(self.combined_layers[i].weight)
            self.add_module('combined_layer_{}'.format(i), layer)

    def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model.

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        if self.hparams.stage == 'matrix_factorization':
            pred_scores = (
                torch.mul(
                    self.dropout(self.user_embeddings(users)),
                    self.dropout(self.item_embeddings(items))
                ).sum(axis=1)
                + self.user_biases(users).squeeze(1)
                + self.item_biases(items).squeeze(1)
            )
        else:
            # TODO: remove self.device and let lightning do it
            metadata_output = self.item_metadata[items, :].to(self.device)
            if self.metadata_layers is not None:
                for metadata_nn_layer in self.metadata_layers:
                    metadata_output = self.dropout(
                        F.leaky_relu(
                            metadata_nn_layer(metadata_output)
                        )
                    )

            # TODO: make this matrix factorization instead of only a MLP
            combined_output = torch.cat((self.user_embeddings(users),
                                         self.item_embeddings(items),
                                         metadata_output), 1)
            for combined_nn_layer in self.combined_layers[:-1]:
                combined_output = self.dropout(
                    F.leaky_relu(
                        combined_nn_layer(combined_output)
                    )
                )

            pred_scores = (
                self.combined_layers[-1](combined_output)
                + self.user_biases(users)
                + self.item_biases(items)
            )

        return pred_scores.squeeze()

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings on device."""
        # TODO: update this to get the embeddings post-MLP
        return self.item_embeddings.weight.data

    def save_model(self,
                   path: Union[str, Path] = os.path.join(DATA_PATH / 'model'),
                   overwrite: bool = False) -> None:
        """
        Save the model's state dictionary, hyperparameters, and item metadata.

        While PyTorch Lightning offers a way to save and load models, there are two main reasons
        for overriding these:

        1) To properly save and load a model requires the ``Trainer`` object, meaning that all
           deployed models will require Lightning to run the model, which is not actually needed
           for inference.

        2) In the v0.8.4 release, loading a model back in leads to a ``RuntimeError`` unable to load
           in weights.

        Parameters
        ----------
        path: str or Path
            Directory path to save model and data files
        overwrite: bool
            Whether or not to overwrite existing data

        """
        path = str(path)

        if os.path.exists(path):
            if os.listdir(path) and overwrite is False:
                raise ValueError(f'Data exists in ``path`` at {path} and ``overwrite`` is False.')

        Path(path).mkdir(parents=True, exist_ok=True)
        joblib.dump(self.item_metadata, os.path.join(path, 'metadata.pkl'))

        super().save_model(filename=os.path.join(path, 'model.pth'))
Exemple #10
0
class ColdStartModel(MultiStagePipeline):
    # NOTE: the full docstring is merged in with ``MultiStagePipeline``'s using
    # ``merge_docstrings``. Only the description of new or changed parameters are included in this
    # docstring
    """
    Training pipeline for a matrix factorization model optimized for the cold-start problem.

    Many recommendation models suffer from the cold start problem, when a model is unable to provide
    adequate recommendations for a new item until enough users have interacted with it. But, if
    users only interact with recommended items, the item will never be recommended, and thus the
    model will never improve recommendations for this item.

    The ``ColdStartModel`` attempts to bypass this by limiting the item space down to "item
    buckets", training a model on this as the item space, then expanding out to all items. During
    this expansion, the learned-embeddings of each bucket is copied over to each corresponding
    item, providing a smarter initialization than a random one for both existing and new items.
    Now, when we have a new item, we can use its bucket embedding as an initialization into a model.

    The stages in a ``ColdStartModel`` are, in order:

    1. ``item_buckets``
        Matrix factorization with item embeddings and bias terms bucketed by
        ``item_buckets`` argument. Unlike in the next stage, many items may map on to a single
        bucket, and this will share the same embedding and bias representation. The model should
        learn user preference for buckets in this stage.
    2. ``no_buckets``
        Standard matrix factorization as we do in ``MatrixFactorizationModel``. However, upon
        advancing to this stage, the item embeddings are initialized with their bucketed embedding
        value (and same for biases). Not only does this provide better initialization than random,
        but allows new items to be incorporated into the model without training by using their
        item bucket embedding and bias terms at prediction time.

    Note that the cold start problem exists for new users as well, but this functionality will be
    added to this model in a future version.

    All ``ColdStartModel`` instances are subclasses of the ``LightningModule`` class provided by
    PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import ColdStartModel, CollieTrainer


        # instantiate and fit a ``ColdStartModel`` as expected
        model = ColdStartModel(train=train, item_buckets=item_buckets)
        trainer = CollieTrainer(model)
        trainer.fit(model)

        # train for X more epochs on the next stage, ``no_buckets``
        trainer.max_epochs += X
        model.advance_stage()
        trainer.fit(model)

        model.eval()

        # do evaluation as normal with ``model``

        # get item-item recommendations for a new item by using the bucket ID, Z
        similar_items = model.item_bucket_item_similarity(item_bucket_id=Z)

        model.save_model(filename='model.pth')
        new_model = ColdStartModel(load_model_path='model.pth')

        # do evaluation as normal with ``new_model``

    Note
    ----
    The ``forward`` calculation will be different depending on the stage that is set. Note this
    when evaluating / saving and loading models in.

    Parameters
    ----------
    item_buckets: torch.tensor, 1-d
        An ordered iterable containing the bucket ID for each item ID. For example, if you have
        five films and are going to bucket by primary genre, and your data looks like:

        * Item ID: 0, Genre ID: 1

        * Item ID: 1, Genre ID: 0

        * Item ID: 2, Genre ID: 2

        * Item ID: 3, Genre ID: 2

        * Item ID: 4, Genre ID: 1

        Then ``item_buckets`` would be: ``[1, 0, 2, 2, 1]``
    embedding_dim: int
        Number of latent factors to use for user and item embeddings
    dropout_p: float
        Probability of dropout
    item_buckets_stage_lr: float
        Learning rate for user parameters and item bucket parameters optimized during the
        ``item_buckets`` stage
    no_buckets_stage_lr: float
        Learning rate for user parameters and item parameters optimized during the ``no_buckets``
        stage
    item_buckets_stage_lr: float
        Optimizer used for user parameters and item bucket parameters optimized during the
        ``item_buckets`` stage. If a string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    no_buckets_stage_lr: float
        Optimizer used for user parameters and item parameters optimized during the ``no_buckets``
        stage. If a string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    """
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 item_buckets: Iterable[int] = None,
                 embedding_dim: int = 30,
                 dropout_p: float = 0.0,
                 sparse: bool = False,
                 item_buckets_stage_lr: float = 1e-3,
                 no_buckets_stage_lr: float = 1e-3,
                 lr_scheduler_func: Optional[Callable] = partial(
                     ReduceLROnPlateau,
                     patience=1,
                     verbose=False,
                 ),
                 weight_decay: float = 0.0,
                 item_buckets_stage_optimizer: Union[str, Callable] = 'adam',
                 no_buckets_stage_optimizer: Union[str, Callable] = 'adam',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        optimizer_config_list = None
        num_item_buckets = None

        if load_model_path is None:
            # TODO: separate out optimizer and bias optimizer somehow
            optimizer_config_list = [
                {
                    'lr': item_buckets_stage_lr,
                    'optimizer': item_buckets_stage_optimizer,
                    'parameter_prefix_list': [
                        'user_embed',
                        'user_bias',
                        'item_bucket_embed',
                        'item_bucket_bias',
                    ],
                    'stage': 'item_buckets',
                },
                {
                    'lr': no_buckets_stage_lr,
                    'optimizer': no_buckets_stage_optimizer,
                    'parameter_prefix_list': [
                        'user_embed',
                        'user_bias',
                        'item_embed',
                        'item_bias',
                    ],
                    'stage': 'no_buckets',
                },
            ]

            if not isinstance(item_buckets, torch.Tensor):
                item_buckets = torch.tensor(item_buckets)

            # data quality checks for ``item_buckets``
            assert item_buckets.dim() == 1, (
                f'``item_buckets`` must be 1-dimensional, not {item_buckets.dim()}-dimensional!'
            )
            if len(item_buckets) != train.num_items:
                raise ValueError(
                    'Length of ``item_buckets`` must be equal to the number of items in the '
                    f'dataset: {len(item_buckets)} != {train.num_items}.'
                )
            if min(item_buckets) != 0:
                raise ValueError(f'``item_buckets`` IDs must start at 0, not {min(item_buckets)}!')

            num_item_buckets = item_buckets.max().item() + 1

        super().__init__(optimizer_config_list=optimizer_config_list,
                         num_item_buckets=num_item_buckets,
                         **get_init_arguments())

    __doc__ = merge_docstrings(MultiStagePipeline, __doc__, __init__)

    def _move_any_external_data_to_device(self):
        """Move the item buckets to the device before training."""
        self.hparams.item_buckets = self.hparams.item_buckets.to(self.device)

    def _copy_weights(self, old: nn.Embedding, new: nn.Embedding, buckets: torch.tensor) -> None:
        new.weight.data.copy_(old.weight.data[buckets])

    def set_stage(self, stage: str) -> None:
        """Set the stage for the model."""
        current_stage = self.hparams.stage

        if stage in self.hparams.stage_list:
            if current_stage == 'item_buckets' and stage == 'no_buckets':
                print('Copying over item embeddings...')
                self._copy_weights(self.item_bucket_biases,
                                   self.item_biases,
                                   self.hparams.item_buckets)
                self._copy_weights(self.item_bucket_embeddings,
                                   self.item_embeddings,
                                   self.hparams.item_buckets)
        else:
            raise ValueError(
                f'"{stage}" is not a valid stage, please choose one of {self.hparams.stage_list}'
            )

        self.hparams.stage = stage
        print(f'Set ``self.hparams.stage`` to "{stage}"')

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after `prepare_data`.

        """
        # define initial embedding groups
        self.item_bucket_biases = ZeroEmbedding(
            num_embeddings=self.hparams.num_item_buckets,
            embedding_dim=1,
            sparse=self.hparams.sparse,
        )
        self.item_bucket_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_item_buckets,
            embedding_dim=self.hparams.embedding_dim,
            sparse=self.hparams.sparse,
        )

        # define fine-tuned embedding groups
        self.user_biases = ZeroEmbedding(
            num_embeddings=self.hparams.num_users,
            embedding_dim=1,
            sparse=self.hparams.sparse
        )
        self.item_biases = ZeroEmbedding(
            num_embeddings=self.hparams.num_items,
            embedding_dim=1,
            sparse=self.hparams.sparse,
        )
        self.user_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_users,
            embedding_dim=self.hparams.embedding_dim,
            sparse=self.hparams.sparse
        )
        self.item_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_items,
            embedding_dim=self.hparams.embedding_dim,
            sparse=self.hparams.sparse,
        )

        self.dropout = nn.Dropout(p=self.hparams.dropout_p)

    def forward(self, users: torch.tensor, items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model.

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        user_embeddings = self.user_embeddings(users)
        user_biases = self.user_biases(users)

        if self.hparams.stage == 'item_buckets':
            # transform item IDs to item bucket IDs
            items = self.hparams.item_buckets[items]

            item_embeddings = self.item_bucket_embeddings(items)
            item_biases = self.item_bucket_biases(items)
        elif self.hparams.stage == 'no_buckets':
            item_embeddings = self.item_embeddings(items)
            item_biases = self.item_biases(items)

        pred_scores = (
            torch.mul(self.dropout(user_embeddings), self.dropout(item_embeddings)).sum(axis=1)
            + user_biases.squeeze(1)
            + item_biases.squeeze(1)
        )

        return pred_scores.squeeze()

    def item_bucket_item_similarity(self, item_bucket_id: int) -> pd.Series:
        """
        Get most similar item indices to a item bucket by cosine similarity.

        Cosine similarity is computed with item and item bucket embeddings from a trained model.

        Parameters
        ----------
        item_id: int

        Returns
        -------
        sim_score_idxs: pd.Series
            Sorted values as cosine similarity for each item in the dataset with the index being
            the item ID

        """
        item_bucket_embeddings = self.item_bucket_embeddings.weight.data
        item_bucket_embeddings = (
            item_bucket_embeddings / item_bucket_embeddings.norm(dim=1)[:, None]
        )

        item_embeddings = self._get_item_embeddings()
        item_embeddings = item_embeddings / item_embeddings.norm(dim=1)[:, None]

        sim_score_idxs = (
            torch.matmul(item_bucket_embeddings[[item_bucket_id], :],
                         item_embeddings.transpose(1, 0))
            .detach()
            .cpu()
            .numpy()
            .squeeze()
        )

        sim_score_idxs_series = pd.Series(sim_score_idxs)
        sim_score_idxs_series = sim_score_idxs_series.sort_values(ascending=False)

        return sim_score_idxs_series

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings on device."""
        return self.item_embeddings.weight.data
class NonlinearMatrixFactorizationModel(BasePipeline):
    # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``.
    # Only the description of new or changed parameters are included in this docstring
    """
    Training pipeline for a nonlinear matrix factorization model.

    ``NonlinearMatrixFactorizationModel`` models have an embedding layer for users and items. These
    are sent through separate dense networks, which output more refined embeddings, which are then
    dot producted for a single float ranking / rating.

    Collie adds a twist on to this novel framework by allowing separate optimizers for embeddings
    and bias terms. With larger datasets and multiple epochs of training, a model might incorrectly
    learn to only optimize the bias terms for a quicker path towards a local loss minimum,
    essentially memorizing how popular each item is. By using a separate, slower optimizer for the
    bias terms (like Stochastic Gradient Descent), the model must prioritize optimizing the
    embeddings for meaningful, more varied recommendations, leading to a model that is able to
    achieve a much lower loss. See the documentation below for ``bias_lr`` and ``bias_optimizer``
    input arguments for implementation details.

    All ``NonlinearMatrixFactorizationModel`` instances are subclasses of the ``LightningModule``
    class provided by PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import CollieTrainer, NonlinearMatrixFactorizationModel


        model = NonlinearMatrixFactorizationModel(train=train)
        trainer = CollieTrainer(model)
        trainer.fit(model)
        model.eval()

        # do evaluation as normal with ``model``

        model.save_model(filename='model.pth')
        new_model = NonlinearMatrixFactorizationModel(load_model_path='model.pth')

        # do evaluation as normal with ``new_model``

    Parameters
    ----------
    user_embedding_dim: int
        Number of latent factors to use for user embeddings
    item_embedding_dim: int
        Number of latent factors to use for item embeddings
    user_dense_layers_dims: list
        List of linear layer dimensions to apply to the user embedding, starting with the dimension
        directly following ``user_embedding_dim``
    item_dense_layers_dims: list
        List of linear layer dimensions to apply to the item embedding, starting with the dimension
        directly following ``item_embedding_dim``
    embedding_dropout_p: float
        Probability of dropout on the embedding layers
    dense_dropout_p: float
        Probability of dropout on the dense layers
    bias_lr: float
        Bias terms learning rate. If 'infer', will set equal to ``lr``
    optimizer: torch.optim or str
        If a string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    bias_optimizer: torch.optim or str
        Optimizer for the bias terms. This supports the same string options as ``optimizer``, with
        the addition of ``infer``, which will set the optimizer equal to ``optimizer``. If
        ``bias_optimizer`` is ``None``, only a single optimizer will be created for all model
        parameters
    y_range: tuple
        Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get
        predicted ratings within the range of ``min`` and ``max``

    """
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 user_embedding_dim: int = 60,
                 item_embedding_dim: int = 60,
                 user_dense_layers_dims: List[float] = [48, 32],
                 item_dense_layers_dims: List[float] = [48, 32],
                 embedding_dropout_p: float = 0.0,
                 dense_dropout_p: float = 0.0,
                 lr: float = 1e-3,
                 bias_lr: Optional[Union[float, str]] = 1e-2,
                 lr_scheduler_func: Optional[Callable] = partial(
                     ReduceLROnPlateau, patience=1, verbose=True),
                 weight_decay: float = 0.0,
                 optimizer: Union[str, Callable] = 'adam',
                 bias_optimizer: Optional[Union[str, Callable]] = 'sgd',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 y_range: Optional[Tuple[float, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        super().__init__(**get_init_arguments())

    __doc__ = merge_docstrings(BasePipeline, __doc__, __init__)

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after ``prepare_data``.

        """
        self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users,
                                         embedding_dim=1)
        self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items,
                                         embedding_dim=1)
        self.user_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_users,
            embedding_dim=self.hparams.user_embedding_dim)
        self.item_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_items,
            embedding_dim=self.hparams.item_embedding_dim)

        self.embedding_dropout = nn.Dropout(p=self.hparams.embedding_dropout_p)
        self.dense_dropout = nn.Dropout(p=self.hparams.dense_dropout_p)

        # set up user dense layers
        user_dense_layers_dims = ([self.hparams.user_embedding_dim] +
                                  self.hparams.user_dense_layers_dims)
        self.user_dense_layers = [
            nn.Linear(user_dense_layers_dims[idx - 1],
                      user_dense_layers_dims[idx])
            for idx in range(1, len(user_dense_layers_dims))
        ]
        for i, layer in enumerate(self.user_dense_layers):
            nn.init.xavier_normal_(self.user_dense_layers[i].weight)
            self.add_module('user_dense_layer_{}'.format(i), layer)

        # set up item dense layers
        item_dense_layers_dims = ([self.hparams.item_embedding_dim] +
                                  self.hparams.item_dense_layers_dims)
        self.item_dense_layers = [
            nn.Linear(item_dense_layers_dims[idx - 1],
                      item_dense_layers_dims[idx])
            for idx in range(1, len(item_dense_layers_dims))
        ]
        for i, layer in enumerate(self.item_dense_layers):
            nn.init.xavier_normal_(self.item_dense_layers[i].weight)
            self.add_module('item_dense_layer_{}'.format(i), layer)

    def forward(self, users: torch.tensor,
                items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model.

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        user_embeddings = self.user_embeddings(users)
        item_embeddings = self.item_embeddings(items)

        for idx, user_dense_layer in enumerate(self.user_dense_layers):
            user_embeddings = F.leaky_relu(user_dense_layer(user_embeddings))

            if idx < (len(self.user_dense_layers) - 1):
                user_embeddings = self.dense_dropout(user_embeddings)

        for idx, item_dense_layer in enumerate(self.item_dense_layers):
            item_embeddings = F.leaky_relu(item_dense_layer(item_embeddings))

            if idx < (len(self.item_dense_layers) - 1):
                item_embeddings = self.dense_dropout(item_embeddings)

        preds = ((self.embedding_dropout(user_embeddings) *
                  self.embedding_dropout(item_embeddings)).sum(1) +
                 self.user_biases(users).squeeze(1) +
                 self.item_biases(items).squeeze(1))

        if self.hparams.y_range is not None:
            preds = (torch.sigmoid(preds) *
                     (self.hparams.y_range[1] - self.hparams.y_range[0]) +
                     self.hparams.y_range[0])

        return preds

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings on device."""
        if not hasattr(self, 'item_embeddings_'):
            items = torch.arange(self.hparams.num_items, device=self.device)

            item_embeddings = self.item_embeddings(items)

            for item_dense_layer in self.item_dense_layers:
                item_embeddings = F.leaky_relu(
                    item_dense_layer(item_embeddings))

            self.item_embeddings_ = item_embeddings.detach()

        return self.item_embeddings_
class MultiStagePipeline(BasePipeline, metaclass=ABCMeta):
    """
    Multi-stage pipeline model architectures to inherit from.

    This model template is intended for models that train in distinct stages, with a different
    optimizer optimizing each step. This allows model components to be optimized with a set
    order in mind, rather than all at once, such as with the ``BasePipeline``.

    Generally, multi-stage models will have a training protocol like:

    .. code-block:: python

        from collie.model import CollieTrainer, SomeMultiStageModel


        model = SomeMultiStageModel(train=train)
        trainer = CollieTrainer(model)

        # fit stage 1
        trainer.fit(model)

        # fit stage 2
        trainer.max_epochs += 10
        model.advance_stage()
        trainer.fit(model)

        # fit stage 3
        trainer.max_epochs += 10
        model.advance_stage()
        trainer.fit(model)

        # ... and so on, until...

        model.eval()

    Just like with ``BasePipeline``, all subclasses MUST at least override the following methods:

    * ``_setup_model`` - Set up the model architecture

    * ``forward`` - Forward pass through a model

    For ``item_item_similarity`` to work properly, all subclasses are should also implement:

    * ``_get_item_embeddings`` - Returns item embeddings from the model

    Notes
    -----
    * With each call of ``trainer.fit``, the optimizer and learning rate scheduler state will reset.
    * When loading a multi-stage model in, the state will be set to the last possible state. This
      state may have a different ``forward`` calculation than other states.

    Parameters
    ----------
    optimizer_config_list: list of dict
        List of dictionaries containing the optimizer configurations for each stage's
        optimizer(s). Each dictionary must contain the following keys:

        * ``lr``: str
            Learning rate for the optimizer

        * ``optimizer``: ``torch.optim`` or ``str``

        * ``parameter_prefix_list``: List[str]
            List of string prefixes corressponding to the model components that should be
            optimized with this optimizer

        * ``stage``: str
            Name of stage

        This must be ordered with the intended progression of stages.

    """
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 lr_scheduler_func: Optional[Callable] = None,
                 weight_decay: float = 0.0,
                 optimizer_config_list: List[Dict[str, Union[float, List[str],
                                                             str]]] = None,
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None,
                 **kwargs):
        stage_list = None

        if load_model_path is None:
            if optimizer_config_list is None:
                raise ValueError(
                    'Must provide ``optimizer_config_list`` when initializing a new multi-stage '
                    'model!')
            else:
                stage_list = list(
                    OrderedDict.fromkeys([
                        optimizer_config['stage']
                        for optimizer_config in optimizer_config_list
                    ]))

        super().__init__(stage_list=stage_list, **get_init_arguments())

        if load_model_path is None:
            # set stage if we have not already loaded it in and set it there
            self.hparams.stage = self.hparams.stage_list[0]
            self.set_stage(self.hparams.stage)

    __doc__ = merge_docstrings(BasePipeline, __doc__, __init__)

    def _load_model_init_helper(self, *args, **kwargs) -> None:
        super()._load_model_init_helper(*args, **kwargs)

        # set the stage to the last stage
        self.hparams.stage = self.hparams.stage_list[-1]
        print(f'Set ``self.hparams.stage`` to "{self.hparams.stage}"')

    def advance_stage(self) -> None:
        """Advance the stage to the next one in ``self.hparams.stage_list``."""
        stage = self.hparams.stage

        if stage in self.hparams.stage_list:
            stage_idx = self.hparams.stage_list.index(stage)
            if (stage_idx + 1) >= len(self.hparams.stage_list):
                raise ValueError(
                    f'Cannot advance stage past {stage} - it is the final stage!'
                )

            self.set_stage(stage=self.hparams.stage_list[stage_idx + 1])

    def set_stage(self, stage: str) -> None:
        """Set the model to the desired stage."""
        if stage in self.hparams.stage_list:
            self.hparams.stage = stage
            print(f'Set ``self.hparams.stage`` to "{self.hparams.stage}"')
        else:
            raise ValueError(
                f'{stage} is not a valid stage, please choose one of {self.hparams.stage_list}'
            )

    def _get_optimizer_parameters(
            self,
            optimizer_config: List[Dict[str, Union[float, List[str], str]]],
            include_weight_decay: bool = True,
            **kwargs) -> List[Dict[str, Union[torch.tensor, float]]]:
        optimizer_parameters = [{
            'params':
            (param for (name, param) in self.named_parameters() if reduce(
                lambda x, y: x or y,
                [
                    name.startswith(prefix)
                    for prefix in optimizer_config['parameter_prefix_list']
                ],
                False,
            )),
            'lr':
            optimizer_config['lr'],
        }]

        if include_weight_decay:
            weight_decay_dict = {'weight_decay': self.hparams.weight_decay}
            [d.update(weight_decay_dict) for d in optimizer_parameters]

        return optimizer_parameters

    def configure_optimizers(
        self
    ) -> (Union[Tuple[List[Callable], List[Callable]], Tuple[
            Callable, Callable], Callable]):
        """
        Configure optimizers and learning rate schedulers to use in optimization.

        This method will be called after `setup`.

        Creates an optimizer and learning rate scheduler for each configuration dictionary in
        ``self.hparams.optimizer_config_list``.

        """
        # since this is the only function that is called before each ``trainer.fit`` call, we will
        # also take this time to ensure any external data a model might rely on has been properly
        # moved to the device before training
        self._move_any_external_data_to_device()

        optimizer_config_list = [
            self._get_optimizer(self.optimizer,
                                optimizer_config=optimizer_config)
            for optimizer_config in self.hparams.optimizer_config_list
        ]

        if self.lr_scheduler_func is not None:
            monitor = 'val_loss_epoch'
            if self.val_loader is None:
                monitor = 'train_loss_epoch'

            # add in optimizer to scheduler function
            scheduler_list = [{
                'scheduler':
                self.lr_scheduler_func(optimizer_config),
                'monitor':
                monitor,
            } for optimizer_config in optimizer_config_list]

            return optimizer_config_list, scheduler_list

        else:
            return optimizer_config_list

    def optimizer_step(self,
                       epoch: int = None,
                       batch_idx: int = None,
                       optimizer: torch.optim.Optimizer = None,
                       optimizer_idx: int = None,
                       optimizer_closure: Optional[Callable] = None,
                       **kwargs) -> None:
        """
        Overriding Lightning's optimizer step function to only step the optimizer associated with
        the relevant stage.

        See here for more details:
        https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#optimizer-step

        Parameters
        ----------
        epoch: int
            Current epoch
        batch_idx: int
            Index of current batch
        optimizer: torch.optim.Optimizer
            A PyTorch optimizer
        optimizer_idx: int
            If you used multiple optimizers, this indexes into that list
        optimizer_closure: Callable
            Closure for all optimizers

        """
        if self.hparams.optimizer_config_list[optimizer_idx][
                'stage'] == self.hparams.stage:
            optimizer.step(closure=optimizer_closure)
class MLPMatrixFactorizationModel(BasePipeline):
    # NOTE: the full docstring is merged in with ``BasePipeline``'s using ``merge_docstrings``.
    # Only the description of new or changed parameters are included in this docstring
    """
    Training pipeline for the matrix factorization model with MLP layers instead of a final dot
    product (like in ``MatrixFactorizationModel``).

    ``MLPMatrixFactorizationModel`` models have an embedding layer for both users and items which,
    are concatenated and sent through a MLP to output a single float ranking value.

    All ``MLPMatrixFactorizationModel`` instances are subclasses of the ``LightningModule`` class
    provided by PyTorch Lightning. This means to train a model, you will need a
    ``collie.model.CollieTrainer`` object, but the model can be saved and loaded without this
    ``Trainer`` instance. Example usage may look like:

    .. code-block:: python

        from collie.model import CollieTrainer, MLPMatrixFactorizationModel


        model = MLPMatrixFactorizationModel(train=train)
        trainer = CollieTrainer(model)
        trainer.fit(model)
        model.eval()

        # do evaluation as normal with ``model``

        model.save_model(filename='model.pth')
        new_model = MLPMatrixFactorizationModel(load_model_path='model.pth')

        # do evaluation as normal with ``new_model``

    Parameters
    ----------
    embedding_dim: int
        Number of latent factors to use for user and item embeddings
    num_layers: int
        Number of MLP layers to apply. Each MLP layer will have its input dimension calculated with
        the formula ``embedding_dim * (2 ** (``num_layers`` - ``current_layer_number``))``
    dropout_p: float
        Probability of dropout on the linear layers
    bias_lr: float
        Bias terms learning rate. If 'infer', will set equal to ``lr``
    optimizer: torch.optim or str
        If a string, one of the following supported optimizers:

        * ``'sgd'`` (for ``torch.optim.SGD``)

        * ``'adam'`` (for ``torch.optim.Adam``)

    bias_optimizer: torch.optim or str
        Optimizer for the bias terms. This supports the same string options as ``optimizer``, with
        the addition of ``infer``, which will set the optimizer equal to ``optimizer``. If
        ``bias_optimizer`` is ``None``, only a single optimizer will be created for all model
        parameters
    y_range: tuple
        Specify as ``(min, max)`` to apply a sigmoid layer to the output score of the model to get
        predicted ratings within the range of ``min`` and ``max``

    """
    def __init__(self,
                 train: INTERACTIONS_LIKE_INPUT = None,
                 val: INTERACTIONS_LIKE_INPUT = None,
                 embedding_dim: int = 30,
                 num_layers: int = 3,
                 dropout_p: float = 0.0,
                 lr: float = 1e-3,
                 bias_lr: Optional[Union[float, str]] = 1e-2,
                 lr_scheduler_func: Optional[Callable] = partial(
                     ReduceLROnPlateau, patience=1, verbose=True),
                 weight_decay: float = 0.0,
                 optimizer: Union[str, Callable] = 'adam',
                 bias_optimizer: Optional[Union[str, Callable]] = 'sgd',
                 loss: Union[str, Callable] = 'hinge',
                 metadata_for_loss: Optional[Dict[str, torch.tensor]] = None,
                 metadata_for_loss_weights: Optional[Dict[str, float]] = None,
                 y_range: Optional[Tuple[float, float]] = None,
                 load_model_path: Optional[str] = None,
                 map_location: Optional[str] = None):
        super().__init__(**get_init_arguments())

    __doc__ = merge_docstrings(BasePipeline, __doc__, __init__)

    def _setup_model(self, **kwargs) -> None:
        """
        Method for building model internals that rely on the data passed in.

        This method will be called after `prepare_data`.

        """
        self.user_biases = ZeroEmbedding(num_embeddings=self.hparams.num_users,
                                         embedding_dim=1)
        self.item_biases = ZeroEmbedding(num_embeddings=self.hparams.num_items,
                                         embedding_dim=1)
        self.user_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_users,
            embedding_dim=self.hparams.embedding_dim)
        self.item_embeddings = ScaledEmbedding(
            num_embeddings=self.hparams.num_items,
            embedding_dim=self.hparams.embedding_dim)

        mlp_modules = []
        input_size = self.hparams.embedding_dim * 2
        for i in range(self.hparams.num_layers):
            next_input_size = (int(self.hparams.embedding_dim * 2 *
                                   ((self.hparams.num_layers - i) /
                                    (self.hparams.num_layers + 1))))
            mlp_modules.append(nn.Linear(input_size, next_input_size))
            mlp_modules.append(nn.ReLU())
            mlp_modules.append(nn.Dropout(p=self.hparams.dropout_p))
            input_size = next_input_size
        self.mlp_layers = nn.Sequential(*mlp_modules)

        self.predict_layer = nn.Linear(next_input_size, 1)

    def forward(self, users: torch.tensor,
                items: torch.tensor) -> torch.tensor:
        """
        Forward pass through the model, roughly:

        ```prediction = MLP(concatenate(user_embedding * item_embedding)) + user_bias + item_bias```

        If dropout is added, it is applied for the two embeddings and not the biases.

        Parameters
        ----------
        users: tensor, 1-d
            Array of user indices
        items: tensor, 1-d
            Array of item indices

        Returns
        -------
        preds: tensor, 1-d
            Predicted ratings or rankings

        """
        user_embeddings = self.user_embeddings(users)
        item_embeddings = self.item_embeddings(items)

        concatenated_embeddings = torch.cat((user_embeddings, item_embeddings),
                                            -1)
        mlp_output = torch.sigmoid(
            self.predict_layer(
                self.mlp_layers(concatenated_embeddings))).squeeze()

        preds = (mlp_output + self.user_biases(users).squeeze(1) +
                 self.item_biases(items).squeeze(1))

        if self.hparams.y_range is not None:
            preds = (torch.sigmoid(preds) *
                     (self.hparams.y_range[1] - self.hparams.y_range[0]) +
                     self.hparams.y_range[0])

        return preds

    def _get_item_embeddings(self) -> torch.tensor:
        """Get item embeddings on device."""
        return self.item_embeddings.weight.data