def save(self, path):
        """
        Save a model.

        The model can be saved, then reloaded later to provide recommendations.

        Parameters
        ----------
        path : str
            The path where the model will be saved.
            This should refer to a file, not to a directory.
            Three items will be stored here: the underlying model parameters, the original ratings,
            and the column names.  These are stored with suffix '.model', '.ratings', and
            '.metadata'.
        """
        sc = CommonSparkContext.Instance().sc()
        delete_file_or_dir(path)
        os.makedirs(path)
        model_path, ratings_path, metadata_path = self._file_paths(path)
        # save model
        self.model.save(sc, model_path)
        # save ratings
        self.ratings.save(ratings_path)
        # save metadata
        metadata = [self.user_col, self.item_col, self.rating_col]
        with fileio.open_file(metadata_path, 'w') as f:
            # TODO detect filesystem errors
            pickle.dump(metadata, f)
    def load(cls, path):
        """
        Load a model that was saved previously.

        Parameters
        ----------
        path : str
            The path where the model files are stored.
            This is the same path that was passed to ``save``.
            There are three files/directories based on this path, with
            extensions '.model', '.ratings', and '.metadata'.

        Returns
        -------
        out : MatrixFactorizationModel
            A model that can be used to predict ratings.
        """
        sc = CommonSparkContext.Instance().sc()
        model_path, ratings_path, metadata_path = cls._file_paths(path)
        # load model
        model = recommendation.MatrixFactorizationModel.load(sc, model_path)
        # load ratings
        ratings = XFrame.load(ratings_path)
        # load metadata
        with open(metadata_path) as f:
            user_col, item_col, rating_col = pickle.load(f)

        return cls(model, ratings, user_col, item_col, rating_col)
Esempio n. 3
0
 def train(self):
     sc = CommonSparkContext.Instance().sc()
     rdd = self.corpus.to_spark_rdd()
     model = Word2Vec().setVectorSize(self.vector_size).setSeed(
         self.seed).fit(rdd)
     return TextModel(model)