コード例 #1
0
    def save_as_csv(self, path, **params):
        """
        Saves the RDD to file as text.
        """
        self._entry(path)

        def to_csv(row, **params):
            sio = StringIO.StringIO()
            writer = csv.writer(sio, **params)
            try:
                writer.writerow([row], **params)
                ret = sio.getvalue()
                return ret
            except IOError:
                return ''

        delete_file_or_dir(path)
        with open(path, 'w') as f:
            self.begin_iterator()
            elems_at_a_time = 10000
            ret = self.iterator_get_next(elems_at_a_time)
            while True:
                for row in ret:
                    line = to_csv(row, **params)
                    f.write(line)
                if len(ret) == elems_at_a_time:
                    ret = self.iterator_get_next(elems_at_a_time)
                else:
                    break
        self._exit()
コード例 #2
0
    def save(self, path):
        """
        Save a model.

        The model can be saved, then reloaded later to provide recommendations.

        Parameters
        ----------
        path : str
            The path where the model will be saved.
            This should refer to a file, not to a directory.
            Three items will be stored here: the underlying model parameters, the original ratings,
            and the column names.  These are stored with suffix '.model', '.ratings', and
            '.metadata'.
        """
        sc = CommonSparkContext.Instance().sc()
        delete_file_or_dir(path)
        os.makedirs(path)
        model_path, ratings_path, metadata_path = self._file_paths(path)
        # save model
        self.model.save(sc, model_path)
        # save ratings
        self.ratings.save(ratings_path)
        # save metadata
        metadata = [self.user_col, self.item_col, self.rating_col]
        with open(metadata_path, 'w') as f:
            pickle.dump(metadata, f)
コード例 #3
0
 def save_as_text(self, path):
     """
     Saves the RDD to file as text.
     """
     self._entry(path)
     # this only works for local files
     delete_file_or_dir(path)
     try:
         self._rdd.saveAsTextFile(path)           # action ?
     except:
         # TODO distinguish between filesystem errors and pickle errors
         raise TypeError('The XArray save failed.')
     metadata = self.elem_type
     metadata_path = os.path.join(path, 'metadata')
     with open(metadata_path, 'w') as md:
         # TODO detect filesystem errors
         pickle.dump(metadata, md)
     self._exit()