コード例 #1
0
ファイル: checkpoint.py プロジェクト: shliujing/tfsnippet
    def save(self, global_step=None, session=None):
        """
        Save the session to a checkpoint file.

        Args:
            global_step (int or tf.Tensor): The global step counter.
            session (tf.Session): The session to save.
                If not specified, select the default session.

        Returns:
            str: The path of the saved checkpoint file.
        """
        session = session or get_default_session_or_error()

        # save the states of savable objects into serial var
        if self._objects:
            object_states = {}
            for key, obj in six.iteritems(self._objects):
                object_states[key] = obj.get_state()

            serialized_states = pkl.dumps(
                object_states, protocol=pkl.HIGHEST_PROTOCOL)
            self._serial_var.set(serialized_states)

        # now save the variables to checkpoint file
        if not os.path.isdir(self.save_dir):
            makedirs(self.save_dir, exist_ok=True)
        return self._saver.save(
            session,
            os.path.join(self.save_dir, self.filename),
            global_step=global_step,
            write_meta_graph=self.save_meta
        )
コード例 #2
0
    def commit(self, result_dict):
        """
        Write the `result_dict` to screen, and save to a JSON file.

        If ``env["MLTOOLKIT_EXPERIMENT_RESULT_FILE"]`` presents, the results
        will saved in the file specified by this environmental variable.
        Otherwise the results will be saved in ``result_dir + "/result.json"``.

        Args:
            result_dict (dict):  JSON serializable result dict.
                It will be merged with ``self.result_dict``.
        """
        self.result_dict.update(result_dict)

        print('')
        print('Result Updated')
        print('--------------')
        for k in sorted(six.iterkeys(self.result_dict)):
            print('{}: {}'.format(k, self.result_dict[k]))

        parent_dir = os.path.split(self.result_json_file)[0]
        makedirs(parent_dir, exist_ok=True)
        json_result = json.dumps(self.result_dict,
                                 sort_keys=True,
                                 cls=JsonEncoder)
        with codecs.open(self.result_json_file, 'wb', 'utf-8') as f:
            f.write(json_result)
def load_data(dataset):
    if dataset == 'SMD':
        dataset_folder = 'ServerMachineDataset'
        file_list = os.listdir(os.path.join(dataset_folder, "train"))
        for filename in file_list:
            if filename.endswith('.txt'):
                load_and_save('train', filename, filename.strip('.txt'),
                              dataset_folder)
                load_and_save('test', filename, filename.strip('.txt'),
                              dataset_folder)
                load_and_save('test_label', filename, filename.strip('.txt'),
                              dataset_folder)
    elif dataset == 'SMAP' or dataset == 'MSL':
        dataset_folder = 'data'
        with open(os.path.join(dataset_folder, 'labeled_anomalies.csv'),
                  'r') as file:
            csv_reader = csv.reader(file, delimiter=',')
            res = [row for row in csv_reader][1:]
        res = sorted(res, key=lambda k: k[0])
        label_folder = os.path.join(dataset_folder, 'test_label')
        makedirs(label_folder, exist_ok=True)
        data_info = [
            row for row in res if row[1] == dataset and row[0] != 'P-2'
        ]
        labels = []
        for row in data_info:
            anomalies = ast.literal_eval(row[2])
            length = int(row[-1])
            label = np.zeros([length], dtype=np.bool)
            for anomaly in anomalies:
                label[anomaly[0]:anomaly[1] + 1] = True
            labels.extend(label)
        labels = np.asarray(labels)
        print(dataset, 'test_label', labels.shape)
        with open(
                os.path.join(output_folder,
                             dataset + "_" + 'test_label' + ".pkl"),
                "wb") as file:
            dump(labels, file)

        def concatenate_and_save(category):
            data = []
            for row in data_info:
                filename = row[0]
                temp = np.load(
                    os.path.join(dataset_folder, category, filename + '.npy'))
                data.extend(temp)
            data = np.asarray(data)
            print(dataset, category, data.shape)
            with open(
                    os.path.join(output_folder,
                                 dataset + "_" + category + ".pkl"),
                    "wb") as file:
                dump(data, file)

        for c in ['train', 'test']:
            concatenate_and_save(c)
コード例 #4
0
    def make_dir(self, sub_path):
        """
        Ensure the `sub_path` directory exists.

        Args:
            sub_path (str): The sub path.

        Returns:
            str: The full path of the directory.
        """
        path = self.resolve_path(sub_path)
        makedirs(path, exist_ok=True)
        return path
コード例 #5
0
    def _enter(self):
        # open a temporary directory if the checkpoint dir is not specified
        if self._checkpoint_dir is None:
            self._temp_dir_ctx = TemporaryDirectory()
            self._checkpoint_dir = self._temp_dir_ctx.__enter__()
        else:
            makedirs(self._checkpoint_dir, exist_ok=True)

        # create the variable saver
        self._saver = VariableSaver(self._param_vars, self._checkpoint_dir)

        # return self as the context object
        return self
コード例 #6
0
    def prepare_parent(self, sub_path):
        """
        Ensure the parent directory of `sub_path` exists.

        Args:
            sub_path (str): The sub path.

        Returns:
            str: The full path of `sub_path`.
        """
        path = self.resolve_path(sub_path)
        parent_dir = os.path.split(path)[0]
        makedirs(parent_dir, exist_ok=True)
        return path
コード例 #7
0
    def save(self, global_step=None):
        """
        Save the checkpoint to file.

        Args:
            global_step (int or tf.Tensor): The global step counter.
        """
        sess = get_default_session_or_error()
        makedirs(self.save_dir, exist_ok=True)
        self._saver.save(sess,
                         os.path.join(self.save_dir, self.filename),
                         global_step=global_step,
                         latest_filename=self.latest_file,
                         write_meta_graph=self.save_meta)
コード例 #8
0
    def __init__(self, result_dir=None, script_name=None):
        """
        Construct a new :class:`MLResults` instance.

        Args:
            result_dir (str or fs.base.FS): A local directory path, a URI
                recognizable by `PyFilesystem <https://www.pyfilesystem.org/>`_,
                or an instance of :class:`fs.base.FS`.  It will be used as
                the result directory, while all the result files will be
                stored within it.  If not specified, will create a local
                directory "./results/<script_name>/".
            script_name (str): The name of the main script.
                If not specified, will use the file name (excluding
                the extension ".py") of the main module.
        """
        if result_dir is None:
            if script_name is None:
                script_name = os.path.splitext(
                    os.path.split(
                        os.path.abspath(sys.modules['__main__'].__file__)
                    )[1]
                )[0]

            # The ``env["MLSTORAGE_EXPERIMENT_ID"]`` would be set if the
            # program is run via `mlrun` from MLStorage.  See
            # `MLStorage Server <https://github.com/haowen-xu/mlstorage-server>`_
            # and
            # `MLStorage Client <https://github.com/haowen-xu/mlstorage-client>`_
            # for details.
            if os.environ.get('MLSTORAGE_EXPERIMENT_ID'):
                # use the current working directory as the result directory
                # if run via `mlrun` from MLStorage.
                result_dir = os.getcwd()
            else:
                result_dir = os.path.join('./results', script_name)
                if not os.path.isdir(result_dir):
                    makedirs(result_dir, exist_ok=True)

        if not isinstance(result_dir, FS):
            try:
                # attempt to create the result directory automatically
                self._fs = open_fs(result_dir, create=True)
            except TypeError:
                self._fs = open_fs(result_dir)
        else:
            self._fs = result_dir

        # the dict to collect metrics
        self._metrics_dict = {}
コード例 #9
0
    def commit(self, result_dict):
        """
        Update the results with `result_dict`, and save the merged results
        to "result.json".

        Args:
            result_dict (dict):  JSON serializable result dict.
                It will be merged with ``self.result_dict``.
        """
        self.result_dict.update(result_dict)
        parent_dir = os.path.split(self.result_json_file)[0]
        makedirs(parent_dir, exist_ok=True)
        json_result = json.dumps(self.result_dict,
                                 sort_keys=True,
                                 cls=JsonEncoder)
        with codecs.open(self.result_json_file, 'wb', 'utf-8') as f:
            f.write(json_result)
コード例 #10
0
import ast
import csv
import os
import sys
from pickle import dump

import numpy as np
from tfsnippet.utils import makedirs

output_folder = 'processed'
makedirs(output_folder, exist_ok=True)

from IPython import embed


def load_and_save(category, filename, dataset, dataset_folder):
    temp = np.genfromtxt(os.path.join(dataset_folder, category, filename),
                         dtype=np.float32,
                         delimiter=',')
    print(dataset, category, filename, temp.shape)
    with open(os.path.join(output_folder, dataset + "_" + category + ".pkl"),
              "wb") as file:
        dump(temp, file)


def load_data(dataset):
    if dataset == 'SMD':
        dataset_folder = 'ServerMachineDataset'
        file_list = os.listdir(os.path.join(dataset_folder, "train"))
        for filename in file_list:
            if filename.endswith('.txt'):
コード例 #11
0
def early_stopping(param_vars, initial_metric=None, save_dir=None,
                   smaller_is_better=True, restore_on_error=False,
                   cleanup=True, name=None):
    """Open a context to memorize the values of parameters at best metric.

    This method will open a context with an object to memorize the best
    metric for early-stopping.  An example of using this early-stopping
    context is:

        with early_stopping(param_vars) as es:
            ...
            es.update(loss, global_step)
            ...

    Where ``es.update(loss, global_step)`` should cause the parameters to
    be saved on disk if `loss` is better than the current best metric.
    One may also get the best metric via ``es.best_metric``.

    Note that if no loss is given via ``es.update``, then the variables
    would keep their latest values when exiting the early-stopping context.

    Parameters
    ----------
    param_vars : list[tf.Variable] | dict[str, tf.Variable]
        List or dict of variables to be memorized.

        If a dict is specified, the keys of the dict would be used as the
        serializations keys via `VariableSaver`.

    initial_metric : float | tf.Tensor | tf.Variable
        The initial best loss (usually for recovering from previous session).

    save_dir : str
        The directory where to save the variable values.
        If not specified, will use a temporary directory.

    smaller_is_better : bool
        Whether or not the less, the better loss? (default True)

    restore_on_error : bool
        Whether or not to restore the memorized parameters even on error?
        (default False)

    cleanup : bool
        Whether or not to cleanup the saving directory on exit?

        This argument will be ignored if `save_dir` is None, while
        the temporary directory will always be deleted on exit.

    name : str
        Optional name of this scope.

    Yields
    ------
    _EarlyStopping
        The object to receive loss during early-stopping context.
    """
    if not param_vars:
        raise ValueError('`param_vars` must not be empty.')

    if save_dir is None:
        with TemporaryDirectory() as tempdir:
            with early_stopping(param_vars, initial_metric=initial_metric,
                                save_dir=tempdir, cleanup=False,
                                smaller_is_better=smaller_is_better,
                                restore_on_error=restore_on_error,
                                name=name) as es:
                yield es

    else:
        if isinstance(initial_metric, (tf.Tensor, tf.Variable)):
            initial_metric = initial_metric.eval()

        with tf.name_scope(name):
            saver = VariableSaver(param_vars, save_dir)
            save_dir = os.path.abspath(save_dir)
            makedirs(save_dir, exist_ok=True)

            es = _EarlyStopping(saver,
                                best_metric=initial_metric,
                                smaller_is_better=smaller_is_better)

            try:
                yield es
            except Exception as ex:
                if isinstance(ex, KeyboardInterrupt) or restore_on_error:
                    saver.restore()
                raise
            else:
                saver.restore()
            finally:
                if cleanup:
                    try:
                        if os.path.exists(save_dir):
                            shutil.rmtree(save_dir)
                    except Exception:
                        getLogger(__name__).error(
                            'Failed to cleanup validation save dir %r.',
                            save_dir, exc_info=True
                        )
                if not es.ever_updated:
                    warnings.warn(
                        'Early-stopping metric has never been updated. '
                        'The variables will keep their latest values. '
                        'Did you forget to add corresponding metric?'
                    )