def save(self, global_step=None, session=None): """ Save the session to a checkpoint file. Args: global_step (int or tf.Tensor): The global step counter. session (tf.Session): The session to save. If not specified, select the default session. Returns: str: The path of the saved checkpoint file. """ session = session or get_default_session_or_error() # save the states of savable objects into serial var if self._objects: object_states = {} for key, obj in six.iteritems(self._objects): object_states[key] = obj.get_state() serialized_states = pkl.dumps( object_states, protocol=pkl.HIGHEST_PROTOCOL) self._serial_var.set(serialized_states) # now save the variables to checkpoint file if not os.path.isdir(self.save_dir): makedirs(self.save_dir, exist_ok=True) return self._saver.save( session, os.path.join(self.save_dir, self.filename), global_step=global_step, write_meta_graph=self.save_meta )
def commit(self, result_dict): """ Write the `result_dict` to screen, and save to a JSON file. If ``env["MLTOOLKIT_EXPERIMENT_RESULT_FILE"]`` presents, the results will saved in the file specified by this environmental variable. Otherwise the results will be saved in ``result_dir + "/result.json"``. Args: result_dict (dict): JSON serializable result dict. It will be merged with ``self.result_dict``. """ self.result_dict.update(result_dict) print('') print('Result Updated') print('--------------') for k in sorted(six.iterkeys(self.result_dict)): print('{}: {}'.format(k, self.result_dict[k])) parent_dir = os.path.split(self.result_json_file)[0] makedirs(parent_dir, exist_ok=True) json_result = json.dumps(self.result_dict, sort_keys=True, cls=JsonEncoder) with codecs.open(self.result_json_file, 'wb', 'utf-8') as f: f.write(json_result)
def load_data(dataset): if dataset == 'SMD': dataset_folder = 'ServerMachineDataset' file_list = os.listdir(os.path.join(dataset_folder, "train")) for filename in file_list: if filename.endswith('.txt'): load_and_save('train', filename, filename.strip('.txt'), dataset_folder) load_and_save('test', filename, filename.strip('.txt'), dataset_folder) load_and_save('test_label', filename, filename.strip('.txt'), dataset_folder) elif dataset == 'SMAP' or dataset == 'MSL': dataset_folder = 'data' with open(os.path.join(dataset_folder, 'labeled_anomalies.csv'), 'r') as file: csv_reader = csv.reader(file, delimiter=',') res = [row for row in csv_reader][1:] res = sorted(res, key=lambda k: k[0]) label_folder = os.path.join(dataset_folder, 'test_label') makedirs(label_folder, exist_ok=True) data_info = [ row for row in res if row[1] == dataset and row[0] != 'P-2' ] labels = [] for row in data_info: anomalies = ast.literal_eval(row[2]) length = int(row[-1]) label = np.zeros([length], dtype=np.bool) for anomaly in anomalies: label[anomaly[0]:anomaly[1] + 1] = True labels.extend(label) labels = np.asarray(labels) print(dataset, 'test_label', labels.shape) with open( os.path.join(output_folder, dataset + "_" + 'test_label' + ".pkl"), "wb") as file: dump(labels, file) def concatenate_and_save(category): data = [] for row in data_info: filename = row[0] temp = np.load( os.path.join(dataset_folder, category, filename + '.npy')) data.extend(temp) data = np.asarray(data) print(dataset, category, data.shape) with open( os.path.join(output_folder, dataset + "_" + category + ".pkl"), "wb") as file: dump(data, file) for c in ['train', 'test']: concatenate_and_save(c)
def make_dir(self, sub_path): """ Ensure the `sub_path` directory exists. Args: sub_path (str): The sub path. Returns: str: The full path of the directory. """ path = self.resolve_path(sub_path) makedirs(path, exist_ok=True) return path
def _enter(self): # open a temporary directory if the checkpoint dir is not specified if self._checkpoint_dir is None: self._temp_dir_ctx = TemporaryDirectory() self._checkpoint_dir = self._temp_dir_ctx.__enter__() else: makedirs(self._checkpoint_dir, exist_ok=True) # create the variable saver self._saver = VariableSaver(self._param_vars, self._checkpoint_dir) # return self as the context object return self
def prepare_parent(self, sub_path): """ Ensure the parent directory of `sub_path` exists. Args: sub_path (str): The sub path. Returns: str: The full path of `sub_path`. """ path = self.resolve_path(sub_path) parent_dir = os.path.split(path)[0] makedirs(parent_dir, exist_ok=True) return path
def save(self, global_step=None): """ Save the checkpoint to file. Args: global_step (int or tf.Tensor): The global step counter. """ sess = get_default_session_or_error() makedirs(self.save_dir, exist_ok=True) self._saver.save(sess, os.path.join(self.save_dir, self.filename), global_step=global_step, latest_filename=self.latest_file, write_meta_graph=self.save_meta)
def __init__(self, result_dir=None, script_name=None): """ Construct a new :class:`MLResults` instance. Args: result_dir (str or fs.base.FS): A local directory path, a URI recognizable by `PyFilesystem <https://www.pyfilesystem.org/>`_, or an instance of :class:`fs.base.FS`. It will be used as the result directory, while all the result files will be stored within it. If not specified, will create a local directory "./results/<script_name>/". script_name (str): The name of the main script. If not specified, will use the file name (excluding the extension ".py") of the main module. """ if result_dir is None: if script_name is None: script_name = os.path.splitext( os.path.split( os.path.abspath(sys.modules['__main__'].__file__) )[1] )[0] # The ``env["MLSTORAGE_EXPERIMENT_ID"]`` would be set if the # program is run via `mlrun` from MLStorage. See # `MLStorage Server <https://github.com/haowen-xu/mlstorage-server>`_ # and # `MLStorage Client <https://github.com/haowen-xu/mlstorage-client>`_ # for details. if os.environ.get('MLSTORAGE_EXPERIMENT_ID'): # use the current working directory as the result directory # if run via `mlrun` from MLStorage. result_dir = os.getcwd() else: result_dir = os.path.join('./results', script_name) if not os.path.isdir(result_dir): makedirs(result_dir, exist_ok=True) if not isinstance(result_dir, FS): try: # attempt to create the result directory automatically self._fs = open_fs(result_dir, create=True) except TypeError: self._fs = open_fs(result_dir) else: self._fs = result_dir # the dict to collect metrics self._metrics_dict = {}
def commit(self, result_dict): """ Update the results with `result_dict`, and save the merged results to "result.json". Args: result_dict (dict): JSON serializable result dict. It will be merged with ``self.result_dict``. """ self.result_dict.update(result_dict) parent_dir = os.path.split(self.result_json_file)[0] makedirs(parent_dir, exist_ok=True) json_result = json.dumps(self.result_dict, sort_keys=True, cls=JsonEncoder) with codecs.open(self.result_json_file, 'wb', 'utf-8') as f: f.write(json_result)
import ast import csv import os import sys from pickle import dump import numpy as np from tfsnippet.utils import makedirs output_folder = 'processed' makedirs(output_folder, exist_ok=True) from IPython import embed def load_and_save(category, filename, dataset, dataset_folder): temp = np.genfromtxt(os.path.join(dataset_folder, category, filename), dtype=np.float32, delimiter=',') print(dataset, category, filename, temp.shape) with open(os.path.join(output_folder, dataset + "_" + category + ".pkl"), "wb") as file: dump(temp, file) def load_data(dataset): if dataset == 'SMD': dataset_folder = 'ServerMachineDataset' file_list = os.listdir(os.path.join(dataset_folder, "train")) for filename in file_list: if filename.endswith('.txt'):
def early_stopping(param_vars, initial_metric=None, save_dir=None, smaller_is_better=True, restore_on_error=False, cleanup=True, name=None): """Open a context to memorize the values of parameters at best metric. This method will open a context with an object to memorize the best metric for early-stopping. An example of using this early-stopping context is: with early_stopping(param_vars) as es: ... es.update(loss, global_step) ... Where ``es.update(loss, global_step)`` should cause the parameters to be saved on disk if `loss` is better than the current best metric. One may also get the best metric via ``es.best_metric``. Note that if no loss is given via ``es.update``, then the variables would keep their latest values when exiting the early-stopping context. Parameters ---------- param_vars : list[tf.Variable] | dict[str, tf.Variable] List or dict of variables to be memorized. If a dict is specified, the keys of the dict would be used as the serializations keys via `VariableSaver`. initial_metric : float | tf.Tensor | tf.Variable The initial best loss (usually for recovering from previous session). save_dir : str The directory where to save the variable values. If not specified, will use a temporary directory. smaller_is_better : bool Whether or not the less, the better loss? (default True) restore_on_error : bool Whether or not to restore the memorized parameters even on error? (default False) cleanup : bool Whether or not to cleanup the saving directory on exit? This argument will be ignored if `save_dir` is None, while the temporary directory will always be deleted on exit. name : str Optional name of this scope. Yields ------ _EarlyStopping The object to receive loss during early-stopping context. """ if not param_vars: raise ValueError('`param_vars` must not be empty.') if save_dir is None: with TemporaryDirectory() as tempdir: with early_stopping(param_vars, initial_metric=initial_metric, save_dir=tempdir, cleanup=False, smaller_is_better=smaller_is_better, restore_on_error=restore_on_error, name=name) as es: yield es else: if isinstance(initial_metric, (tf.Tensor, tf.Variable)): initial_metric = initial_metric.eval() with tf.name_scope(name): saver = VariableSaver(param_vars, save_dir) save_dir = os.path.abspath(save_dir) makedirs(save_dir, exist_ok=True) es = _EarlyStopping(saver, best_metric=initial_metric, smaller_is_better=smaller_is_better) try: yield es except Exception as ex: if isinstance(ex, KeyboardInterrupt) or restore_on_error: saver.restore() raise else: saver.restore() finally: if cleanup: try: if os.path.exists(save_dir): shutil.rmtree(save_dir) except Exception: getLogger(__name__).error( 'Failed to cleanup validation save dir %r.', save_dir, exc_info=True ) if not es.ever_updated: warnings.warn( 'Early-stopping metric has never been updated. ' 'The variables will keep their latest values. ' 'Did you forget to add corresponding metric?' )