def save(filepath, obj, on_overwrite='ignore'): """ Serialize `object` to a file denoted by `filepath`. Parameters ---------- filepath : str A filename. If the suffix is `.joblib` and joblib can be imported, `joblib.dump` is used in place of the regular pickling mechanisms; this results in much faster saves by saving arrays as separate .npy files on disk. If the file suffix is `.npy` than `numpy.save` is attempted on `obj`. Otherwise, (c)pickle is used. obj : object A Python object to be serialized. on_overwrite : str, optional A string specifying what to do if the file already exists. Possible values include: - "ignore" : Just overwrite the existing file. - "backup" : Make a backup copy of the file (<filepath>.bak). Save the new copy. Then delete the backup copy. This allows recovery of the old version of the file if saving the new one fails. """ filepath = preprocess(filepath) if os.path.exists(filepath): if on_overwrite == 'backup': backup = filepath + '.bak' shutil.move(filepath, backup) save(filepath, obj) try: os.remove(backup) except Exception as e: warnings.warn("Got an error while trying to remove " + backup + ":" + str(e)) return else: assert on_overwrite == 'ignore' try: _save(filepath, obj) except RuntimeError as e: """ Sometimes for large theano graphs, pickle/cPickle exceed the maximum recursion depth. This seems to me like a fundamental design flaw in pickle/cPickle. The workaround I employ here is the one recommended to someone who had a similar problem on stackexchange: http://stackoverflow.com/questions/2134706/hitting-maximum-recursion-depth-using-pythons-pickle-cpickle Obviously this does not scale and could cause a crash but I don't see another solution short of writing our own implementation of pickle. """ if str(e).find('recursion') != -1: logger.warning('pylearn2.utils.save encountered the following ' 'error: ' + str(e) + '\nAttempting to resolve this error by calling ' + 'sys.setrecusionlimit and retrying') old_limit = sys.getrecursionlimit() try: sys.setrecursionlimit(50000) _save(filepath, obj) finally: sys.setrecursionlimit(old_limit)
def _load(filepath, recurse_depth=0, retry=True): """ Recursively tries to load a file until success or maximum number of attempts. Parameters ---------- filepath : str A path to a file to load. Should be a pickle, Matlab, or NumPy file; or a .txt or .amat file that numpy.loadtxt can load. recurse_depth : int, optional End users should not use this argument. It is used by the function itself to implement the `retry` option recursively. retry : bool, optional If True, will make a handful of attempts to load the file before giving up. This can be useful if you are for example calling show_weights.py on a file that is actively being written to by a training script--sometimes the load attempt might fail if the training script writes at the same time show_weights tries to read, but if you try again after a few seconds you should be able to open the file. Returns ------- loaded_object : object The object that was stored in the file. """ try: import joblib joblib_available = True except ImportError: joblib_available = False if recurse_depth == 0: filepath = preprocess(filepath) if filepath.endswith('.npy') or filepath.endswith('.npz'): return np.load(filepath) if filepath.endswith('.amat') or filepath.endswith('txt'): try: return np.loadtxt(filepath) except Exception: raise Exception("{0} cannot be loaded by serial.load (trying " "to use np.loadtxt)".format(filepath)) if filepath.endswith('.mat'): global io if io is None: import scipy.io io = scipy.io try: return io.loadmat(filepath) except NotImplementedError as nei: if str(nei).find('HDF reader') != -1: global hdf_reader if hdf_reader is None: import h5py hdf_reader = h5py return hdf_reader.File(filepath, 'r') else: raise # this code should never be reached assert False # for loading PY2 pickle in PY3 encoding = {'encoding': 'latin-1'} if six.PY3 else {} def exponential_backoff(): if recurse_depth > 9: logger.info('Max number of tries exceeded while trying to open ' '{0}'.format(filepath)) logger.info('attempting to open via reading string') with open(filepath, 'rb') as f: content = f.read() return cPickle.loads(content, **encoding) else: nsec = 0.5 * (2.0**float(recurse_depth)) logger.info("Waiting {0} seconds and trying again".format(nsec)) time.sleep(nsec) return _load(filepath, recurse_depth + 1, retry) try: if not joblib_available: with open(filepath, 'rb') as f: obj = cPickle.load(f, **encoding) else: try: obj = joblib.load(filepath) except Exception as e: if os.path.exists(filepath) and not os.path.isdir(filepath): raise raise_cannot_open(filepath) except MemoryError as e: # We want to explicitly catch this exception because for MemoryError # __str__ returns the empty string, so some of our default printouts # below don't make a lot of sense. # Also, a lot of users assume any exception is a bug in the library, # so we can cut down on mail to pylearn-users by adding a message # that makes it clear this exception is caused by their machine not # meeting requirements. if os.path.splitext(filepath)[1] == ".pkl": improve_memory_error_message( e, ("You do not have enough memory to " "open %s \n" " + Try using numpy.{save,load} " "(file with extension '.npy') " "to save your file. It uses less " "memory when reading and " "writing files than pickled files.") % filepath) else: improve_memory_error_message( e, "You do not have enough memory to " "open %s" % filepath) except (BadPickleGet, EOFError, KeyError) as e: if not retry: raise Exception(e.__class__('Failed to open {0}'.format(filepath))) obj = exponential_backoff() except ValueError: logger.exception if not retry: raise Exception(ValueError('Failed to open {0}'.format(filepath))) obj = exponential_backoff() except Exception: # assert False raise Exception("Couldn't open {0}".format(filepath)) # if the object has no yaml_src, we give it one that just says it # came from this file. could cause trouble if you save obj again # to a different location if not hasattr(obj, 'yaml_src'): try: obj.yaml_src = '!pkl: "' + os.path.abspath(filepath) + '"' except Exception: pass return obj
def _load(filepath, recurse_depth=0, retry=True): """ Recursively tries to load a file until success or maximum number of attempts. Parameters ---------- filepath : str A path to a file to load. Should be a pickle, Matlab, or NumPy file; or a .txt or .amat file that numpy.loadtxt can load. recurse_depth : int, optional End users should not use this argument. It is used by the function itself to implement the `retry` option recursively. retry : bool, optional If True, will make a handful of attempts to load the file before giving up. This can be useful if you are for example calling show_weights.py on a file that is actively being written to by a training script--sometimes the load attempt might fail if the training script writes at the same time show_weights tries to read, but if you try again after a few seconds you should be able to open the file. Returns ------- loaded_object : object The object that was stored in the file. """ try: import joblib joblib_available = True except ImportError: joblib_available = False if recurse_depth == 0: filepath = preprocess(filepath) if filepath.endswith('.npy') or filepath.endswith('.npz'): return np.load(filepath) if filepath.endswith('.amat') or filepath.endswith('txt'): try: return np.loadtxt(filepath) except Exception: raise Exception("{0} cannot be loaded by serial.load (trying " "to use np.loadtxt)".format(filepath)) if filepath.endswith('.mat'): global io if io is None: import scipy.io io = scipy.io try: return io.loadmat(filepath) except NotImplementedError as nei: if str(nei).find('HDF reader') != -1: global hdf_reader if hdf_reader is None: import h5py hdf_reader = h5py return hdf_reader.File(filepath, 'r') else: raise # this code should never be reached assert False # for loading PY2 pickle in PY3 encoding = {'encoding': 'latin-1'} if six.PY3 else {} def exponential_backoff(): if recurse_depth > 9: logger.info('Max number of tries exceeded while trying to open ' '{0}'.format(filepath)) logger.info('attempting to open via reading string') with open(filepath, 'rb') as f: content = f.read() return cPickle.loads(content, **encoding) else: nsec = 0.5 * (2.0 ** float(recurse_depth)) logger.info("Waiting {0} seconds and trying again".format(nsec)) time.sleep(nsec) return _load(filepath, recurse_depth + 1, retry) try: if not joblib_available: with open(filepath, 'rb') as f: obj = cPickle.load(f, **encoding) else: try: obj = joblib.load(filepath) except Exception as e: if os.path.exists(filepath) and not os.path.isdir(filepath): raise raise_cannot_open(filepath) except MemoryError as e: # We want to explicitly catch this exception because for MemoryError # __str__ returns the empty string, so some of our default printouts # below don't make a lot of sense. # Also, a lot of users assume any exception is a bug in the library, # so we can cut down on mail to pylearn-users by adding a message # that makes it clear this exception is caused by their machine not # meeting requirements. if os.path.splitext(filepath)[1] == ".pkl": improve_memory_error_message(e, ("You do not have enough memory to " "open %s \n" " + Try using numpy.{save,load} " "(file with extension '.npy') " "to save your file. It uses less " "memory when reading and " "writing files than pickled files.") % filepath) else: improve_memory_error_message(e, "You do not have enough memory to " "open %s" % filepath) except (BadPickleGet, EOFError, KeyError) as e: if not retry: raise Exception(e.__class__('Failed to open {0}'.format(filepath))) obj = exponential_backoff() except ValueError: logger.exception if not retry: raise Exception(ValueError('Failed to open {0}'.format(filepath))) obj = exponential_backoff() except Exception: # assert False raise Exception("Couldn't open {0}".format(filepath)) # if the object has no yaml_src, we give it one that just says it # came from this file. could cause trouble if you save obj again # to a different location if not hasattr(obj, 'yaml_src'): try: obj.yaml_src = '!pkl: "' + os.path.abspath(filepath) + '"' except Exception: pass return obj