def load_sparse(fname): """ .. todo:: WRITEME """ f = None try: if not os.path.exists(fname): fname = fname + '.gz' f = gzip.open(fname) elif fname.endswith('.gz'): f = gzip.open(fname) else: f = open(fname) d = cPickle.load(f) finally: if f: f.close() return d
def _unpickle(cls, file): """ .. todo:: What is this? why not just use serial.load like the CIFAR-100 class? Whoever wrote it shows up as "unknown" in git blame. """ from pylearn2.utils import string_utils fname = os.path.join(string_utils.preprocess('${PYLEARN2_DATA_PATH}'), 'cifar10', 'cifar-10-batches-py', file) if not os.path.exists(fname): raise IOError(fname + " was not found. You probably need to " "download the CIFAR-10 dataset by using the " "download script in " "pylearn2/scripts/datasets/download_cifar10.sh " "or manually from " "http://www.cs.utoronto.ca/~kriz/cifar.html") fname = cache.datasetCache.cache_file(fname) _logger.info('loading file %s' % fname) fo = open(fname, 'rb') dict = cPickle.load(fo) fo.close() return dict
def _load(filepath, recurse_depth=0, retry=True): """ Recursively tries to load a file until success or maximum number of attempts. Parameters ---------- filepath : str A path to a file to load. Should be a pickle, Matlab, or NumPy file; or a .txt or .amat file that numpy.loadtxt can load. recurse_depth : int, optional End users should not use this argument. It is used by the function itself to implement the `retry` option recursively. retry : bool, optional If True, will make a handful of attempts to load the file before giving up. This can be useful if you are for example calling show_weights.py on a file that is actively being written to by a training script--sometimes the load attempt might fail if the training script writes at the same time show_weights tries to read, but if you try again after a few seconds you should be able to open the file. Returns ------- loaded_object : object The object that was stored in the file. """ try: import joblib joblib_available = True except ImportError: joblib_available = False if recurse_depth == 0: filepath = preprocess(filepath) if filepath.endswith('.npy') or filepath.endswith('.npz'): return np.load(filepath) if filepath.endswith('.amat') or filepath.endswith('txt'): try: return np.loadtxt(filepath) except Exception: reraise_as("{0} cannot be loaded by serial.load (trying " "to use np.loadtxt)".format(filepath)) if filepath.endswith('.mat'): global io if io is None: import scipy.io io = scipy.io try: return io.loadmat(filepath) except NotImplementedError as nei: if str(nei).find('HDF reader') != -1: global hdf_reader if hdf_reader is None: import h5py hdf_reader = h5py return hdf_reader.File(filepath, 'r') else: raise # this code should never be reached assert False # for loading PY2 pickle in PY3 encoding = {'encoding': 'latin-1'} if six.PY3 else {} def exponential_backoff(): if recurse_depth > 9: logger.info('Max number of tries exceeded while trying to open ' '{0}'.format(filepath)) logger.info('attempting to open via reading string') with open(filepath, 'rb') as f: content = f.read() return cPickle.loads(content, **encoding) else: nsec = 0.5 * (2.0 ** float(recurse_depth)) logger.info("Waiting {0} seconds and trying again".format(nsec)) time.sleep(nsec) return _load(filepath, recurse_depth + 1, retry) try: if not joblib_available: with open(filepath, 'rb') as f: obj = cPickle.load(f, **encoding) else: try: obj = joblib.load(filepath) except Exception as e: if os.path.exists(filepath) and not os.path.isdir(filepath): raise raise_cannot_open(filepath) except MemoryError as e: # We want to explicitly catch this exception because for MemoryError # __str__ returns the empty string, so some of our default printouts # below don't make a lot of sense. # Also, a lot of users assume any exception is a bug in the library, # so we can cut down on mail to pylearn-users by adding a message # that makes it clear this exception is caused by their machine not # meeting requirements. if os.path.splitext(filepath)[1] == ".pkl": improve_memory_error_message(e, ("You do not have enough memory to " "open %s \n" " + Try using numpy.{save,load} " "(file with extension '.npy') " "to save your file. It uses less " "memory when reading and " "writing files than pickled files.") % filepath) else: improve_memory_error_message(e, "You do not have enough memory to " "open %s" % filepath) except (BadPickleGet, EOFError, KeyError) as e: if not retry: reraise_as(e.__class__('Failed to open {0}'.format(filepath))) obj = exponential_backoff() except ValueError: logger.exception if not retry: reraise_as(ValueError('Failed to open {0}'.format(filepath))) obj = exponential_backoff() except Exception: # assert False reraise_as("Couldn't open {0}".format(filepath)) # if the object has no yaml_src, we give it one that just says it # came from this file. could cause trouble if you save obj again # to a different location if not hasattr(obj, 'yaml_src'): try: obj.yaml_src = '!pkl: "' + os.path.abspath(filepath) + '"' except Exception: pass return obj