예제 #1
0
    def load_data(self):
        """
        Fetch the CIFAR-10 dataset and load it into memory.

        Arguments:
            path (str, optional): Local directory in which to cache the raw
                                  dataset.  Defaults to current directory.
            normalize (bool, optional): Whether to scale values between 0 and 1.
                                        Defaults to True.

        Returns:
            tuple: Both training and test sets are returned.
        """
        workdir, filepath = self._valid_path_append(self.path, '',
                                                    self.filename)
        batchdir = os.path.join(workdir, 'cifar-10-batches-py')
        if not os.path.exists(os.path.join(batchdir, 'data_batch_1')):
            if not os.path.exists(filepath):
                self.fetch_dataset(self.url, self.filename, filepath,
                                   self.size)
            with tarfile.open(filepath, 'r:gz') as f:
                f.extractall(workdir)

        train_batches = [
            os.path.join(batchdir, 'data_batch_' + str(i))
            for i in range(1, 6)
        ]
        Xlist, ylist = [], []
        for batch in train_batches:
            with open(batch, 'rb') as f:
                d = pickle_load(f)
                Xlist.append(d['data'])
                ylist.append(d['labels'])

        X_train = np.vstack(Xlist)
        y_train = np.vstack(ylist)

        with open(os.path.join(batchdir, 'test_batch'), 'rb') as f:
            d = pickle_load(f)
            X_test, y_test = d['data'], d['labels']

        y_train = y_train.reshape(-1, 1)
        y_test = np.array(y_test).reshape(-1, 1)

        if self.contrast_normalize:
            norm_scale = 55.0  # Goodfellow
            X_train = self.global_contrast_normalize(X_train, scale=norm_scale)
            X_test = self.global_contrast_normalize(X_test, scale=norm_scale)

        if self.normalize:
            X_train = X_train / 255.
            X_test = X_test / 255.

        if self.whiten:
            zca_cache = os.path.join(workdir, 'cifar-10-zca-cache.pkl')
            X_train, X_test = self.zca_whiten(X_train, X_test, cache=zca_cache)

        return (X_train, y_train), (X_test, y_test), 10
예제 #2
0
파일: image.py 프로젝트: Jokeren/neon
    def load_data(self):
        """
        Fetch the CIFAR-10 dataset and load it into memory.

        Arguments:
            path (str, optional): Local directory in which to cache the raw
                                  dataset.  Defaults to current directory.
            normalize (bool, optional): Whether to scale values between 0 and 1.
                                        Defaults to True.

        Returns:
            tuple: Both training and test sets are returned.
        """
        workdir, filepath = self._valid_path_append(self.path, '', self.filename)
        batchdir = os.path.join(workdir, 'cifar-10-batches-py')
        if not os.path.exists(os.path.join(batchdir, 'data_batch_1')):
            if not os.path.exists(filepath):
                self.fetch_dataset(self.url, self.filename, filepath, self.size)
            with tarfile.open(filepath, 'r:gz') as f:
                f.extractall(workdir)

        train_batches = [os.path.join(batchdir, 'data_batch_' + str(i)) for i in range(1, 6)]
        Xlist, ylist = [], []
        for batch in train_batches:
            with open(batch, 'rb') as f:
                d = pickle_load(f)
                Xlist.append(d['data'])
                ylist.append(d['labels'])

        X_train = np.vstack(Xlist)
        y_train = np.vstack(ylist)

        with open(os.path.join(batchdir, 'test_batch'), 'rb') as f:
            d = pickle_load(f)
            X_test, y_test = d['data'], d['labels']

        y_train = y_train.reshape(-1, 1)
        y_test = np.array(y_test).reshape(-1, 1)

        if self.contrast_normalize:
            norm_scale = 55.0  # Goodfellow
            X_train = self.global_contrast_normalize(X_train, scale=norm_scale)
            X_test = self.global_contrast_normalize(X_test, scale=norm_scale)

        if self.normalize:
            X_train = X_train / 255.
            X_test = X_test / 255.

        if self.whiten:
            zca_cache = os.path.join(workdir, 'cifar-10-zca-cache.pkl')
            X_train, X_test = self.zca_whiten(X_train, X_test, cache=zca_cache)

        return (X_train, y_train), (X_test, y_test), 10
예제 #3
0
파일: persist.py 프로젝트: leo-lp/neon-1
def load_obj(load_path):
    """
    Loads a saved on-disk representation to a python data structure. We
    currently support the following file formats:

        * python pickle (.pkl)

    Arguments:
        load_path (str): where to the load the serialized object (full path
                            and file name)

    """
    if isinstance(load_path, str):
        load_path = os.path.expandvars(os.path.expanduser(load_path))
        if load_path.endswith('.gz'):
            import gzip
            load_path = gzip.open(load_path, 'rb')
        else:
            load_path = open(load_path, 'rb')
    fname = load_path.name

    logger.debug("deserializing object from:  %s", fname)
    try:
        return pickle_load(load_path)
    except AttributeError:
        msg = ("Problems deserializing: %s.  Its possible the interface "
               "for this object has changed since being serialized.  You "
               "may need to remove and recreate it." % load_path)
        logger.error(msg)
        raise AttributeError(msg)
예제 #4
0
    def load_data(self):
        """
        Fetch the MNIST dataset and load it into memory.

        Arguments:
            path (str, optional): Local directory in which to cache the raw
                                  dataset.  Defaults to current directory.
            normalize (bool, optional): Whether to scale values between 0 and 1.
                                        Defaults to True.

        Returns:
            tuple: Both training and test sets are returned.
        """
        filepath = self._valid_path_append(self.path, self.filename)
        if not os.path.exists(filepath):
            self.fetch_dataset(self.url, self.filename, filepath, self.size)

        with gzip.open(filepath, 'rb') as mnist:
            (X_train, y_train), (X_test, y_test) = pickle_load(mnist)
            X_train = X_train.reshape(-1, 784)
            X_test = X_test.reshape(-1, 784)

            if self.normalize:
                X_train = X_train / 255.
                X_test = X_test / 255.

        return (X_train, y_train), (X_test, y_test), 10
예제 #5
0
def load_obj(load_path):
    """
    Loads a saved on-disk representation to a python data structure. We
    currently support the following file formats:

        * python pickle (.pkl)

    Arguments:
        load_path (str): where to the load the serialized object (full path
                            and file name)

    """
    if isinstance(load_path, str):
        load_path = os.path.expandvars(os.path.expanduser(load_path))
        if load_path.endswith('.gz'):
            import gzip
            load_path = gzip.open(load_path, 'rb')
        else:
            load_path = open(load_path, 'rb')
    fname = load_path.name

    logger.debug("deserializing object from:  %s", fname)
    try:
        return pickle_load(load_path)
    except AttributeError:
        msg = ("Problems deserializing: %s.  Its possible the interface "
               "for this object has changed since being serialized.  You "
               "may need to remove and recreate it." % load_path)
        logger.error(msg)
        raise AttributeError(msg)
예제 #6
0
파일: image.py 프로젝트: Jokeren/neon
    def load_data(self):
        """
        Fetch the MNIST dataset and load it into memory.

        Arguments:
            path (str, optional): Local directory in which to cache the raw
                                  dataset.  Defaults to current directory.
            normalize (bool, optional): Whether to scale values between 0 and 1.
                                        Defaults to True.

        Returns:
            tuple: Both training and test sets are returned.
        """
        filepath = self._valid_path_append(self.path, self.filename)
        if not os.path.exists(filepath):
            self.fetch_dataset(self.url, self.filename, filepath, self.size)

        with gzip.open(filepath, 'rb') as mnist:
            (X_train, y_train), (X_test, y_test) = pickle_load(mnist)
            X_train = X_train.reshape(-1, 784)
            X_test = X_test.reshape(-1, 784)

            if self.normalize:
                X_train = X_train / 255.
                X_test = X_test / 255.

        return (X_train, y_train), (X_test, y_test), 10
예제 #7
0
    def load_data(self):
        """
        Fetch the MNIST dataset and load it into memory.

        Arguments:
            path (str, optional): Local directory in which to cache the raw
                                  dataset.  Defaults to current directory.
            normalize (bool, optional): Whether to scale values between 0 and 1.
                                        Defaults to True.

        Returns:
            tuple: Both training and test sets are returned.
        """
        filepath = self._valid_path_append(self.path, self.filename)
        if not os.path.exists(filepath):
            self.fetch_dataset(self.url, self.filename, filepath, self.size)

        with gzip.open(filepath, 'rb') as mnist:
            (X_train, y_train), (X_test, y_test) = pickle_load(mnist)

            if self.subset_pct < 100:
                X_train = X_train[:int(X_train.shape[0] * self.subset_pct /
                                       100.)]
                y_train = y_train[:int(y_train.shape[0] * self.subset_pct /
                                       100.)]
                X_test = X_test[:int(X_test.shape[0] * self.subset_pct / 100.)]
                y_test = y_test[:int(y_test.shape[0] * self.subset_pct / 100.)]
                logger.debug("subset %d%% of data", self.subset_pct * 100)

            if self.size > 28:
                n_train, n_test = X_train.shape[0], X_test.shape[0]
                X_train_ = np.zeros(shape=(n_train, self.size, self.size))
                X_test_ = np.zeros(shape=(n_test, self.size, self.size))
                X_train_[:, :28, :28] = X_train
                X_test_[:, :28, :28] = X_test
            else:
                X_train_ = X_train[:, :self.size, :self.size]
                X_test_ = X_test[:, :self.size, :self.size]
            X_train = X_train_.reshape(-1, self.size * self.size)
            X_test = X_test_.reshape(-1, self.size * self.size)

            if self.normalize:
                X_train = X_train / 255.
                X_test = X_test / 255.
                if self.sym_range:
                    X_train = X_train * 2. - 1.
                    X_test = X_test * 2. - 1.

            if self.shuffle:
                np.random.seed(0)
                np.random.shuffle(X_train)

        return (X_train, y_train), (X_test, y_test), 10
예제 #8
0
    def load_data(self):
        """
        Fetch the MNIST dataset and load it into memory.

        Arguments:
            path (str, optional): Local directory in which to cache the raw
                                  dataset.  Defaults to current directory.
            normalize (bool, optional): Whether to scale values between 0 and 1.
                                        Defaults to True.

        Returns:
            tuple: Both training and test sets are returned.
        """
        filepath = self._valid_path_append(self.path, self.filename)
        if not os.path.exists(filepath):
            self.fetch_dataset(self.url, self.filename, filepath, self.size)

        with gzip.open(filepath, 'rb') as mnist:
            (X_train, y_train), (X_test, y_test) = pickle_load(mnist)

            if self.subset_pct < 100:
                X_train = X_train[:int(X_train.shape[0] * self.subset_pct / 100.)]
                y_train = y_train[:int(y_train.shape[0] * self.subset_pct / 100.)]
                X_test = X_test[:int(X_test.shape[0] * self.subset_pct / 100.)]
                y_test = y_test[:int(y_test.shape[0] * self.subset_pct / 100.)]
                logger.debug("subset %d%% of data", self.subset_pct*100)

            if self.size > 28:
                n_train, n_test = X_train.shape[0], X_test.shape[0]
                X_train_ = np.zeros(shape=(n_train, self.size, self.size))
                X_test_ = np.zeros(shape=(n_test, self.size, self.size))
                X_train_[:, :28, :28] = X_train
                X_test_[:, :28, :28] = X_test
            else:
                X_train_ = X_train[:, :self.size, :self.size]
                X_test_ = X_test[:, :self.size, :self.size]
            X_train = X_train_.reshape(-1, self.size*self.size)
            X_test = X_test_.reshape(-1, self.size*self.size)

            if self.normalize:
                X_train = X_train / 255.
                X_test = X_test / 255.
                if self.sym_range:
                    X_train = X_train * 2. - 1.
                    X_test = X_test * 2. - 1.

            if self.shuffle:
                np.random.seed(0)
                np.random.shuffle(X_train)

        return (X_train, y_train), (X_test, y_test), 10
예제 #9
0
    def zca_whiten(train, test, cache=None):
        """
        Use train set statistics to apply the ZCA whitening transform to
        both train and test sets.
        """
        if cache and os.path.isfile(cache):
            with open(cache, 'rb') as f:
                (meanX, W) = pickle_load(f)
        else:
            meanX, W = CIFAR10._compute_zca_transform(train)
            if cache:
                logger.info("Caching ZCA transform matrix")
                with open(cache, 'wb') as f:
                    pickle.dump((meanX, W), f, 2)

        logger.info("Applying ZCA whitening transform")
        train_w = np.dot(train - meanX, W)
        test_w = np.dot(test - meanX, W)

        return train_w, test_w
예제 #10
0
파일: image.py 프로젝트: Jokeren/neon
    def zca_whiten(train, test, cache=None):
        """
        Use train set statistics to apply the ZCA whitening transform to
        both train and test sets.
        """
        if cache and os.path.isfile(cache):
            with open(cache, 'rb') as f:
                (meanX, W) = pickle_load(f)
        else:
            meanX, W = CIFAR10._compute_zca_transform(train)
            if cache:
                logger.info("Caching ZCA transform matrix")
                with open(cache, 'wb') as f:
                    pickle.dump((meanX, W), f, 2)

        logger.info("Applying ZCA whitening transform")
        train_w = np.dot(train - meanX, W)
        test_w = np.dot(test - meanX, W)

        return train_w, test_w