def fetch(self, download_if_missing):
     with lockfile.FileLock(self.home()) as lock:
         try:
             open(self.home(self.NAME + '_inputs.npy')).close()
             open(self.home(self.NAME + '_labels.npy')).close()
         except IOError:
             if download_if_missing:
                 try:
                     amat_test = AMat(self.test_amat())
                 except IOError:
                     logger.info('Failed to read %s, downloading %s' % (
                         self.test_amat(),
                         os.path.join(self.BASE_URL, self.REMOTE)))
                     if not os.path.exists(self.home()):
                         os.makedirs(self.home())
                     utils.download_and_extract(
                         os.path.join(self.BASE_URL, self.REMOTE),
                         self.home(),
                         verbose=False,
                         sha1=self.SHA1)
                     amat_test = AMat(self.test_amat())
                 amat_train = AMat(self.train_amat())
                 n_inputs = 28**2
                 n_train = self.descr['n_train']
                 n_valid = self.descr['n_valid']
                 n_test = self.descr['n_test']
                 assert amat_train.all.shape[0] == n_train + n_valid
                 assert amat_test.all.shape[0] == n_test
                 assert amat_train.all.shape[1] == amat_test.all.shape[1]
                 assert amat_train.all.shape[1] == n_inputs + 1
                 allmat = np.vstack((amat_train.all, amat_test.all))
                 inputs = np.reshape(
                         allmat[:, :n_inputs].astype('float32'),
                         (-1, 28, 28))
                 labels = allmat[:, n_inputs].astype('int32')
                 assert np.all(labels == allmat[:, n_inputs])
                 assert np.all(labels < self.descr['n_classes'])
                 np.save(self.home(self.NAME + '_inputs.npy'), inputs)
                 np.save(self.home(self.NAME + '_labels.npy'), labels)
                 # clean up the .amat files we downloaded
                 os.remove(self.test_amat())
                 os.remove(self.train_amat())
             else:
                 raise
Exemple #2
0
 def fetch(self, download_if_missing):
     with lockfile.FileLock(self.home()) as lock:
         try:
             open(self.home(self.NAME + '_inputs.npy')).close()
             open(self.home(self.NAME + '_labels.npy')).close()
         except IOError:
             if download_if_missing:
                 try:
                     amat_test = AMat(self.test_amat())
                 except IOError:
                     logger.info('Failed to read %s, downloading %s' %
                                 (self.test_amat(),
                                  os.path.join(self.BASE_URL, self.REMOTE)))
                     if not os.path.exists(self.home()):
                         os.makedirs(self.home())
                     utils.download_and_extract(os.path.join(
                         self.BASE_URL, self.REMOTE),
                                                self.home(),
                                                verbose=False,
                                                sha1=self.SHA1)
                     amat_test = AMat(self.test_amat())
                 amat_train = AMat(self.train_amat())
                 n_inputs = 28**2
                 n_train = self.descr['n_train']
                 n_valid = self.descr['n_valid']
                 n_test = self.descr['n_test']
                 assert amat_train.all.shape[0] == n_train + n_valid
                 assert amat_test.all.shape[0] == n_test
                 assert amat_train.all.shape[1] == amat_test.all.shape[1]
                 assert amat_train.all.shape[1] == n_inputs + 1
                 allmat = np.vstack((amat_train.all, amat_test.all))
                 inputs = np.reshape(allmat[:, :n_inputs].astype('float32'),
                                     (-1, 28, 28))
                 labels = allmat[:, n_inputs].astype('int32')
                 assert np.all(labels == allmat[:, n_inputs])
                 assert np.all(labels < self.descr['n_classes'])
                 np.save(self.home(self.NAME + '_inputs.npy'), inputs)
                 np.save(self.home(self.NAME + '_labels.npy'), labels)
                 # clean up the .amat files we downloaded
                 os.remove(self.test_amat())
                 os.remove(self.train_amat())
             else:
                 raise
Exemple #3
0
    def fetch(self, download_if_missing):
        try:
            open(self.home(self.NAME + '_inputs.npy')).close()
            open(self.home(self.NAME + '_labels.npy')).close()
        except IOError:
            if download_if_missing:
                all_amat_filename = self.level_amat(self.LEVEL)
                try:
                    amat_all = AMat(all_amat_filename)
                except IOError:
                    logger.info('Failed to read %s, downloading %s' %
                                (all_amat_filename,
                                 os.path.join(self.BASE_URL, self.REMOTE)))
                    if not os.path.exists(self.home()):
                        os.makedirs(self.home())
                    utils.download_and_extract(os.path.join(
                        self.BASE_URL, self.REMOTE),
                                               self.home(),
                                               verbose=False,
                                               sha1=self.SHA1)
                    amat_all = AMat(all_amat_filename)
                # at this point self.home() contains not only the
                # all_amat_filename, but it also contains the amats for all
                # the other levels too.
                #
                # This for loop transfers each amat to the dataset folder where
                # it belongs.
                for level in range(1, 7):
                    if level == self.LEVEL:
                        continue
                    if not os.path.exists(self.level_amat(level)):
                        continue
                    other = BaseNoise(level)
                    try:
                        # try loading the other one
                        other.fetch(download_if_missing=False)

                        # if that worked, then delete just-downloaded amat
                        # required for the other's build_meta
                        os.remove(self.level_amat(level))
                    except IOError:
                        # assuming this was because fetch failed,
                        # move the amat for the other level into the
                        # home folder of the other dataset.
                        # next time the other dataset is fetched,
                        # it will load the amat, save a npy, and delete the
                        # amat.
                        if not os.path.exists(other.home()):
                            os.makedirs(other.home())
                        os.rename(self.level_amat(level),
                                  other.level_amat(level))

                # now carry on loading as usual
                n_inputs = 28**2
                n_train = self.descr['n_train']
                n_valid = self.descr['n_valid']
                n_test = self.descr['n_test']
                assert amat_all.all.shape[0] == n_train + n_valid + n_test
                assert amat_all.all.shape[1] == n_inputs + 1
                inputs = np.reshape(
                    amat_all.all[:, :n_inputs].astype('float32'), (-1, 28, 28))
                labels = amat_all.all[:, n_inputs].astype('int32')
                assert np.all(labels == amat_all.all[:, n_inputs])
                assert np.all(labels < self.descr['n_classes'])
                np.save(self.home(self.NAME + '_inputs.npy'), inputs)
                np.save(self.home(self.NAME + '_labels.npy'), labels)
                # clean up the .amat files we downloaded
                os.remove(all_amat_filename)
            else:
                raise
    def fetch(self, download_if_missing):
        try:
            open(self.home(self.NAME + '_inputs.npy')).close()
            open(self.home(self.NAME + '_labels.npy')).close()
        except IOError:
            if download_if_missing:
                all_amat_filename = self.level_amat(self.LEVEL)
                try:
                    amat_all = AMat(all_amat_filename)
                except IOError:
                    logger.info('Failed to read %s, downloading %s' % (
                        all_amat_filename,
                        os.path.join(self.BASE_URL, self.REMOTE)))
                    if not os.path.exists(self.home()):
                        os.makedirs(self.home())
                    utils.download_and_extract(
                        os.path.join(self.BASE_URL, self.REMOTE),
                        self.home(),
                        verbose=False,
                        sha1=self.SHA1)
                    amat_all = AMat(all_amat_filename)
                # at this point self.home() contains not only the
                # all_amat_filename, but it also contains the amats for all
                # the other levels too.
                #
                # This for loop transfers each amat to the dataset folder where
                # it belongs.
                for level in range(1, 7):
                    if level == self.LEVEL:
                        continue
                    if not os.path.exists(self.level_amat(level)):
                        continue
                    other = BaseNoise(level)
                    try:
                        # try loading the other one
                        other.fetch(download_if_missing=False)

                        # if that worked, then delete just-downloaded amat
                        # required for the other's build_meta
                        os.remove(self.level_amat(level))
                    except IOError:
                        # assuming this was because fetch failed,
                        # move the amat for the other level into the
                        # home folder of the other dataset.
                        # next time the other dataset is fetched,
                        # it will load the amat, save a npy, and delete the
                        # amat.
                        if not os.path.exists(other.home()):
                            os.makedirs(other.home())
                        os.rename(
                                self.level_amat(level),
                                other.level_amat(level))

                # now carry on loading as usual
                n_inputs = 28**2
                n_train = self.descr['n_train']
                n_valid = self.descr['n_valid']
                n_test = self.descr['n_test']
                assert amat_all.all.shape[0] == n_train + n_valid + n_test
                assert amat_all.all.shape[1] == n_inputs + 1
                inputs = np.reshape(
                        amat_all.all[:, :n_inputs].astype('float32'),
                        (-1, 28, 28))
                labels = amat_all.all[:, n_inputs].astype('int32')
                assert np.all(labels == amat_all.all[:, n_inputs])
                assert np.all(labels < self.descr['n_classes'])
                np.save(self.home(self.NAME + '_inputs.npy'), inputs)
                np.save(self.home(self.NAME + '_labels.npy'), labels)
                # clean up the .amat files we downloaded
                os.remove(all_amat_filename)
            else:
                raise
Exemple #5
0
 def prepare_data(self):
     if not os.path.exists(self.data_dir):
         download_and_extract(self.url, self.data_dir)