def fetch(self, download_if_missing): with lockfile.FileLock(self.home()) as lock: try: open(self.home(self.NAME + '_inputs.npy')).close() open(self.home(self.NAME + '_labels.npy')).close() except IOError: if download_if_missing: try: amat_test = AMat(self.test_amat()) except IOError: logger.info('Failed to read %s, downloading %s' % ( self.test_amat(), os.path.join(self.BASE_URL, self.REMOTE))) if not os.path.exists(self.home()): os.makedirs(self.home()) utils.download_and_extract( os.path.join(self.BASE_URL, self.REMOTE), self.home(), verbose=False, sha1=self.SHA1) amat_test = AMat(self.test_amat()) amat_train = AMat(self.train_amat()) n_inputs = 28**2 n_train = self.descr['n_train'] n_valid = self.descr['n_valid'] n_test = self.descr['n_test'] assert amat_train.all.shape[0] == n_train + n_valid assert amat_test.all.shape[0] == n_test assert amat_train.all.shape[1] == amat_test.all.shape[1] assert amat_train.all.shape[1] == n_inputs + 1 allmat = np.vstack((amat_train.all, amat_test.all)) inputs = np.reshape( allmat[:, :n_inputs].astype('float32'), (-1, 28, 28)) labels = allmat[:, n_inputs].astype('int32') assert np.all(labels == allmat[:, n_inputs]) assert np.all(labels < self.descr['n_classes']) np.save(self.home(self.NAME + '_inputs.npy'), inputs) np.save(self.home(self.NAME + '_labels.npy'), labels) # clean up the .amat files we downloaded os.remove(self.test_amat()) os.remove(self.train_amat()) else: raise
def fetch(self, download_if_missing): with lockfile.FileLock(self.home()) as lock: try: open(self.home(self.NAME + '_inputs.npy')).close() open(self.home(self.NAME + '_labels.npy')).close() except IOError: if download_if_missing: try: amat_test = AMat(self.test_amat()) except IOError: logger.info('Failed to read %s, downloading %s' % (self.test_amat(), os.path.join(self.BASE_URL, self.REMOTE))) if not os.path.exists(self.home()): os.makedirs(self.home()) utils.download_and_extract(os.path.join( self.BASE_URL, self.REMOTE), self.home(), verbose=False, sha1=self.SHA1) amat_test = AMat(self.test_amat()) amat_train = AMat(self.train_amat()) n_inputs = 28**2 n_train = self.descr['n_train'] n_valid = self.descr['n_valid'] n_test = self.descr['n_test'] assert amat_train.all.shape[0] == n_train + n_valid assert amat_test.all.shape[0] == n_test assert amat_train.all.shape[1] == amat_test.all.shape[1] assert amat_train.all.shape[1] == n_inputs + 1 allmat = np.vstack((amat_train.all, amat_test.all)) inputs = np.reshape(allmat[:, :n_inputs].astype('float32'), (-1, 28, 28)) labels = allmat[:, n_inputs].astype('int32') assert np.all(labels == allmat[:, n_inputs]) assert np.all(labels < self.descr['n_classes']) np.save(self.home(self.NAME + '_inputs.npy'), inputs) np.save(self.home(self.NAME + '_labels.npy'), labels) # clean up the .amat files we downloaded os.remove(self.test_amat()) os.remove(self.train_amat()) else: raise
def fetch(self, download_if_missing): try: open(self.home(self.NAME + '_inputs.npy')).close() open(self.home(self.NAME + '_labels.npy')).close() except IOError: if download_if_missing: all_amat_filename = self.level_amat(self.LEVEL) try: amat_all = AMat(all_amat_filename) except IOError: logger.info('Failed to read %s, downloading %s' % (all_amat_filename, os.path.join(self.BASE_URL, self.REMOTE))) if not os.path.exists(self.home()): os.makedirs(self.home()) utils.download_and_extract(os.path.join( self.BASE_URL, self.REMOTE), self.home(), verbose=False, sha1=self.SHA1) amat_all = AMat(all_amat_filename) # at this point self.home() contains not only the # all_amat_filename, but it also contains the amats for all # the other levels too. # # This for loop transfers each amat to the dataset folder where # it belongs. for level in range(1, 7): if level == self.LEVEL: continue if not os.path.exists(self.level_amat(level)): continue other = BaseNoise(level) try: # try loading the other one other.fetch(download_if_missing=False) # if that worked, then delete just-downloaded amat # required for the other's build_meta os.remove(self.level_amat(level)) except IOError: # assuming this was because fetch failed, # move the amat for the other level into the # home folder of the other dataset. # next time the other dataset is fetched, # it will load the amat, save a npy, and delete the # amat. if not os.path.exists(other.home()): os.makedirs(other.home()) os.rename(self.level_amat(level), other.level_amat(level)) # now carry on loading as usual n_inputs = 28**2 n_train = self.descr['n_train'] n_valid = self.descr['n_valid'] n_test = self.descr['n_test'] assert amat_all.all.shape[0] == n_train + n_valid + n_test assert amat_all.all.shape[1] == n_inputs + 1 inputs = np.reshape( amat_all.all[:, :n_inputs].astype('float32'), (-1, 28, 28)) labels = amat_all.all[:, n_inputs].astype('int32') assert np.all(labels == amat_all.all[:, n_inputs]) assert np.all(labels < self.descr['n_classes']) np.save(self.home(self.NAME + '_inputs.npy'), inputs) np.save(self.home(self.NAME + '_labels.npy'), labels) # clean up the .amat files we downloaded os.remove(all_amat_filename) else: raise
def fetch(self, download_if_missing): try: open(self.home(self.NAME + '_inputs.npy')).close() open(self.home(self.NAME + '_labels.npy')).close() except IOError: if download_if_missing: all_amat_filename = self.level_amat(self.LEVEL) try: amat_all = AMat(all_amat_filename) except IOError: logger.info('Failed to read %s, downloading %s' % ( all_amat_filename, os.path.join(self.BASE_URL, self.REMOTE))) if not os.path.exists(self.home()): os.makedirs(self.home()) utils.download_and_extract( os.path.join(self.BASE_URL, self.REMOTE), self.home(), verbose=False, sha1=self.SHA1) amat_all = AMat(all_amat_filename) # at this point self.home() contains not only the # all_amat_filename, but it also contains the amats for all # the other levels too. # # This for loop transfers each amat to the dataset folder where # it belongs. for level in range(1, 7): if level == self.LEVEL: continue if not os.path.exists(self.level_amat(level)): continue other = BaseNoise(level) try: # try loading the other one other.fetch(download_if_missing=False) # if that worked, then delete just-downloaded amat # required for the other's build_meta os.remove(self.level_amat(level)) except IOError: # assuming this was because fetch failed, # move the amat for the other level into the # home folder of the other dataset. # next time the other dataset is fetched, # it will load the amat, save a npy, and delete the # amat. if not os.path.exists(other.home()): os.makedirs(other.home()) os.rename( self.level_amat(level), other.level_amat(level)) # now carry on loading as usual n_inputs = 28**2 n_train = self.descr['n_train'] n_valid = self.descr['n_valid'] n_test = self.descr['n_test'] assert amat_all.all.shape[0] == n_train + n_valid + n_test assert amat_all.all.shape[1] == n_inputs + 1 inputs = np.reshape( amat_all.all[:, :n_inputs].astype('float32'), (-1, 28, 28)) labels = amat_all.all[:, n_inputs].astype('int32') assert np.all(labels == amat_all.all[:, n_inputs]) assert np.all(labels < self.descr['n_classes']) np.save(self.home(self.NAME + '_inputs.npy'), inputs) np.save(self.home(self.NAME + '_labels.npy'), labels) # clean up the .amat files we downloaded os.remove(all_amat_filename) else: raise
def prepare_data(self): if not os.path.exists(self.data_dir): download_and_extract(self.url, self.data_dir)