def get_weight_md5(model): return '.'.join([ md5_checksum(w) if w.ndim > 0 else str(w) for w in model.get_weights() ])
def __init__(self, path='~/tensorflow_datasets/mnist'): path = os.path.abspath(os.path.expanduser(path)) save_path = os.path.join(path, 'mnist.npz') if not os.path.exists(path): os.makedirs(path) assert os.path.isdir(path) ## check exist processed file all_data = None if os.path.exists(save_path): if not os.path.isfile(save_path): raise ValueError("path to %s must be a file" % save_path) if md5_checksum(save_path) != MNIST.MD5: print("Miss match MD5 remove file at: ", save_path) os.remove(save_path) else: all_data = np.load(save_path) ## download and extract if all_data is None: from tqdm import tqdm def dl_progress(count, block_size, total_size): kB = block_size * count / 1024. prog.update(kB - prog.n) read32 = lambda b: np.frombuffer( b, dtype=np.dtype(np.uint32).newbyteorder('>'))[0] all_data = {} for name, url in MNIST.URL.items(): basename = os.path.basename(url) zip_path = os.path.join(path, basename) prog = tqdm(desc="Downloading %s" % basename, unit='kB') urlretrieve(url, zip_path, dl_progress) prog.clear() prog.close() with gzip.open(zip_path, "rb") as f: magic = read32(f.read(4)) if magic not in (2051, 2049): raise ValueError('Invalid magic number %d in MNIST image file: %s' % (magic, zip_path)) n = read32(f.read(4)) # images if 'X_' in name: rows = read32(f.read(4)) cols = read32(f.read(4)) buf = f.read(rows * cols * n) data = np.frombuffer(buf, dtype=np.uint8) data = data.reshape(n, rows, cols, 1) # labels else: buf = f.read(n) data = np.frombuffer(buf, dtype=np.uint8) data = one_hot(data, 10) all_data[name] = data np.savez_compressed(save_path, **all_data) ## split train, valid, test rand = np.random.RandomState(seed=1) ids = rand.permutation(all_data['X_train'].shape[0]) X_train = all_data['X_train'][ids] y_train = all_data['y_train'][ids] X_valid = X_train[:5000] y_valid = y_train[:5000] X_train = X_train[5000:] y_train = y_train[5000:] X_test = all_data['X_test'] y_test = all_data['y_test'] to_ds = lambda images, labels: tf.data.Dataset.zip( (tf.data.Dataset.from_tensor_slices(images), tf.data.Dataset.from_tensor_slices(labels))) self.train = to_ds(X_train, y_train) self.valid = to_ds(X_valid, y_valid) self.test = to_ds(X_test, y_test)
def __init__(self, version, path="~/tensorflow_datasets/cifar"): path = os.path.abspath(os.path.expanduser(path)) if not os.path.exists(path): os.makedirs(path) version = int(version) assert version in (10, 100), "Only support CIFAR-10 and CIFAR-100" ## download and extract url = CIFAR.URL[version] basename = os.path.basename(url) zip_path = os.path.join(path, basename) if os.path.exists( zip_path) and md5_checksum(zip_path) != CIFAR.MD5[version]: os.remove(zip_path) if not os.path.exists(zip_path): from tqdm import tqdm prog = tqdm(desc=f"Downloading file '{basename}'", unit="kB") def dl_progress(count, block_size, total_size): kB = count * block_size / 1024. prog.update(kB - prog.n) urlretrieve(url, zip_path, reporthook=dl_progress) prog.clear() prog.close() # extract data_dir = os.path.join(path, CIFAR.DIR_NAME[version]) if os.path.exists(data_dir) and md5_folder( data_dir) != CIFAR.MD5_EXTRACT[version]: shutil.rmtree(data_dir) if not os.path.exists(data_dir): with tarfile.open(zip_path, "r:gz") as f: print("Extract zip file to ") f.extractall(path) ## load data X_train = [] y_train = [] y_train_coarse = [] X_test = [] y_test = [] y_test_coarse = [] for i in os.listdir(data_dir): if '.' not in i: with open(os.path.join(data_dir, i), 'rb') as f: data = pickle.load(f, encoding='bytes') if b'batch_label' not in data: # metadata continue # labels if b"labels" in data: lab = data[b'labels'] elif b"fine_labels" in data: lab = data[b'fine_labels'] lab_coarse = data[ b'coarse_labels'] if b'coarse_labels' in data else [] # store the data if b'test' in data[b'batch_label'] or 'test' in i: X_test.append(data[b'data']) y_test += lab y_test_coarse += lab_coarse else: X_train.append(data[b'data']) y_train += lab y_train_coarse += lab_coarse X_train = np.concatenate(X_train, axis=0) y_train = np.array(y_train) self.X_test = np.concatenate(X_test, axis=0) self.y_test = np.array(y_test) self.X_valid = X_train[:5000] self.y_valid = y_train[:5000] self.X_train = X_train[5000:] self.y_train = y_train[5000:] if len(y_train_coarse) > 0: y_train_coarse = np.array(y_train_coarse) self.y_valid_coarse = y_train_coarse[:5000] self.y_train_coarse = y_train_coarse[5000:] self.y_test_coarse = np.array(y_test_coarse)