Example #1
0
def get_weight_md5(model):
    return '.'.join([
        md5_checksum(w) if w.ndim > 0 else str(w) for w in model.get_weights()
    ])
Example #2
0
  def __init__(self, path='~/tensorflow_datasets/mnist'):
    path = os.path.abspath(os.path.expanduser(path))
    save_path = os.path.join(path, 'mnist.npz')
    if not os.path.exists(path):
      os.makedirs(path)
    assert os.path.isdir(path)

    ## check exist processed file
    all_data = None
    if os.path.exists(save_path):
      if not os.path.isfile(save_path):
        raise ValueError("path to %s must be a file" % save_path)
      if md5_checksum(save_path) != MNIST.MD5:
        print("Miss match MD5 remove file at: ", save_path)
        os.remove(save_path)
      else:
        all_data = np.load(save_path)
    ## download and extract
    if all_data is None:
      from tqdm import tqdm

      def dl_progress(count, block_size, total_size):
        kB = block_size * count / 1024.
        prog.update(kB - prog.n)

      read32 = lambda b: np.frombuffer(
          b, dtype=np.dtype(np.uint32).newbyteorder('>'))[0]

      all_data = {}
      for name, url in MNIST.URL.items():
        basename = os.path.basename(url)
        zip_path = os.path.join(path, basename)
        prog = tqdm(desc="Downloading %s" % basename, unit='kB')
        urlretrieve(url, zip_path, dl_progress)
        prog.clear()
        prog.close()
        with gzip.open(zip_path, "rb") as f:
          magic = read32(f.read(4))
          if magic not in (2051, 2049):
            raise ValueError('Invalid magic number %d in MNIST image file: %s' %
                             (magic, zip_path))
          n = read32(f.read(4))
          # images
          if 'X_' in name:
            rows = read32(f.read(4))
            cols = read32(f.read(4))
            buf = f.read(rows * cols * n)
            data = np.frombuffer(buf, dtype=np.uint8)
            data = data.reshape(n, rows, cols, 1)
          # labels
          else:
            buf = f.read(n)
            data = np.frombuffer(buf, dtype=np.uint8)
            data = one_hot(data, 10)
          all_data[name] = data
      np.savez_compressed(save_path, **all_data)
    ## split train, valid, test
    rand = np.random.RandomState(seed=1)
    ids = rand.permutation(all_data['X_train'].shape[0])
    X_train = all_data['X_train'][ids]
    y_train = all_data['y_train'][ids]
    X_valid = X_train[:5000]
    y_valid = y_train[:5000]
    X_train = X_train[5000:]
    y_train = y_train[5000:]
    X_test = all_data['X_test']
    y_test = all_data['y_test']
    to_ds = lambda images, labels: tf.data.Dataset.zip(
        (tf.data.Dataset.from_tensor_slices(images),
         tf.data.Dataset.from_tensor_slices(labels)))
    self.train = to_ds(X_train, y_train)
    self.valid = to_ds(X_valid, y_valid)
    self.test = to_ds(X_test, y_test)
Example #3
0
    def __init__(self, version, path="~/tensorflow_datasets/cifar"):
        path = os.path.abspath(os.path.expanduser(path))
        if not os.path.exists(path):
            os.makedirs(path)
        version = int(version)
        assert version in (10, 100), "Only support CIFAR-10 and CIFAR-100"
        ## download and extract
        url = CIFAR.URL[version]
        basename = os.path.basename(url)
        zip_path = os.path.join(path, basename)
        if os.path.exists(
                zip_path) and md5_checksum(zip_path) != CIFAR.MD5[version]:
            os.remove(zip_path)
        if not os.path.exists(zip_path):
            from tqdm import tqdm
            prog = tqdm(desc=f"Downloading file '{basename}'", unit="kB")

            def dl_progress(count, block_size, total_size):
                kB = count * block_size / 1024.
                prog.update(kB - prog.n)

            urlretrieve(url, zip_path, reporthook=dl_progress)
            prog.clear()
            prog.close()
        # extract
        data_dir = os.path.join(path, CIFAR.DIR_NAME[version])
        if os.path.exists(data_dir) and md5_folder(
                data_dir) != CIFAR.MD5_EXTRACT[version]:
            shutil.rmtree(data_dir)
        if not os.path.exists(data_dir):
            with tarfile.open(zip_path, "r:gz") as f:
                print("Extract zip file to ")
                f.extractall(path)
        ## load data
        X_train = []
        y_train = []
        y_train_coarse = []
        X_test = []
        y_test = []
        y_test_coarse = []
        for i in os.listdir(data_dir):
            if '.' not in i:
                with open(os.path.join(data_dir, i), 'rb') as f:
                    data = pickle.load(f, encoding='bytes')
                    if b'batch_label' not in data:  # metadata
                        continue
                    # labels
                    if b"labels" in data:
                        lab = data[b'labels']
                    elif b"fine_labels" in data:
                        lab = data[b'fine_labels']
                    lab_coarse = data[
                        b'coarse_labels'] if b'coarse_labels' in data else []
                    # store the data
                    if b'test' in data[b'batch_label'] or 'test' in i:
                        X_test.append(data[b'data'])
                        y_test += lab
                        y_test_coarse += lab_coarse
                    else:
                        X_train.append(data[b'data'])
                        y_train += lab
                        y_train_coarse += lab_coarse

        X_train = np.concatenate(X_train, axis=0)
        y_train = np.array(y_train)
        self.X_test = np.concatenate(X_test, axis=0)
        self.y_test = np.array(y_test)
        self.X_valid = X_train[:5000]
        self.y_valid = y_train[:5000]
        self.X_train = X_train[5000:]
        self.y_train = y_train[5000:]
        if len(y_train_coarse) > 0:
            y_train_coarse = np.array(y_train_coarse)
            self.y_valid_coarse = y_train_coarse[:5000]
            self.y_train_coarse = y_train_coarse[5000:]
            self.y_test_coarse = np.array(y_test_coarse)