def prepare_dataset(data_dir): # download dataset url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00229/Skin_NonSkin.txt" save_path = os.path.join(data_dir, url.split("/")[-1]) try: download_url(url, save_path) except Exception as e: print('Error downloading dataset: %s' % str(e)) sys.exit(1) # read the dataset data = list() for line in open(save_path, "r").readlines(): data.append(line.strip("\n").split("\t")) data = np.asarray(data).astype(float) n_samples = len(data) random_idx = np.arange(0, n_samples) np.random.shuffle(random_idx) data = data[random_idx] x, y = data[:, :3], data[:, 3:] y = (y - 1).astype(int) train_split = int(n_samples * 0.7) valid_split = int(n_samples * 0.85) train_set = [x[:train_split, :], y[:train_split]] valid_set = [x[train_split:valid_split, :], y[train_split:valid_split]] test_set = [x[valid_split:, :], y[valid_split:]] return [train_set, valid_set, test_set]
def prepare_dataset(data_dir): url = "http://deeplearning.net/data/mnist/mnist.pkl.gz" save_path = os.path.join(data_dir, url.split("/")[-1]) print("Preparing MNIST dataset ...") try: download_url(url, save_path) except Exception as e: print('Error downloading dataset: %s' % str(e)) sys.exit(1) # load the dataset with gzip.open(save_path, "rb") as f: return pickle.load(f, encoding="latin1")
def prepare_dataset(data_dir): url = "https://raw.githubusercontent.com/mnielsen/neural-networks-and-deep-learning/master/data/mnist.pkl.gz" save_path = os.path.join(data_dir, url.split("/")[-1]) print("Preparing MNIST dataset ...") try: download_url(url, save_path) except Exception as e: print('Error downloading dataset: %s' % str(e)) sys.exit(1) # load the dataset with gzip.open(save_path, "rb") as f: return pickle.load(f, encoding="latin1")
def prepare_dataset(data_dir): url = "http://deeplearning.net/data/mnist/mnist.pkl.gz" save_path = os.path.join(data_dir, url.split("/")[-1]) print("Preparing MNIST dataset ...") try: download_url(url, save_path) except Exception as e: print("Error downloading dataset: %s" % str(e)) sys.exit(1) # load the dataset with gzip.open(save_path, "rb") as f: train, valid, test = pickle.load(f, encoding="latin1") # return all X from train/valid/test X = np.concatenate([train[0], valid[0], test[0]]) y = np.concatenate([train[1], valid[1], test[1]]) return X, y