def prep_mnist(): (x_train, y_train), (x_test, y_test) = kd.mnist.load_data() x = np.array([img.flatten() for img in np.concatenate((x_train, x_test))]) / 255 y = np.concatenate((y_train, y_test)) xy = np.array([np.append(row, label) for (row, label) in list(zip(x, y))]) print(x.shape, y.shape, xy.shape) # print(xy[0]) arff_utils.image_stream_to_arff(xy, (28, 28), 'MNIST', 'MNIST.arff')
def prep_mnist_f(): (x_train, y_train), (x_test, y_test) = kd.fashion_mnist.load_data() x = np.array([img.flatten() for img in np.concatenate((x_train, x_test))]) / 255 y = np.concatenate((y_train, y_test)) labels = [ 'T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle_boots' ] xy = np.array( [np.append(row, labels[label]) for (row, label) in list(zip(x, y))]) print(x.shape, y.shape, xy.shape) # print(xy[0]) arff_utils.image_stream_to_arff(xy, (28, 28), 'MNIST_F', 'MNIST_F.arff')
def prep_cmater(root): path = f'{root}/raw-batch/vis/cmater/datasets/bangla-numerals/training-images.npz' data = np.load(path) x = np.array([[[np.average(c) / 255 for c in row] for row in img] for img in data.f.images]) x = np.array([img.flatten() for img in x]) y = data.f.labels xy = np.array([ np.append(row, label) for _ in range(2) for (row, label) in list(zip(x, y)) ]) np.random.shuffle(xy) print(x.shape, y.shape, xy.shape) arff_utils.image_stream_to_arff(xy, (32, 32), 'CMATER-BANGLA', 'CMATER-BANGLA.arff')
def prep_cifar(): (x_train, y_train), (x_test, y_test) = kd.cifar10.load_data() x_rgb = np.concatenate((x_train, x_test)) x = np.array([[[np.average(c) / 255 for c in row] for row in img] for img in x_rgb]) x = np.array([img.flatten() for img in x]) y = np.concatenate((y_train, y_test)).flatten() labels = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] xy = np.array( [np.append(row, labels[label]) for (row, label) in list(zip(x, y))]) print(x.shape, y.shape, xy.shape) # print(x_train[1][0][1], xy[1][1]) arff_utils.image_stream_to_arff(xy, (32, 32), 'CIFAR10', 'CIFAR10.arff')
def prep_imagenette(root): path = f'{root}/raw-batch/vis/imagenette/train_64' xy = [] for label in os.listdir(path): for image_file_path in glob.glob(path + '/' + label + '/*'): try: img = imread(image_file_path) x = np.array([[np.average(c) / 255 for c in row] for row in img]).flatten() y = label xy.append(np.append(x, y)) print(image_file_path, x.shape, y) except ValueError as e: print(f'Could not read {image_file_path}: {e}') xy = np.array(xy) np.random.shuffle(xy) print(xy.shape) arff_utils.image_stream_to_arff(xy, (64, 64), 'IMAGENETTE', 'IMAGENETTE.arff')
def prep_malaria(root): pos_path = f'{root}/raw-batch/vis/malaria/32/Parasitized' neg_path = f'{root}/raw-batch/vis/malaria/32/Uninfected' train_files = glob.glob(pos_path + "/*") + glob.glob(neg_path + "/*") xy = [] for i, name in enumerate(train_files): try: img = imread(name) y = 'pos' if 'Parasitized' in name else 'neg' x = np.array([[np.average(c) / 255 for c in row] for row in img]).flatten() xy.append(np.append(x, y)) print(i, name, x.shape, y) except ValueError as e: print(f'Could not read {name}: {e}') xy = np.array(xy) np.random.shuffle(xy) print(xy.shape) arff_utils.image_stream_to_arff(xy, (32, 32), 'MALARIA', 'MALARIA.arff')
def prep_intel(root): path = f'{root}/raw-batch/vis/intel_imgs/seg_train_32' xy = [] for label in os.listdir(path): for image_file_path in glob.glob(path + '/' + label + '/*'): try: img = imread(image_file_path) x = np.array([[np.average(c) / 255 for c in row] for row in img]).flatten() y = label for _ in range(2): xy.append(np.append(x, y)) print(image_file_path, x.shape, y) except ValueError as e: print(f'Could not read {image_file_path}: {e}') xy = np.array(xy) np.random.shuffle(xy) print(xy.shape) arff_utils.image_stream_to_arff(xy, (32, 32), 'INTEL-IMGS', 'INTEL-IMGS.arff')
def prep_dogs_vs_cats(root): path = f'{root}/raw-batch/vis/dogs-vs-cats/train_32' train_files = glob.glob(path + "/*") xy = [] for i, name in enumerate(train_files): try: img = imread(name) y = 'cat' if 'cat' in name.split('/')[-1] else 'dog' x = np.array([[np.average(c) / 255 for c in row] for row in img]).flatten() xy.append(np.append(x, y)) print(i, name, x.shape, y) except ValueError as e: print(f'Could not read {name}: {e}') xy = np.array(xy) np.random.shuffle(xy) print(xy.shape) arff_utils.image_stream_to_arff(xy, (32, 32), 'DOGS-VS-CATS', 'DOGS-VS-CATS.arff')