def main(): args = parse_args() path_to_x_train = args.x_train_dir path_to_y_train = args.y_train_dir path_to_model = args.model_output_dir verbosity = args.verbosity iter_num = args.iter_num batch_size = args.batch_size X_original = read_mnist(path_to_x_train) y_original = read_mnist(path_to_y_train) X, image_shape = preprocessing_data(X_original) y = y_original print( f'\nbatch_size: {batch_size}, iter_num: {iter_num}, kernel: {args.kernel}\n' ) X_train, X_val, y_train, y_val = X[:50000], X[50000:], y[:50000], y[50000:] clf = MySvm(args.kernel, image_shape=image_shape) clf.fit(X_train, y_train, iter_num=iter_num, batch_size=batch_size, verbosity=verbosity) prediction_labels = clf.predict(X_val) print(classification_report(y_val, prediction_labels, digits=4)) optimal_weights = clf.get_weights() print(f'Saving model to {path_to_model}') save_weights(path_to_model, optimal_weights)
def logistic_reg_mnist_download(): mnist_folder = 'data/mnist' utils.download_mnist(mnist_folder) train, val, test = utils.read_mnist(mnist_folder, flatten=True) # We ignore the validation test data in this problem return train, test
def main(): args = parse_args() path_to_x_test = args.x_test_dir path_to_y_test = args.y_test_dir path_to_model = args.model_input_dir kernel = args.kernel X_original = read_mnist(path_to_x_test) X_test, image_shape = preprocessing_data(X_original) y_test = read_mnist(path_to_y_test) weights = load_weights(path_to_model) clf = MySvm(kernel_type=kernel, image_shape=image_shape) clf.load_weights(weights) predict_labels = clf.predict(X_test) print('Metrics on the test data:\n') print(classification_report(y_test, predict_labels, digits=4))
def main(z_dim, mi, e1, e2, test): test_bool = test from utils import find_avaiable_gpu import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" device_id = find_avaiable_gpu() print('Using device {}'.format(device_id)) os.environ["CUDA_VISIBLE_DEVICES"] = str(device_id) train, test = tu.read_mnist(batch_sizes=(250, 250)) datasets = tu.Datasets(train=train, test=test) optimizer = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.999) encoder = VariationalEncoder(z_dim=z_dim) decoder = VariationalDecoder(z_dim=z_dim) logdir = tu.obtain_log_path('lvae/v1/{}/{}-{}-{}/'.format(z_dim, mi, e1, e2)) vae = LagrangianVAE(encoder, decoder, datasets, optimizer, logdir, mi, e1, e2) if not test_bool: vae.train(num_epochs=5000) vae.test()
def fetch_data(class_1, class_2): '''reduces MNIST data to a two class classification problem''' mnist_folder = 'data/mnist' if not os.path.isdir(mnist_folder): os.mkdir('data') os.mkdir(mnist_folder) utils.download_mnist(mnist_folder) train, _, test = utils.read_mnist(mnist_folder, flatten=True) train_images = np.array(train[0]) train_images = preprocessing.scale(np.array(train_images), axis=1) train_labels = np.array([np.where(x == 1)[0][0] for x in train[1]]) test_images = np.array(test[0]) test_images = preprocessing.scale(np.array(test_images), axis=1) test_labels = np.array([np.where(x == 1)[0][0] for x in test[1]]) train_data = ((train_labels == class_1) + (train_labels == class_2)) x_train = train_images[train_data] y_train = train_labels[train_data] test_data = ((test_labels == class_1) + (test_labels == class_2)) x_test = test_images[test_data] y_test = test_labels[test_data] return x_train, y_train, x_test, y_test
# Define paramaters for the model learning_rate = 0.01 batch_size = 128 n_epochs = 30 n_train = 60000 n_test = 10000 # Step 1: Read in data #Data download is prohibited, because of firewall. So download the MNIST database files mnist_folder = 'data\mnist' # utils.download_mnist(mnist_folder) my_path = os.path.abspath(os.path.dirname(__file__)) path = os.path.join(my_path, mnist_folder) #train, val, test = utils.read_mnist(mnist_folder, flatten=True) train, val, test = utils.read_mnist(path, flatten=True) # Step 2: Create datasets and iterator # create training Dataset and batch it train_data = tf.data.Dataset.from_tensor_slices(train) train_data = train_data.shuffle(10000) # if you want to shuffle your data train_data = train_data.batch(batch_size) # create testing Dataset and batch it test_data = tf.data.Dataset.from_tensor_slices(test) test_data = test_data.batch(batch_size) # create one iterator and initialize it with different datasets iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) img, label = iterator.get_next()
import tensorflow as tf import time import utils # Define paramaters for the model learning_rate = 0.003 batch_size = 512 n_epochs = 30 n_train = 185 n_test = 111 # Step 1: Read in data mnist_folder = 'convert_MNIST' #utils.download_mnist(mnist_folder) train, val, test = utils.read_mnist(mnist_folder, flatten=True) #rint(type(train)) feature,label = train #print(len(label)) # Step 2: Create datasets and iterator # create training Dataset and batch it train_data = tf.data.Dataset.from_tensor_slices(train) train_data = train_data.shuffle(10000) # if you want to shuffle your data train_data = train_data.batch(batch_size) # create testing Dataset and batch it test_data = tf.data.Dataset.from_tensor_slices(test) test_data = test_data.shuffle(10000) test_data = test_data.batch(batch_size) #############################
print("Predicting using KNN with k =", k) test_features = np.array([utils.hog_features(img) for img in test_data[:, :-1]]) test_labels = test_data[:, -1] tik = time.clock() score = knn_clf.score(test_features, test_labels) print("Time taken to predict = {:.4} sec".format(float(time.clock() - tik))) return score def get_svm_score(train_data, test_data): features = np.array([utils.hog_features(img) for img in train_data[:, :-1]]) labels = train_data[:, -1] c = 15000 print("Fitting SVM model with C =", c) tik = time.clock() svm_clf = SVC(C=c).fit(features, labels) print("Time taken to fit = {:.4} sec".format(float(time.clock() - tik))) print("Predicting using SVM with C =", c) test_features = np.array([utils.hog_features(img) for img in test_data[:, :-1]]) tik = time.clock() test_labels = test_data[:, -1] score = svm_clf.score(test_features, test_labels) print("Time taken to predict = {:.4} sec".format(float(time.clock() - tik))) return score train_data, test_data = utils.read_mnist() # print("KNN score =", get_knn_score(train_data, test_data)) print("SVM score =", get_svm_score(train_data, test_data))
from utils import read_mnist, load_model_from_json, display_image import random import matplotlib.pyplot as plt import numpy as np import keras from keras.utils.np_utils import to_categorical # Comment this line to enable training using your GPU os.environ['CUDA_VISIBLE_DEVICES'] = '-1' NUM_IMAGES_RANDOM = 5 NUM_IMAGES_MISCLASSIFICATION = 5 # Loading the test dataset test_features, test_labels = read_mnist('t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz') # Loading the model from files model = load_model_from_json('lenet5') model.summary() # We need to do this to keep Keras happy model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) # Predicting labels and evaluating the model on the test set predicted_labels = model.predict(test_features) score = model.evaluate(test_features, to_categorical(test_labels)) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # Showing some random images
import numpy as np import tensorflow as tf import time import utils # Define paramaters for the model learning_rate = 0.01 batch_size = 128 n_epochs = 30 n_train = 50000 n_test = 1000 notmnist_folder = 'data/' train, val, test = utils.read_mnist(notmnist_folder, flatten=True, num_train=n_train) train_data = tf.data.Dataset.from_tensor_slices(train) train_data = train_data.batch(batch_size) test_data = tf.data.Dataset.from_tensor_slices(test) test_data = test_data.shuffle(n_test) test_data = test_data.batch(batch_size) iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) img, label = iterator.get_next() #placeholders for X and Y # init ops for both test and train iterators train_init = iterator.make_initializer(train_data) test_init = iterator.make_initializer(test_data)
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import sys sys.path.append('/Users/mhz/tensorflow-practice') import time import tensorflow as tf import tensorflow.contrib.layers as layers from tensorflow.examples.tutorials.mnist import input_data from utils import read_mnist train_data, test_data, val_data = read_mnist('data/mnist') LEARNING_RATE = 0.001 BATCH_SIZE = 128 SKIP_STEP = 10 DROPOUT = 0.75 N_EPOCHS = 1 # with tf.name_scope('data'): # X = tf.placeholder(tf.float32, [None, 784], name="X_placeholder") # Y = tf.placeholder(tf.float32, [None, 10], name="Y_placeholder") # Define a function that combines the convolution layer with the non-linearity def conv_relu(inputs, filters, k_size, stride, padding, scope_name): with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope: in_channels = inputs.shape[-1] kernel = tf.get_variable('kernel', [k_size, k_size, in_channels, filters],
import numpy as np import utils mnist_dir = 'MNIST-data' X_train, y_train, X_test, y_test = utils.read_mnist(mnist_dir, flatten=True) print('Training data shape: ', X_train.shape) print('Training labels shape: ', y_train.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) class KNearestNeighbor(object): def _init_(self): pass def train(self, X, y): """ :param X: A Numpy array of shape (num_train, D) containing the train data consisting of num_train samples and dimension D. :param y: A Numpy array of shape (num_train,) containing the train labels, where y[i] is the label for X[i] """ self.X_train = X self.y_train = y def compute_distance(self, X): """ :return: dists: A numpy array of shape (num_test, num_train) where dists[i, j]
def download_data(mnist_folder): # utils.download_mnist(mnist_folder) train, val, test = utils.read_mnist(mnist_folder, flatten=True) return train, val, test
'train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz' ] # for filename in filenames: # download_url = os.path.join(url, filename) # local_dest = os.path.join(path, filename) # local_file, _ = urllib.request.urlretrieve(download_url, local_dest) # with gzip.open(local_dest, 'rb') as f_in, open(local_dest[:-3], 'wb') as f_out: # shutil.copyfileobj(f_in, f_out) # os.remove(local_dest) batch_size = 128 train, val, test = read_mnist(path) train_data = tf.data.Dataset.from_tensor_slices(train) train_data = train_data.shuffle(10000) # shuffles the data train_data = train_data.batch(batch_size) test_data = tf.data.Dataset.from_tensor_slices(test) test_data = test_data.batch(batch_size) iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) img, label = iterator.get_next() label = tf.cast(label, tf.float32) train_init = iterator.make_initializer(train_data) test_init = iterator.make_initializer(test_data) w = tf.get_variable('weights',
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from tensorflow.examples.tutorials.mnist import input_data import numpy as np import tensorflow as tf import time import utils # Define paramaters for the model learning_rate = 0.01 batch_size = 128 n_epochs = 30 # Step 1: Read notMnist data X_train, X_validation, X_test = utils.read_mnist('notMnist') X_batch, Y_batch = utils.next_batch(batch_size, X_train) # Step 2: create placeholders for features and labels # each image in the notMnist is of shape 28*28 = 784 # therefore, each image is represented with a 1x784 tensor # there are 10 classes for each image, corresponding to char A - J. # each lable is one hot vector. X = tf.placeholder(tf.float32, [batch_size, 784], name='image') Y = tf.placeholder(tf.int32, [batch_size, 10], name='label') # Step 3: create weights and bias # w is initialized to random variables with mean of 0, stddev of 0.01 # b is initialized to 0 # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w) # shape of b depends on Y
#learning_rate = 0.01 batch_size = 128 n_epochs = 50 n_train = 60000 n_test = 10000 #定义正则化损失函数、指数衰减学习率和滑动平均操作来提高识别精度。 REGULARATION_RATE = 0.001 LEARNING_RATE_BASE = 0.01 LEARNING_RATE_DECAY = 0.8 # Step 1: Read in data mnist_folder = 'data/mnist' #utils.download_mnist(mnist_folder) train, val, test = utils.read_mnist( mnist_folder, flatten=True) #three vars are class tuples, each combined with two array. # Step 2: Create datasets and iterator # create training Dataset and batch it train_data = tf.data.Dataset.from_tensor_slices(train) train_data = train_data.shuffle(10000) # if you want to shuffle your data train_data = train_data.batch(batch_size) # create testing Dataset and batch it test_data = tf.data.Dataset.from_tensor_slices(test) test_data = test_data.batch(batch_size) # create one iterator and initialize it with different datasets iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes)
import numpy as np import tensorflow as tf import time import utils # Define paramaters LEARNING_RATE = 0.01 BATCH_SIZE = 128 N_EPOCHS = 30 N_TRAIN = 6000 N_TEST = 10000 # Read in data MNIST_FOLDER = '../data/mnist' utils.download_mnist(MNIST_FOLDER) train, val, test = utils.read_mnist(MNIST_FOLDER, flatten=True) # Create train and test dataset train_data = tf.data.Dataset.from_tensor_slices(train) train_data = train_data.shuffle(10000) test_data = tf.data.Dataset.from_tensor_slices(test) # Process the data in batches train_data = train_data.batch(BATCH_SIZE) test_data = test_data.batch(BATCH_SIZE) # Create Iterator to get samples from the two dataset iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) img, label = iterator.get_next()
from sklearn.model_selection import train_test_split from tensorflow import keras from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.utils import to_categorical from tensorflow.keras.callbacks import TensorBoard from lenet5 import make_lenet5 from utils import read_mnist, save_model_to_json # Comment this line to enable training using your GPU os.environ['CUDA_VISIBLE_DEVICES'] = '-1' EPOCHS = 10 BATCH_SIZE = 128 train_features, train_labels = read_mnist('train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz') train_features, validation_features, train_labels, validation_labels = \ train_test_split(train_features, train_labels, test_size=0.2, random_state=0) print('# of training images:', train_features.shape[0]) print('# of cross-validation images:', validation_features.shape[0]) model = make_lenet5() model.summary() model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) X_train, y_train = train_features, to_categorical(train_labels) X_validation, y_validation = validation_features, to_categorical(validation_labels)
import tensorflow as tf import time import utils # Define paramaters for the model learning_rate = 0.01 batch_size = 128 n_epochs = 30 n_train = 60000 n_test = 10000 # Step 1: Read in data mnist_folder = '.\\data\\mnist' # utils.download_mnist(mnist_folder) train, val, test = utils.read_mnist(mnist_folder, flatten=True) # Step 2: Create datasets and iterator train_data = tf.data.Dataset.from_tensor_slices(train) train_data = train_data.shuffle(10000) # if you want to shuffle your data # batch_size = 128 , 表示将tf.data.Dataset按照batch_size可以一块一块的取出来 # train_data 的类型变为:batchDataset train_data = train_data.batch(batch_size) # test_data 在预测时也是一块一块做的 # 因为这是dataset的特性 test_data = tf.data.Dataset.from_tensor_slices(test) test_data = test_data.batch(batch_size) # 使用Iterator拿到数据
fail_name = 'ISTR_MR' te_par = TE_PAR.TE_PAR(mr_name=['color']) k_sec = 1000 top_k = 1 class Dummy: # 空类 pass env = Dummy() # 模型参数 LayerOutput = Dummy() # 输出值 if data_set == 'MNIST': input_par = INPUT_PAR.INPUT_PAR(28, 1, 10, 'MNIST') x_train, y_train, x_valid, y_valid, x_test, y_test = utils.read_mnist( "./MNIST_data/") # 读数据 elif data_set == 'CIFAR10': input_par = INPUT_PAR.INPUT_PAR(32, 3, 10, 'CIFAR10') x_train, y_train, x_test, y_test = utils.read_cifar10() x_train_order, y_train_order, list_order_train = dp.read_order_tr_data( 'order_data/' + data_set + '/', x_train, y_train) input_par.input_n_samples(x_train.shape[0]) print('X_train shape = {}'.format(x_train.shape)) print('X_test shape = {}'.format(x_test.shape)) x_train_order, y_train_order = dp.fail_tr_set(fail_name, x_train_order, y_train_order, list_order_train) x_train, y_train = utils.shuffle_data(x_train_order, y_train_order)