def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate feature_type = FLAGS.feature_type model_type = FLAGS.model_type # Load dataset. data = read_dataset('data/test_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, feature_type) # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegressionTf(ndim, 'ones') elif model_type == 'logistic': model = LogisticRegression(ndim, 'zeros') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'zeros') # Train Model. model = train_model(data, model, learning_rate, num_steps=20000) # Eval Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type) acc, loss = eval_model(data_test, model) # Test Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type)
def main(_): """High level pipeline. This scripts performs the training and evaling and testing stages for semi-supervised learning using kMeans algorithm. """ # Load dataset. # _, unlabeled_data = io_tools.read_dataset('data/simple_test.csv') _, unlabeled_data = io_tools.read_dataset('data/mnist_train.csv') n_dims = unlabeled_data.shape[1] # Initialize model. model = GaussianMixtureModel(n_dims, n_components=FLAGS.n_components, max_iter=FLAGS.max_iter) # Unsupervised training. model.fit(unlabeled_data) # Supervised training. # train_label, train_data = io_tools.read_dataset('data/simple_test.csv') train_label, train_data = io_tools.read_dataset('data/mnist_train.csv') model.supervised_fit(train_data, train_label) # Eval model. # eval_label, eval_data = io_tools.read_dataset('data/simple_test.csv') eval_label, eval_data = io_tools.read_dataset('data/mnist_test.csv') y_hat_eval = model.supervised_predict(eval_data) print(eval_label) acc = np.sum(y_hat_eval == eval_label) / (1. * eval_data.shape[0]) print("Accuracy: %s" % acc)
def main(_): """High level pipeline. This scripts performs the training and evaling and testing stages for semi-supervised learning using kMeans algorithm. """ # Load dataset. unlabeled_data, _ = io_tools.read_dataset('data/train_no_label.csv') n_dims = unlabeled_data.shape[1] # Initialize model. if FLAGS.model_type == 'kmeans': model = KMeans(n_dims, n_components=FLAGS.n_components, max_iter=FLAGS.max_iter) else: model = GaussianMixtureModel(n_dims, n_components=FLAGS.n_components, max_iter=FLAGS.max_iter) # Unsupervised training. model.fit(unlabeled_data) # Supervised training. train_data, train_label = io_tools.read_dataset(('data/' 'train_with_label.csv')) model.supervised_fit(train_data, train_label) # Eval model. eval_data, eval_label = io_tools.read_dataset('data/val.csv') y_hat_eval = model.supervised_predict(eval_data) acc = np.sum(y_hat_eval == eval_label) / (1.*eval_data.shape[0]) print("Accuracy: %s" % acc)
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate w_decay_factor = FLAGS.w_decay_factor num_steps = FLAGS.num_steps opt_method = FLAGS.opt_method feature_columns = FLAGS.feature_columns.split(',') # Load dataset. dataset = read_dataset("data/train.csv") # Data processing. train_set = preprocess_data(dataset, feature_columns=feature_columns, squared_features=True) # Initialize model. ndim = train_set[0].shape[1] model = LinearRegression(ndim, 'zeros') # Train model. if opt_method == 'iter': # Perform gradient descent. train_model(train_set, model, learning_rate, num_steps=num_steps, shuffle=True) print('Performed gradient descent.') else: # Compute closed form solution. train_model_analytic(train_set, model) print('Closed form solution.') train_loss = eval_model(train_set, model) print("Train loss: %s" % train_loss) # Plot the x vs. y if one dimension. if train_set[0].shape[1] == 1: plot_x_vs_y(train_set, model) # Eval model. raw_eval = read_dataset("data/val.csv") eval_set = preprocess_data(raw_eval, feature_columns=feature_columns, squared_features=True) eval_loss = eval_model(eval_set, model) print("Eval loss: %s" % eval_loss) # Test model. raw_test = read_dataset("data/test.csv") test_set = preprocess_data(raw_test, feature_columns=feature_columns, squared_features=True) test_loss = eval_model(test_set, model) print("Test loss: %s" % test_loss)
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate w_decay_factor = FLAGS.w_decay_factor num_steps = FLAGS.num_steps opt_method = FLAGS.opt_method feature_type = FLAGS.feature_type # Load dataset and data processing. train_set = read_dataset("data/train.txt", "data/image_data/") train_set = preprocess_data(train_set, feature_type) # Initialize model. ndim = train_set['image'][0].shape[0] model = SupportVectorMachine(ndim, 'zeros', w_decay_factor=FLAGS.w_decay_factor) # Train model. if opt_method == 'iter': # Perform gradient descent. train_model(train_set, model, learning_rate, num_steps=num_steps, batch_size=100) print('Performed gradient descent.') else: # Compute closed form solution. train_model_qp(train_set, model) print('Finished QP Solver') train_loss, train_acc = eval_model(train_set, model) print("Train loss: %s" % train_loss) print("Train acc: %s" % train_acc) # Eval model. eval_set = read_dataset("data/val.txt", "data/image_data/") eval_set = preprocess_data(eval_set, feature_type) eval_loss, eval_acc = eval_model(eval_set, model) print("Eval loss: %s" % eval_loss) print("Eval acc: %s" % eval_acc) # Test model. test_set = read_dataset("data/test.txt", "data/image_data/") test_set = preprocess_data(test_set, feature_type) test_loss, test_acc = eval_model(test_set, model) print("Test loss: %s" % test_loss) print("Test ac: %s" % test_acc)
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ # learning_rate = FLAGS.learning_rate # feature_type = FLAGS.feature_type # model_type = FLAGS.model_type # num_steps = FLAGS.num_steps feature_type = 'default' model_type = 'svm' # Load dataset. data = read_dataset('data/train_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, 'default') print("Finish preprocessing...") # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'uniform') elif model_type == 'logistic': model = LogisticRegression(ndim, 'uniform') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'uniform') # Train Model. print("Start to train the model...") model = train_model(data, model) # Eval Model. print("Start to evaluate the model...") data_val = read_dataset('data/val_lab.txt', 'data/image_data') data_val = preprocess_data(data_val, feature_type) loss, acc = eval_model(data_val, model) print(loss, acc) # Test Model. print("Start doing the test") data_test = read_dataset('data/test_lab.txt', 'data/image_data') print("Start preprocess testing data") data_test = preprocess_data(data_test, feature_type) print("Making predictions") data_test['label'] = model.predict(model.forward(data_test['image'])) print("Output the results to csv file") write_dataset('data/test_lab.txt', data_test) # Generate Kaggle output. print("Finished!")
def setUp(self): cols = ['GarageArea', 'OverallQual', 'BldgType'] self.dataset = io_tools.read_dataset("data/train.csv") self.processed_data = data_tools.preprocess_data(self.dataset, feature_columns=cols) self.N = self.processed_data[0].shape[0] self.ndims = self.processed_data[0].shape[1] self.model = linear_regression.LinearRegression(self.ndims, "zeros")
def main(_): """High level pipeline. This script performs the training, evaling and testing state of the model. """ # learning_rate = FLAGS.learning_rate # feature_type = FLAGS.feature_type # model_type = FLAGS.model_type feature_type = 'default' model_type = 'linear' # Load dataset. data = read_dataset('data/train_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, 'default') print("Finish preprocessing...") # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegressionTf(ndim, 'gaussian') elif model_type == 'logistic': model = LogisticRegressionTf(ndim, 'uniform') elif model_type == 'svm': model = SupportVectorMachineTf(ndim, 'uniform') # Train Model. print("Start to train the model...") model = train_model(data, model) # Eval Model. print("Start to evaluate the model...") data_val = read_dataset('data/val_lab.txt', 'data/image_data') data_val = preprocess_data(data_val, feature_type) loss, acc = eval_model(data_val, model) print(loss, acc)
def setUp(self): self.dataset = io_tools.read_dataset("data/train.txt", "data/image_data/") self.dataset = data_tools.preprocess_data(self.dataset, 'raw') self.model = support_vector_machine.SupportVectorMachine( 8 * 8 * 3, 'zeros')
def setUp(self): self.dataset = io_tools.read_dataset("data/train.txt", "data/image_data/")
def setUp(self): self.dataset = io_tools.read_dataset("data/train.csv")
def update_step(x_batch, y_batch, model, learning_rate): """Performs on single update step, (i.e. forward then backward). Args: x_batch(numpy.ndarray): input data of dimension (N, ndims). y_batch(numpy.ndarray): label data of dimension (N, 1). model(LinearModel): Initialized linear model. """ f = LinearRegression.forward(model, x_batch) grad = learning_rate * LinearRegression.backward(model, f, y_batch) model.w = model.w - learning_rate * grad dataset = io_tools.read_dataset('train.csv') # print(dataset) data = data_tools.preprocess_data(dataset) ndim = data[0].shape[1] print('data[0]', data[0]) print('ndim', ndim) # print(data) train_model(data, LinearRegression(ndim)) def train_model_analytic(processed_dataset, model): """Computes and sets the optimal model weights (model.w). Args: processed_dataset(list): List of [x,y] processed from utils.data_tools.preprocess_data.
def test_io(self): train_label, train_data = io_tools.read_dataset('data/simple_test.csv') np.testing.assert_array_equal(train_data.shape, np.asarray([200, 2]))
def main(_): """High level pipeline.""" # pp.pprint(flags.FLAGS.__flags) # Preprocess method supports ['default', 'rgb', 'hsv'] preprocess_method = FLAGS.preprocess_method feature_type = FLAGS.feature_type # Training/Validation/Testing image txt dir traintxtdir = FLAGS.traintxtdir # valtxtdir = FLAGS.valtxtdir testtxtdir = FLAGS.testtxtdir # All the image together totaltxtdir = FLAGS.totaltxtdir # Training image dataset dir imgdir = FLAGS.imgdir preprocesed_imgdir = FLAGS.preprocesed_imgdir rescaled_imgdir = FLAGS.rescaled_imgdir # Read all the images # -------------------------------------------------------------- # # print("[*] Reading and preprocessing dataset...") # raw_dataset, filename = read_dataset(totaltxtdir, imgdir) # Resize all the images -> save to new folder, all image in same size # rescale_data(raw_dataset, filename, feature_type) # Preprocess images # preprocess_data(rescaled_imgdir, preprocesed_imgdir, preprocess_method) # Load train/val/test set # -------------------------------------------------------------- # print("[*] Loading training set...") try: train_set, _ = read_dataset(traintxtdir, preprocesed_imgdir) train_image = train_set['image'] train_label = train_set['label'] positive_train_img_num = np.sum(train_label == 1) negative_train_img_num = np.sum(train_label == 0) except: print("[*] Oops! Please try loading training set again...") print("[*] Loading training set successfully!") print("[*] " + str(positive_train_img_num) + " faces loaded! " + str(negative_train_img_num) + " non-faces loaded!") # Compute integral image of training set for idx in range(train_image.shape[0]): tmp = integral_image(train_image[idx]) train_image[idx] = tmp # Adaboost and Cascade classifiers classifiers = AdaBoost(train_image, train_label, positive_train_img_num, negative_train_img_num, feature_size=0) # -------------------------------------------------------------- # print("[*] Loading test set...") try: test_set, _ = read_dataset(testtxtdir, preprocesed_imgdir) test_image = test_set['image'] test_label = test_set['label'] positive_test_img_num = np.sum(test_label == 1) negative_test_img_num = np.sum(test_label == 0) except: print("[*] Oops! Please try loading test set again...") print("[*] Loading test set successfully!") print("[*] " + str(positive_test_img_num) + " faces loaded! " + str(negative_test_img_num) + " non-faces loaded!") # Compute integral image of test set for idx in range(test_image.shape[0]): tmp = integral_image(test_image[idx]) test_image[idx] = tmp # Start test # -------------------------------------------------------------- # print("[*] Start testing...") positive_test_images = [] negative_test_images = [] for idx in range(positive_test_img_num + negative_test_img_num): if test_label[idx]: positive_test_images.append(test_image[idx]) else: negative_test_images.append(test_image[idx]) pred_pos = np.sum( ensemble_vote_all(np.array(positive_test_images), classifiers)) acc_pos = float(pred_pos / positive_test_img_num) pred_neg = np.sum( ensemble_vote_all(np.array(negative_test_images), classifiers)) acc_neg = float(pred_pos / positive_test_img_num) print("[*] Test done!") print("Faces [" + str(pred_pos) + " / " + str(positive_test_img_num) + "] accuracy: " + str(acc_pos)) print("Objects [" + str(pred_neg) + " / " + str(negative_test_img_num) + "] accuracy: " + str(acc_neg))