def main(): args = getArguments() print('[DEBUG]', args) x, y = make_circles(n_samples=args.n_samples, noise=args.noise, factor=0.3, random_state=42) x1Squared = x[:, 0]**2 x2Squared = x[:, 1]**2 x = np.concatenate((x, x1Squared.reshape(-1, 1)), axis=1) x = np.concatenate((x, x2Squared.reshape(-1, 1)), axis=1) y = y.reshape(-1, 1) scaler = StandardScaler() x = scaler.fit_transform(x) lr = LogisticRegression(x=x, y=y, alpha=3e-2, max_epochs=1000, epsilon=1e-3, batch_size=100) lr.runGradientDescent() print(f'[DEBUG] Optimized Cost: {lr.history["cost"][-1]}') print(f'[DEBUG] Optimized Theta: {lr.history["theta"][-1]}') plotAndSaveGraphs(lr, args, scaler)
def main(): args = getArguments() print('[DEBUG]', args) x, y = make_blobs(n_samples=args.n_samples, centers=2, n_features=2, cluster_std=args.noise, random_state=42) scaler = StandardScaler() x = scaler.fit_transform(x) lr = LogisticRegression(x=x, y=y.reshape(-1, 1), alpha=args.lr, max_epochs=args.max_epochs, epsilon=args.epsilon, batch_size=args.batch_size) lr.runGradientDescent() print(f'[DEBUG] Optimized Theta: {lr.history["theta"][-1]}') print(f'[DEBUG] Optimized Cost: {lr.history["cost"][-1]}') plotAndSaveGraphs(lr, args, scaler)
def main(): dataset = datasets.load_breast_cancer() features = dataset.data features = StandardScaler().fit_transform(features) num_features = features.shape[1] labels = dataset.target train_features, test_features, train_labels, test_labels = train_test_split( features, labels, test_size=0.3, stratify=labels) train_size = train_features.shape[0] test_size = test_features.shape[0] # slice the dataset to be exact as per the batch size # e.g. train_size = 1898322, batch_size = 256 # [:1898322-(1898322%256)] = [:1898240] # 1898322 // 256 = 7415; 7415 * 256 = 1898240 train_features = train_features[:train_size - (train_size % BATCH_SIZE)] train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)] # modify the size of the dataset to be passed on model.train() train_size = train_features.shape[0] # slice the dataset to be exact as per the batch size test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] test_size = test_features.shape[0] model = LogisticRegression( alpha=LEARNING_RATE, batch_size=BATCH_SIZE, num_classes=NUM_CLASSES, sequence_length=num_features, ) model.train( checkpoint_path="./checkpoint_path/logistic_regression/", log_path="./log_path/logistic_regression/", model_name="logistic_regression", epochs=3000, train_data=[train_features, train_labels], train_size=train_size, validation_data=[test_features, test_labels], validation_size=test_size, result_path="./results/logistic_regression/", )
def logistic_regression(train_data, train_labels, test_data, test_labels): print(f'{LogisticRegression.__name__}:') # Create and train model lr_model = LogisticRegression(train_data.shape[1], eta=0.001, epochs=50) model = OneVersusRest(lr_model) model.train(train_data, train_labels) # Predict 2000 validation set samples and calculate accuracy test_data_2k = test_data[:len(test_labels)] test_pred = model.predict(test_data_2k) # Print metrics print('\nTest Accuracy: {:.02f}%\n'.format( 100 * accuracy(test_pred, test_labels))) mat, classes = confusion_matrix(test_pred, test_labels) print('Precision:\n{}\n'.format( np.round(precision(test_pred, test_labels), 2))) print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2))) print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2))) print('Confusion Matrix:') print(mat) # Predict 10000 test set samples and save predictions print('Predicting 10k samples...') test_pred = model.predict(test_data) save_predictions(logistic_regression.__name__, test_pred) print('Saved 10k predictions.\n')
def task_3_logistic(x, y, x_test, y_test, args): accuracies = [] sizes = np.linspace(10, 200, num=20) N = y.shape[0] for size in sizes: acc = 0 for i in range(50): rand = np.random.randint(int(N), size=int(size)) m = LogisticRegression(x[rand], y[rand]) m.fit(lr=args[0], eps=args[1], regularization=args[2]) pred = m.predict(x_test) cm = evaluation.confusion_matrix(y_test, pred) acc += evaluation.accuracy(cm) accuracies.append(acc/50) return accuracies, sizes
def generate_classification_predictions(): X, Y = get_classification_training_data() test_X = get_classification_testing_data() class_models = [LogisticRegression(), NaiveBayes()] predictions = [] for model in class_models: model.fit(X, Y) predictions.append(model.predict(test_X)) return predictions
def test_logistic_regression(): from models.logistic_regression import LogisticRegression x, y = np.random.randn(2, 500, 2), np.zeros([2, 500]) x[0] += np.array([1, -1]) # 左上方移动 x[1] += np.array([-1, 1]) # 右下方移动 y[1] = 1 plot_scatter(x[0], x[1], 'Real') x = x.reshape(-1, 2) y = y.flatten() logistic = LogisticRegression(2, lr=1e-3) train_logistic_regression(logistic, x, y, batch_size=32, epochs=100) pred = logistic.predict(x) plot_scatter_with_line(x[pred == 0], x[pred == 1], logistic.weights, 'Pred') acc = np.sum(pred == y) / len(pred) print(f'Acc = {100 * acc:.2f}%')
def fit(self, model, X, label_vector, weights, smoothing=False): n_classes = model.get_n_classes() if n_classes > 2: sys.exit("Platt scaling not yet implemented for more than 2 classes.") self._base_model = model if smoothing: X, label_vector, weights = self.reweight_data(X, label_vector, weights) scores = np.reshape(model.score(X), (len(label_vector), 1)) bincount = np.bincount(label_vector, minlength=n_classes) most_common = np.argmax(bincount) # check to see if there is only one label in the training data: if bincount[most_common] == len(label_vector): print("Only label %d found in dev data; skipping Platt" % most_common) else: self._platt_model = LogisticRegression(n_classes, alpha=self._alpha, penalty=self._penalty, objective='acc') self._platt_model.fit(scores, label_vector, weights)
def update_plot(self, plot_state): X, y = plot_state_to_model_data(plot_state) if X.shape[0] != 0: classifier = LogisticRegression() classifier.fit(X, y, 1000) b, w1, w2 = classifier.weights origin = np.array([0, -b / w2]) angle = angle_from_tangent(-w2, w1) delta = np.array([0, self.ALPHA / w2]) x_red, y_red = calculate_line_endpoints(angle, origin - delta) x_decision, y_decision = calculate_line_endpoints(angle, origin) x_blue, y_blue = calculate_line_endpoints(angle, origin + delta) self.decision_boundary.data_source.data = dict(x=x_decision, y=y_decision) self.red_side.data_source.data = dict(x=x_red, y=y_red) self.blue_side.data_source.data = dict(x=x_blue, y=y_blue)
def train_lr(x, y): train_size = min(400, int(len(x) * 0.8)) X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size, random_state=2333, stratify=y) model = LogisticRegression() model.init_model(None) X = model.preprocess_data(X_train) model.fit(X, y_train) return model
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ # learning_rate = FLAGS.learning_rate # feature_type = FLAGS.feature_type # model_type = FLAGS.model_type # num_steps = FLAGS.num_steps feature_type = 'default' model_type = 'svm' # Load dataset. data = read_dataset('data/train_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, 'default') print("Finish preprocessing...") # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'uniform') elif model_type == 'logistic': model = LogisticRegression(ndim, 'uniform') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'uniform') # Train Model. print("Start to train the model...") model = train_model(data, model) # Eval Model. print("Start to evaluate the model...") data_val = read_dataset('data/val_lab.txt', 'data/image_data') data_val = preprocess_data(data_val, feature_type) loss, acc = eval_model(data_val, model) print(loss, acc) # Test Model. print("Start doing the test") data_test = read_dataset('data/test_lab.txt', 'data/image_data') print("Start preprocess testing data") data_test = preprocess_data(data_test, feature_type) print("Making predictions") data_test['label'] = model.predict(model.forward(data_test['image'])) print("Output the results to csv file") write_dataset('data/test_lab.txt', data_test) # Generate Kaggle output. print("Finished!")
def main(): parser = argparse.ArgumentParser(description='Linear Regression test') parser.add_argument('-n', '--n_iter', type=int, default=50, help='number of iterations for grad_descent') parser.add_argument('-f', '--n_features', type=int, default=2, help='number of features') args = parser.parse_args() n_iter = args.n_iter n_features = args.n_features X, y, centers = generate_classification_data(n_features=n_features) X_train, X_test, y_train, y_test = split_dataset(X, y) print("Training size: %s, Test size %s" % (len(X_train), len(X_test))) print("-" * 20) # Plotting dataset plot_points_and_cluster(X, centers) # Fit and predict model = LogisticRegression(n_iter=n_iter) model.fit(X_train, y_train) y_pred = model.predict(X_test) print("-" * 20) # Scoring model.score(y_test, y_pred) print("-" * 20) # Plot decision boundary if n_features == 2: plot_logistic_regression_decision_boundary(X, y, model) # Plot iteration vs cost plot_iteration_vs_cost(n_iter, model.cost_h)
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate feature_type = FLAGS.feature_type model_type = FLAGS.model_type num_steps = FLAGS.num_steps # Load dataset. data = read_dataset('data/val_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, feature_type) # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'ones') elif model_type == 'logistic': model = LogisticRegression(ndim, 'zeros') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'zeros') # Train Model. model = train_model(data, model, learning_rate, num_steps=num_steps) # Eval Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type) acc, loss = eval_model(data_test, model) # Test Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type)
def sgd_optimization(learning_rate=0.13, n_epochs=1000, dataset='../../data/mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: int :param dataset: the path of MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz :type batch_size: int :param batch_size: """ datasets = load_data(dataset) train_x, train_y = datasets[0] valid_x, valid_y = datasets[1] test_x, test_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_x.get_value(borrow=True).shape[0] // batch_size valid_x, valid_y = datasets[1] test_x, test_y = datasets[2] # build the model print("... building the model") # allocate symbolic variables for the data # index to a minibatch index = T.lscalar() # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=28*28, n_out=10) # the cost we minimize during training is the negative log likelihood # of the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_givens = { x: test_x[index * batch_size : (index+1) * batch_size], y: test_y[index * batch_size : (index+1) * batch_size] } test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens=test_givens) valid_givens = { x: valid_x[index * batch_size : (index+1) * batch_size], y: valid_y[index * batch_size : (index+1) * batch_size] } valid_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens=valid_givens) # compute the gradient of cost with respect to theta = (W, b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [ (classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b) ] # compiling a Theano function train_model that returns the cost, # but in the same time updates the parameter of the model based on # the rules defined in `updates`. train_givens = { x: train_x[index * batch_size : (index+1) * batch_size], y: train_y[index * batch_size : (index+1) * batch_size] } train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens=train_givens) # train model print('... training the model') # early-stopping parameters # look as this many examples regardless patience = 5000 # wait this much longer when a new best is found patience_increase = 2 # a relative improvement of this much is considered significant improvement_threshold = 0.995 # go through this many # minibatch before checking the network # on the validation set # in this case we check every epoch validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = np.inf test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [valid_model(i) for i in range(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print( "epoch %i, minibatch %i/%i, validatioin error %f %%" % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = np.mean(test_losses) print( ('epoch %i, minibatch %i/%i, test error of best model %f %%') % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) # save the best model with open('outputs/best_model.pkl', 'wb') as f: pickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ('Optimization complete with best validation score of %f %%, ' 'with best performance %f %%') % (best_validation_loss % 100., test_score * 100.) ) print('The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)
eps = [0.01, 0.05, 0.1, 0.5] # Task 3. Experiments # 1. Compare accuracy of naive bayes and logistic regression # Get cross validation accuracy for 5-fold cv print("Ionosphere validation accuracy (default parameters):") evaluation.cross_validation(5, ionosphere_train_features, ionosphere_train_labels, model=LogisticRegression) # Grid search for optimal hyperparameters print("Ionosphere grid search hyperparameters:") ionosphere_max_val_acc, ionosphere_arg_max = evaluation.grid_search(learning_rates=lrs, epsilons=eps, lambdas=lamdas, x=ionosphere_train_features, y=ionosphere_train_labels, model=LogisticRegression) # Accuracy on test split - train with best hyperparameters print("Ionosphere test accuracy:") logistic_ionosphere = LogisticRegression(ionosphere_train_features, ionosphere_train_labels) logistic_ionosphere.fit(lr=ionosphere_arg_max[0], eps=ionosphere_arg_max[1], regularization=ionosphere_arg_max[2]) ionosphere_prediction = logistic_ionosphere.predict(ionosphere_test_features) cm_ionosphere = evaluation.confusion_matrix(ionosphere_test_labels, ionosphere_prediction) print("Accuracy:", evaluation.accuracy(cm_ionosphere), "Precision:", evaluation.precision(cm_ionosphere), "Recall:", evaluation.true_positive(cm_ionosphere), "F1:", evaluation.f_score(cm_ionosphere)) # 5-fold CV for naive bayes print("Ionosphere validation accuracy (naive bayes):") evaluation.cross_validation_naive(5, ionosphere_dataset.train_data, NaiveBayes, ionosphere_dataset.label_column, ionosphere_dataset.feature_columns) naive_ionosphere = NaiveBayes(ionosphere_dataset.train_data, ionosphere_dataset.label_column, continuous=ionosphere_dataset.feature_columns) print("Ionosphere test accuracy (naive bayes):") ionosphere_pred_naive = ionosphere_dataset.test_data.apply(naive_ionosphere.predict, axis=1) cm_ionosphere_naive = evaluation.confusion_matrix(ionosphere_test_labels, ionosphere_pred_naive.to_numpy())
def main(model, dataset, learning_rate, stopping_condition, threshold=None): """ Main program :param model: Str :param dataset: Str :param learning_rate: Float :param stopping_condition: Float :param threshold: Float :return: None """ # Import, process, and normalize data if dataset == 'breast': df = preprocessing.process_breast_cancer_data() elif dataset == 'glass': df = preprocessing.process_glass_data() elif dataset == 'iris': df = preprocessing.process_iris_data() elif dataset == 'soybean': df = preprocessing.process_soybean_data() elif dataset == 'voter': df = preprocessing.process_voter_data() # Set up stratified 5-fold cross-validation; only necessary for classificaton skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=5) training_sets, test_sets = [], [] for fold, (train, test) in enumerate( skf.split(X=np.zeros(len(df)), y=df.iloc[:, -1:])): training_sets.append(df.iloc[train]) test_sets.append(df.iloc[test]) # Train; run 5 experiments in total training_errors, trained_models = [], [] for training_set in training_sets: print("\nTraining:") training_data = training_set.iloc[:, 1:-1].to_numpy().T training_labels = training_set.iloc[:, -1:].to_numpy().T classes = df['class'].unique() if model == 'adaline': my_model = \ Adaline(training_data, training_labels, classes, learning_rate, threshold, stopping_condition, raw_data=training_set) elif model == 'logistic_regression': my_model = \ LogisticRegression(training_data, training_labels, classes, learning_rate, threshold, stopping_condition, raw_data=training_set) if dataset == 'breast' or dataset == 'voter': my_model.train() elif dataset == 'glass' or dataset == 'iris' or dataset == 'soybean': my_model.multi_train() trained_models.append(my_model) training_errors.append(my_model.get_training_error()) my_model.plot_error() my_model.report_classifications() # Test; run 5 experiments in total testing_errors = [] for model, test_set in zip(trained_models, test_sets): print("\nTesting: ") testing_data = test_set.iloc[:, 1:-1].to_numpy().T testing_labels = test_set.iloc[:, -1:].to_numpy().T if dataset == 'breast' or dataset == 'voter': model.test(testing_data, testing_labels) elif dataset == 'glass' or dataset == 'iris' or dataset == 'soybean': model.multi_test(testing_data, testing_labels) testing_errors.append(model.get_testing_error()) model.report_classifications() # Report average results average_training_error = sum(training_errors) / len(training_errors) average_testing_error = sum(testing_errors) / len(testing_errors) print("\nSummary:") print(f"Average training error: {average_training_error}") print(f"Average testing error: {average_testing_error}")
def run(config): input_dir = config['input_dir'] prefix = config['prefix'] field = config['field'] label_name = config['label_name'] random_test_prop = config['random_test_prop'] metadata_name = config['metadata_name'] train_start = config['train_start'] train_end = config['train_end'] test_start = config['test_start'] test_end = config['test_end'] max_n_train = config['max_n_train'] sample_labels = config['sample_labels'] penalty = config['penalty'] objective = config['objective'] average = config['average'] cshift = config['cshift'] # make the output directory and save the config file output_dir = make_output_dir(config) fh.makedirs(output_dir) fh.write_to_json(config, os.path.join(output_dir, 'config.json')) # load features print(input_dir, label_name, train_end, test_start, penalty, objective, cshift) print("Loading features") all_X, all_ids, all_vocab = load_all_features(input_dir, prefix, config) all_ids_index = dict(zip(all_ids, range(len(all_ids)))) n_items, n_features = all_X.shape print("Full feature matrix shape = ", all_X.shape) # if desired, do a random split into test and nontest data if random_test_prop is not None: print("Doing random train/test split") test_prop = float(random_test_prop) n_test_all = int(n_items * test_prop) test_indices = np.random.choice(np.arange(n_items), size=n_test_all, replace=False) test_items_all = [all_ids[i] for i in test_indices] nontest_items_all = list(set(all_ids) - set(test_items_all)) n_nontest_all = len(nontest_items_all) # alternatively, if metadata exists, use it to split into test and nontest elif metadata_name is not None: metadata_file = os.path.join(input_dir, prefix + '.' + metadata_name + '.csv') metadata_df = pd.read_csv(metadata_file, header=0, index_col=0) metadata_df.index = [str(i) for i in metadata_df.index] field_vals = list(set(metadata_df[field].values)) field_vals.sort() print("Splitting data according to %s" % field) print("Values:", field_vals) print("Testing on %s to %s" % (test_start, test_end)) # first, split into training and non-train data based on the field of interest test_selector_all = (metadata_df[field] >= test_start) & (metadata_df[field] <= test_end) metadata_test_df = metadata_df[test_selector_all] test_items_all = list(metadata_test_df.index) n_test_all = len(test_items_all) nontest_selector_all = (metadata_df[field] >= train_start) & ( metadata_df[field] <= train_end) metadata_nontest_df = metadata_df[nontest_selector_all] nontest_items_all = list(metadata_nontest_df.index) n_nontest_all = len(nontest_items_all) # otherwise, there is not test data; just train a model else: nontest_items_all = list(all_ids) n_nontest_all = len(nontest_items_all) test_items_all = [] n_test_all = 0 # if there is test data, learn a model to distinguish train from test (if desired): weights_df = pd.DataFrame(np.ones(len(all_ids)), index=all_ids, columns=['weight']) if n_test_all > 0 and cshift: print("Training models for covariates shift") # split test and nontest to get balanced subsets test_items_1 = list( np.random.choice(test_items_all, size=int(n_test_all / 2), replace=False)) test_items_2 = list(set(test_items_all) - set(test_items_1)) nontest_items_1 = list( np.random.choice(nontest_items_all, size=int(n_nontest_all / 2), replace=False)) nontest_items_2 = list(set(nontest_items_all) - set(nontest_items_1)) # combine the test and nontest data into two balanced sets cset1_items = nontest_items_1 + test_items_1 cset2_items = nontest_items_2 + test_items_2 y1 = [0] * len(test_items_1) + [1] * len(nontest_items_1) y2 = [0] * len(test_items_2) + [1] * len(nontest_items_2) cset1_indices = [all_ids_index[i] for i in cset1_items] cset2_indices = [all_ids_index[i] for i in cset2_items] X1 = all_X[cset1_indices, :] X2 = all_X[cset2_indices, :] # train two models, one on each half of the data, using the other as a dev set cshift_model1 = LogisticRegression(n_classes=2, penalty='l2', objective='acc') cshift_model1.create_alpha_grid(config['n_alphas'], config['alpha_min'], config['alpha_max']) cshift_model1.fit(X1, y1, None, X2, y2, None, 1) cshift_model2 = LogisticRegression(n_classes=2, penalty='l2', objective='acc') cshift_model2.create_alpha_grid(config['n_alphas'], config['alpha_min'], config['alpha_max']) cshift_model2.fit(X2, y2, None, X1, y1, None, 1) # now get the models' predictions on the dev data, which will inform future weighting y1_pred_probs = cshift_model2.predict_proba(X1) for i, item in enumerate(nontest_items_1): weights_df.loc[item] = n_nontest_all / float(n_test_all) * ( 1.0 / y1_pred_probs[i, 0] - 1) y2_pred_probs = cshift_model1.predict_proba(X2) for i, item in enumerate(nontest_items_1): weights_df.loc[item] = n_nontest_all / float(n_test_all) * ( 1.0 / y2_pred_probs[i, 0] - 1) # reset random seed for consistency with/without using cshift if config['seed'] is not None: np.random.seed(int(config['seed'])) print("Weights mean/min/max:", np.mean(weights_df.values), np.min(weights_df.values), np.max(weights_df.values)) # only keep the items in the train and test sets all_items = nontest_items_all + test_items_all print("Train: %d, Test: %d (labeled and unlabeled)" % (n_nontest_all, n_test_all)) # load labels label_file = os.path.join(input_dir, prefix + '.' + label_name + '.csv') labels_df = pd.read_csv(label_file, index_col=0, header=0) labels_df.index = [str(i) for i in labels_df.index] labels_df = labels_df.loc[all_items] class_names = labels_df.columns # find the labeled items print("Subsetting items with labels") label_sums_df = labels_df.sum(axis=1) labeled_item_selector = label_sums_df > 0 labels_df = labels_df[labeled_item_selector] n_labeled_items, n_classes = labels_df.shape print("%d labeled items and %d classes" % (n_labeled_items, n_classes)) labeled_items = set(labels_df.index) if n_classes > 2 and config['objective'] == 'calibration': sys.exit( "*ERROR*: Calibration objective has not been implemented for more than 2 classes" ) nontest_items = [i for i in nontest_items_all if i in labeled_items] test_items = [i for i in test_items_all if i in labeled_items] n_nontest = len(nontest_items) n_test = len(test_items) # take a subset of the nontest items up to a max size, if desired. if max_n_train is not None and n_nontest_all > max_n_train: print("Sampling a set of %d labels" % max_n_train) nontest_indices = np.random.choice(np.arange(n_nontest_all), size=max_n_train, replace=False) nontest_items = [nontest_items[i] for i in nontest_indices] n_nontest = len(nontest_items) # split the training set into train and dev print("Splitting nontest into train and dev") np.random.shuffle(nontest_items) n_dev = int(n_nontest / config['dev_folds']) dev_fold = int(config['dev_fold']) dev_items = nontest_items[n_dev * dev_fold:n_dev * (dev_fold + 1)] train_items = list(set(nontest_items) - set(dev_items)) train_items.sort() dev_items.sort() n_train = len(train_items) n_dev = len(dev_items) print("Train: %d, dev: %d, test: %d" % (n_train, n_dev, n_test)) fh.write_list_to_text([str(n_train)], os.path.join(output_dir, 'train.n.txt')) fh.write_list_to_text([str(n_test)], os.path.join(output_dir, 'test.n.txt')) fh.write_list_to_text([str(n_dev)], os.path.join(output_dir, 'dev.n.txt')) test_labels_df = labels_df.loc[test_items] nontest_labels_df = labels_df.loc[nontest_items] train_labels_df = labels_df.loc[train_items] dev_labels_df = labels_df.loc[dev_items] test_weights_df = weights_df.loc[test_items] nontest_weights_df = weights_df.loc[nontest_items] train_weights_df = weights_df.loc[train_items] dev_weights_df = weights_df.loc[dev_items] # Convert (possibly multiply-annotated) labels to one label per instance, either by duplicating or sampling test_labels_df, test_weights_df = prepare_labels( test_labels_df, sample=False, weights_df=test_weights_df) nontest_labels_df, nontest_weights_df = prepare_labels( nontest_labels_df, sample=sample_labels, weights_df=nontest_weights_df) train_labels_df, train_weights_df = prepare_labels( train_labels_df, sample=sample_labels, weights_df=train_weights_df) dev_labels_df, dev_weights_df = prepare_labels(dev_labels_df, sample=sample_labels, weights_df=dev_weights_df) test_labels_df.to_csv(os.path.join(output_dir, 'test_labels.csv')) nontest_labels_df.to_csv(os.path.join(output_dir, 'nontest_labels.csv')) train_labels_df.to_csv(os.path.join(output_dir, 'train_labels.csv')) dev_labels_df.to_csv(os.path.join(output_dir, 'dev_labels.csv')) test_weights_df.to_csv(os.path.join(output_dir, 'test_weights.csv')) nontest_weights_df.to_csv(os.path.join(output_dir, 'nontest_weights.csv')) train_weights_df.to_csv(os.path.join(output_dir, 'train_weights.csv')) dev_weights_df.to_csv(os.path.join(output_dir, 'dev_weights.csv')) # get one-row-hot label matrices for each subset train_labels = train_labels_df.values dev_labels = dev_labels_df.values test_labels = test_labels_df.values nontest_labels = nontest_labels_df.values # get weight vectors for each subset train_weights = train_weights_df.values[:, 0] dev_weights = dev_weights_df.values[:, 0] test_weights = test_weights_df.values[:, 0] nontest_weights = nontest_weights_df.values[:, 0] # get new item lists which correspond to the label data frames test_items = list(test_labels_df.index) dev_items = list(dev_labels_df.index) train_items = list(train_labels_df.index) n_test = len(test_items) # gather training features feature_index = dict(zip(all_ids, range(len(all_ids)))) train_indices = [feature_index[i] for i in train_items] dev_indices = [feature_index[i] for i in dev_items] test_indices = [feature_index[i] for i in test_items] train_X = all_X[train_indices, :] dev_X = all_X[dev_indices, :] test_X = all_X[test_indices, :] print(train_X.shape, dev_X.shape, test_X.shape) nontest_prop = np.dot(nontest_weights, nontest_labels) / nontest_weights.sum() print("Non-test label proportions:", nontest_prop) fh.write_list_to_text([str(nontest_prop[1])], os.path.join(output_dir, 'nontest.prop.txt')) if n_test > 0: test_prop = np.dot(test_weights, test_labels) / test_weights.sum() print("Test label proportions:", test_prop) fh.write_list_to_text([str(test_prop[1])], os.path.join(output_dir, 'test.prop.true.txt')) fh.write_list_to_text([str(np.abs(test_prop[1] - nontest_prop[1]))], os.path.join(output_dir, 'test.prop.ae.nontest.txt')) else: test_prop = None pos_label = 1 # use zero as the positive label if it the minority class if n_classes == 2: if nontest_prop[1] > 0.5: pos_label = 0 print("Using %d as the positive label" % pos_label) # convert the label matrices into a categorical label vector train_label_vector = np.argmax(train_labels, axis=1) test_label_vector = np.argmax(test_labels, axis=1) dev_label_vector = np.argmax(dev_labels, axis=1) # train a model model = LogisticRegression(n_classes=n_classes, penalty=penalty, objective=objective) model.create_alpha_grid(config['n_alphas'], config['alpha_min'], config['alpha_max']) model.fit(train_X, train_label_vector, train_weights, dev_X, dev_label_vector, dev_weights, pos_label, average) print("Number of non-zero weights = %d" % model.get_model_size()) # predict on train, dev, and test data train_f1, train_acc, train_cal = predict_evaluate_and_save( model, train_X, train_items, class_names, train_label_vector, pos_label=pos_label, average=average, weights=train_weights, output_dir=output_dir, output_prefix='train') dev_f1, dev_acc, dev_cal = predict_evaluate_and_save(model, dev_X, dev_items, class_names, dev_label_vector, pos_label=pos_label, average=average, weights=dev_weights, output_dir=output_dir, output_prefix='dev') if n_test > 0: test_f1, test_acc, test_cal = predict_evaluate_and_save( model, test_X, test_items, class_names, test_label_vector, pos_label=pos_label, average=average, weights=test_weights, output_dir=output_dir, output_prefix='test') else: test_f1 = np.nan test_acc = np.nan test_cal = np.nan print("Accuracy values: train %0.4f; dev %0.4f; test %0.4f" % (train_acc, dev_acc, test_acc)) print("F1 values: train %0.4f; dev %0.4f; test %0.4f" % (train_f1, dev_f1, test_f1)) #print("Cal values: train %0.4f; dev %0.4f; test %0.4f" % (train_cal, dev_cal, test_cal)) if n_test > 0: test_pred = model.predict(test_X) cc_prop = compute_proportions_from_predicted_labels(test_pred, test_weights, n_classes=2) print("Predicted proportions on test:") print("CC :", cc_prop) fh.write_list_to_text([str(cc_prop[1])], os.path.join(output_dir, 'test.prop.cc.txt')) fh.write_list_to_text([str(np.abs(test_prop[1] - cc_prop[1]))], os.path.join(output_dir, 'test.prop.ae.cc.txt')) test_pred_probs = model.predict_proba(test_X) pcc_prop = np.dot(test_weights, test_pred_probs) / np.sum(test_weights) print("PCC:", pcc_prop) fh.write_list_to_text([str(pcc_prop[1])], os.path.join(output_dir, 'test.prop.pcc.txt')) fh.write_list_to_text([str(np.abs(test_prop[1] - pcc_prop[1]))], os.path.join(output_dir, 'test.prop.ae.pcc.txt')) if n_test > 0: # create a secondary ACC model print("Fitting ACC") acc_model = ACC() acc_model.fit(model, dev_X, dev_label_vector, dev_weights) acc_proportions = acc_model.predict_proportions(test_X, test_weights) print("ACC proportions:", acc_proportions) fh.write_list_to_text([str(acc_proportions[1])], os.path.join(output_dir, 'test.prop.acc.txt')) fh.write_list_to_text([str(np.abs(test_prop[1] - acc_proportions[1]))], os.path.join(output_dir, 'test.prop.ae.acc.txt')) # create a secondary calibration model print("Fitting Platt") platt_model = Platt() platt_model.fit(model, dev_X, dev_label_vector, dev_weights, smoothing=True) platt_proportions = platt_model.predict_proportions( test_X, test_weights) print("Platt proportions:", platt_proportions) fh.write_list_to_text([str(platt_proportions[1])], os.path.join(output_dir, 'test.prop.platt.txt')) fh.write_list_to_text( [str(np.abs(test_prop[1] - platt_proportions[1]))], os.path.join(output_dir, 'test.prop.ae.platt.txt')) print_top_words(model, dev_X, all_vocab, n_classes=n_classes, n_words=40, output_dir=output_dir) joblib.dump(model, os.path.join(output_dir, 'model.pkl')) #fh.write_list_to_text(all_vocab, os.path.join(output_dir, 'model.vocab.txt.gz'), do_gzip=True) fh.write_to_json(all_vocab, os.path.join(output_dir, 'model.vocab.json.test.gz'), sort_keys=False, do_gzip=True) #fh.write_to_json(all_vocab, os.path.join(output_dir, 'model.vocab.json'), sort_keys=False) print("")
def main(args): config_yaml = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader) if not os.path.exists(args.config): raise FileNotFoundError('provided config file does not exist: %s' % args.config) if 'restart_log_dir_path' not in config_yaml['simclr']['train'].keys(): config_yaml['simclr']['train']['restart_log_dir_path'] = None if args.data_dir_path is not None: config_yaml['simclr']['train']['data_dir_path'] = args.data_dir_path print('yo!: ', args.data_dir_path) config_yaml['logger_name'] = 'logreg' config = SimCLRConfig(config_yaml) if not os.path.exists(config.base.output_dir_path): os.mkdir(config.base.output_dir_path) if not os.path.exists(config.base.log_dir_path): os.makedirs(config.base.log_dir_path) logger = setup_logger(config.base.logger_name, config.base.log_file_path) logger.info('using config: %s' % config) config_copy_file_path = os.path.join(config.base.log_dir_path, 'config.yaml') shutil.copy(args.config, config_copy_file_path) writer = SummaryWriter(log_dir=config.base.log_dir_path) if not os.path.exists(args.model): raise FileNotFoundError('provided model directory does not exist: %s' % args.model) else: logger.info('using model directory: %s' % args.model) config.logistic_regression.model_path = args.model logger.info('using model_path: {}'.format(config.logistic_regression.model_path)) config.logistic_regression.epoch_num = args.epoch_num logger.info('using epoch_num: {}'.format(config.logistic_regression.epoch_num)) model_file_path = Path(config.logistic_regression.model_path).joinpath( 'checkpoint_' + config.logistic_regression.epoch_num + '.pth') if not os.path.exists(model_file_path): raise FileNotFoundError('model file does not exist: %s' % model_file_path) else: logger.info('using model file: %s' % model_file_path) train_dataset, val_dataset, test_dataset, classes = Datasets.get_datasets(config, img_size=config.logistic_regression.img_size) num_classes = len(classes) train_loader, val_loader, test_loader = Datasets.get_loaders(config, train_dataset, val_dataset, test_dataset) simclr_model = load_simclr_model(config) simclr_model = simclr_model.to(config.base.device) simclr_model.eval() model = LogisticRegression(simclr_model.num_features, num_classes) model = model.to(config.base.device) learning_rate = config.logistic_regression.learning_rate momentum = config.logistic_regression.momentum optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, nesterov=True) criterion = torch.nn.CrossEntropyLoss() logger.info("creating features from pre-trained context model") (train_x, train_y, test_x, test_y) = get_features( config, simclr_model, train_loader, test_loader ) feature_train_loader, feature_test_loader = get_data_loaders( config, train_x, train_y, test_x, test_y ) best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 best_epoch = 0 best_loss = 0 for epoch in range(config.logistic_regression.epochs): loss_epoch, accuracy_epoch = train( config, feature_train_loader, model, criterion, optimizer ) loss = loss_epoch / len(train_loader) accuracy = accuracy_epoch / len(train_loader) writer.add_scalar("Loss/train_epoch", loss, epoch) writer.add_scalar("Accuracy/train_epoch", accuracy, epoch) logger.info( "epoch [%3.i|%i] -> train loss: %f, accuracy: %f" % ( epoch + 1, config.logistic_regression.epochs, loss, accuracy) ) if accuracy > best_acc: best_loss = loss best_epoch = epoch + 1 best_acc = accuracy best_model_wts = copy.deepcopy(model.state_dict()) model.load_state_dict(best_model_wts) logger.info( "train dataset performance -> best epoch: {}, loss: {}, accuracy: {}".format(best_epoch, best_loss, best_acc, ) ) loss_epoch, accuracy_epoch = test( config, feature_test_loader, model, criterion ) loss = loss_epoch / len(test_loader) accuracy = accuracy_epoch / len(test_loader) logger.info( "test dataset performance -> best epoch: {}, loss: {}, accuracy: {}".format(best_epoch, loss, accuracy) )
# Retrieve args args_parser = Parser() args, data_args, algo_args, model_args, solvers = args_parser.get_args() # Identify node comm = mpi4py.MPI.COMM_WORLD rank = comm.Get_rank() # Set up loggers / plotters logging.basicConfig(level=logging.INFO) log = logging.getLogger(f"Main {rank}") plotter = Plotter(filename=args.plotter_path) # Load model and dataset model = LogisticRegression(**model_args) error_model = model.get_global(comm.size) global_dataset = LIBSVM_Loader(**data_args, seed=args.seed, rank=rank).load(**data_args, comm_size=comm.size) local_dataset = global_dataset.get_truncated(rank, comm.size) if rank > 0: global_dataset = None # Build graph graph = get_graph_class(args.graph)(comm.size, seed=args.seed, logger=log) log.info(graph) # Name the run filename = str(time.time()).split(".")[0]
ap = AveragedPerceptron() ap.train(learning_rates) ap.report() ap.evaluate() ############################################ ###### Part II ########### ############################################ svm = SVM(verbose=True) svm.train(epochs=20) hm.report(svm) hm.evaluate(svm) lr = LogisticRegression(verbose=True) lr.train(epochs=20) hm.report(lr) hm.evaluate(lr) nb = NaiveBayes() nb.train(epochs=1) hm.report(nb) hm.evaluate(nb) # Logistic regression using sklearn import data as dt from sklearn.linear_model import LogisticRegression train_data = dt.load_data(dt.TRAIN, matrix=True) test_data = dt.load_data(dt.TEST, matrix=True)
# Data from: # https://en.wikipedia.org/wiki/Logistic_regression#Probability_of_passing_an_exam_versus_hours_of_study X = np.array([ 0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50 ], dtype='float32') y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1], dtype='float32') X, y = np.reshape(X, (20, 1)), np.reshape(y, (20, 1)) X = np.concatenate((np.ones((20, 1), dtype='float32'), X), axis=1) # Fit model to data model = LogisticRegression(data=X, labels=y) weights = model.fit(alpha=0.1, verbose=True) # Generate line of best fit x_bf = np.linspace(0, 6, dtype='float32') y_bf = np.array([sigmoid(weights[0][0] + x * weights[1][0]) for x in x_bf], dtype='float32') plt.scatter(X[:, 1], y, color='b', s=75, label='Samples') plt.plot(x_bf, y_bf, color='r', label='Fitted Model') plt.xlabel('$x$') plt.ylabel('$y$') plt.title('Logistic Regression') plt.legend() plt.show()
class Platt: """ Apply platt scaling to a score classifier """ def __init__(self, penalty='l2', alpha=100000.0): self._penalty = penalty self._alpha = alpha self._p_pred_given_true = None self._base_model = None self._platt_model = None def fit(self, model, X, label_vector, weights, smoothing=False): n_classes = model.get_n_classes() if n_classes > 2: sys.exit("Platt scaling not yet implemented for more than 2 classes.") self._base_model = model if smoothing: X, label_vector, weights = self.reweight_data(X, label_vector, weights) scores = np.reshape(model.score(X), (len(label_vector), 1)) bincount = np.bincount(label_vector, minlength=n_classes) most_common = np.argmax(bincount) # check to see if there is only one label in the training data: if bincount[most_common] == len(label_vector): print("Only label %d found in dev data; skipping Platt" % most_common) else: self._platt_model = LogisticRegression(n_classes, alpha=self._alpha, penalty=self._penalty, objective='acc') self._platt_model.fit(scores, label_vector, weights) def predict_proba(self, X): if self._platt_model is None: return self._base_model.predict_proba(X) else: scores = self._base_model.score(X) scores = scores.reshape((len(scores), 1)) return self._platt_model.predict_proba(scores) def predict(self, X): pred_probs = self.predict_proba(X) predictions = np.argmax(pred_probs, axis=1) return predictions def predict_proportions(self, X, weights): pred_probs = self.predict_proba(X) return np.dot(weights, pred_probs) / np.sum(weights) def reweight_data(self, X, label_vector, instance_weights): n_classes = self._base_model.get_n_classes() cl_sums = np.zeros(n_classes) for cl in range(n_classes): sel = np.array(label_vector == cl, dtype=bool) cl_sums[cl] = np.sum(instance_weights[sel]) pos_weight = (cl_sums[1] + 1) / float(cl_sums[1] + 2) neg_weight = (cl_sums[0] + 1) / float(cl_sums[0] + 2) weight_vector = (label_vector * pos_weight + (1-label_vector) * neg_weight) * instance_weights if type(X) == list: X = X + X else: X = sparse.vstack([X, X]) y = np.r_[label_vector, 1-label_vector] w = np.r_[weight_vector, 1-weight_vector] return X, y, w
def classifier_lenet5(learning_rate=0.1, n_epochs=200, dataset='../../data/mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training/testing (MNIST) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = np.random.RandomState(23455) datasets = load_data(dataset) train_x, train_y = datasets[0] valid_x, valid_y = datasets[1] test_x, test_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_x.get_value(borrow=True).shape[0] n_valid_batches = valid_x.get_value(borrow=True).shape[0] n_test_batches = test_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data # index to a minibatch index = T.lscalar() # the data is presented as rasterized images x = T.matrix('x') # the labels are presented as 1D vector of int labels y = T.ivector('y') # build the model print('... building the model') # reshape matrix of rasterized images of shape (batch_size, 28*28) # to a 4D tensor, compatible with our ConvLayer (28, 28) # is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1, 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = Conv(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = Conv(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrics # of shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model givens = { x: test_x[index * batch_size:(index + 1) * batch_size], y: test_y[index * batch_size:(index + 1) * batch_size] } test_model = theano.function([index], layer3.errors(y), givens=givens) givens = { x: valid_x[index * batch_size:(index + 1) * batch_size], y: valid_y[index * batch_size:(index + 1) * batch_size] } valid_model = theano.function([index], layer3.errors(y), givens=givens) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. # We thus create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] givens = { x: train_x[index * batch_size:(index + 1) * batch_size], y: train_y[index * batch_size:(index + 1) * batch_size] } train_model = theano.function([index], cost, updates=updates, givens=givens) # train the model print('... training') # early-stopping parameters # look as this many examples regardless patience = 10000 # wait this much longer when a new best is found patience_increase = 2 # a relative improvement of this much is considered significant improvement_threshold = 0.995 # go through this many minibatche before checking the network # on the validation set, in this case we check every epoch validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print("training @ iter = ", iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ valid_model(i) for i in range(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: # improve patience if loss imporovement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = np.mean(test_losses) print(( "epoch %i, minibatch %i/%i, test error of best model %f %%" ) % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print("Optimization complete.") print("Best validation score of %f %% obtained at iteration %i, " "with test performance %f %%" % (best_validation_loss * 100., best_iter + 1, test_score * 100.))