def build_model(args): """ Build the model, optimizer, and loss according to experiment args Parameters ---------- args : argparse.Namespace Experiment arguments Returns ------- model : torch.nn.Module The model to be trained optimizer : torch.optim.Optimizer The optimizer loss : torch.nn.Module The loss function """ # Build the model according to teh given arguments model = models.LogisticRegression(init_m=args.init_m, init_b=args.init_b) # Adam is a good default optimizer choice optimizer = optim.Adam(model.parameters(), lr=args.lr) # Your loss depends on the problem loss = nn.BCEWithLogitsLoss() if args.cuda: model = model.cuda() loss = loss.cuda() return model, optimizer, loss
def main(): args = get_args() check_args(args) if args.mode.lower() == "train": # Load the training data. X, y = load_data(args.data) # Create the model. # TODO: Add other algorithms as necessary. if args.algorithm.lower() == 'useless': model = models.Useless() elif args.algorithm.lower() == 'sumoffeatures': model = models.SumOfFeatures() elif args.algorithm.lower() == 'perceptron': model = models.Perceptron(args.online_learning_rate, args.online_training_iterations) elif args.algorithm.lower() == 'logisticregression': model = models.LogisticRegression(args.online_learning_rate, args.gd_iterations, args.num_features_to_select) else: raise Exception('The model given by --model is not yet supported.') # Train the model. model.fit(X, y) # Save the model. try: with open(args.model_file, 'wb') as f: pickle.dump(model, f) except IOError: raise Exception("Exception while writing to the model file.") except pickle.PickleError: raise Exception("Exception while dumping model pickle.") elif args.mode.lower() == "test": # Load the test data. X, y = load_data(args.data) # Load the model. try: with open(args.model_file, 'rb') as f: model = pickle.load(f) except IOError: raise Exception("Exception while reading the model file.") except pickle.PickleError: raise Exception("Exception while loading model pickle.") # Compute and save the predictions. y_hat = model.predict(X) invalid_label_mask = (y_hat != 0) & (y_hat != 1) if any(invalid_label_mask): raise Exception( 'All predictions must be 0 or 1, but found other predictions.') np.savetxt(args.predictions_file, y_hat, fmt='%d') else: raise Exception("Mode given by --mode is unrecognized.")
async def get(self, filename): df = pd.read_csv(f'uploads/{filename}') col_predict = int(self.get_argument("col")) df, x, y = models.getSets(df, col_predict) dfHTML = df.to_html(max_rows=15, justify='center', col_space=50) reg = models.LogisticRegression() acc = await reg.trainAndPredict(x, y, 0.5) self.render("train.html", filename=filename, data=dfHTML, trained=True, acc=acc, col=col_predict)
def evaluate(self): """ Used for producing the results of Experiment 3 in the paper. """ print("Evaluating ...") emb_dim, num_class = self._embeddings.shape[1], self._labels.unique( ).shape[0] dev_accs, test_accs = [], [] for i in range(50): classifier = models.LogisticRegression(emb_dim, num_class).to(self._device) optimizer = torch.optim.Adam(classifier.parameters(), lr=0.01, weight_decay=0.0) for _ in range(100): classifier.train() logits, loss = classifier(self._embeddings[self._train_mask], self._labels[self._train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() dev_logits, _ = classifier(self._embeddings[self._dev_mask], self._labels[self._dev_mask]) test_logits, _ = classifier(self._embeddings[self._test_mask], self._labels[self._test_mask]) dev_preds = torch.argmax(dev_logits, dim=1) test_preds = torch.argmax(test_logits, dim=1) dev_acc = ( torch.sum(dev_preds == self._labels[self._dev_mask]).float() / self._labels[self._dev_mask].shape[0]).detach().cpu().numpy() test_acc = ( torch.sum(test_preds == self._labels[self._test_mask]).float() / self._labels[self._test_mask].shape[0]).detach().cpu().numpy() dev_accs.append(dev_acc * 100) test_accs.append(test_acc * 100) print( "Finished iteration {:02} of the logistic regression classifier. Validation accuracy {:.2f} test accuracy {:.2f}" .format(i + 1, dev_acc, test_acc)) dev_accs = np.stack(dev_accs) test_accs = np.stack(test_accs) print('Average validation accuracy: {:.2f} with std: {}'.format( dev_accs.mean(), dev_accs.std())) print('Average test accuracy: {:.2f} with std: {:.2f}'.format( test_accs.mean(), test_accs.std()))
def train(args): """ Fit a model's parameters given the parameters specified in args. """ X, y = load_data(args.data) # build the appropriate model if args.algorithm == "perceptron": model = models.Perceptron(nfeatures=X.shape[1]) elif args.algorithm == "logistic": model = models.LogisticRegression(nfeatures=X.shape[1]) else: raise Exception("Algorithm argument not recognized") # Run the training loop for epoch in range(args.online_training_iterations): model.fit(X=X, y=y, lr=args.online_learning_rate) # Save the model pickle.dump(model, open(args.model_file, 'wb'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--model', type=str, choices=models.available_models, dest='model_name') args = parser.parse_args() if args.model_name == "LinearRegression": model = models.LinearRegression(1, 1, learning_rate=0.005) x_train = np.array([[2.3], [4.4], [3.7], [6.1], [7.3], [2.1], [5.6], [7.7], [8.7], [4.1], [6.7], [6.1], [7.5], [2.1], [7.2], [5.6], [5.7], [7.7], [3.1]], dtype=np.float32) y_train = np.array([[3.7], [4.76], [4.], [7.1], [8.6], [3.5], [5.4], [7.6], [7.9], [5.3], [7.3], [7.5], [8.5], [3.2], [8.7], [6.4], [6.6], [7.9], [5.3]], dtype=np.float32) t_x = Tensor(x_train) t_y = Tensor(y_train) tensor_dataset = TensorDataset(t_x, t_y) data_loader = DataLoader(tensor_dataset, batch_size=32) model.run(data_loader, model) elif args.model_name == "LogisticRegression": X_train, y_train = load_iris(return_X_y=True) t_x, t_y = torch.tensor(X_train, dtype=torch.float), torch.tensor(y_train, dtype=torch.long) tensor_dataset = TensorDataset(t_x, t_y) num_classes = len(set(y_train)) input_dim = X_train.shape[1] data_loader = DataLoader(tensor_dataset, batch_size=32, shuffle=True) model = models.LogisticRegression(input_dim, num_classes, learning_rate=0.01, epoch=1000) model.run(data_loader, model) elif args.model_name == "Convolution2D": n_epoch = 5 tr_batch_size, ts_batch_size = 32, 1024 data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) tr_mnist = MNIST(root=os.path.realpath('../../dataset/mnist'), train=True, transform=data_transform, download=False) ts_mnist = MNIST(root=os.path.realpath('../../dataset/mnist'), train=False, transform=data_transform, download=False) train_loader = DataLoader(tr_mnist, batch_size=tr_batch_size, shuffle=True) test_loader = DataLoader(ts_mnist, batch_size=ts_batch_size, shuffle=True) conv_net = models.Convolution2D(n_epoch=n_epoch, log_per_batch=1000, train_batch_size=tr_batch_size) conv_net.run(train_loader, test_loader)
def main(): data = Data() logistic_regression = models.LogisticRegression() neural_network = models.NeuralNet() svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale') random_forest = models.RandomForest(n_estimators=100, max_depth=None, random_state=None) # Process dataset training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \ usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \ data.pre_process() # Logistic Regression logistic_regression.fit(training_data_features, training_data_labels, learning_rate=0.01, epochs=500) accuracy_mnist, confusion_mnist = logistic_regression.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = logistic_regression.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = logistic_regression.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Neural Network neural_network.fit(training_data_features, training_data_labels, epochs=10) accuracy_mnist, confusion_mnist = neural_network.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = neural_network.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = neural_network.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Neural Network', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Support Vector Machine svm.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = svm.predict(usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = svm.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Random Forest random_forest.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = random_forest.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = random_forest.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = random_forest.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Random Forest', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined)
train=True, download=True, transform=transform_train, target_transform=target_transform) trainset_wo_aug = torchvision.datasets.CIFAR10( root='./data', train=True, download=False, transform=transform_test, target_transform=target_transform) bc = bilevel_coreset.BilevelCoreset(loss_fn, loss_fn, max_inner_it=7500, max_conj_grad_it=100) model = models.LogisticRegression(nystrom_features_dim, num_classes) # choose base inds based_inds = np.random.choice(len(trainset.targets), base_inds_size, replace=False) inds = bc.build_with_nystrom_proxy( trainset, trainset_wo_aug, based_inds, coreset_size, kernel_fn_ntk, loader_creator_fn, model, nystrom_features_dim=nystrom_features_dim,
for train_index, test_index in group_kfold.split(Xdata, Ydata, groups): model.train(Xdata[train_index], Ydata[train_index]) Ypred = model.test(Xdata[test_index]) confusion = sklearn.metrics.confusion_matrix(Ydata[test_index], Ypred, labels=features.labels) if sum_confusion is None: sum_confusion = np.zeros(confusion.shape) sum_confusion += confusion return sum_confusion / k def select_best_model(Xdata, Ydata, models): avg_accuracies = [(i, k_fold_cross_validate(Xdata, Ydata, 4, model)) for i, model in enumerate(models)] print(avg_accuracies) return max(avg_accuracies, key=operator.itemgetter(1)) allfeatures = features.compute_or_read_features() Xdata, Ydata = to_numpy_arrays(allfeatures) models = [models.RandomForest(200, 'gini'), models.LogisticRegression(), models.SVMNonLinear('rbf'), models.SVMNonLinear('sigmoid'), models.NeuralNet(), models.KNN()] #best = select_best_model(Xdata, Ydata, models) #print(best) for model in models: cm = k_fold_confusion_matrix(Xdata, Ydata, 4, model) save_confusion_matrix(cm, model._name) print(f"Confusion matrix for {model._name} saved")
def evaluate(self): """ Evaluates SelfGNN on the train, validation, and test splits in a semi-supervised fashion. Note: Used for producing the results of Experiment 1, 3 in the paper. """ print("Evaluating ...") emb_dim, num_class = self._embeddings.shape[1], self._labels.unique( ).shape[0] dev_accs, test_accs = [], [] args = self._args iters = 20 if len( self._train_mask.shape) == 1 else self._train_mask.shape[1] for i in range(iters): classifier = models.LogisticRegression(emb_dim, num_class).to(self._device) optimizer = torch.optim.Adam(classifier.parameters(), lr=0.01, weight_decay=0.0) mask_index = None if len(self._train_mask.shape) == 1 else i train_mask, dev_mask, test_mask = index_mask(self._train_mask, self._dev_mask, self._test_mask, index=i) for _ in range(100): classifier.train() logits, loss = classifier(self._embeddings[train_mask], self._labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() dev_logits, _ = classifier(self._embeddings[dev_mask], self._labels[dev_mask]) test_logits, _ = classifier(self._embeddings[test_mask], self._labels[test_mask]) dev_preds = torch.argmax(dev_logits, dim=1) test_preds = torch.argmax(test_logits, dim=1) dev_acc = (torch.sum(dev_preds == self._labels[dev_mask]).float() / self._labels[dev_mask].shape[0]).detach().cpu().numpy() test_acc = ( torch.sum(test_preds == self._labels[test_mask]).float() / self._labels[test_mask].shape[0]).detach().cpu().numpy() dev_accs.append(dev_acc * 100) test_accs.append(test_acc * 100) print( "Finished iteration {:02} of the logistic regression classifier. Validation accuracy {:.2f} test accuracy {:.2f}" .format(i + 1, dev_acc, test_acc)) dev_accs = np.stack(dev_accs) test_accs = np.stack(test_accs) dev_acc, dev_std = dev_accs.mean(), dev_accs.std() test_acc, test_std = test_accs.mean(), test_accs.std() nc = self._norm_config path = osp.join( self._dataset.result_dir, f"results-norm.encoder.{nc['encoder_norm']}.projection.{nc['prj_head_norm']}.prediction.{nc['prd_head_norm']}.txt" ) with open(path, 'w') as f: f.write( f"{args.name},{args.model},{dev_acc:.4f},{dev_std:.2f},{test_acc:.4f},{test_std:.2f}" ) print('Average validation accuracy: {:.2f} with std: {}'.format( dev_acc, dev_std)) print('Average test accuracy: {:.2f} with std: {:.2f}'.format( test_acc, test_std)) return dev_acc, dev_std, test_acc, test_std
def main(): data = Data() logistic_regression = models.LogisticRegression() neural_network = models.NeuralNet() svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale') random_forest = models.RandomForest(n_estimators=100, max_depth=None, random_state=None) discriminant_analysis = DiscriminantAnalysis() vaecnn = deep_learning_models.VAEConvolutionNeuralNet( input_data.read_data_sets("data", one_hot=True), (28, 28), (28, 28)) # Process dataset training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \ usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \ data.pre_process() # Discriminant Analysis IMAGE_SIZE = int(training_data_features.shape[-1]**0.5) discriminant_analysis.fit( training_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), training_data_labels) accuracy_mnist, confusion_mnist = discriminant_analysis.predict( 'MNIST dataset', mnist_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), mnist_test_data_labels) accuracy_usps, confusion_usps = discriminant_analysis.predict( 'USPS dataset', usps_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), usps_test_data_labels) accuracy_combined, confusion_combined = discriminant_analysis.predict( 'Combined dataset', combined_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), combined_test_data_labels) print_and_plot('Bayesian Discriminant Analysis', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Logistic Regression logistic_regression.fit(training_data_features, training_data_labels, learning_rate=0.01, epochs=500) accuracy_mnist, confusion_mnist = logistic_regression.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = logistic_regression.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = logistic_regression.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Neural Network neural_network.fit(training_data_features, training_data_labels, epochs=10) accuracy_mnist, confusion_mnist = neural_network.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = neural_network.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = neural_network.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Neural Network', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Support Vector Machine svm.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = svm.predict(usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = svm.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Random Forest random_forest.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = random_forest.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = random_forest.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = random_forest.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Random Forest', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Restricted Boltzmann Machine num_hidden_nodes_list = [20, 100, 500] for num_hidden_nodes in num_hidden_nodes_list: rbm = deep_learning_models.RBM(images=input_data.read_data_sets( "data", one_hot=True), n_components=num_hidden_nodes, learning_rate=0.02, batch_size=100, n_iter=1000, random_state=0) rbm.fit() rbm.gibbs_sampling(1000) rbm.generate_images(num_hidden_nodes) # Variational Auto Encoders code_unit_list = [2, 8, 16] for code_unit in code_unit_list: vae = deep_learning_models.VAE( input_data.read_data_sets("data", one_hot=True), code_unit) vae.generate_images(epochs=20) # Variational Auto Encoders with Convolutional Neural Networks vaecnn.encode() vaecnn.decode() vaecnn.compile_() vaecnn.train(epochs=10, batch_size=100)
def main(): data = Data() logistic_regression = models.LogisticRegression() neural_network = models.NeuralNet() svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale') random_forest = models.RandomForest(n_estimators=100, max_depth=None, random_state=None) discriminant_analysis = DiscriminantAnalysis() # Process dataset training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \ usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \ data.pre_process() # Discriminant Analysis IMAGE_SIZE = int(training_data_features.shape[-1]**0.5) discriminant_analysis.fit( training_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), training_data_labels) accuracy_mnist, confusion_mnist = discriminant_analysis.predict( 'MNIST dataset', mnist_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), mnist_test_data_labels) accuracy_usps, confusion_usps = discriminant_analysis.predict( 'USPS dataset', usps_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), usps_test_data_labels) accuracy_combined, confusion_combined = discriminant_analysis.predict( 'Combined dataset', combined_test_data_features.reshape((-1, IMAGE_SIZE, IMAGE_SIZE)), combined_test_data_labels) print_and_plot('Bayesian Discriminant Analysis', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Logistic Regression logistic_regression.fit(training_data_features, training_data_labels, learning_rate=0.01, epochs=500) accuracy_mnist, confusion_mnist = logistic_regression.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = logistic_regression.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = logistic_regression.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Neural Network neural_network.fit(training_data_features, training_data_labels, epochs=10) accuracy_mnist, confusion_mnist = neural_network.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = neural_network.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = neural_network.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Neural Network', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Support Vector Machine svm.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = svm.predict(usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = svm.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Random Forest random_forest.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = random_forest.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = random_forest.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = random_forest.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Random Forest', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined)