def main(): args = get_args() check_args(args) if args.mode.lower() == "train": # Load the training data. X, y = load_data(args.data) # Create the model. # TODO: Add other algorithms as necessary. if args.algorithm.lower() == 'useless': model = models.Useless() elif args.algorithm.lower() == 'sumoffeatures': model = models.SumOfFeatures() elif args.algorithm.lower() == 'perceptron': if args.online_training_iterations and args.online_learning_rate: model = models.Perceptron(args.online_learning_rate, args.online_training_iterations) else: model = models.Perceptron() else: raise Exception('The model given by --model is not yet supported.') # Train the model. model.fit(X, y) # Save the model. try: with open(args.model_file, 'wb') as f: pickle.dump(model, f) except IOError: raise Exception("Exception while writing to the model file.") except pickle.PickleError: raise Exception("Exception while dumping model pickle.") elif args.mode.lower() == "test": # Load the test data. X, y = load_data(args.data) # Load the model. try: with open(args.model_file, 'rb') as f: model = pickle.load(f) except IOError: raise Exception("Exception while reading the model file.") except pickle.PickleError: raise Exception("Exception while loading model pickle.") # Compute and save the predictions. y_hat = model.predict(X) invalid_label_mask = (y_hat != 0) & (y_hat != 1) if any(invalid_label_mask): raise Exception( 'All predictions must be 0 or 1, but found other predictions.') np.savetxt(args.predictions_file, y_hat, fmt='%d') else: raise Exception("Mode given by --mode is unrecognized.")
def test(): """ test Perceptron, SVM and LDA accuracy """ tested_models = [ TestedModel('Perceptron', models.Perceptron()), TestedModel('SVM', models.SVM()), TestedModel('LDA', models.LDA()), ] k = 10000 iterations = 500 for i, m in enumerate(MS): for j in range(iterations): X, y = sample_d(m) X_t, y_t = sample_d(k) for tested in tested_models: tested.model.fit(X, y) score = tested.model.score(X_t, y_t) tested.add_accuracy(m, score['accuracy']) plt.figure() for tested in tested_models: plt.plot(MS, [tested.accuracy[m] for m in MS], marker='.', label=tested.name) plt.legend() plt.title('Training batch size vs. accuracy') plt.xlabel('m') plt.ylabel('accuracy') plt.show() for tested in tested_models: print(tested.name, tested.accuracy)
def train(args): """ Fit a model's parameters given the parameters specified in args. """ X, y = load_data(args.data) # build the appropriate model if args.algorithm == "perceptron": model = models.Perceptron(nfeatures=X.shape[1]) elif args.algorithm == "logistic": model = models.LogisticRegression(nfeatures=X.shape[1]) else: raise Exception("Algorithm argument not recognized") # Run the training loop for epoch in range(args.online_training_iterations): model.fit(X=X, y=y, lr=args.online_learning_rate) # Save the model pickle.dump(model, open(args.model_file, 'wb'))
def train(train_data, val_data, lex, max_epochs, patience): train_data = list(train_data) model = models.Perceptron() best_accuracy = 0 best_model = model no_improvement_since = 0 for t in range(max_epochs): random.shuffle(train_data) train_one_epoch(train_data, lex, model) # Validate and see if the model got better: val_model = model.copy() val_model.average_weights() val_accuracy = validate(t, val_data, lex, val_model) if val_accuracy > best_accuracy: best_accuracy = val_accuracy best_model = val_model no_improvement_since = 0 else: no_improvement_since += 1 if no_improvement_since > patience: break return best_model
def plot(): """ plot the hyperplanes portrayed by each model for m samples sampled from the distribution m """ ROWS = 2 COLS = 3 fig, axs = plt.subplots(ROWS, COLS, figsize=(15, 10)) scatter = None for i, m in enumerate(MS): X, y = draw_points(m) x_lim = np.array([min(X[:, 0]), max(X[:, 0])]) ax = axs[i // COLS, i % COLS] perceptron = models.Perceptron() perceptron.fit(X, y) svm = models.SVM() svm.fit(X, y) scatter = ax.scatter(X[:, 0], X[:, 1], c=y) ax.plot(x_lim, hyperplane_line(x_lim, W, BIAS), label='f') ax.plot(x_lim, hyperplane_line(x_lim, perceptron.get_w(), perceptron.get_b()), label='Perceptron') ax.plot(x_lim, hyperplane_line(x_lim, svm.get_w(), svm.get_b()), label='SVM') ax.set(xlabel='x', ylabel='y') ax.set_title(f"Data for m={m}") ax.legend() plt.legend(handles=scatter.legend_elements()[0], labels=('Negative', 'Positive'), loc=4) axs[-1, -1].axis('off') fig.tight_layout() plt.show()
def main(): args = get_args() check_args(args) if args.mode.lower() == "train": # Load the training data. X, y = load_data(args.data) # Create the model. # TODO: Add other algorithms as necessary. if args.algorithm.lower() == 'sumoffeatures': model = models.SumOfFeatures() elif args.algorithm.lower() == 'perceptron': model = models.Perceptron() elif args.algorithm.lower() == 'useless': model = models.Useless() elif args.algorithm.lower() == 'logisticregression': model = models.LogisticRegression() else: raise Exception('The model given by --model is not yet supported.') # Select features. num_orig_features = X.shape[1] index_array = np.empty(1) if args.num_features_to_select > 0: index_array = select_features(X, y, args.num_features_to_select) index_array = np.sort(index_array) X_selected = X[:, index_array[0]] for i in range(index_array.shape[0]): if i != 0: X_selected = hstack([X_selected, X[:,index_array[i]]]) X = X_selected # Train the model. if args.algorithm.lower() == 'perceptron': model.fit(X ,y, args.online_learning_rate, args.online_training_iterations) elif args.algorithm.lower() == 'logisticregression': model.fit(X, y, args.online_learning_rate, args.gd_iterations, num_orig_features, index_array) else: model.fit(X, y) # Save the model. try: with open(args.model_file, 'wb') as f: pickle.dump(model, f) except IOError: raise Exception("Exception while writing to the model file.") except pickle.PickleError: raise Exception("Exception while dumping model pickle.") elif args.mode.lower() == "test": # Load the test data. X, y = load_data(args.data) # Load the model. try: with open(args.model_file, 'rb') as f: model = pickle.load(f) except IOError: raise Exception("Exception while reading the model file.") except pickle.PickleError: raise Exception("Exception while loading model pickle.") # Compute and save the predictions. y_hat = model.predict(X) invalid_label_mask = (y_hat != 0) & (y_hat != 1) if any(invalid_label_mask): raise Exception('All predictions must be 0 or 1, but found other predictions.') np.savetxt(args.predictions_file, y_hat, fmt='%d') else: raise Exception("Mode given by --mode is unrecognized.")
def main(): args = get_args() check_args(args) if args.mode.lower() == "train": # Load the training data. X, y = load_data(args.data) # print(type(args.data)) # Create the model. # TODO: Add other algorithms as necessary. models.Perceptron(args.online_learning_rate, args.online_training_iterations) models.Logistic(args.online_learning_rate, args.online_training_iterations) models.nb(args.independent_mode, args.training_iterations, args.latent_states) # Create model for each algorithm if args.algorithm.lower() == 'useless': model = models.Useless() elif args.algorithm.lower() == 'perceptron': model = models.Perceptron(args.online_learning_rate,args.online_training_iterations) elif args.algorithm.lower() == 'logistic': model = models.Logistic(args.online_learning_rate,args.online_training_iterations) elif args.algorithm.lower() == 'pegasos': model = models.Pegasos(args.online_learning_rate, args.online_training_iterations, args.pegasos_lambda) elif args.algorithm.lower() == 'nb': model = models.nb(args.independent_mode, args.training_iterations, args.latent_states) else: raise Exception('The model given by --model is not yet supported.') # Train the model. model.fit(X, y) # Save the model. try: with open(args.model_file, 'wb') as f: pickle.dump(model, f) except IOError: raise Exception("Exception while writing to the model file.") except pickle.PickleError: raise Exception("Exception while dumping model pickle.") elif args.mode.lower() == "test": # Load the test data. X, y = load_data(args.data) # Load the model. try: with open(args.model_file, 'rb') as f: model = pickle.load(f) except IOError: raise Exception("Exception while reading the model file.") except pickle.PickleError: raise Exception("Exception while loading model pickle.") # Compute and save the predictions. y_hat = model.predict(X) # invalid_label_mask = (y_hat != 0) & (y_hat != 1) # if any(invalid_label_mask): # raise Exception('All predictions must be 0 or 1, but found other predictions.') if np.issubdtype(type(y[0]), np.dtype(int)): np.savetxt(args.predictions_file, y_hat, fmt='%d') else: np.savetxt(args.predictions_file, y_hat, fmt='%s') else: raise Exception("Mode given by --mode is unrecognized.")
def main(): args = get_args() check_args(args) if args.mode.lower() == "train": # Load the training data. X, y = load_data(args.data) # Create the model. # TODO: Add other algorithms as necessary. if args.algorithm.lower() == 'adaboost': model = models.Adaboost(args.num_boosting_iterations) model.fit(X, y) elif args.algorithm.lower() == 'logisticregression': model = models.LogisticRegression(args.online_learning_rate, args.num_features_to_select, args.gd_iterations) model.fit(X, y) elif args.algorithm.lower() == 'sumoffeatures': model = models.SumOfFeatures() model.fit(X, y) elif args.algorithm.lower() == 'perceptron': model = models.Perceptron(args.online_learning_rate, args.online_training_iterations) model.fit(X, y) elif args.algorithm.lower() == 'lambda_means': model = models.LambdaMeans() model.fit(X, y, lambda0=args.cluster_lambda, iterations=args.clustering_training_iterations) elif args.algorithm.lower() == 'stochastic_k_means': model = models.StochasticKMeans() model.fit(X, y, num_clusters=args.number_of_clusters, iterations=args.clustering_training_iterations) elif args.algorithm.lower() == 'useless': model = models.Useless() model.fit(X, y) else: raise Exception('The model given by --model is not yet supported.') # Save the model. try: with open(args.model_file, 'wb') as f: pickle.dump(model, f) except IOError: raise Exception("Exception while writing to the model file.") except pickle.PickleError: raise Exception("Exception while dumping model pickle.") elif args.mode.lower() == "test": # Load the test data. X, y = load_data(args.data) # Load the model. try: with open(args.model_file, 'rb') as f: model = pickle.load(f) except IOError: raise Exception("Exception while reading the model file.") except pickle.PickleError: raise Exception("Exception while loading model pickle.") # Compute and save the predictions. y_hat = model.predict(X) invalid_label_mask = (y_hat != 0) & (y_hat != 1) if any(invalid_label_mask): raise Exception( 'All predictions must be 0 or 1, but found other predictions.') np.savetxt(args.predictions_file, y_hat, fmt='%d') else: raise Exception("Mode given by --mode is unrecognized.")