def train_keras_classifier(): from cifar10_classifier import Classifier from keras.optimizers import SGD from keras import backend as K batch_size = 128 epochs = 50 data_augmentation = True opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) with tf.variable_scope('conv') as scope: model = Classifier().model model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) # load data and start training if data_augmentation: print('Using real-time data augmentation.') datagen, (x_train, y_train), (x_test, y_test) = data_loader.load_augmented_data() model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), epochs=epochs, validation_data=(x_test, y_test), workers=4, callbacks=[SGDLearningRateTracker()]) else: print('Not using data augmentation.') (x_train, y_train), (x_test, y_test) = data_loader.load_original_data() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True) # save as tensorflow model if not os.path.isdir(SAVE_DIR): os.makedirs(SAVE_DIR) model.save(CLASSIFIER_PATH) from keras.backend import get_session sess = get_session() saver = tf.train.Saver() saver.save(sess, PRETRAINED_PATH) print('Saved trained model at %s ' % (PRETRAINED_PATH)) # evaluate on test set scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0])
class StateWikiClassifier(): DATABASE = "us_twitter.db" def __init__(self): db_mgr = DataManager(self.DATABASE) self.train_tweets, self.train_labels = db_mgr.select_wikipedia_train() self.vectorizer = get_vectorizer("tfidf", min_df=1) self.nb = Classifier(classifier="nb") self.train_data = self.vectorizer.fit_transform(self.train_tweets) self.nb.fit(self.train_data, self.train_labels) def predict(self, text): text = text.lower() results = self.nb.predict(self.vectorizer.transform([text])) return results[0], FIPS_DEFINITIONS[results[0]]
def kfold(orig_data, orig_labels, test_data, clf: classifiers.Classifier): results = [] predictions = [] best_k = 0 for k in range(len(orig_data)): print("Iteration", k + 1, "over", len(orig_data)) data, labels = orig_data[:], orig_labels[:] val_data = data.pop(k) val_labels = labels.pop(k) train_data = np.concatenate(data) train_labels = np.concatenate(labels) print("Fitting...") clf.fit(train_data, train_labels) print("Evaluating...") results.append(clf.evaluate(val_data, val_labels)) print(results[k]) print("Predicting...") predictions.append(clf.predict(test_data)) print(predictions[k]) if results[k]["Accuracy"] > results[best_k]["Accuracy"]: best_k = k return results[best_k], predictions[best_k]
def results(X_train, y_train, X_test, y_test, features="binary", D_in=200): print("\n > Logistic Regression: ") # performs logistic regression log_reg = Classifier(X_train, y_train, model="log_reg") # determines the parameters used in the grid search hyperparams = {'C': [0.01, 1, 100], 'penalty': ['l1', 'l2']} # picks the best possible model using grid search log_reg.grid_search(hyperparams) # fully train the best model log_reg.fit() # tests the accuracy of the model log_reg.score(X_test, y_test) print("\n > Linear SVM: ") # performs SVM Linear_SVM = Classifier(X_train, y_train, model="Linear_SVM") # determines the parameters used in the grid search hyperparams = {'C': [0.01, 1, 100]} # picks the best possible model using grid search Linear_SVM.grid_search(hyperparams) # fully train the best model Linear_SVM.fit() # tests the accuracy of the model Linear_SVM.score(X_test, y_test) if features == "binary": print("\n > Bernoulli Naive Bayes SVM: ") # performs Gaussian Naive Bayes Bernoulli_NBSVM = Classifier(X_train, y_train, model="Bernoulli_NBSVM") # determines the parameters used in the grid search hyperparams = {'C': [0.01, 1, 100], 'beta': [0.25, 0.5, 0.75]} # picks the best possible model using grid search Bernoulli_NBSVM.grid_search(hyperparams) # fully train the best model Bernoulli_NBSVM.fit() # tests the accuracy of the model Bernoulli_NBSVM.score(X_test, y_test) if features == "sentence_embed": print("\n > Feedforward NN:") # performs feeforward NN feedforward_NN = Classifier(X_train, y_train, "feedforward_NN", D_in) # determines the parameters used in the grid search #hyperparams = {'batch_size' : [128, 256, 512], 'epochs' : [10, 20, 50]} # picks the best possible model using grid search #feedforward_NN.grid_search(hyperparams) # fully train the best model feedforward_NN.fit() # tests the accuracy of the model feedforward_NN.score(X_test, y_test) print("\n > Gaussian Naive Bayes: ") # performs Gaussian Naive Bayes Gaussian_NB = Classifier(X_train, y_train, model="Gaussian_NB") # determines the parameters used in the grid search hyperparams = { 'priors': [None, (0.25, 0.75), (0.5, 0.5), (0.75, 0.25)] } # picks the best possible model using grid search Gaussian_NB.grid_search(hyperparams) # fully train the best model Gaussian_NB.fit() # tests the accuracy of the model Gaussian_NB.score(X_test, y_test) return (log_reg, Linear_SVM, Gaussian_NB) else: print("\n > Multinomial Naive Bayes: ") # performs Gaussian Naive Bayes Multinomial_NB = Classifier(X_train, y_train, model="Multinomial_NB") # determines the parameters used in the grid search hyperparams = { 'class_prior': [None, (0.25, 0.75), (0.5, 0.5), (0.75, 0.25)] } # picks the best possible model using grid search Multinomial_NB.grid_search(hyperparams) # fully train the best model Multinomial_NB.fit() # tests the accuracy of the model Multinomial_NB.score(X_test, y_test) return (log_reg, Linear_SVM, Multinomial_NB)