Example #1
0
def train_keras_classifier():
    from cifar10_classifier import Classifier
    from keras.optimizers import SGD
    from keras import backend as K

    batch_size = 128
    epochs = 50
    data_augmentation = True

    opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

    with tf.variable_scope('conv') as scope:
        model = Classifier().model
        model.compile(loss='categorical_crossentropy',
                      optimizer=opt,
                      metrics=['accuracy'])

    # load data and start training
    if data_augmentation:
        print('Using real-time data augmentation.')
        datagen, (x_train,
                  y_train), (x_test,
                             y_test) = data_loader.load_augmented_data()
        model.fit_generator(datagen.flow(x_train,
                                         y_train,
                                         batch_size=batch_size),
                            epochs=epochs,
                            validation_data=(x_test, y_test),
                            workers=4,
                            callbacks=[SGDLearningRateTracker()])
    else:
        print('Not using data augmentation.')
        (x_train, y_train), (x_test, y_test) = data_loader.load_original_data()
        model.fit(x_train,
                  y_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_data=(x_test, y_test),
                  shuffle=True)

    # save as tensorflow model
    if not os.path.isdir(SAVE_DIR):
        os.makedirs(SAVE_DIR)
    model.save(CLASSIFIER_PATH)

    from keras.backend import get_session
    sess = get_session()
    saver = tf.train.Saver()
    saver.save(sess, PRETRAINED_PATH)
    print('Saved trained model at %s ' % (PRETRAINED_PATH))

    # evaluate on test set
    scores = model.evaluate(x_test, y_test, verbose=1)
    print('Test loss:', scores[0])
Example #2
0
class StateWikiClassifier():
  DATABASE = "us_twitter.db"
  def __init__(self):
    db_mgr = DataManager(self.DATABASE)
    self.train_tweets, self.train_labels = db_mgr.select_wikipedia_train()
    self.vectorizer = get_vectorizer("tfidf", min_df=1)
    self.nb = Classifier(classifier="nb")
    self.train_data = self.vectorizer.fit_transform(self.train_tweets)
    self.nb.fit(self.train_data, self.train_labels)

  def predict(self, text):
    text = text.lower()
    results = self.nb.predict(self.vectorizer.transform([text]))
    return results[0], FIPS_DEFINITIONS[results[0]]
Example #3
0
def kfold(orig_data, orig_labels, test_data, clf: classifiers.Classifier):
    results = []
    predictions = []
    best_k = 0
    for k in range(len(orig_data)):
        print("Iteration", k + 1, "over", len(orig_data))
        data, labels = orig_data[:], orig_labels[:]
        val_data = data.pop(k)
        val_labels = labels.pop(k)
        train_data = np.concatenate(data)
        train_labels = np.concatenate(labels)
        print("Fitting...")
        clf.fit(train_data, train_labels)
        print("Evaluating...")
        results.append(clf.evaluate(val_data, val_labels))
        print(results[k])
        print("Predicting...")
        predictions.append(clf.predict(test_data))
        print(predictions[k])
        if results[k]["Accuracy"] > results[best_k]["Accuracy"]:
            best_k = k
    return results[best_k], predictions[best_k]
Example #4
0
def results(X_train, y_train, X_test, y_test, features="binary", D_in=200):

    print("\n  > Logistic Regression: ")
    # performs logistic regression
    log_reg = Classifier(X_train, y_train, model="log_reg")
    # determines the parameters used in the grid search
    hyperparams = {'C': [0.01, 1, 100], 'penalty': ['l1', 'l2']}
    # picks the best possible model using grid search
    log_reg.grid_search(hyperparams)
    # fully train the best model
    log_reg.fit()
    # tests the accuracy of the model
    log_reg.score(X_test, y_test)

    print("\n  > Linear SVM: ")
    # performs SVM
    Linear_SVM = Classifier(X_train, y_train, model="Linear_SVM")
    # determines the parameters used in the grid search
    hyperparams = {'C': [0.01, 1, 100]}
    # picks the best possible model using grid search
    Linear_SVM.grid_search(hyperparams)
    # fully train the best model
    Linear_SVM.fit()
    # tests the accuracy of the model
    Linear_SVM.score(X_test, y_test)

    if features == "binary":
        print("\n  > Bernoulli Naive Bayes SVM: ")
        # performs Gaussian Naive Bayes
        Bernoulli_NBSVM = Classifier(X_train, y_train, model="Bernoulli_NBSVM")
        # determines the parameters used in the grid search
        hyperparams = {'C': [0.01, 1, 100], 'beta': [0.25, 0.5, 0.75]}
        # picks the best possible model using grid search
        Bernoulli_NBSVM.grid_search(hyperparams)
        # fully train the best model
        Bernoulli_NBSVM.fit()
        # tests the accuracy of the model
        Bernoulli_NBSVM.score(X_test, y_test)

    if features == "sentence_embed":
        print("\n  > Feedforward NN:")
        # performs feeforward NN
        feedforward_NN = Classifier(X_train, y_train, "feedforward_NN", D_in)
        # determines the parameters used in the grid search
        #hyperparams = {'batch_size' : [128, 256, 512], 'epochs' : [10, 20, 50]}
        # picks the best possible model using grid search
        #feedforward_NN.grid_search(hyperparams)
        # fully train the best model
        feedforward_NN.fit()
        # tests the accuracy of the model
        feedforward_NN.score(X_test, y_test)

        print("\n  > Gaussian Naive Bayes: ")
        # performs Gaussian Naive Bayes
        Gaussian_NB = Classifier(X_train, y_train, model="Gaussian_NB")
        # determines the parameters used in the grid search
        hyperparams = {
            'priors': [None, (0.25, 0.75), (0.5, 0.5), (0.75, 0.25)]
        }
        # picks the best possible model using grid search
        Gaussian_NB.grid_search(hyperparams)
        # fully train the best model
        Gaussian_NB.fit()
        # tests the accuracy of the model
        Gaussian_NB.score(X_test, y_test)

        return (log_reg, Linear_SVM, Gaussian_NB)

    else:
        print("\n  > Multinomial Naive Bayes: ")
        # performs Gaussian Naive Bayes
        Multinomial_NB = Classifier(X_train, y_train, model="Multinomial_NB")
        # determines the parameters used in the grid search
        hyperparams = {
            'class_prior': [None, (0.25, 0.75), (0.5, 0.5), (0.75, 0.25)]
        }
        # picks the best possible model using grid search
        Multinomial_NB.grid_search(hyperparams)
        # fully train the best model
        Multinomial_NB.fit()
        # tests the accuracy of the model
        Multinomial_NB.score(X_test, y_test)

        return (log_reg, Linear_SVM, Multinomial_NB)