from sklearn.naive_bayes import BernoulliNB
from run_binary_classifier import run

param_grid = {
    'bag_of_words__stop_words': ['english'],
    'bag_of_words__ngram_range': [(1, 2)],
    'bag_of_words__max_features': [500],
    'dim_reduct__n_components': [300],
    'normalizer__norm': ['l2'],
    'classifier__alpha': [1.0],
    'classifier__binarize': [0.0]
}

clf = BernoulliNB()

run(param_grid, clf)
def keras_logreg_model():
    model = Sequential()
    model.add(
        Dense(units=1,
              input_shape=(2, ),
              kernel_initializer='normal',
              kernel_regularizer=regularizers.l2(1.),
              activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model


param_grid = {
    'bag_of_words__stop_words': ['english'],
    'bag_of_words__ngram_range': [(1, 2)],
    'bag_of_words__max_features': [500],
    #'dim_reduct__n_components': [300],
    'normalizer__norm': ['l2']
    #'classifier__C': [5., 10.]
}

estimator = KerasClassifier(build_fn=keras_logreg_model,
                            epochs=1,
                            batch_size=5,
                            verbose=1)

run(param_grid, estimator)
Beispiel #3
0
multilabel_param_grid  = [{
        'estimator__bag_of_words__stop_words': ['english'],
        'estimator__bag_of_words__ngram_range': [(1, 2)],
        'estimator__bag_of_words__max_features': [500],
        'estimator__dim_reduct__n_components': [300],
        'estimator__normalizer__norm': ['l2'],
        'estimator__classifier__C': [5., 10.]
}]


# =========================== #
# TRAIN
# BINARY CLASSIFIER
# =========================== #
binary_clf = run_binary_classifier.run(binary_param_grid, LogisticRegression(), comments_file=train_binary)

with open('./saved_models/log_reg_joint_binary.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)

# =========================== #
# TRAIN
# MULTILABEL CLASSIFIER
# =========================== #
multilabel_clf = run_multilabel_classifier.run(multilabel_param_grid, LogisticRegression(), comments_file=train_multilabel)
with open('./saved_models/log_reg_joint_multilabel.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)


# =========================== #
# PREDICT
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from run_binary_classifier import _load_comments, run

train_comments_path = os.path.join('../../../', 'data/train_binary.csv')
test_comments_path = os.path.join('../../../', 'data/test_clean_binary.csv')

param_grid = {
    'bag_of_words__stop_words': ['english'],
    'bag_of_words__ngram_range': [(1, 2)],
    'bag_of_words__max_features': [500],
    'dim_reduct__n_components': [300],
    'normalizer__norm': ['l2'],
    'classifier__C': [5., 10.]
}

clf = LogisticRegression()

trained_clf = run(param_grid, clf, comments_file=train_comments_path)

with open('./saved_models/log_reg_trained_binary.pkl', 'wb') as saved_model:
    pickle.dump(trained_clf, file=saved_model)

with open('./saved_models/log_reg_trained_binary.pkl', 'rb') as saved_model:
    loaded_clf = pickle.load(saved_model)

    X_test, y_test = _load_comments(test_comments_path)
    y_test_predict = loaded_clf.predict(X_test)

    print(classification_report(y_test, y_test_predict))