Esempio n. 1
0
from keras.models import Model
from keras.wrappers.scikit_learn import KerasClassifier
from run_multilabel_classifier import run


def create_model():
    inp = Input(shape=())
    embed_size = 128
    x = Embedding(2000, embed_size)(inp)
    x = LSTM(60, return_sequences=True, name='lstm_layer')(inp)
    x = GlobalMaxPool1D()(x)
    x = Dropout(0.1)(x)
    x = Dense(50, activation="relu")(x)
    x = Dropout(0.1)(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model


param_grid = {'estimator__classifier__C': [100., 0.1]}

estimator = KerasClassifier(build_fn=create_model,
                            epochs=1,
                            batch_size=16,
                            verbose=1)

run(param_grid, estimator)
Esempio n. 2
0

# =========================== #
# TRAIN
# BINARY CLASSIFIER
# =========================== #
binary_clf = run_binary_classifier.run(binary_param_grid, LogisticRegression(), comments_file=train_binary)

with open('./saved_models/log_reg_joint_binary.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)

# =========================== #
# TRAIN
# MULTILABEL CLASSIFIER
# =========================== #
multilabel_clf = run_multilabel_classifier.run(multilabel_param_grid, LogisticRegression(), comments_file=train_multilabel)
with open('./saved_models/log_reg_joint_multilabel.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)


# =========================== #
# PREDICT
# BINARY CLASSIFIER
# =========================== #
print('Binary prediction')

X_binary_test, y_binary_test = run_binary_classifier.load_comments(test_binary)
y_binary_test_predict = binary_clf.predict(X_binary_test)

print(classification_report(y_binary_test, y_binary_test_predict))
Esempio n. 3
0
from sklearn.svm import SVC
from run_multilabel_classifier import run

param_grid = {
    'bag_of_words__stop_words': ['english'],
    'bag_of_words__ngram_range': [(1, 2)],
    'bag_of_words__max_features': [500],
    'dim_reduct__n_components': [300],
    'normalizer__norm': ['l2'],
    'classifier__C': [100., 0.1, 0.0001]
}

clf = SVC()

run(param_grid, clf)