from IPython.display import display

from NewsGroup_common import newsgroup_data
from NewsGroup_common import basic_model_test
from NewsGroup_common import saveDataframe
from sklearn.svm import LinearSVC

############### Data Processing #####################

X_train, X_test, y_train, y_test = newsgroup_data.getData()

############### Decision Tree Model #####################

support_vector_classifier = LinearSVC(C=1.0, penalty='l2')
predictions, accuracy, report, confusion_matrix = basic_model_test(
    support_vector_classifier, X_train, X_test, y_train, y_test,
    "NewsGroup Support Vector Classifier")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {
    'C': [0.1, 1, 100, 1000],
    'penalty': ['l2'],
    'loss': ['hinge', 'squared_hinge'],
    'max_iter': [1000, 2000]
}

optimized_model = GridSearchCV(estimator=support_vector_classifier,
                               param_grid=hyperparams,
                               n_jobs=1,
from NewsGroup_common import newsgroup_data
from NewsGroup_common import basic_model_test
from NewsGroup_common import saveDataframe
from sklearn.ensemble import RandomForestClassifier

############### Data Processing #####################

X_train, X_test, y_train, y_test = newsgroup_data.getData()

############### Decision Tree Model #####################

random_forest = RandomForestClassifier(n_estimators=100,
                                       criterion='gini',
                                       bootstrap=True)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    random_forest, X_train, X_test, y_train, y_test, "NewsGroup Random Forest")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {
    'n_estimators': [10, 100, 1000],
    'criterion': ['gini', 'entropy']
}

optimized_model = GridSearchCV(estimator=random_forest,
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)
Beispiel #3
0
from IPython.display import display

from NewsGroup_common import newsgroup_data
from NewsGroup_common import basic_model_test
from NewsGroup_common import saveDataframe
from sklearn import tree

############### Data Processing #####################

X_train, X_test, y_train, y_test = newsgroup_data.getData()

############### Decision Tree Model #####################

decision_tree = tree.DecisionTreeClassifier(max_depth=None,
                                            min_samples_split=2)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    decision_tree, X_train, X_test, y_train, y_test, "NewsGroup Decision Tree")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'min_samples_split': [10, 100, 500], 'max_depth': [2, 20, None]}

optimized_model = GridSearchCV(estimator=decision_tree,
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)
Beispiel #4
0
from NewsGroup_common import basic_model_test
from NewsGroup_common import saveDataframe
from sklearn.linear_model import LogisticRegression

############### Data Processing #####################

X_train, X_test, y_train, y_test = newsgroup_data.getData()

############### Logistic Regression Model #####################

logistic_regression = LogisticRegression(penalty='l2',
                                         max_iter=300,
                                         C=1,
                                         random_state=42)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    logistic_regression, X_train, X_test, y_train, y_test,
    "NewsGroup Logistic Regression")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {
    'penalty': ['l2'],
    'solver': ['sag', 'lbfgs'],
    'C': [0.1, 1, 100],
    'max_iter': [100, 300]
}

optimized_model = GridSearchCV(estimator=logistic_regression,
                               param_grid=hyperparams,
                               n_jobs=1,
Beispiel #5
0
import pandas as pd
from IPython.display import display

from NewsGroup_common import newsgroup_data
from NewsGroup_common import basic_model_test
from NewsGroup_common import saveDataframe
from sklearn.ensemble import AdaBoostClassifier

############### Data Processing #####################

X_train, X_test, y_train, y_test = newsgroup_data.getData()

############### AdaBoost Model #####################

adaboost = AdaBoostClassifier(n_estimators=50,learning_rate=1.0)
predictions, accuracy, report, confusion_matrix = basic_model_test(adaboost,X_train, X_test, y_train, y_test,"NewsGroup AdaBoost")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'n_estimators': [50, 100], 'learning_rate': [0.1, 1.0]}

optimized_model = GridSearchCV(estimator=adaboost, param_grid=hyperparams,
                                n_jobs=1, cv=3, verbose=1, error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
Beispiel #6
0
from IPython.display import display

from NewsGroup_common import newsgroup_data
from NewsGroup_common import basic_model_test
from NewsGroup_common import saveDataframe
from sklearn.naive_bayes import MultinomialNB

############### Data Processing #####################

X_train, X_test, y_train, y_test = newsgroup_data.getData()

############### Decision Tree Model #####################

naive_bayes = MultinomialNB(alpha=1.0)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    naive_bayes, X_train, X_test, y_train, y_test,
    "NewsGroup Multinomial Naive Bayes")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'alpha': (1e0, 1e-2, 1e-4, 1e-10)}

optimized_model = GridSearchCV(estimator=naive_bayes,
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)