from imdb_common import basic_model_test
from imdb_common import saveDataframe
from sklearn.svm import LinearSVC

warnings.simplefilter('ignore')
sns.set(rc={'figure.figsize' : (12, 6)})
sns.set_style("darkgrid", {'axes.grid': True})

############### Data Processing #####################

X_train, X_test, y_train, y_test = imdb_data.getData()

############### Support Vector Machine Model #####################

support_vector_classifier = LinearSVC(C=1.0, penalty='l2', dual=True)
predictions, accuracy, report, confusion_matrix = basic_model_test(support_vector_classifier,X_train, X_test, y_train, y_test,"IMDB Support Vector Classifier")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'C': [0.1, 1, 100, 1000], 'penalty': ['l2'], 'loss': ['hinge', 'squared_hinge'], 'max_iter': [1000, 2000]}

optimized_model = GridSearchCV(estimator=support_vector_classifier, param_grid=hyperparams,
                                n_jobs=1, cv=3, verbose=1, error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
예제 #2
0
from imdb_common import basic_model_test
from imdb_common import saveDataframe
from sklearn.ensemble import AdaBoostClassifier

warnings.simplefilter('ignore')
sns.set(rc={'figure.figsize': (12, 6)})
sns.set_style("darkgrid", {'axes.grid': True})

############### Data Processing #####################

X_train, X_test, y_train, y_test = imdb_data.getData()

############### Ada Boost Model #####################

ada_boost = AdaBoostClassifier(n_estimators=50, learning_rate=1.0)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    ada_boost, X_train, X_test, y_train, y_test, "IMDB AdaBoost")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'n_estimators': [50, 100], 'learning_rate': [0.1, 1.0]}

optimized_model = GridSearchCV(estimator=ada_boost,
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)
예제 #3
0
warnings.simplefilter('ignore')
sns.set(rc={'figure.figsize': (12, 6)})
sns.set_style("darkgrid", {'axes.grid': True})

############### Data Processing #####################

X_train, X_test, y_train, y_test = imdb_data.getData()

############### Logistic Regression Model #####################

logistic_regression = LogisticRegression(penalty='l2',
                                         max_iter=300,
                                         C=1,
                                         random_state=42)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    logistic_regression, X_train, X_test, y_train, y_test,
    "IMDB Logistic Regression")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {
    'penalty': ['l2'],
    'solver': ['sag', 'lbfgs'],
    'C': [0.1, 1, 100],
    'max_iter': [100, 300]
}

optimized_model = GridSearchCV(estimator=logistic_regression,
                               param_grid=hyperparams,
                               n_jobs=1,
예제 #4
0
from imdb_common import saveDataframe
from sklearn.naive_bayes import MultinomialNB

warnings.simplefilter('ignore')
sns.set(rc={'figure.figsize': (12, 6)})
sns.set_style("darkgrid", {'axes.grid': True})

############### Data Processing #####################

X_train, X_test, y_train, y_test = imdb_data.getData()

############### Decision Tree Model #####################

naive_bayes = MultinomialNB(alpha=1.0)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    naive_bayes, X_train, X_test, y_train, y_test,
    "IMDB Multinomial Naive Bayes")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'alpha': (1e0, 1e-2, 1e-4, 1e-10)}

optimized_model = GridSearchCV(estimator=naive_bayes,
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)
from sklearn.tree import DecisionTreeClassifier

warnings.simplefilter('ignore')
sns.set(rc={'figure.figsize': (12, 6)})
sns.set_style("darkgrid", {'axes.grid': True})

############### Data Processing #####################

X_train, X_test, y_train, y_test = imdb_data.getData()

############### Decision Tree Model #####################

decision_tree = DecisionTreeClassifier(
    max_depth=None, min_samples_split=2
)  #DecisionTreeClassifier(criterion='entropy', random_state = 0)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    decision_tree, X_train, X_test, y_train, y_test, "IMDB Decision Tree")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'min_samples_split': [10, 100, 500], 'max_depth': [2, 20, None]}

optimized_model = GridSearchCV(estimator=decision_tree,
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier

warnings.simplefilter('ignore')
sns.set(rc={'figure.figsize': (12, 6)})
sns.set_style("darkgrid", {'axes.grid': True})

############### Data Processing #####################

X_train, X_test, y_train, y_test = imdb_data.getData()

############### Random Forest Model #####################

random_forest = RandomForestClassifier(
    n_estimators=100, criterion='gini', bootstrap=True
)  #RandomForestClassifier(criterion='entropy', random_state = 0)
predictions, accuracy, report, confusion_matrix = basic_model_test(
    random_forest, X_train, X_test, y_train, y_test, "IMDB Random Forest")

################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {
    'n_estimators': [100, 316, 1000],
    'criterion': ['gini', 'entropy']
}

optimized_model = GridSearchCV(estimator=random_forest,
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)