Esempio n. 1
0
"""
Implementation and general notes:
  • The parameters min_samples_split and min_impurity_decrease were difficult to analyse. Seems like smaller values are preferred
  • The model often chose different hyperparemeters between runs. It seems inconsistent
"""

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from utils import split_feats_targs, capture_features, capture_targets, export_results

(train_features, train_targets) = split_feats_targs('train_2.csv')  # pass training set with targets
(val_features, val_targets) = split_feats_targs('val_2.csv')  # pass validation set
test_features = capture_features('test_no_label_2.csv', False)  # pass test set without targets
actual_targets = capture_targets('test_with_label_2.csv')  # pass test set with targets

"""
Parameter options to tune:
  • splitting criterion: gini and entropy
  • maximum depth of the tree: 10 and no maximum
  • minimum number of samples to split an internal node: experiment with values of your choice
  • minimum impurity decrease: experiment with values of your choice
  • class weight: None and balanced
"""

print("Finding best hyperparameters for DT....")
best_dt = GridSearchCV(DecisionTreeClassifier(), {
  'criterion': ['gini', 'entropy'],
  'max_depth': [10, None],
  'min_samples_split': [2,3,5],
  'min_impurity_decrease': [0.0, 1e-250, 1e-900],
  'class_weight': [None, 'balanced']
Esempio n. 2
0
from sklearn.neural_network import MLPClassifier
from utils import split_feats_targs, capture_features, capture_targets, export_results

(train_features, train_targets) = split_feats_targs(
    'train_1.csv')  # pass training set with targets
test_features = capture_features('test_no_label_1.csv',
                                 False)  # pass test set without targets
actual_targets = capture_targets(
    'test_with_label_1.csv')  # pass test set with targets

fitted_mlp = MLPClassifier(activation='logistic', solver='sgd').fit(
    train_features, train_targets)  # fits model with training set values
predicted_targets = list(fitted_mlp.predict(
    test_features))  # gets predictions from model and record them
export_results(actual_targets, predicted_targets, 'Base-MLP-DS1.csv')
Esempio n. 3
0
INTENDED FOR DEMO
Running this file will run every single estimator for the given training and test set

File inputs are required wherever <file_type> is seen
"""
from utils import split_feats_targs, capture_features, capture_targets, export_results
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
"""
Store the necessary training/test set values into variables
"""
(train_features,
 train_targets) = split_feats_targs('<demo_training_set_file_name>')
test_features = capture_features(
    '<demo_test_set_file_name>',
    False)  # pass False if test set has no targets, otherwise pass True
actual_targets = capture_targets('<demo_test_set_w_targets_file_name>')
"""
Run GNB model
"""
fitted_gnb = GaussianNB().fit(
    train_features, train_targets)  # fit model with training set values
predicted_targets = list(fitted_gnb.predict(
    test_features))  # get predictions from model and record them
export_results(actual_targets, predicted_targets, '<demo_output_file_name>')
"""
Run PER model
"""