from data_utils import DataUtils
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import accuracy_score

from sklearn.tree import DecisionTreeClassifier
import sklearn.tree as tree

decision_tree_classifier = DecisionTreeClassifier(random_state=0)

parkinson_features = "MDVP_Fo.Hz.,MDVP_Fhi.Hz.,MDVP_Flo.Hz.,MDVP_Jitter...,MDVP_Jitter.Abs.,MDVP_RAP,MDVP_PPQ,Jitter_DDP,MDVP_Shimmer,MDVP_Shimmer.dB.,Shimmer_APQ3,Shimmer_APQ5,MDVP_APQ,Shimmer_DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE"
parkinson_data_util = DataUtils(parkinson_features, "data_sets", "parkinson_clean_normal_training.csv", "parkinson_clean_normal_testing.csv")
parkinson_training_inputs = parkinson_data_util.training_inputs()
parkinson_training_classes = parkinson_data_util.training_classes()
parkinson_testing_inputs = parkinson_data_util.testing_inputs()
parkinson_testing_classes = parkinson_data_util.testing_classes()

## plot for data distribution
print("mean: ", np.mean(parkinson_training_inputs))

# decision_tree_classifier.fit(parkinson_training_inputs, parkinson_training_classes)
# score1 = decision_tree_classifier.score(parkinson_testing_inputs, parkinson_testing_classes)
# print("score1: ", score1)


cross_validation = StratifiedKFold(parkinson_training_classes, n_folds=5)
plot_utils = PlotUtils()
"""
plot_tree = DecisionTreeClassifier(random_state=0, max_depth=3, max_features=7)
import seaborn as sb
from plot_utils import PlotUtils
from data_utils import DataUtils
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.grid_search import GridSearchCV

from sklearn.tree import DecisionTreeClassifier
import sklearn.tree as tree

wisconsin_features = "Clump.Thickness,Uniformity.of.Cell.Size,Uniformity.of.Cell.Shape,Marginal.Adhesion,Single.Epithelial.Cell.Size,Bare.Nuceoli,Bland.Chromatin,Normal.Nucleoli,Mitoses"
wisconsin_data_util = DataUtils(wisconsin_features, "data_sets", "wisconsin_training.csv", "wisconsin_testing.csv")
wisconsin_training_inputs = wisconsin_data_util.training_inputs()
wisconsin_training_classes = wisconsin_data_util.training_classes()
wisconsin_testing_inputs = wisconsin_data_util.testing_inputs()
wisconsin_testing_classes = wisconsin_data_util.testing_classes()

cross_validation = StratifiedKFold(wisconsin_training_classes, n_folds=5)
plot_utils = PlotUtils()


## Decision Tree below
decision_tree_classifier = DecisionTreeClassifier(random_state=0)

plot_tree = DecisionTreeClassifier(random_state=0, max_depth=6, max_features=1)
cv_scores = cross_val_score(plot_tree, wisconsin_training_inputs, wisconsin_training_classes, cv=5)
# sb.distplot(cv_scores)
# plt.title('Average accuracy score of the Breast Cancer Data: {}'.format(np.mean(cv_scores)))
# plt.show()
Ejemplo n.º 3
0
from sklearn.metrics import accuracy_score

from sklearn.learning_curve import validation_curve
from sklearn.learning_curve import learning_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier

from plot_utils import PlotUtils
from data_utils import DataUtils
from sklearn.metrics import mean_squared_error

from sklearn.tree import DecisionTreeClassifier
wisconsin_features = "Clump.Thickness,Uniformity.of.Cell.Size,Uniformity.of.Cell.Shape,Marginal.Adhesion,Single.Epithelial.Cell.Size,Bare.Nuceoli,Bland.Chromatin,Normal.Nucleoli,Mitoses"
wisconsin_data_util = DataUtils(wisconsin_features, "data_sets", "wisconsin_training.csv", "wisconsin_testing.csv")
wisconsin_training_inputs = wisconsin_data_util.training_inputs()
wisconsin_training_classes = wisconsin_data_util.training_classes()
wisconsin_testing_inputs = wisconsin_data_util.testing_inputs()
wisconsin_testing_classes = wisconsin_data_util.testing_classes()

parkinson_features = "MDVP_Fo.Hz.,MDVP_Fhi.Hz.,MDVP_Flo.Hz.,MDVP_Jitter...,MDVP_Jitter.Abs.,MDVP_RAP,MDVP_PPQ,Jitter_DDP,MDVP_Shimmer,MDVP_Shimmer.dB.,Shimmer_APQ3,Shimmer_APQ5,MDVP_APQ,Shimmer_DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE"
parkinson_data_util = DataUtils(parkinson_features, "data_sets", "parkinson_clean_normal_training.csv", "parkinson_clean_normal_testing.csv")
parkinson_training_inputs = parkinson_data_util.training_inputs()
parkinson_training_classes = parkinson_data_util.training_classes()
parkinson_testing_inputs = parkinson_data_util.testing_inputs()
parkinson_testing_classes = parkinson_data_util.testing_classes()


"""
SKNN: neural network

"""