from data_utils import DataUtils import matplotlib.pyplot as plt from sklearn.cross_validation import StratifiedKFold from sklearn.cross_validation import cross_val_score from sklearn.grid_search import GridSearchCV from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier import sklearn.tree as tree decision_tree_classifier = DecisionTreeClassifier(random_state=0) parkinson_features = "MDVP_Fo.Hz.,MDVP_Fhi.Hz.,MDVP_Flo.Hz.,MDVP_Jitter...,MDVP_Jitter.Abs.,MDVP_RAP,MDVP_PPQ,Jitter_DDP,MDVP_Shimmer,MDVP_Shimmer.dB.,Shimmer_APQ3,Shimmer_APQ5,MDVP_APQ,Shimmer_DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE" parkinson_data_util = DataUtils(parkinson_features, "data_sets", "parkinson_clean_normal_training.csv", "parkinson_clean_normal_testing.csv") parkinson_training_inputs = parkinson_data_util.training_inputs() parkinson_training_classes = parkinson_data_util.training_classes() parkinson_testing_inputs = parkinson_data_util.testing_inputs() parkinson_testing_classes = parkinson_data_util.testing_classes() ## plot for data distribution print("mean: ", np.mean(parkinson_training_inputs)) # decision_tree_classifier.fit(parkinson_training_inputs, parkinson_training_classes) # score1 = decision_tree_classifier.score(parkinson_testing_inputs, parkinson_testing_classes) # print("score1: ", score1) cross_validation = StratifiedKFold(parkinson_training_classes, n_folds=5) plot_utils = PlotUtils() """ plot_tree = DecisionTreeClassifier(random_state=0, max_depth=3, max_features=7)
import seaborn as sb from plot_utils import PlotUtils from data_utils import DataUtils import matplotlib.pyplot as plt from sklearn.cross_validation import StratifiedKFold from sklearn.cross_validation import cross_val_score from sklearn.metrics import accuracy_score from sklearn.grid_search import GridSearchCV from sklearn.tree import DecisionTreeClassifier import sklearn.tree as tree wisconsin_features = "Clump.Thickness,Uniformity.of.Cell.Size,Uniformity.of.Cell.Shape,Marginal.Adhesion,Single.Epithelial.Cell.Size,Bare.Nuceoli,Bland.Chromatin,Normal.Nucleoli,Mitoses" wisconsin_data_util = DataUtils(wisconsin_features, "data_sets", "wisconsin_training.csv", "wisconsin_testing.csv") wisconsin_training_inputs = wisconsin_data_util.training_inputs() wisconsin_training_classes = wisconsin_data_util.training_classes() wisconsin_testing_inputs = wisconsin_data_util.testing_inputs() wisconsin_testing_classes = wisconsin_data_util.testing_classes() cross_validation = StratifiedKFold(wisconsin_training_classes, n_folds=5) plot_utils = PlotUtils() ## Decision Tree below decision_tree_classifier = DecisionTreeClassifier(random_state=0) plot_tree = DecisionTreeClassifier(random_state=0, max_depth=6, max_features=1) cv_scores = cross_val_score(plot_tree, wisconsin_training_inputs, wisconsin_training_classes, cv=5) # sb.distplot(cv_scores) # plt.title('Average accuracy score of the Breast Cancer Data: {}'.format(np.mean(cv_scores))) # plt.show()
from sklearn.metrics import accuracy_score from sklearn.learning_curve import validation_curve from sklearn.learning_curve import learning_curve from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import AdaBoostClassifier from plot_utils import PlotUtils from data_utils import DataUtils from sklearn.metrics import mean_squared_error from sklearn.tree import DecisionTreeClassifier wisconsin_features = "Clump.Thickness,Uniformity.of.Cell.Size,Uniformity.of.Cell.Shape,Marginal.Adhesion,Single.Epithelial.Cell.Size,Bare.Nuceoli,Bland.Chromatin,Normal.Nucleoli,Mitoses" wisconsin_data_util = DataUtils(wisconsin_features, "data_sets", "wisconsin_training.csv", "wisconsin_testing.csv") wisconsin_training_inputs = wisconsin_data_util.training_inputs() wisconsin_training_classes = wisconsin_data_util.training_classes() wisconsin_testing_inputs = wisconsin_data_util.testing_inputs() wisconsin_testing_classes = wisconsin_data_util.testing_classes() parkinson_features = "MDVP_Fo.Hz.,MDVP_Fhi.Hz.,MDVP_Flo.Hz.,MDVP_Jitter...,MDVP_Jitter.Abs.,MDVP_RAP,MDVP_PPQ,Jitter_DDP,MDVP_Shimmer,MDVP_Shimmer.dB.,Shimmer_APQ3,Shimmer_APQ5,MDVP_APQ,Shimmer_DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE" parkinson_data_util = DataUtils(parkinson_features, "data_sets", "parkinson_clean_normal_training.csv", "parkinson_clean_normal_testing.csv") parkinson_training_inputs = parkinson_data_util.training_inputs() parkinson_training_classes = parkinson_data_util.training_classes() parkinson_testing_inputs = parkinson_data_util.testing_inputs() parkinson_testing_classes = parkinson_data_util.testing_classes() """ SKNN: neural network """