def get_split_training_dataset(train_part=0.75): """ Get the phy_train dataset shuffled and split """ # Impute dataset X, Y, n, f = load_data("../data/raw/phy_train.dat") # Split and shuffle return train_test_split(X, Y, train_size=train_part)
def load_validation_data(): """ Load training and testing data Returns Xt -- Imputed training data Yt -- training prediction Xv -- Imputed validation data """ # Load and impute validation data Xv, Yzero, nv, fv = load_data("../data/raw/phy_test.dat", load_y=False) Xv = remove_features_missing_data(Xv) # Load and impute training data Xt, Yt, nt, ft = load_data("../data/raw/phy_train.dat") Xt = remove_features_missing_data(Xt) return Xt, Yt, Xv
import SVM as training import imputation as imp import K_means_imp as kimp from sklearn.model_selection import train_test_split from sklearn.preprocessing import Normalizer, MinMaxScaler, StandardScaler from sklearn import preprocessing from sklearn.preprocessing import scale import opti_svm as ops import opti_forest as opf if __name__ == '__main__': raw_data = imp.load_data('cleaned_1-OCT-modelling.csv') #nom = Normalizer(norm='l2') #m = MinMaxScaler() #s = StandardScaler() no_missing, missing_set, index_no_missing, index_missing, labels, names = imp.deal_data( raw_data) X_set, y = imp.impute(no_missing, missing_set, index_missing, labels, names, raw_data) X_train, X_test, y_train, y_test = train_test_split(X_set, y, train_size=0.8, random_state=1) #X_strain,X_stest,y_strain,y_stest = train_test_split(X_norm, y, train_size=0.8, random_state=1) svc_precision, svc_recall, svc_f1 = training.svm(X_train, y_train, X_test, y_test) #training.svm(X_strain, y_strain, X_stest, y_stest) print('svc_precision for each labels:', svc_precision, '\n') print('svc_recall for each labels:', svc_recall, '\n') print('svc_f1 for each labels:', svc_f1, '\n')
""" Use entirety of provided X, Y to predict Default Arguments Xtrain -- Training data Ytrain -- Training prediction Named Arguments -- Returns classifier -- a tree fitted to Xtrain and Ytrain """ classifier = KNeighborsClassifier(125) classifier.fit(Xtrain, Ytrain) return classifier if __name__ == "__main__": # Let's take our training data and train a decision tree # on a subset. Scikit-learn provides a good module for cross- # validation. if len(sys.argv) < 2: print "Usage: $ python decision-tree.py /path/to/data/file/" else: training = sys.argv[1] X,Y,n,f = load_data(training) Xt, Xv, Yt, Yv = shuffle_split(X,Y) Classifier = train(Xt, Yt) print "KNN Accuracy" suite(Yv, Classifier.predict(Xv))
def classify(Xtrain, Ytrain): """ Use entirety of provided X, Y to predict Arguments Xtrain -- Training data Ytrain -- Training prediction Returns ready_tree -- a tree fitted to Xtrain and Ytrain """ ready_tree = tree.DecisionTreeClassifier() ready_tree.fit(Xtrain, Ytrain) return ready_tree if __name__ == "__main__": # Let's take our training data and train a decision tree # on a subset. Scikit-learn provides a good module for cross- # validation. if len(sys.argv) < 2: print "Usage: $ python decision-tree.py /path/to/data/file/" else: training = sys.argv[1] X, Y, n, f = load_data(training) Xt, Xv, Yt, Yv = shuffle_split(X, Y) tree = classify(Xt, Yt) print "Decision Tree Accuracy:", acc(Yv, tree.predict(Xv)), "%"