def get_split_training_dataset(train_part=0.75):
    """ Get the phy_train dataset shuffled and split """
    # Impute dataset
    X, Y, n, f = load_data("../data/raw/phy_train.dat")

    # Split and shuffle
    return train_test_split(X, Y, train_size=train_part)
Esempio n. 2
0
def get_split_training_dataset(train_part=0.75):
    """ Get the phy_train dataset shuffled and split """
    # Impute dataset
    X, Y, n, f = load_data("../data/raw/phy_train.dat")

    # Split and shuffle
    return train_test_split(X, Y, train_size=train_part)
def load_validation_data():
    """ Load training and testing data

    Returns
    Xt -- Imputed training data
    Yt -- training prediction
    Xv -- Imputed validation data
    """
    # Load and impute validation data
    Xv, Yzero, nv, fv = load_data("../data/raw/phy_test.dat", load_y=False)
    Xv = remove_features_missing_data(Xv)

    # Load and impute training data
    Xt, Yt, nt, ft = load_data("../data/raw/phy_train.dat")
    Xt = remove_features_missing_data(Xt)

    return Xt, Yt, Xv
Esempio n. 4
0
def load_validation_data():
    """ Load training and testing data

    Returns
    Xt -- Imputed training data
    Yt -- training prediction
    Xv -- Imputed validation data
    """
    # Load and impute validation data
    Xv, Yzero, nv, fv = load_data("../data/raw/phy_test.dat", load_y=False)
    Xv = remove_features_missing_data(Xv)

    # Load and impute training data
    Xt, Yt, nt, ft = load_data("../data/raw/phy_train.dat")
    Xt = remove_features_missing_data(Xt)

    return Xt, Yt, Xv
Esempio n. 5
0
import SVM as training
import imputation as imp
import K_means_imp as kimp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer, MinMaxScaler, StandardScaler
from sklearn import preprocessing
from sklearn.preprocessing import scale
import opti_svm as ops
import opti_forest as opf

if __name__ == '__main__':
    raw_data = imp.load_data('cleaned_1-OCT-modelling.csv')
    #nom  = Normalizer(norm='l2')

    #m = MinMaxScaler()
    #s = StandardScaler()
    no_missing, missing_set, index_no_missing, index_missing, labels, names = imp.deal_data(
        raw_data)
    X_set, y = imp.impute(no_missing, missing_set, index_missing, labels,
                          names, raw_data)
    X_train, X_test, y_train, y_test = train_test_split(X_set,
                                                        y,
                                                        train_size=0.8,
                                                        random_state=1)
    #X_strain,X_stest,y_strain,y_stest = train_test_split(X_norm, y, train_size=0.8, random_state=1)
    svc_precision, svc_recall, svc_f1 = training.svm(X_train, y_train, X_test,
                                                     y_test)
    #training.svm(X_strain, y_strain, X_stest, y_stest)
    print('svc_precision for each labels:', svc_precision, '\n')
    print('svc_recall for each labels:', svc_recall, '\n')
    print('svc_f1 for each labels:', svc_f1, '\n')
    """ Use entirety of provided X, Y to predict

    Default Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Named Arguments
    --

    Returns
    classifier -- a tree fitted to Xtrain and Ytrain
    """
    classifier = KNeighborsClassifier(125)
    classifier.fit(Xtrain, Ytrain)
    return classifier

if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.

    if len(sys.argv) < 2:
        print "Usage: $ python decision-tree.py /path/to/data/file/"
    else:
        training = sys.argv[1]
        X,Y,n,f = load_data(training)
        Xt, Xv, Yt, Yv = shuffle_split(X,Y)
        Classifier = train(Xt, Yt)
        print "KNN Accuracy"
        suite(Yv, Classifier.predict(Xv))
Esempio n. 7
0

def classify(Xtrain, Ytrain):
    """ Use entirety of provided X, Y to predict

    Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Returns
    ready_tree -- a tree fitted to Xtrain and Ytrain
    """
    ready_tree = tree.DecisionTreeClassifier()
    ready_tree.fit(Xtrain, Ytrain)
    return ready_tree


if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.

    if len(sys.argv) < 2:
        print "Usage: $ python decision-tree.py /path/to/data/file/"
    else:
        training = sys.argv[1]
        X, Y, n, f = load_data(training)
        Xt, Xv, Yt, Yv = shuffle_split(X, Y)
        tree = classify(Xt, Yt)
        print "Decision Tree Accuracy:", acc(Yv, tree.predict(Xv)), "%"