Esempio n. 1
0
    def check_model(self, X_train, X_val, y_train, y_val, X_test, y_test,
                    raw_seq):
        """ Funtion used to navigate to the specific model. The is defined when initialising the class.
            Reads the self.model_type 
            Each statement does the following:
                - Calls function to format data for the model
                - Calls funtion to train the model
                - Calls funtion to plot the MSE graph
                - Calls funtion to test the model
                - Returns the accuarcy as R2 score"""

        if self.model_type == 'CNN':

            X_train, X_val, y_train, n_input, n_output, ytrain1, ytrain2, ytrain3, ytrain4 = CNN.data_format(
                X_train, X_val, y_train)
            history = CNN.CNN_train_model(self, X_train, X_val, y_train, y_val,
                                          self.verbose, n_input, n_output,
                                          ytrain1, ytrain2, ytrain3, ytrain4)
            Models.plotting(history)
            yhat = CNN.CNN_test_model(self, X_test, self.verbose, y_test)
            Models.accuracy(self, yhat, y_test, X_test, self.model_type)

        if self.model_type == 'MLP':

            X_train, X_val, y_train, n_input, n_output, ytrain1, ytrain2, ytrain3, ytrain4 = MLP.data_format(
                X_train, X_val, y_train)
            history = MLP.MLP_train_model(self, X_train, X_val, y_train, y_val,
                                          self.verbose, n_input, n_output,
                                          ytrain1, ytrain2, ytrain3, ytrain4)
            # Models.plotting(history)
            yhat, final_cols = MLP.MLP_test_model(X_test, self.verbose, y_test)
            Models.accuracy(self, yhat, y_test, final_cols, self.model_type)

        if self.model_type == 'KNN':

            X_train, X_val, y_train, X_test = KNN.data_format(
                X_train, X_val, y_train, X_test)
            yhat, final_cols = KNN.KNN_train_model(self, X_train, X_val,
                                                   y_train, y_val, X_test,
                                                   y_test, raw_seq)
            Models.accuracy(self, yhat, y_test, final_cols, self.model_type)

        if self.model_type == 'LSTM':

            history, model = LSTMs.LSTM_train_model(self, X_train, X_val,
                                                    y_train, y_val,
                                                    self.verbose)
            Models.plotting(history)
            yhat = LSTMs.LSTM_test_model(X_test, model, self.verbose, y_test)
            Models.accuracy(self, yhat, y_test, X_test, self.model_type)

        if self.model_type == 'BASELINE':
            n_input, X_train, n_output = BaseLine.data_format(X_train, y_train)
            model = BaseLine.baseline_train(self, X_train, y_train, n_input,
                                            n_output)
            yhat, final_cols = BaseLine.baseline_test(X_test, n_input, model)
            Models.accuracy(self, yhat, y_test, final_cols, self.model_type)
def character_classification():
    print('Loading data...')
    x, y = load_data_chars()
    print('Processing data..')
    print('Training data shape: ', x.shape)
    print('Test data shape: ', y.shape)
    plots.plot_filters(x[0])
    SVM.svm(x, y)
    Naive_Bayes.naive_bayes(x, y)
    KNN.knn(x, y)
    CNN.fit_cnn(x, y, trials=1, network_type='simple')
def knn_classifier(X, y):
	"""
	K Nearest Neighbours classifier
	Train and test given the entire data
	Predict classes for the provided examples
	"""
	knn = KNN(X,y)
	knn.train()

	print(knn.evaluate())

	knn.predict_for_examples(examples)
Esempio n. 4
0
def parallel_run(method, X_train_train, X_train_val, y_train_train,
                 y_train_val):
    y_prediction = None
    if method == 'GBM':
        y_prediction = GBM(X_train_train, X_train_val, y_train_train)
    elif method == 'GLM':
        y_prediction = GLM(X_train_train, X_train_val, y_train_train)
    elif method == 'KNN':
        y_prediction = KNN(X_train_train, X_train_val, y_train_train)
    elif method == 'NN':
        y_prediction = NN(X_train_train, X_train_val, y_train_train,
                          y_train_val)

    return y_prediction
Esempio n. 5
0
def prediksi():
    label = ["Terindikasi Sara", "Bukan Sara"]
    if request.is_json:
        content = request.get_json()
        text = content['text']
    else:
        text = request.form['text']

    prediksi = model.predict(text)
    index = np.where(prediksi[0] == prediksi[0].max())
    return_json = {
        'status': 200,
        'message': 'success',
        'klasifikasi': label[index[0][0]],
        'detail': {
            label[0]: str(prediksi[0][0]),
            label[1]: str(prediksi[0][1])
        }
    }
    return jsonify(return_json)
def run_algorithms(X_train, X_val, y_train, y_val, best_loss, algorithm , mode):
    
    # from models import GBM, GLM, KNN, NN
    
    if mode =='test': y_val[pd.isnull(y_val['Target'])]['Target']=1 # it doesnt have values for test mode and we set these values to 1 to preventing errors
        
    y_prediction={method:None for method in none_mixed_methods+mixed_methods}
    y_prediction_train={method:None for method in none_mixed_methods+mixed_methods}
    Xtrain={method:None for method in none_mixed_methods+mixed_methods}
    Xval={method:None for method in none_mixed_methods+mixed_methods}
    
    X_train = X_train.drop(['county_fips', 'date of day t'], axis=1)
    X_val = X_val.drop(['county_fips', 'date of day t'], axis=1)
    y_train = np.array(y_train['Target']).reshape(-1)
    y_val = np.array(y_val['Target']).reshape(-1)
    
    for method in none_mixed_methods:
        Xtrain[method] = X_train
        Xval[method] = X_val
        if method in models_to_log:
            Xtrain[method] = logarithm_covariates(Xtrain[method])
            Xval[method] = logarithm_covariates(Xval[method])
        
    if algorithm == 'GBM' or algorithm in mixed_methods:
        y_prediction['GBM'], y_prediction_train['GBM'] = GBM(Xtrain['GBM'], Xval['GBM'], y_train, best_loss['GBM'])
        
    if algorithm == 'GLM' or algorithm in mixed_methods:
        y_prediction['GLM'], y_prediction_train['GLM'] = GLM(Xtrain['GLM'], Xval['GLM'], y_train)
        
    if algorithm == 'KNN' or algorithm in mixed_methods:
        y_prediction['KNN'], y_prediction_train['KNN'] = KNN(Xtrain['KNN'], Xval['KNN'], y_train)
        
    if algorithm == 'NN' or algorithm in mixed_methods:
        y_prediction['NN'], y_prediction_train['NN'] = NN(Xtrain['NN'], Xval['NN'], y_train, y_val, best_loss['NN'])
        
    if algorithm == 'LSTM' or algorithm == 'LSTM_MIXED':
        y_prediction['LSTM'], y_prediction_train['LSTM'] = LSTMM(Xtrain['LSTM'], Xval['LSTM'], y_train, y_val)

    print('y_prediction[NN]',y_prediction['NN'])
    print('y_prediction[LSTM]',y_prediction['LSTM'])
    
    if algorithm in mixed_methods:
        
        y_predictions_test, y_predictions_train = [], []
        # Construct the outputs for the testing dataset of the 'MM' methods
        y_predictions_test.extend([y_prediction['GBM'], y_prediction['GLM'], y_prediction['KNN'],
                                   y_prediction['NN']])
        y_prediction_test_np = np.array(y_predictions_test).reshape(len(y_predictions_test), -1)
        X_test_mixedModel = pd.DataFrame(y_prediction_test_np.transpose())
        # Construct the outputs for the training dataset of the 'MM' methods
        y_predictions_train.extend(
            [y_prediction_train['GBM'], y_prediction_train['GLM'], y_prediction_train['KNN'],
             y_prediction_train['NN']])
        y_prediction_train_np = np.array(y_predictions_train).reshape(len(y_predictions_train), -1)
        X_train_mixedModel = pd.DataFrame(y_prediction_train_np.transpose())
        
        if algorithm == 'MM_GLM':
            y_prediction['MM_GLM'], y_prediction_train['MM_GLM'] = GLM(X_train_mixedModel, X_test_mixedModel, y_train)
        elif algorithm == 'MM_NN':
            y_prediction['MM_NN'], y_prediction_train['MM_NN'] = NN(X_train_mixedModel, X_test_mixedModel, y_train, y_val, best_loss['NN'])
    
    
    if algorithm == 'LSTM_MIXED':
        
        y_predictions_test, y_predictions_train = [], []
        # Construct the outputs for the testing dataset of the 'MM' methods
        y_predictions_test.extend([y_prediction['GBM'], y_prediction['GLM'], y_prediction['KNN'],
                                   y_prediction['NN'],y_prediction['LSTM']])
        y_prediction_test_np = np.array(y_predictions_test).reshape(len(y_predictions_test), -1)
        X_test_mixedModel = pd.DataFrame(y_prediction_test_np.transpose())
        # Construct the outputs for the training dataset of the 'MM' methods
        y_predictions_train.extend(
            [y_prediction_train['GBM'], y_prediction_train['GLM'], y_prediction_train['KNN'],
             y_prediction_train['NN'], y_prediction_train['LSTM']])
        y_prediction_train_np = np.array(y_predictions_train).reshape(len(y_predictions_train), -1)
        X_train_mixedModel = pd.DataFrame(y_prediction_train_np.transpose())
        print(X_train_mixedModel)
        y_prediction['LSTM_MIXED'], y_prediction_train['LSTM_MIXED'] = GLM(X_train_mixedModel, X_test_mixedModel, y_train)
        
    return(y_prediction[algorithm], y_prediction_train[algorithm])
Esempio n. 7
0
# Parameters #
##############

# Paths to training and testing set
TRAINING_SET = '../resources/csv/training_set.csv'
TEST_SET = '../resources/csv/test_set.csv'

# Path to export predictions
DESTINATION = '../products/'

# Fingerprint transformation
FINGERPRINT = fingerprints.morgan()

# Model to train
MODEL = ConsensusClassifier([
    KNN(n_neighbors=17),
    MLP(random_state=0),
    SVM(gamma='auto', random_state=0, probability=True),
    RFC(500, random_state=0)
])

########
# Main #
########

if __name__ == '__main__':
    # Load training and test set
    LS = utils.load_from_csv(TRAINING_SET)
    TS = utils.load_from_csv(TEST_SET)

    # Create fingerprint features and output of learning set
Esempio n. 8
0
    def __init__(self):
        self.resource_folder = get_resource_path()
        # for dataset_name in sorted(os.listdir(folder)):
        #     if dataset_name.endswith('.csv'):
        #         print(dataset_name[:-4])
        self.pipelines = {
            'credit-g': (
                'credit-g/dataset_31_credit-g.csv', 'class',
                CreditGPipeline()),
            'wine-quality': (
                'wine-quality/wine-quality-red.csv', 'class',
                WineQualityPipeline()),
            'wq-missing': (
                'wine-quality/wine-quality-red.csv', 'class',
                WineQualityMissingPipeline()),
            'abalone': (
                'abalone/abalone.csv', 'Rings',
                AbalonePipeline()),
            'adult': (
                'adult/adult.csv', 'class',
                AdultPipeline()),
            'adult-missing': (
                'adult/adult.csv', 'class',
                AdultMissingPipeline()),
            'heart': (
                'heart/heart.csv', 'class',
                HeartPipeline())}

        self.classifiers = {
            'dtc': DecisionTree(),
            'rfc40': RandomForest(size=40),
            'ertc40': ExtremelyRandomizedTrees(size=40),
            'xgb': XGB(),
            'svm': SVM(),
            'lsvm': LinearSVM(),
            'knn': KNN(n_neighbors=7),
            'logreg': LogRegression(),
            'gaus': GausNB(),
            'brfc40': BaggingRandomForest(size=40),
            'mlpc': MLPC(input_size=[16, 32, 16, 8])
        }

        self.error_gens = {
            'numeric anomalies': (
                Anomalies(), lambda x: x.dtype in [DataType.INTEGER,
                                                   DataType.FLOAT]),
            'typos': (
                Typos(), lambda x: x.dtype == DataType.STRING),
            'explicit misvals': (
                ExplicitMissingValues(), lambda x: True),
            'implicit misvals': (
                ImplicitMissingValues(), lambda x: True),
            'swap fields': (
                SwapFields(), lambda x: True)}

        self.params = [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8]

        self.tests = {'num disc': lambda x: (x.scale == DataScale.NOMINAL
                                             and x.dtype in [DataType.INTEGER,
                                                             DataType.FLOAT]),
                      'num cont': lambda x: (x.scale == DataScale.NOMINAL
                                             and x.dtype in [DataType.INTEGER,
                                                             DataType.FLOAT]),
                      'string': lambda x: x.dtype == DataType.STRING}

        self.results = Table(rows=sorted(self.pipelines.keys()),
                             columns=sorted(self.classifiers.keys()),
                             subrows=self.tests.keys(),
                             subcolumns=self.error_gens.keys())
Esempio n. 9
0
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from data.cleaner import CleanAuto
from data.loader import loader as load
from models import KNN
from trainers import KNNTrainer
from settings import settings

cfg = settings()
clean_d = [0.01, 0.05, 0.1, 0.15, 0.3, 0.6]
# clean_d = [0.01, 0.05]
model = KNN(**cfg.model["knn"].structure)
trainer = KNNTrainer(model)
df = pd.DataFrame()
for _, d in enumerate(clean_d):
    print("Cleaning d: {}".format(d))
    c = CleanAuto(d)
    trainer.train(load.training_data, load.training_labels)
    model_trained = trainer.get_model()
    costates_hat = model_trained.predict(load.test_labels)
    p = np.linalg.norm(costates_hat[:, :2], axis=1)
    df = pd.concat([df, pd.DataFrame({d: p})], axis=1, ignore_index=True)

# Rename columns
df.columns = clean_d

df.plot(kind="box")
Esempio n. 10
0
    print "LASAGNE"
    yprob = lasange.setTrainDataAndMakeModel(X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob, X_test_index, "LASAGNE")
    lasagneStorer.append(outFrame)

    print "RandomForest"
    yprob = randomforestclassifier.setTrainDataAndMakeModel(
        X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob, X_test_index, "RANDOM_FOREST")
    randomForestStorer.append(outFrame)

    print "KNN"
    yprob = KNN.setTrainDataAndMakeModel(X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob, X_test_index, "KNN")
    KNNStorer.append(outFrame)

    print "MultiNomialNB"
    yprob = MultinomialNB.setTrainDataAndMakeModel(X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob, X_test_index, "MultiNomialNB")
    MultinomialNBStorer.append(outFrame)

#====================== Between 0 and 1 Models (join all X frames) =======================
print "#====================== Between 0 and 1 Models (join all X frames) ======================="
#all chounks of frames concat for respective.
xgboostMetaFeaturesOfX = pd.concat(xgboostStorer)
lasangeMetaFeaturesOfX = pd.concat(lasagneStorer)
Esempio n. 11
0
from models import KNN, NaiveBayes, DecisionTree
from data import Data

training_data = Data('skinTraining')
testing_data = Data('skinTesting')

knn = KNN(training_data, testing_data)
bayes = NaiveBayes(training_data, testing_data)
tree = DecisionTree(training_data, testing_data)

knnResult = knn.evaluate()
bayesResult = bayes.evaluate()
treeResult = tree.evaluate()

print knnResult, treeResult, bayesResult
Esempio n. 12
0
def test_everything(args):
    ## Get features, labels, training and testing set, adjacency
    args, file_names, stat_dirname, features, gt_labels, genres, adjacency, indx_train, indx_test, pygsp_graph, release_dates = load_parameters_and_data(
        args)

    if args.graph_statistics:
        if not os.path.exists(stat_dirname):
            os.makedirs(stat_dirname)

        if args.graph_statistics == 'all':
            ## Prints out all statistics about graph
            gstats.allstats(adjacency, stat_dirname, active_plots=False)
        elif args.graph_statistics == 'advanced':
            ## Prints out all advanced statistics
            gstats.advanced(adjacency,
                            stat_dirname,
                            active_plots=args.plot_graph)
        else:  # basic setting
            ## Prints out basic statistics
            gstats.basic(adjacency)
        gstats.growth_analysis(adjacency, release_dates, gt_labels,
                               stat_dirname)

    if args.inductive_learning:
        print('#### Testing Inductive Learning ####')
        if args.additional_models:
            ## Initialize models with correct parameters
            svm_clf = SVM(features,
                          gt_labels,
                          kernel='linear',
                          seed=SEED,
                          save_path=file_names)
            random_forest_clf = Random_Forest(features,
                                              gt_labels,
                                              n_estimators=100,
                                              max_depth=20,
                                              seed=SEED,
                                              save_path=file_names)
            knn_clf = KNN(features, gt_labels, save_path=file_names)

            error_svm = simple_test(svm_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "svm_")
            print('* SVM simple test error: {:.2f}'.format(error_svm))

            error_rf = simple_test(random_forest_clf,
                                   indx_test,
                                   classes=genres,
                                   name=file_names + "rf_")
            print('* Random Forest simple test error: {:.2f}'.format(error_rf))

            error_knn = simple_test(knn_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "knn_")
            print('* KNN simple test error: {:.2f}'.format(error_knn))

        if args.gcn:
            ## Initialize GCN with correct parameters
            gnn_clf = GCN(nhid=[1200, 100],
                          dropout=0.1,
                          adjacency=adjacency,
                          features=features,
                          labels=gt_labels,
                          n_class=len(genres),
                          cuda=args.use_cpu,
                          regularization=None,
                          lr=0.01,
                          weight_decay=5e-4,
                          epochs=300,
                          batch_size=10000,
                          save_path=file_names)
            error_gnn = simple_test(gnn_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "gnn_")
            print('* GCN simple test error: {:.2f}'.format(error_gnn))
        if args.gcn_khop:
            ## Initialize GCN K-Hop with correct parameters
            gnn_clf = GCN_KHop(nhid=[1200, 100],
                               dropout=0.1,
                               adjacency=adjacency,
                               features=features,
                               labels=gt_labels,
                               n_class=len(genres),
                               khop=2,
                               cuda=args.use_cpu,
                               regularization=None,
                               lr=0.01,
                               weight_decay=5e-4,
                               epochs=300,
                               batch_size=10000,
                               save_path=file_names)
            error_gnn = simple_test(gnn_clf,
                                    indx_test,
                                    classes=genres,
                                    name=file_names + "gnn_khop_")
            print('* GCN KHop simple test error: {:.2f}'.format(error_gnn))
        if args.mlp_nn:
            ## Initialize MLP with correct parameters
            mlp_nn = MLP_NN(hidden_size=100,
                            features=features,
                            labels=gt_labels,
                            num_epoch=10,
                            batch_size=100,
                            num_classes=len(genres),
                            save_path=file_names,
                            cuda=args.use_cpu)
            error_mlpNN = simple_test(mlp_nn,
                                      indx_test,
                                      classes=genres,
                                      name=file_names + "mlpNN_")
            print('* MLP NN simple test error: {:.2f}'.format(error_mlpNN))
Esempio n. 13
0
def train_everything(args):
    ## Get features, labels, training and testing set, adjacency
    args, file_names, stat_dirname, features, gt_labels, genres, adjacency, indx_train, indx_test, pygsp_graph, release_dates = load_parameters_and_data(
        args)

    if args.inductive_learning:
        print('#### Applying Inductive Learning ####')

        if args.additional_models:
            ## Initialize model with correct parameters
            svm_clf = SVM(features,
                          gt_labels,
                          kernel='linear',
                          seed=SEED,
                          save_path=file_names)
            random_forest_clf = Random_Forest(features,
                                              gt_labels,
                                              n_estimators=100,
                                              max_depth=20,
                                              seed=SEED,
                                              save_path=file_names)
            knn_clf = KNN(features, gt_labels, save_path=file_names)

            start = time.time()
            mean_error_svm, std_error_svm = cross_validation(svm_clf,
                                                             indx_train,
                                                             K=5,
                                                             classes=genres,
                                                             name=file_names +
                                                             "svm_")
            print('* SVM cross validation error mean: {:.2f}, std: {:.2f}'.
                  format(mean_error_svm, std_error_svm))
            print("SVM time", time.time() - start)

            start = time.time()
            mean_error_rf, std_error_rf = cross_validation(random_forest_clf,
                                                           indx_train,
                                                           K=5,
                                                           classes=genres,
                                                           name=file_names +
                                                           "rf_")
            print(
                '* Random Forest cross validation error mean: {:.2f}, std: {:.2f}'
                .format(mean_error_rf, std_error_rf))
            print("RF time", time.time() - start)

            start = time.time()
            mean_error_knn, std_error_knn = cross_validation(knn_clf,
                                                             indx_train,
                                                             K=5,
                                                             classes=genres,
                                                             name=file_names +
                                                             "knn_")
            print('* KNN cross validation error mean: {:.2f}, std: {:.2f}'.
                  format(mean_error_knn, std_error_knn))
            print("KNN time", time.time() - start)

        if args.gcn:
            print("Training GCN")
            start = time.time()
            ## Initialize GCN with correct parameters
            gnn_clf = GCN(nhid=[1200, 100],
                          dropout=0.1,
                          adjacency=adjacency,
                          features=features,
                          labels=gt_labels,
                          n_class=len(genres),
                          cuda=args.use_cpu,
                          regularization=None,
                          lr=0.01,
                          weight_decay=5e-4,
                          epochs=300,
                          batch_size=10000,
                          save_path=file_names)
            train_gcn(gnn_clf, indx_train, name=file_names + "gnn_")
            print("GCN time", time.time() - start)

        if args.gcn_khop:
            print("Training GCN K-Hop")
            start = time.time()
            ## Initialize GCN K-Hop with correct parameters
            gnn_clf = GCN_KHop(nhid=[1200, 100],
                               dropout=0.1,
                               adjacency=adjacency,
                               features=features,
                               labels=gt_labels,
                               n_class=len(genres),
                               khop=2,
                               cuda=args.use_cpu,
                               regularization=None,
                               lr=0.01,
                               weight_decay=5e-4,
                               epochs=300,
                               batch_size=10000,
                               save_path=file_names)
            train_gcn(gnn_clf, indx_train, name=file_names + "gnn_khop_")
            print("GCN K-Hop time", time.time() - start)

        if args.mlp_nn:
            start = time.time()
            ## Initialize MLP with correct parameters
            mlp_nn = MLP_NN(hidden_size=100,
                            features=features,
                            labels=gt_labels,
                            num_epoch=100,
                            batch_size=100,
                            num_classes=len(genres),
                            save_path=file_names,
                            cuda=args.use_cpu)
            mean_error_mlpNN, std_error_mlpNN = cross_validation(
                mlp_nn,
                indx_train,
                K=5,
                classes=genres,
                name=file_names + "mlpNN_")
            print('* MLP NN cross validation error mean: {:.2f}, std: {:.2f}'.
                  format(mean_error_mlpNN, std_error_mlpNN))
            print("MLP time", time.time() - start)
Esempio n. 14
0
    xgboostStorer.append(outFrame)

    print "LASAGNE"
    yprob = lasange.setTrainDataAndMakeModel(X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob,X_test_index,"LASAGNE")
    lasagneStorer.append(outFrame)

    print "RandomForest"
    yprob = randomforestclassifier.setTrainDataAndMakeModel(X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob,X_test_index,"RANDOM_FOREST")
    randomForestStorer.append(outFrame)
    
    print "KNN"
    yprob = KNN.setTrainDataAndMakeModel(X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob,X_test_index,"KNN")
    KNNStorer.append(outFrame)

    print "MultiNomialNB"
    yprob = MultinomialNB.setTrainDataAndMakeModel(X_train, Y_train, X_test)
    print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True)
    outFrame = makeOutPutFrame(yprob,X_test_index,"MultiNomialNB")
    MultinomialNBStorer.append(outFrame)

    
#====================== Between 0 and 1 Models (join all X frames) =======================
print "#====================== Between 0 and 1 Models (join all X frames) ======================="
#all chounks of frames concat for respective.
xgboostMetaFeaturesOfX = pd.concat(xgboostStorer) 
from dataHeartDisaese import x2, y2
from models import KNN, LogisticRegressionModel, BaseClassification
from sklearn import model_selection

from CrossValidation import CrossValidation

test_proportion = 0.8
x, x_outer, y, y_outer = model_selection.train_test_split(
    x2, y2, test_size=test_proportion)
N, M = x.shape

models = lambda: [BaseClassification(), KNN(), LogisticRegressionModel()]

outer_cv = CrossValidation(models(), x_outer, y_outer, K=10)
outer_cv.applyInnerValidation(x, y)

outer_cv.test()

outer_cv.print_result_with_k()

outer_cv.show_errors()

outer_cv.compare_all_classifiers()
Esempio n. 16
0
X_train, y_train = data['X_train'], data['y_train']
X_val, y_val = data['X_val'], data['y_val']
X_test, y_test = data['X_test'], data['y_test']

X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))


def get_acc(pred, y_test):
    return np.sum(y_test == pred) / len(y_test) * 100


print("finished reading data")

knn = KNN(5)
knn.train(X_train, y_train)
pred_knn = knn.predict(X_test)
print('The testing accuracy is given by : %f' % (get_acc(pred_knn, y_test)))
'''

knn = KNN(5)
knn.train(X_train, y_train)
pred_knn = knn.predict(X_test)
print('The testing accuracy is given by : %f' % (get_acc(pred_knn, y_test)))

percept_ = Perceptron()
percept_.train(X_train, y_train)
pred_percept = percept_.predict(X_test)
print('The testing accuracy is given by : %f' % (get_acc(pred_percept, y_test)))