def check_model(self, X_train, X_val, y_train, y_val, X_test, y_test, raw_seq): """ Funtion used to navigate to the specific model. The is defined when initialising the class. Reads the self.model_type Each statement does the following: - Calls function to format data for the model - Calls funtion to train the model - Calls funtion to plot the MSE graph - Calls funtion to test the model - Returns the accuarcy as R2 score""" if self.model_type == 'CNN': X_train, X_val, y_train, n_input, n_output, ytrain1, ytrain2, ytrain3, ytrain4 = CNN.data_format( X_train, X_val, y_train) history = CNN.CNN_train_model(self, X_train, X_val, y_train, y_val, self.verbose, n_input, n_output, ytrain1, ytrain2, ytrain3, ytrain4) Models.plotting(history) yhat = CNN.CNN_test_model(self, X_test, self.verbose, y_test) Models.accuracy(self, yhat, y_test, X_test, self.model_type) if self.model_type == 'MLP': X_train, X_val, y_train, n_input, n_output, ytrain1, ytrain2, ytrain3, ytrain4 = MLP.data_format( X_train, X_val, y_train) history = MLP.MLP_train_model(self, X_train, X_val, y_train, y_val, self.verbose, n_input, n_output, ytrain1, ytrain2, ytrain3, ytrain4) # Models.plotting(history) yhat, final_cols = MLP.MLP_test_model(X_test, self.verbose, y_test) Models.accuracy(self, yhat, y_test, final_cols, self.model_type) if self.model_type == 'KNN': X_train, X_val, y_train, X_test = KNN.data_format( X_train, X_val, y_train, X_test) yhat, final_cols = KNN.KNN_train_model(self, X_train, X_val, y_train, y_val, X_test, y_test, raw_seq) Models.accuracy(self, yhat, y_test, final_cols, self.model_type) if self.model_type == 'LSTM': history, model = LSTMs.LSTM_train_model(self, X_train, X_val, y_train, y_val, self.verbose) Models.plotting(history) yhat = LSTMs.LSTM_test_model(X_test, model, self.verbose, y_test) Models.accuracy(self, yhat, y_test, X_test, self.model_type) if self.model_type == 'BASELINE': n_input, X_train, n_output = BaseLine.data_format(X_train, y_train) model = BaseLine.baseline_train(self, X_train, y_train, n_input, n_output) yhat, final_cols = BaseLine.baseline_test(X_test, n_input, model) Models.accuracy(self, yhat, y_test, final_cols, self.model_type)
def character_classification(): print('Loading data...') x, y = load_data_chars() print('Processing data..') print('Training data shape: ', x.shape) print('Test data shape: ', y.shape) plots.plot_filters(x[0]) SVM.svm(x, y) Naive_Bayes.naive_bayes(x, y) KNN.knn(x, y) CNN.fit_cnn(x, y, trials=1, network_type='simple')
def knn_classifier(X, y): """ K Nearest Neighbours classifier Train and test given the entire data Predict classes for the provided examples """ knn = KNN(X,y) knn.train() print(knn.evaluate()) knn.predict_for_examples(examples)
def parallel_run(method, X_train_train, X_train_val, y_train_train, y_train_val): y_prediction = None if method == 'GBM': y_prediction = GBM(X_train_train, X_train_val, y_train_train) elif method == 'GLM': y_prediction = GLM(X_train_train, X_train_val, y_train_train) elif method == 'KNN': y_prediction = KNN(X_train_train, X_train_val, y_train_train) elif method == 'NN': y_prediction = NN(X_train_train, X_train_val, y_train_train, y_train_val) return y_prediction
def prediksi(): label = ["Terindikasi Sara", "Bukan Sara"] if request.is_json: content = request.get_json() text = content['text'] else: text = request.form['text'] prediksi = model.predict(text) index = np.where(prediksi[0] == prediksi[0].max()) return_json = { 'status': 200, 'message': 'success', 'klasifikasi': label[index[0][0]], 'detail': { label[0]: str(prediksi[0][0]), label[1]: str(prediksi[0][1]) } } return jsonify(return_json)
def run_algorithms(X_train, X_val, y_train, y_val, best_loss, algorithm , mode): # from models import GBM, GLM, KNN, NN if mode =='test': y_val[pd.isnull(y_val['Target'])]['Target']=1 # it doesnt have values for test mode and we set these values to 1 to preventing errors y_prediction={method:None for method in none_mixed_methods+mixed_methods} y_prediction_train={method:None for method in none_mixed_methods+mixed_methods} Xtrain={method:None for method in none_mixed_methods+mixed_methods} Xval={method:None for method in none_mixed_methods+mixed_methods} X_train = X_train.drop(['county_fips', 'date of day t'], axis=1) X_val = X_val.drop(['county_fips', 'date of day t'], axis=1) y_train = np.array(y_train['Target']).reshape(-1) y_val = np.array(y_val['Target']).reshape(-1) for method in none_mixed_methods: Xtrain[method] = X_train Xval[method] = X_val if method in models_to_log: Xtrain[method] = logarithm_covariates(Xtrain[method]) Xval[method] = logarithm_covariates(Xval[method]) if algorithm == 'GBM' or algorithm in mixed_methods: y_prediction['GBM'], y_prediction_train['GBM'] = GBM(Xtrain['GBM'], Xval['GBM'], y_train, best_loss['GBM']) if algorithm == 'GLM' or algorithm in mixed_methods: y_prediction['GLM'], y_prediction_train['GLM'] = GLM(Xtrain['GLM'], Xval['GLM'], y_train) if algorithm == 'KNN' or algorithm in mixed_methods: y_prediction['KNN'], y_prediction_train['KNN'] = KNN(Xtrain['KNN'], Xval['KNN'], y_train) if algorithm == 'NN' or algorithm in mixed_methods: y_prediction['NN'], y_prediction_train['NN'] = NN(Xtrain['NN'], Xval['NN'], y_train, y_val, best_loss['NN']) if algorithm == 'LSTM' or algorithm == 'LSTM_MIXED': y_prediction['LSTM'], y_prediction_train['LSTM'] = LSTMM(Xtrain['LSTM'], Xval['LSTM'], y_train, y_val) print('y_prediction[NN]',y_prediction['NN']) print('y_prediction[LSTM]',y_prediction['LSTM']) if algorithm in mixed_methods: y_predictions_test, y_predictions_train = [], [] # Construct the outputs for the testing dataset of the 'MM' methods y_predictions_test.extend([y_prediction['GBM'], y_prediction['GLM'], y_prediction['KNN'], y_prediction['NN']]) y_prediction_test_np = np.array(y_predictions_test).reshape(len(y_predictions_test), -1) X_test_mixedModel = pd.DataFrame(y_prediction_test_np.transpose()) # Construct the outputs for the training dataset of the 'MM' methods y_predictions_train.extend( [y_prediction_train['GBM'], y_prediction_train['GLM'], y_prediction_train['KNN'], y_prediction_train['NN']]) y_prediction_train_np = np.array(y_predictions_train).reshape(len(y_predictions_train), -1) X_train_mixedModel = pd.DataFrame(y_prediction_train_np.transpose()) if algorithm == 'MM_GLM': y_prediction['MM_GLM'], y_prediction_train['MM_GLM'] = GLM(X_train_mixedModel, X_test_mixedModel, y_train) elif algorithm == 'MM_NN': y_prediction['MM_NN'], y_prediction_train['MM_NN'] = NN(X_train_mixedModel, X_test_mixedModel, y_train, y_val, best_loss['NN']) if algorithm == 'LSTM_MIXED': y_predictions_test, y_predictions_train = [], [] # Construct the outputs for the testing dataset of the 'MM' methods y_predictions_test.extend([y_prediction['GBM'], y_prediction['GLM'], y_prediction['KNN'], y_prediction['NN'],y_prediction['LSTM']]) y_prediction_test_np = np.array(y_predictions_test).reshape(len(y_predictions_test), -1) X_test_mixedModel = pd.DataFrame(y_prediction_test_np.transpose()) # Construct the outputs for the training dataset of the 'MM' methods y_predictions_train.extend( [y_prediction_train['GBM'], y_prediction_train['GLM'], y_prediction_train['KNN'], y_prediction_train['NN'], y_prediction_train['LSTM']]) y_prediction_train_np = np.array(y_predictions_train).reshape(len(y_predictions_train), -1) X_train_mixedModel = pd.DataFrame(y_prediction_train_np.transpose()) print(X_train_mixedModel) y_prediction['LSTM_MIXED'], y_prediction_train['LSTM_MIXED'] = GLM(X_train_mixedModel, X_test_mixedModel, y_train) return(y_prediction[algorithm], y_prediction_train[algorithm])
# Parameters # ############## # Paths to training and testing set TRAINING_SET = '../resources/csv/training_set.csv' TEST_SET = '../resources/csv/test_set.csv' # Path to export predictions DESTINATION = '../products/' # Fingerprint transformation FINGERPRINT = fingerprints.morgan() # Model to train MODEL = ConsensusClassifier([ KNN(n_neighbors=17), MLP(random_state=0), SVM(gamma='auto', random_state=0, probability=True), RFC(500, random_state=0) ]) ######## # Main # ######## if __name__ == '__main__': # Load training and test set LS = utils.load_from_csv(TRAINING_SET) TS = utils.load_from_csv(TEST_SET) # Create fingerprint features and output of learning set
def __init__(self): self.resource_folder = get_resource_path() # for dataset_name in sorted(os.listdir(folder)): # if dataset_name.endswith('.csv'): # print(dataset_name[:-4]) self.pipelines = { 'credit-g': ( 'credit-g/dataset_31_credit-g.csv', 'class', CreditGPipeline()), 'wine-quality': ( 'wine-quality/wine-quality-red.csv', 'class', WineQualityPipeline()), 'wq-missing': ( 'wine-quality/wine-quality-red.csv', 'class', WineQualityMissingPipeline()), 'abalone': ( 'abalone/abalone.csv', 'Rings', AbalonePipeline()), 'adult': ( 'adult/adult.csv', 'class', AdultPipeline()), 'adult-missing': ( 'adult/adult.csv', 'class', AdultMissingPipeline()), 'heart': ( 'heart/heart.csv', 'class', HeartPipeline())} self.classifiers = { 'dtc': DecisionTree(), 'rfc40': RandomForest(size=40), 'ertc40': ExtremelyRandomizedTrees(size=40), 'xgb': XGB(), 'svm': SVM(), 'lsvm': LinearSVM(), 'knn': KNN(n_neighbors=7), 'logreg': LogRegression(), 'gaus': GausNB(), 'brfc40': BaggingRandomForest(size=40), 'mlpc': MLPC(input_size=[16, 32, 16, 8]) } self.error_gens = { 'numeric anomalies': ( Anomalies(), lambda x: x.dtype in [DataType.INTEGER, DataType.FLOAT]), 'typos': ( Typos(), lambda x: x.dtype == DataType.STRING), 'explicit misvals': ( ExplicitMissingValues(), lambda x: True), 'implicit misvals': ( ImplicitMissingValues(), lambda x: True), 'swap fields': ( SwapFields(), lambda x: True)} self.params = [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8] self.tests = {'num disc': lambda x: (x.scale == DataScale.NOMINAL and x.dtype in [DataType.INTEGER, DataType.FLOAT]), 'num cont': lambda x: (x.scale == DataScale.NOMINAL and x.dtype in [DataType.INTEGER, DataType.FLOAT]), 'string': lambda x: x.dtype == DataType.STRING} self.results = Table(rows=sorted(self.pipelines.keys()), columns=sorted(self.classifiers.keys()), subrows=self.tests.keys(), subcolumns=self.error_gens.keys())
import os import matplotlib.pyplot as plt import numpy as np import pandas as pd from data.cleaner import CleanAuto from data.loader import loader as load from models import KNN from trainers import KNNTrainer from settings import settings cfg = settings() clean_d = [0.01, 0.05, 0.1, 0.15, 0.3, 0.6] # clean_d = [0.01, 0.05] model = KNN(**cfg.model["knn"].structure) trainer = KNNTrainer(model) df = pd.DataFrame() for _, d in enumerate(clean_d): print("Cleaning d: {}".format(d)) c = CleanAuto(d) trainer.train(load.training_data, load.training_labels) model_trained = trainer.get_model() costates_hat = model_trained.predict(load.test_labels) p = np.linalg.norm(costates_hat[:, :2], axis=1) df = pd.concat([df, pd.DataFrame({d: p})], axis=1, ignore_index=True) # Rename columns df.columns = clean_d df.plot(kind="box")
print "LASAGNE" yprob = lasange.setTrainDataAndMakeModel(X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob, X_test_index, "LASAGNE") lasagneStorer.append(outFrame) print "RandomForest" yprob = randomforestclassifier.setTrainDataAndMakeModel( X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob, X_test_index, "RANDOM_FOREST") randomForestStorer.append(outFrame) print "KNN" yprob = KNN.setTrainDataAndMakeModel(X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob, X_test_index, "KNN") KNNStorer.append(outFrame) print "MultiNomialNB" yprob = MultinomialNB.setTrainDataAndMakeModel(X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob, X_test_index, "MultiNomialNB") MultinomialNBStorer.append(outFrame) #====================== Between 0 and 1 Models (join all X frames) ======================= print "#====================== Between 0 and 1 Models (join all X frames) =======================" #all chounks of frames concat for respective. xgboostMetaFeaturesOfX = pd.concat(xgboostStorer) lasangeMetaFeaturesOfX = pd.concat(lasagneStorer)
from models import KNN, NaiveBayes, DecisionTree from data import Data training_data = Data('skinTraining') testing_data = Data('skinTesting') knn = KNN(training_data, testing_data) bayes = NaiveBayes(training_data, testing_data) tree = DecisionTree(training_data, testing_data) knnResult = knn.evaluate() bayesResult = bayes.evaluate() treeResult = tree.evaluate() print knnResult, treeResult, bayesResult
def test_everything(args): ## Get features, labels, training and testing set, adjacency args, file_names, stat_dirname, features, gt_labels, genres, adjacency, indx_train, indx_test, pygsp_graph, release_dates = load_parameters_and_data( args) if args.graph_statistics: if not os.path.exists(stat_dirname): os.makedirs(stat_dirname) if args.graph_statistics == 'all': ## Prints out all statistics about graph gstats.allstats(adjacency, stat_dirname, active_plots=False) elif args.graph_statistics == 'advanced': ## Prints out all advanced statistics gstats.advanced(adjacency, stat_dirname, active_plots=args.plot_graph) else: # basic setting ## Prints out basic statistics gstats.basic(adjacency) gstats.growth_analysis(adjacency, release_dates, gt_labels, stat_dirname) if args.inductive_learning: print('#### Testing Inductive Learning ####') if args.additional_models: ## Initialize models with correct parameters svm_clf = SVM(features, gt_labels, kernel='linear', seed=SEED, save_path=file_names) random_forest_clf = Random_Forest(features, gt_labels, n_estimators=100, max_depth=20, seed=SEED, save_path=file_names) knn_clf = KNN(features, gt_labels, save_path=file_names) error_svm = simple_test(svm_clf, indx_test, classes=genres, name=file_names + "svm_") print('* SVM simple test error: {:.2f}'.format(error_svm)) error_rf = simple_test(random_forest_clf, indx_test, classes=genres, name=file_names + "rf_") print('* Random Forest simple test error: {:.2f}'.format(error_rf)) error_knn = simple_test(knn_clf, indx_test, classes=genres, name=file_names + "knn_") print('* KNN simple test error: {:.2f}'.format(error_knn)) if args.gcn: ## Initialize GCN with correct parameters gnn_clf = GCN(nhid=[1200, 100], dropout=0.1, adjacency=adjacency, features=features, labels=gt_labels, n_class=len(genres), cuda=args.use_cpu, regularization=None, lr=0.01, weight_decay=5e-4, epochs=300, batch_size=10000, save_path=file_names) error_gnn = simple_test(gnn_clf, indx_test, classes=genres, name=file_names + "gnn_") print('* GCN simple test error: {:.2f}'.format(error_gnn)) if args.gcn_khop: ## Initialize GCN K-Hop with correct parameters gnn_clf = GCN_KHop(nhid=[1200, 100], dropout=0.1, adjacency=adjacency, features=features, labels=gt_labels, n_class=len(genres), khop=2, cuda=args.use_cpu, regularization=None, lr=0.01, weight_decay=5e-4, epochs=300, batch_size=10000, save_path=file_names) error_gnn = simple_test(gnn_clf, indx_test, classes=genres, name=file_names + "gnn_khop_") print('* GCN KHop simple test error: {:.2f}'.format(error_gnn)) if args.mlp_nn: ## Initialize MLP with correct parameters mlp_nn = MLP_NN(hidden_size=100, features=features, labels=gt_labels, num_epoch=10, batch_size=100, num_classes=len(genres), save_path=file_names, cuda=args.use_cpu) error_mlpNN = simple_test(mlp_nn, indx_test, classes=genres, name=file_names + "mlpNN_") print('* MLP NN simple test error: {:.2f}'.format(error_mlpNN))
def train_everything(args): ## Get features, labels, training and testing set, adjacency args, file_names, stat_dirname, features, gt_labels, genres, adjacency, indx_train, indx_test, pygsp_graph, release_dates = load_parameters_and_data( args) if args.inductive_learning: print('#### Applying Inductive Learning ####') if args.additional_models: ## Initialize model with correct parameters svm_clf = SVM(features, gt_labels, kernel='linear', seed=SEED, save_path=file_names) random_forest_clf = Random_Forest(features, gt_labels, n_estimators=100, max_depth=20, seed=SEED, save_path=file_names) knn_clf = KNN(features, gt_labels, save_path=file_names) start = time.time() mean_error_svm, std_error_svm = cross_validation(svm_clf, indx_train, K=5, classes=genres, name=file_names + "svm_") print('* SVM cross validation error mean: {:.2f}, std: {:.2f}'. format(mean_error_svm, std_error_svm)) print("SVM time", time.time() - start) start = time.time() mean_error_rf, std_error_rf = cross_validation(random_forest_clf, indx_train, K=5, classes=genres, name=file_names + "rf_") print( '* Random Forest cross validation error mean: {:.2f}, std: {:.2f}' .format(mean_error_rf, std_error_rf)) print("RF time", time.time() - start) start = time.time() mean_error_knn, std_error_knn = cross_validation(knn_clf, indx_train, K=5, classes=genres, name=file_names + "knn_") print('* KNN cross validation error mean: {:.2f}, std: {:.2f}'. format(mean_error_knn, std_error_knn)) print("KNN time", time.time() - start) if args.gcn: print("Training GCN") start = time.time() ## Initialize GCN with correct parameters gnn_clf = GCN(nhid=[1200, 100], dropout=0.1, adjacency=adjacency, features=features, labels=gt_labels, n_class=len(genres), cuda=args.use_cpu, regularization=None, lr=0.01, weight_decay=5e-4, epochs=300, batch_size=10000, save_path=file_names) train_gcn(gnn_clf, indx_train, name=file_names + "gnn_") print("GCN time", time.time() - start) if args.gcn_khop: print("Training GCN K-Hop") start = time.time() ## Initialize GCN K-Hop with correct parameters gnn_clf = GCN_KHop(nhid=[1200, 100], dropout=0.1, adjacency=adjacency, features=features, labels=gt_labels, n_class=len(genres), khop=2, cuda=args.use_cpu, regularization=None, lr=0.01, weight_decay=5e-4, epochs=300, batch_size=10000, save_path=file_names) train_gcn(gnn_clf, indx_train, name=file_names + "gnn_khop_") print("GCN K-Hop time", time.time() - start) if args.mlp_nn: start = time.time() ## Initialize MLP with correct parameters mlp_nn = MLP_NN(hidden_size=100, features=features, labels=gt_labels, num_epoch=100, batch_size=100, num_classes=len(genres), save_path=file_names, cuda=args.use_cpu) mean_error_mlpNN, std_error_mlpNN = cross_validation( mlp_nn, indx_train, K=5, classes=genres, name=file_names + "mlpNN_") print('* MLP NN cross validation error mean: {:.2f}, std: {:.2f}'. format(mean_error_mlpNN, std_error_mlpNN)) print("MLP time", time.time() - start)
xgboostStorer.append(outFrame) print "LASAGNE" yprob = lasange.setTrainDataAndMakeModel(X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob,X_test_index,"LASAGNE") lasagneStorer.append(outFrame) print "RandomForest" yprob = randomforestclassifier.setTrainDataAndMakeModel(X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob,X_test_index,"RANDOM_FOREST") randomForestStorer.append(outFrame) print "KNN" yprob = KNN.setTrainDataAndMakeModel(X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob,X_test_index,"KNN") KNNStorer.append(outFrame) print "MultiNomialNB" yprob = MultinomialNB.setTrainDataAndMakeModel(X_train, Y_train, X_test) print "%.4f" % log_loss(Y_test, yprob, eps=1e-15, normalize=True) outFrame = makeOutPutFrame(yprob,X_test_index,"MultiNomialNB") MultinomialNBStorer.append(outFrame) #====================== Between 0 and 1 Models (join all X frames) ======================= print "#====================== Between 0 and 1 Models (join all X frames) =======================" #all chounks of frames concat for respective. xgboostMetaFeaturesOfX = pd.concat(xgboostStorer)
from dataHeartDisaese import x2, y2 from models import KNN, LogisticRegressionModel, BaseClassification from sklearn import model_selection from CrossValidation import CrossValidation test_proportion = 0.8 x, x_outer, y, y_outer = model_selection.train_test_split( x2, y2, test_size=test_proportion) N, M = x.shape models = lambda: [BaseClassification(), KNN(), LogisticRegressionModel()] outer_cv = CrossValidation(models(), x_outer, y_outer, K=10) outer_cv.applyInnerValidation(x, y) outer_cv.test() outer_cv.print_result_with_k() outer_cv.show_errors() outer_cv.compare_all_classifiers()
X_train, y_train = data['X_train'], data['y_train'] X_val, y_val = data['X_val'], data['y_val'] X_test, y_test = data['X_test'], data['y_test'] X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) def get_acc(pred, y_test): return np.sum(y_test == pred) / len(y_test) * 100 print("finished reading data") knn = KNN(5) knn.train(X_train, y_train) pred_knn = knn.predict(X_test) print('The testing accuracy is given by : %f' % (get_acc(pred_knn, y_test))) ''' knn = KNN(5) knn.train(X_train, y_train) pred_knn = knn.predict(X_test) print('The testing accuracy is given by : %f' % (get_acc(pred_knn, y_test))) percept_ = Perceptron() percept_.train(X_train, y_train) pred_percept = percept_.predict(X_test) print('The testing accuracy is given by : %f' % (get_acc(pred_percept, y_test)))