def main(RUNS = 10, numH = 2): """ Cancer """ # try: print ">>STARTING..."; proben = proben1(); D = proben.breast_cancer(); DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS); netConfig = {'numI': D['train']['INFO']['num_inputs'], 'numO': D['train']['INFO']['num_outputs'], 'numH': numH }; for ri in xrange(RUNS): print ">>>> RUN {0} of {1}".format(ri, RUNS); print "ON :", D['name']; coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = D['name'], train_set = DCrossVal[ri][0], valid_set = DCrossVal[ri][1], test_set = D['test'], netConfig = netConfig); coevo.init_populations(); coevo.coevolve();
def main(RUNS = 10, numH = 5): """ Parkinsons """ # try: print ">>STARTING..."; lb_bench = lab_bencmark(); D = lb_bench.parkinsons(); D['name'] = 'Parkinsons'; DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS); netConfig = {'numI': 22, 'numO': 1, 'numH': numH }; for ri in xrange(RUNS): print ">>>> RUN {0} of {1}".format(ri, RUNS); print "ON :", D['NAME']; coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = D['NAME'], train_set = DCrossVal[ri][0], valid_set = DCrossVal[ri][1], test_set = D['test'], netConfig = netConfig); coevo.init_populations(); coevo.coevolve();
def main(RUNS=10, numH=2): """ Lung Cancer """ FOLDS = 2 # try: print ">>STARTING..." for i in xrange(RUNS): lb_bench = lab_bencmark() D = lb_bench.lung_cancer() D["name"] = "Lung_cancer" DCrossVal = kfold.kfold(D=D["train"], numFolds=FOLDS) netConfig = {"numI": 56, "numO": 1, "numH": numH} for ri in xrange(FOLDS): print ">>>> RUN {0} of {1}".format(ri, RUNS) print "ON :", D["NAME"] coevo = ndmCoevoOptim.ndmCoevoOptim( dataset_name=D["NAME"], train_set=DCrossVal[ri][0], valid_set=DCrossVal[ri][1], test_set=D["test"], netConfig=netConfig, ) coevo.init_populations() coevo.coevolve()
def test_iris(RUNS = 10): """ test for the iris dataset """ lab_data = lab_bencmark(); print ">>>IRIS"; D = lab_data.iris(); DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS); netConfig = {'numI': 4, 'numO': 1, 'numH': 2 }; for ri in xrange(RUNS): coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = 'IRIS', train_set = DCrossVal[ri][0], valid_set = DCrossVal[ri][1], test_set = D['test'], netConfig = netConfig); #disable random inject coevo.params['randomNodesInject'] = False; coevo.init_populations(); coevo.coevolve(); del lab_data;
def test_glass(RUNS = 10): """ GLASS """ proben = proben1(); D = proben.glass(); DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS); netConfig = {'numI': D['test']['INFO']['num_inputs'], 'numO': D['test']['INFO']['num_outputs'], 'numH': 2 }; for ri in xrange(RUNS): coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = D['name'], train_set = DCrossVal[ri][0], valid_set = DCrossVal[ri][1], test_set = D['test'], netConfig = netConfig); #disable random inject coevo.params['randomNodesInject'] = False; coevo.init_populations(); m = coevo.coevolve(); del proben;
def main(RUNS = 10, numH = 2): """ Card """ try: print ">>STARTING..."; proben = proben1(); D = proben.australian_cc(); DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS); netConfig = {'numI': D['train']['INFO']['num_inputs'], 'numO': D['train']['INFO']['num_outputs'], 'numH': numH }; for ri in xrange(RUNS): print ">>>> RUN {0} of {1}".format(ri, RUNS); print "ON :", D['name']; coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = D['name'], train_set = DCrossVal[ri][0], valid_set = DCrossVal[ri][1], test_set = D['test'], netConfig = netConfig); coevo.init_populations(); coevo.coevolve(); #send notification #notify.noticeEMail(D['name']+' DONE'); except: """ """ print "ERROR";
def decision_tree(frame): port_DATA = frame # instantiate encoder lb = LabelEncoder() # make a copy of the dataset port_DATA_copy = port_DATA.copy() # set up a list to replace the action categorical values with numerical ones replace_list = {'Action': {'allow': 0, 'deny': 1, 'drop': 2, 'reset-both': 3}} # replace the values port_DATA_copy.replace(replace_list, inplace=True) # Select our Independent Features feature = ['Source Port', 'Destination Port', 'Packets', 'pkts_received', 'Bytes', 'Bytes Received'] # Set x values to the independent features X = port_DATA_copy[feature] # set y values to the target feature Y = port_DATA_copy['Action'] # set up our test and train values with sklearn. Test size will be 30% of the data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1) # Set up our Decision Tree Classifier tree = DecisionTreeClassifier(criterion="entropy", max_depth=5) # Fit our training data to the classifier tree = tree.fit(X_train, Y_train) # use the prediction function to make a prediction based on the x test set predict = tree.predict(X_test) # use the y test set with the predictions based off of the x test set to find an accuracy percentage print("Accuracy of Split Test Model: ", metrics.accuracy_score(Y_test, predict)) # display the tree print_tree(tree, feature) # create and display confusion matrix confusion.confusionMatrix(Y_test, predict) # Resample to Evaluate the Model x_train, x_test, y_train, y_test = kfold.kfold(port_DATA_copy) # retrain model with the kfold cross validation sets tree.fit(x_train, y_train) # predict the accuracy of the tree with the kfold sets pred = tree.predict(x_test) # print the decision tree generated by the kfold sets print_tree(tree, feature) # evaluate accuracy of model with the kfold set print("Accuracy of KFold Test Model: ", metrics.accuracy_score(y_test, pred))
def grid_search(model, params, folds): '''plt.plot(C,C_accuracies) plt.title("gama vs Accuracy for gamma:"+str(gama)) plt.xlabel("C") plt.ylabel("Accuracy") plt.show() Grid Search takes four arguments, model which can be 'ovr' or 'ovo', params is a dictionary with keys gammas and C, folds gives the number of folds, and type of kernel ''' gammas = params['gamma'] C = params['C'] kernel = params['kernel'] folds = kfold(df,5,True) max_acc = 0 best_C = None best_gamma = None # gama_accuracies = [] for gama in gammas: C_accuracies =[] for cs in C: accuracies = [] i=0 for fold in folds: test_fold_df = df.iloc[fold,:] # Create a dataframe with with the index values which is for the fold #GET X AND y FROM DATAFRAME train_fold_df = df.drop(fold, axis=0) # get all rows in training set for fold which are not in test set for the fold #GET TRAINING X AND y X_train = train_fold_df.drop(['label'],axis=1) y_train = train_fold_df.filter(['label']).to_numpy() #GET TESTING X AND y X_test = test_fold_df.drop(['label'],axis=1) y_test = test_fold_df.filter(['label']).to_numpy() sv = MSVM(model,cs,gama,kernel) sv.fit(X_train,y_train) #FITS THE MODEL pred = sv.predict(X_test) #MAKE PREDICTIONS acc = measure_accuracy(y_test,pred)# MEASURES ACCURACY USING USER DEFINED FUNCTION accuracies.append(acc) print("Accuracy for Gamma:",gama," and C:",cs," and Fold: ",i+1," is:",acc) i+=1 accuracies = np.array(accuracies) #NOW CALCULATE MEAN ACCURACY FOR ALL FOLDS and GIVEN GAMMA AND C print("MEAN ACCURACY FOR GAMMA:",gama," and C:",cs," is ",np.mean(accuracies)) if max_acc < np.mean(accuracies): max_acc = np.mean(accuracies) best_C = cs best_gamma = gama C_accuracies.append(np.mean(accuracies)) plt.plot(C,C_accuracies) plt.title("C vs Accuracy for gamma:"+str(gama)) plt.xlabel("C") plt.ylabel("Accuracy") plt.show() print("BEST ACCURACY: ",max_acc," FOR C:",best_C," AND GAMMA:",best_gamma)
def main(RUNS=10, numH=2): """ Horse """ # try: print ">>STARTING..." proben = proben1() D = proben.horse() DCrossVal = kfold.kfold(D=D["train"], numFolds=RUNS) netConfig = {"numI": D["train"]["INFO"]["num_inputs"], "numO": D["train"]["INFO"]["num_outputs"], "numH": numH} for ri in xrange(RUNS): print ">>>> RUN {0} of {1}".format(ri, RUNS) print "ON :", D["name"] coevo = ndmCoevoOptim.ndmCoevoOptim( dataset_name=D["name"], train_set=DCrossVal[ri][0], valid_set=DCrossVal[ri][1], test_set=D["test"], netConfig=netConfig, ) coevo.init_populations() coevo.coevolve()
import kfold; import profile; import visualisation.visualiseOutputs2D as vis2d; from PyQt4 import QtCore, QtGui from visualiseNDMNet import *; coevo = ndmCoevoOptim.ndmCoevoOptim(); errors_train =[]; errors_test = []; benchmark = proben1(); lab_bencmark = lab_bencmark(); K = 10; # D = kfold.kfold(D = benchmark.mushroom()['train'],numFolds = K); D2 = kfold.kfold(D = lab_bencmark.iris()['train'],numFolds = K); for i in xrange(1): print ">>>", i; coevo.init_populations(); # coevo.train_set = D2[i][0]; # coevo.validation_set = D2[i][1]; profile.run("coevo.coevolve()");
# coding: utf-8 get_ipython().magic(u'cd rrna/src') import numpy as np mean_pair_probs = np.load( "../data/rnafold_results/rnafold_mean_pair_probs.npy") rrna_pair_probs = np.load( "../data/rnafold_results/rnafold_rrna_pair_probs.npy") mean_pair_probs.shape rrna_pair_probs.shape labels = np.zeros(mean_pair_probs.shape[0] + rrna_pair_probs[0]) labels = np.zeros(mean_pair_probs.shape[0] + rrna_pair_probs.shape[0]) labels.shape for i in range(mean_pair_probs.shape[0], -1): print i for i in range(mean_pair_probs.shape[0], labels.shape[0] - 1): labels[i] = 1 np.count_nonzero(labels) for i in range(mean_pair_probs.shape[0] - 1, labels.shape[0] - 1): labels[i] = 1 np.count_nonzero(labels) pair_probs = np.hstack(mean_pair_probs, rrna_pair_probs) pair_probs = np.hstack([mean_pair_probs, rrna_pair_probs]) pair_probs = np.vstack([mean_pair_probs, rrna_pair_probs]) import kfold kfold.kfold(labels, pair_probs) get_ipython().magic(u'save')
# coding: utf-8 get_ipython().magic(u'cd rrna/src') import kfold import numpy as np rrna_pair_probs = np.load("/projects/bio/rrna/data/rnafold_results/rrna_by1_pair_probs.npy") # these will change for mouse not_rrna_pair_probs = np.load("/projects/bio/rrna/data/rnafold_results/not_rrna_mean_pair_probs.npy") not_rrna_pair_probs.shape rrna_pair_probs.shape kfold.kfold(rrna_pair_probs, not_rrna_pair_probs, save_folder="/projects/bio/rrna/data/rnafold_results/", n_partitions=10, sampling="under") kfold.kfold(rrna_pair_probs, not_rrna_pair_probs, save_folder="/projects/bio/rrna/data/rnafold_results/", n_partitions=10, sampling="over")
def do_training_testing(clf, X, y, filename, show=False): """ fungsi untuk melakukan training dan testing baik itu dengan atau tanpa resampling return per_clf: model terbaik dari masing-masing fitur parameter: clf = array object classifier X = data per jenis fitur y = label dari data kf = object K-Fold show = boolean, untuk mencetak proses pencarian model terbaik """ try: os.remove(filename) except OSError: pass first_row = ['Clf-Fitur'] for i in range(10): first_row.append('Fold ' + str(i + 1)) first_row.append('Avg') with open(filename, 'a', newline='') as file: writer = csv.writer(file) writer.writerow(first_row) per_clf = {} train_indices_all, test_indices_all = kfold(y, n_splits=10) kf = np.array(list(zip(train_indices_all, test_indices_all))) for c in clf: # untuk masing-masing jenis classifier if c == 'gauss_nb': continue for index, fitur in enumerate(X): # untuk masing-masing jenis fitur y_train = y c1 = False if c == 'multi_nb' and fitur == 'tfidf': c1 = True c = 'gauss_nb' if show: # show process print('\t', c, fitur) per_clf[(c, fitur)] = get_best_model(X[fitur], y_train, clf[c], kf, c, fitur, filename, show=True) else: per_clf[(c, fitur)] = get_best_model(X[fitur], y_train, clf[c], kf, c, fitur, filename) if c1: c = 'multi_nb' return per_clf