def GetModelObject(self, Features): if self.ModelType == "lda": return Learning.LDAModel(Features) elif self.ModelType == "svm": return Learning.SVMModel(Features) elif self.ModelType == "logit": return Learning.LogitModel(Features) else: print "** Model type NOT KNOWN!", self.ModelType return
def testLearning2(): # Iris data # from sklearn.datasets import load_iris # data = load_iris() d = pd.read_csv( 'D:\AviaNZ\Sound_Files\Denoising_paper_data\Primary_dataset\kiwi\mfcc.tsv', sep="\t", header=None) data = d.values target = data[:, -1] data = data[:, 0:-1] learners = Learning.Learning(data, target) model = learners.trainMLP() learners.performTest(model) model = learners.trainKNN() learners.performTest(model) model = learners.trainSVM() learners.performTest(model) model = learners.trainGP() learners.performTest(model) model = learners.trainDecisionTree() learners.performTest(model) model = learners.trainRandomForest() learners.performTest(model) model = learners.trainBoosting() learners.performTest(model) model = learners.trainXGBoost() learners.performTest(model) model = learners.trainGMM() learners.performTest(model)
def testLearning1(): # Very simple test from sklearn.datasets import make_classification features, labels = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) learners = Learning.Learning(features, labels) model = learners.trainMLP() learners.performTest(model) model = learners.trainKNN() learners.performTest(model) model = learners.trainSVM() learners.performTest(model) model = learners.trainGP() learners.performTest(model) model = learners.trainDecisionTree() learners.performTest(model) model = learners.trainRandomForest() learners.performTest(model) model = learners.trainBoosting() learners.performTest(model) model = learners.trainXGBoost() learners.performTest(model) model = learners.trainGMM() learners.performTest(model)
def finishBtnSlot(self): print "finish triggered" if self.counter >= 7: return try: # scale = ["C", "D", "E", "F", "G", "A", "B"]#listen() # scale = ["C", "E", "G", "B", "D", "F", "A"] #scale = ["E", "F#", "G#", "A", "B", "C#", "D#"] # E Major if self.counter == 0: scale = self.recordGui.scale self.xavier = Learning.Learning(scale) # learning for 2k iterations for x in range(250): if x > 0 and x < 30 and self.counter == 0: self.xavier.qLearn(True, 0.05) elif x % 250 == 0 and self.counter != 0: if random.random() > 0.5: self.xavier.qLearn(True, 0.07) else: self.xavier.qLearn(True, 0.1) else: self.xavier.qLearn(False, None) #print "x=", x # agent is done learning self.playGui.aiNotesLe.setText( self.playGui.aiNotesLe.text() + self.xavier.goalScale[self.counter] + " ") self.counter += 1 except KeyboardInterrupt: Learning.World.global_player.destroy() if self.counter == 7: self.playGui.trainingLbl.setStyleSheet( "QLabel{ background-color: green; }") self.playGui.trainingLbl.setText("Ready") self.playGui.playBtn.setVisible(True) self.playGui.playBtn.setEnabled(True) self.xavier.createExplorationQvalues() self.playGui.xavier = self.xavier
def __init__(self, n_pc, input_node, n_neuron_out, lr): ''' Initialize actor net as a nengo network object PARAMS: n_pc - number of place cells n_neuron_in - number of neurons in Ensemble encoding input n_neuron_out - number of neurons in Ensemble encoding output ''' with nengo.Network() as net: net.output = nengo.Ensemble(n_neurons=n_neuron_out, dimensions=8, radius=np.sqrt(8)) net.conn = nengo.Connection( input_node, net.output, synapse=0.01, function=lambda x: [0] * 8, solver=nengo.solvers.LstsqL2(weights=True), learning_rule_type=Learning.TDL(learning_rate=lr)) self.net = net
from ase import atoms from ase.io import read, write, iread import matplotlib.pyplot as plt import numpy as np import Learning import pickle from sklearn.cluster import KMeans from ase.visualize.plot import plot_atoms from ase.data.colors import jmol_colors atomsite = iread("runs0/run0/structures.traj") energies = [] dataSet = [] clusterCounts = [] energyClassifier = Learning.EnergyClassifier() ksis = [1, 2, 4] lambs = [1, -1] etas = [0.05, 2, 4, 8, 20, 40, 80] angularEtas = [0.005] rss = [0] rc = 3 atomicLambda = 1 energyClassifier.setHyperParameters(ksis, lambs, etas, angularEtas, rss, atomicLambda, rc) # for name in range(10): # for i, atom in enumerate(iread("BestProb/"+"run"+str(name)+"/structures.traj"), 0): # if i%10 ==0: # energies.append(atom.get_total_energy()) # point, _ = energyClassifier.features(atom)
import Learning import PitchDetection import random def listen(): detector = PitchDetection.PitchDetection() scale = detector.detect() return scale try: #scale = ["C", "D", "E", "F", "G", "A", "B"]#listen() #scale = ["C", "E", "G", "B", "D", "F", "A"] scale = ["E", "F#", "G#", "A", "B", "C#", "D#"] #E Major xavier = Learning.Learning(scale) # learning for 2k iterations for x in range(2000): if x > 0 and x < 30: xavier.qLearn(True, 0.05) elif x % 250 == 0: if random.random() > 0.5: xavier.qLearn(True, 0.07) else: xavier.qLearn(True, 0.1) else: xavier.qLearn(False, None) print "x=", x # agent is done learning
import sys sys.path.append('./Learning') import Learning Learning.foo() Learning.do_something() a = Learning.Computer('myPC') a.say_hello() import Learning.Algebra as algebra b = algebra.add(10, 5) print(b) import Learning.Arithmetic as arith c = arith.calculate(5, 8) print(c)
def ParseFeatureFile(self, FilePath, FeatureSet2, FeatureSet3, DBRatio): """ Initialize the FeatureSet object, by parsing features from the specified FilePath. Facultative features go to FeatureSetF, constitutive features go to FeatureSetC """ File = open(FilePath, "rb") # Parse the header line specially: HeaderLine = File.readline() self.HeaderLines.append(HeaderLine) Bits = HeaderLine.strip().split("\t") for BitIndex in range(len(Bits)): if BitIndex >= FormatBits.FirstFeature: self.FeatureNames[BitIndex - FormatBits.FirstFeature] = Bits[BitIndex] #if BitIndex <= FormatBits.LastFeature: # print "Feature %s: %s"%(BitIndex - FormatBits.FirstFeature, Bits[BitIndex]) # Iterate over non-header lines: LineNumber = 0 for FileLine in File.xreadlines(): LineNumber += 1 if FileLine[0] == "#": self.HeaderLines.append(FileLine) continue # skip comment line if not FileLine.strip(): continue # skip blank line Bits = FileLine.replace("\r","").replace("\n","").split("\t") # If there are TOO MANY bits, then discard the extras: Bits = Bits[:FormatBits.LastFeature + 1] try: TrueFlag = int(Bits[FormatBits.TrueProteinFlag]) except: continue # skip; not a valid instance line Charge = int(Bits[FormatBits.Charge]) SisterAnnotation = Bits[FormatBits.SisterAnnotationFlag] Vector = Learning.FeatureVector() if Charge > 2: FeatureSet = FeatureSet3 else: FeatureSet = FeatureSet2 try: for FeatureBitIndex in range(FormatBits.FirstFeature, FormatBits.LastFeature + 1): FeatureIndex = FeatureBitIndex - FormatBits.FirstFeature #if FeatureIndex not in self.FeaturesAll: # continue if FeatureBitIndex < len(Bits) and Bits[FeatureBitIndex].strip() and Bits[FeatureBitIndex] != "None": Vector.Features.append(float(Bits[FeatureBitIndex])) else: Vector.Features.append(0) Vector.FileBits = Bits Vector.TrueFlag = TrueFlag if TrueFlag: FeatureSet.TrueVectors.append(Vector) else: FeatureSet.FalseVectors.append(Vector) FeatureSet.AllVectors.append(Vector) except: traceback.print_exc() print "** Error on line %s column %s of feature file"%(LineNumber, FeatureIndex) print Bits File.close() # Initialize counts: for FeatureSet in (FeatureSet2, FeatureSet3): FeatureSet.SetCounts() FeatureSet.GetPriorProbabilityFalse(DBRatio) print "CHARGE 1,2: Read in %s true and %s false vectors"%(FeatureSet2.TrueCount, FeatureSet2.FalseCount) print "CHARGE 3+: Read in %s true and %s false vectors"%(FeatureSet3.TrueCount, FeatureSet3.FalseCount)
def validationCurve(dataFile, clf, nClasses=2, score=None): ''' Choose a classifier and plot the validation curve Score against different values for a selected hyperparameter to see the influence of a single hyperparameter dataFile: dataset including features and targets clf: classifier to consider score: customise the scoring (default in sklearn is 'accuracy') ''' # Let's use fB(F2) score if score is None: from sklearn.metrics import fbeta_score, make_scorer score = make_scorer(fbeta_score, beta=2) d = pd.read_csv(dataFile, sep="\t", header=None) data = d.values # Balance the data set targets = data[:, -1] data = data[:, 0:-1] if nClasses == 2: posTargetInd = np.where(targets == 1) negTargetInd = np.where(targets == 0) # randomly select n negative rows n = min(np.shape(posTargetInd)[1], np.shape(negTargetInd)[1]) posTargetInd = posTargetInd[0].tolist() posTargetInd = random.sample(posTargetInd, n) negTargetInd = negTargetInd[0].tolist() negTargetInd = random.sample(negTargetInd, n) inds = posTargetInd + negTargetInd elif nClasses == 3: c1TargetInd = np.where(targets == 0) # c1=noise c2TargetInd = np.where(targets == 1) # c2=male c3TargetInd = np.where(targets == 2) # c3=female # randomly select n negative rows n = min( np.shape(c1TargetInd)[1], np.shape(c2TargetInd)[1], np.shape(c3TargetInd)[1]) c1TargetInd = c1TargetInd[0].tolist() c1TargetInd = random.sample(c1TargetInd, n) c2TargetInd = c2TargetInd[0].tolist() c2TargetInd = random.sample(c2TargetInd, n) c3TargetInd = c3TargetInd[0].tolist() c3TargetInd = random.sample(c3TargetInd, n) inds = c1TargetInd + c2TargetInd + c3TargetInd data = data[inds, :] targets = targets[inds] indices = np.arange(targets.shape[0]) np.random.shuffle(indices) data, targets = data[indices], targets[indices] if clf == 'GaussianNB': from sklearn.naive_bayes import GaussianNB estimator = GaussianNB() elif clf == 'SVM': estimator = SVC(C=1) param_name = "gamma" param_range = np.logspace(-6, 1, 10) # param_name = "C" # param_range = np.linspace(0.01, 1, 5) elif clf == 'MLP': estimator = MLPClassifier() param_name = "alpha" param_range = 10.0**-np.arange(1, 7) # param_name = "max_iter" # param_range = [100, 200, 300, 400, 500] elif clf == 'kNN': estimator = KNeighborsClassifier() param_name = "n_neighbors" param_range = [1, 2, 3, 4, 5, 6] elif clf == 'GP': estimator = GaussianProcessClassifier(1.0 * RBF(1.0)) elif clf == 'DT': estimator = DecisionTreeClassifier(max_depth=5) elif clf == 'RF': estimator = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=2) elif clf == 'Boost': estimator = AdaBoostClassifier() elif clf == 'XGB': estimator = xgb.XGBClassifier() elif clf == 'GMM': estimator = GaussianMixture(n_components=2, covariance_type='spherical', max_iter=20) title = "Validation Curves - " + clf v = Learning.Validate(estimator, title, data, targets, param_name=param_name, param_range=param_range, scoring=score) plt = v.plot_validation_curve() plt.show()
hparam.batch_fraction_of_replay_memory = 1 hparam.max_replay_memory_size = 1000 hparam.episode_length = 1000 hparam.num_epochs = 50 hparam.network_depth = 3 hparam.network_width = 20 hparam.max_num_services = 3 hparam.initial_exploration_boost = 0 hparam.target_model_update_frequency = 10 if algorithm_choice == 8: #disable exploration hparam.initial_exploration_boost = 0 hparam.epsilon = 0 #disable learning hparam.do_training = False migration_algorithm = Learning.DQNMigrationAlgorithm(hparam, rng, True) else: print("invalid option:'", algorithm_choice_str, "'") exit(-1) simscale = 2 simulation_config = simulator.Simulation.Configuration() simulation_config.numClouds = 15 * simscale simulation_config.numUsers = 20 * simscale simulation_config.numServices = 20 * simscale #25*simscale simulation_config.numInternalNodes = 15 * simscale simulation_config.numBaseStations = 10 * simscale simulation_config.migration_algorithm = migration_algorithm simulation_config.service_cost_function = basicCostFunctions.ComplexCostFunctionNoActivation( ) #ComplexCostFunction()
def learninigCurve(dataFile, clf, score=None): ''' Choose a classifier and plot the learning curve dataFile: dataset including features and targets clf: classifier to consider score: customise the scoring (default in sklearn is 'accuracy') ''' # Let's use fB(F2) score if score is None: from sklearn.metrics import fbeta_score, make_scorer score = make_scorer(fbeta_score, beta=2) d = pd.read_csv(dataFile, sep="\t", header=None) data = d.values # Balance the data set targets = data[:, -1] data = data[:, 0:-1] posTargetInd = np.where(targets == 1) negTargetInd = np.where(targets == 0) # randomly select n negative rows n = min(np.shape(posTargetInd)[1], np.shape(negTargetInd)[1]) posTargetInd = posTargetInd[0].tolist() posTargetInd = random.sample(posTargetInd, n) negTargetInd = negTargetInd[0].tolist() negTargetInd = random.sample(negTargetInd, n) inds = posTargetInd + negTargetInd data = data[inds, :] targets = targets[inds] indices = np.arange(targets.shape[0]) np.random.shuffle(indices) data, targets = data[indices], targets[indices] if clf == 'GaussianNB': from sklearn.naive_bayes import GaussianNB estimator = GaussianNB() elif clf == 'SVM': from sklearn.svm import SVC estimator = SVC(gamma=0.0077) elif clf == 'MLP': estimator = MLPClassifier(hidden_layer_sizes=(250, ), max_iter=100, early_stopping=True) elif clf == 'kNN': estimator = KNeighborsClassifier(3) elif clf == 'GP': estimator = GaussianProcessClassifier(1.0 * RBF(1.0)) elif clf == 'DT': estimator = DecisionTreeClassifier(max_depth=5) elif clf == 'RF': estimator = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=2) elif clf == 'Boost': estimator = AdaBoostClassifier() elif clf == 'XGB': estimator = xgb.XGBClassifier() elif clf == 'GMM': estimator = GaussianMixture(n_components=2, covariance_type='spherical', max_iter=20) title = "Learning Curves - " + clf v = Learning.Validate(estimator, title, data, targets, scoring=score) plt = v.plot_learning_curve() plt.show()
for i in xrange(len(lts) / 2): print 'Training RBM %i' % i top = len(net.Layers) - 1 learner = rbm.RBM(arch[i+1], arch[i]) rbm.learn(learner, rbmtrain, rbmvalid, tp) net.Layers[top -i].W = learner.W.transpose() net.Layers[top -i].h = learner.v.copy() net.Layers[i].W = learner.W.copy() net.Layers[i].h = learner.h.copy() rbmtrain = learner.up(rbmtrain) rbmvalid = learner.up(rbmvalid) print 'Finished' print 'Training Autoencoder' Learning.train_sgd(net, train, train, ntp) print 'Finished' ################################################# # Use this to replace parts of the signal with background ############################## signalStrength = 0.2 test_rm = data[:,:1000] flips = np.random.rand(test_rm.shape[0], test_rm.shape[1]) keeps = flips < signalStrength unkeeps = flips > signalStrength noise = np.ones_like(test_rm) * test_rm.mean() noise *= unkeeps test = test_rm * keeps
def _process(self): Learning.puntaje(dicc)
print('Select Area for CamShift and Enter a key') inputmode = True frame2 = frame.copy() while(inputmode): cv2.imshow('frame', frame) cv2.waitKey(0) elif k == ord('q'): inputmode = False trackWindow = None pcap.release() cv2.destroyAllWindows() """ if __name__ == '__main__': Video.CaptureVideo('output.avi') _ = input("Press Enter to Read Video :") (frames, labels) = Video.camShift('output.avi') print(str(len(labels)) + " objects detected : ", labels) #for i in range(len(frames)): #print(frames[i].shape) model = Learning.train(frames, labels, epoches=5, lr=0.0001) #print(model) #predictVideo(model, labels) Video.predictVideo(model, labels)
def TrainClassifier(dir, species, feature, clf=None, pca=False): ''' Use wavelet energy/MFCC as features, train, and save the classifiers for later use Recommended to use fit_GridSearchCV and plot validation/learning curves to determine hyper-parameter values and see how learning improves with more data, at what point it gets stable Choose what features to show to the classifier. Currently lots of variations of WE and MFCC. (1) Wavelet Energies - All 62 nodes, extracted from raw recordings (feature = 'weraw_all') (2) Wavelet Energies - Limit nodes to match frequency range of the species, extracted from raw recordings (3) Wavelet Energies - Limit to optimum nodes for species, extracted from raw recordings (4) Wavelet Energies - All 62 nodes, extracted with bandpass filter (5) Wavelet Energies - Limit nodes to match frequency range of the species, extracted with bandpass filter (6) Wavelet Energies - Limit to optimum nodes for species, extracted with bandpass filter (7) Wavelet Energies - All 62 nodes, extracted from denoised (8) Wavelet Energies - Limit nodes to match frequency range of the species, extracted from denoised (9) Wavelet Energies - Limit to optimum nodes for species, extracted from denoised (10) Wavelet Energies - All 62 nodes, extracted from denoised + bandpassed (11) Wavelet Energies - Limit nodes to match frequency range of the species, extracted from denoised + bandpassed (12) Wavelet Energies - Limit to optimum nodes for species, extracted from denoised + bandpassed (13) MFCC - Full range extracted from raw ('mfccraw_all') (14) MFCC - Limit to match frquency range of the species extracted from raw ('mfccraw_band') (15) MFCC - Full range extracted from bandpassed ('mfccbp_all') (16) MFCC - Limit to match frquency range of the species extracted from bandpassed (17) MFCC - Full range extracted from denoised (18) MFCC - Limit to match frquency range of the species extracted from denoised (19) MFCC - Full range extracted from bandpassed + denoised (20) MFCC - Limit to match frquency range of the species extracted from bandpassed + denoised :param dir: path to the dataset :param species: species name so that the classifier can be saved accordingly :param feature: 'WEraw_all', 'WEraw_band', 'WEraw_spnodes', 'WEbp_all', 'WEbp_band', 'WEbp_spnodes', 'WEd_all', 'WEd_band', 'WEd_spnodes', 'WEbpd_all', 'WEbpd_band', 'WEbpd_spnodes', 'MFCCraw_all', 'mfccraw_band', 'MFCCbp_all', 'mfccbp_band', 'MFCCd_all', 'MFCCd_band', 'MFCCbpd_all', 'MFCCbpd_band' :param clf: name of the classifier to train :return: save the trained classifier in dirName e.g. kiwi_SVM.joblib ''' # Read previously stored data as required # d = pd.read_csv(os.path.join(dir, 'Kiwi (Tokoeka Fiordland)_WE_spnodes_seg_train.tsv'), sep=",", header=None) d = pd.read_csv(os.path.join(dir, species + '_' + feature + '.tsv'), sep="\t", header=None) data = d.values # Balance the data set targets = data[:, -1] data = data[:, 0:-1] posTargetInd = np.where(targets == 1) negTargetInd = np.where(targets == 0) # randomly select n negative rows n = min(np.shape(posTargetInd)[1], np.shape(negTargetInd)[1]) posTargetInd = posTargetInd[0].tolist() posTargetInd = random.sample(posTargetInd, n) negTargetInd = negTargetInd[0].tolist() negTargetInd = random.sample(negTargetInd, n) inds = posTargetInd + negTargetInd data = data[inds, :] # use PCA if selected if pca: pca1 = PCA(n_components=0.8) # will retain 90% of the variance data = pca1.fit_transform(data) targets = targets[inds] learners = Learning(data, targets, testFraction=0.5) # use whole data set for training # OR learn with optimum nodes, for kiwi it is [35, 43, 36, 45] # kiwiNodes = [35, 43, 36, 45] # kiwiNodes = [34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 55] # kiwiNodes = [n - 1 for n in kiwiNodes] # nodes = list(range(63)) # # nonKiwiNodes = list(set(nodes) - set(kiwiNodes)) # # print(nonKiwiNodes) # learners = Learning(data[:, kiwiNodes], targets) # learners = Learning(data[:, nonKiwiNodes], data[:, -1]) # learners = Learning(data[:, 33:61], data[:, -1]) if clf == None: # then train all the classifiers (expensive option) print("MLP--------------------------------") # model = learners.trainMLP(structure=(100,), learningrate=0.001, solver='adam', epochs=200, alpha=1, # shuffle=True, early_stopping=False) model = learners.trainMLP(structure=(25, ), learningrate=0.001, solver='adam', epochs=200, alpha=1, shuffle=True, early_stopping=False) # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_MLP.joblib')) learners.performTest(model) print("kNN--------------------------------") model = learners.trainKNN(K=3) # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_kNN.joblib')) learners.performTest(model) print("SVM--------------------------------") # model = learners.trainSVM(kernel="rbf", C=1, gamma=0.0077) model = learners.trainSVM(kernel="rbf", C=1, gamma=0.03) learners.performTest(model) # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_SVM.joblib')) learners.performTest(model) print("GP--------------------------------") model = learners.trainGP() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_GP.joblib')) learners.performTest(model) print("DT--------------------------------") model = learners.trainDecisionTree() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_DT.joblib')) learners.performTest(model) print("RF--------------------------------") model = learners.trainRandomForest() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_RF.joblib')) learners.performTest(model) print("Boosting--------------------------------") model = learners.trainBoosting() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_Boost.joblib')) learners.performTest(model) print("XGB--------------------------------") model = learners.trainXGBoost() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_XGB.joblib')) learners.performTest(model) # print("GMM--------------------------------") # model = learners.trainGMM(covType='full', maxIts=200, nClasses=4) # # Save the model # dump(model, os.path.join(dir,species+'_'+feature+'_GMM.joblib')) print("######################################################") elif clf == 'MLP': print("MLP--------------------------------") model = learners.trainMLP(structure=(250, ), learningrate=0.001, solver='adam', epochs=200, alpha=1, shuffle=True, early_stopping=True) # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_MLP.joblib')) elif clf == 'kNN': print("kNN--------------------------------") model = learners.trainKNN(K=3) # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_kNN.joblib')) elif clf == 'SVM': print("SVM--------------------------------") model = learners.trainSVM(kernel="rbf", C=1, gamma=0.00018) # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_SVM.joblib')) elif clf == 'GP': print("GP--------------------------------") model = learners.trainGP() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_GP.joblib')) elif clf == 'DT': print("DT--------------------------------") model = learners.trainDecisionTree() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_DT.joblib')) elif clf == 'RF': print("RF--------------------------------") model = learners.trainRandomForest() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_RF.joblib')) elif clf == 'Boost': print("Boosting--------------------------------") model = learners.trainBoosting() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_Boost.joblib')) elif clf == 'XGB': print("XGB--------------------------------") model = learners.trainXGBoost() # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_XGB.joblib')) elif clf == 'GMM': print("GMM--------------------------------") model = learners.trainGMM(covType='full', maxIts=200, nClasses=4) # Save the model dump(model, os.path.join(dir, species + '_' + feature + '_GMM.joblib'))
def testClassifiers(dir_clf, dir_test, species, feature, clf=None, pca=False): ''' Load previously trained classifiers and test on a completely new data set. :param dir_clf: path to the saved classifiers :param dir_test: path to the test dataset :param species: species name :param feature: 'WEraw_all', 'WEraw_band', 'WEraw_spnodes' ... :param clf: classifier name e.g. 'SVM' :return: print out confusion matrix ''' # read test dataset d = pd.read_csv(os.path.join(dir_test, species + '_' + feature + '.tsv'), sep="\t", header=None) # d = pd.read_csv(os.path.join(dir_test, 'Kiwi (Tokoeka Fiordland)_WE_spnodes_seg_test.tsv'), sep=",", header=None) data = d.values targets = data[:, -1] data = data[:, 0:-1] # use PCA if selected if pca: pca1 = PCA(n_components=0.8) # will retain 90% of the variance data = pca1.fit_transform(data) # Test with all 62 nodes learners = Learning(data, targets, testFraction=1) # use all data for testing # # OR test with optimum nodes, for kiwi it is [35, 43, 36, 45] # # kiwiNodes = [35, 43, 36, 45] # kiwiNodes = [34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 55] # kiwiNodes = [n - 1 for n in kiwiNodes] # nodes = list(range(63)) # nonKiwiNodes = list(set(nodes) - set(kiwiNodes)) # learners = Learning(data[:, kiwiNodes], data[:, -1], testFraction=1) # # learners = Learning.Learning(data[:, nonKiwiNodes], data[:, -1]) # # learners = Learning.Learning(data[:, 33:61], data[:, -1]) if clf == None: print("MLP--------------------------------") # Load the model model = load( os.path.join(dir_clf, species + '_' + feature + '_MLP.joblib')) learners.performTest(model) print("kNN--------------------------------") model = load( os.path.join(dir_clf, species + '_' + feature + '_kNN.joblib')) learners.performTest(model) print("SVM--------------------------------") model = load( os.path.join(dir_clf, species + '_' + feature + '_SVM.joblib')) learners.performTest(model) print("GP--------------------------------") model = load( os.path.join(dir_clf, species + '_' + feature + '_GP.joblib')) learners.performTest(model) print("DT--------------------------------") model = load( os.path.join(dir_clf, species + '_' + feature + '_DT.joblib')) learners.performTest(model) print("RF--------------------------------") model = load( os.path.join(dir_clf, species + '_' + feature + '_RF.joblib')) learners.performTest(model) print("Boosting--------------------------------") model = load( os.path.join(dir_clf, species + '_' + feature + '_Boost.joblib')) learners.performTest(model) print("XGB--------------------------------") model = load( os.path.join(dir_clf, species + '_' + feature + '_XGB.joblib')) learners.performTest(model) # print("GMM--------------------------------") # model = load(os.path.join(dir_clf, species + '_' + feature + '_GMM.joblib')) # learners.performTest(model) print("######################################################") else: model = load( os.path.join(dir_clf, species + '_' + feature + '_' + clf + '.joblib')) learners.performTest(model)
import numpy as np import Perception import Decision import Memory import random import gym import Learning import math import naoenvSimulation import matplotlib.pyplot as plt camera = Perception.Camera() memory = Memory.Memory(camera) Perc = Perception.Ball(memory) learning = Learning.Learning(memory) Dec = Decision.Decision(memory, learning) def createEnvironment(numberOfPossibleBalls, environment, center): #create some balls ball = [0, 0] gaze = [0, 0] while gaze[0] == 0 or gaze[1] == 0: ball[0] = random.randrange(environment[0], environment[1], 1) ball[1] = random.randrange(environment[2], environment[3], 1) #create the gaze gaze = [ball[0] - center[0], ball[1] - center[1]] #find the direction of the gaze directionx = 1 directiony = 1
def TrainFacultative(self): """ Train paired models for CONSTITUTIVE ("always") and FACULTATIVE ("sometimes") PTMs. """ # Train a model on all PTMs, to get initial scores for all PTMs. # The initial model uses only CONSTITUTIVE features, and its output # is used only to provide an ORACLE for the facultative model: print "TRAIN model on all features:" self.Model.Train(self.TrainingSetAll) print "SCORE all features:" self.Model.Test(self.TrainingSetAll) ############################################################## print "Generate SUB-MODEL of only facultative features:" # Sort facultative instances by score: SortedList = [] for Vector in self.TrainingSetAll.AllVectors: if not Vector.FileBits[FormatBits.SisterAnnotationFlag]: continue SortedList.append((Vector.Score, Vector)) SortedList.sort() FacFeatureSet = Learning.FeatureSetClass() ChunkSize = min(len(SortedList) / 4, 1000) print "Sorted list of %s facultative features, chunk size is %s"%(len(SortedList), ChunkSize) for (Score, Vector) in SortedList[:ChunkSize]: NewVector = Learning.FeatureVector() NewVector.FileBits = Vector.FileBits[:] NewVector.Features = Vector.Features[:] NewVector.TrueFlag = 0 FacFeatureSet.AllVectors.append(NewVector) FacFeatureSet.FalseVectors.append(NewVector) for (Score, Vector) in SortedList[-ChunkSize:]: NewVector = Learning.FeatureVector() NewVector.FileBits = Vector.FileBits[:] NewVector.Features = Vector.Features[:] NewVector.TrueFlag = 1 FacFeatureSet.AllVectors.append(NewVector) FacFeatureSet.TrueVectors.append(NewVector) FacFeatureSet.SetCounts() FacFeatureSet.GetPriorProbabilityFalse(self.TrainingSetDBRatio) ############################################################## # Write out the FACULTATIVE feature set: FacTrainingFile = open("FacultativeTrainingSet.txt", "wb") for HeaderLine in self.HeaderLines: FacTrainingFile.write(HeaderLine) for Vector in FacFeatureSet.AllVectors: Bits = Vector.FileBits[:] if Vector.TrueFlag: Bits[FormatBits.TrueProteinFlag] = "1" else: Bits[FormatBits.TrueProteinFlag] = "0" Str = string.join(Bits, "\t") FacTrainingFile.write(Str + "\n") FacTrainingFile.close() ############################################################## # Train the sub-model: self.FacModel = self.GetModelObject(self.FeaturesF) self.FacModel.Train(FacFeatureSet) self.FacModel.Test(FacFeatureSet) self.FacModel.ReportAccuracy(FacFeatureSet) # invokes ComputeOddsTrue ############################################################## # Apply the trained fac-model to *all* facultative features, and # train an overall model on all *constitutive* features: self.FeatureSetC = Learning.FeatureSetClass() self.FeatureSetF = Learning.FeatureSetClass() for Vector in self.TrainingSetAll.AllVectors: if Vector.FileBits[FormatBits.SisterAnnotationFlag]: FeatureSet = self.FeatureSetF else: FeatureSet = self.FeatureSetC FeatureSet.AllVectors.append(Vector) if Vector.TrueFlag: FeatureSet.TrueVectors.append(Vector) else: FeatureSet.FalseVectors.append(Vector) self.FeatureSetC.SetCounts() self.FeatureSetF.SetCounts() self.FeatureSetC.GetPriorProbabilityFalse(self.TrainingSetDBRatio) self.FeatureSetF.GetPriorProbabilityFalse(self.TrainingSetDBRatio) # Score facultative-feature, using facultative-model: self.FacModel.Test(self.FeatureSetF) # Train constitutive-ONLY model, and score constitutive features: self.ConModel = self.GetModelObject(self.FeaturesC) self.ConModel.Train(self.FeatureSetC) self.ConModel.Test(self.FeatureSetC) self.ConModel.ReportAccuracy(self.FeatureSetC) # to invoke ComputeOddsTrue ############################################################## # Save our models: if self.WriteModelFilePath: (Stub, Extension) = os.path.splitext(self.WriteModelFilePath) ConModelPath = "%s.con"%Stub FacModelPath = "%s.fac"%Stub self.ConModel.SaveModel(ConModelPath) self.FacModel.SaveModel(FacModelPath) ############################################################## # Write out the scored features: OutputFile = open(self.OutputFeaturePath, "wb") for Line in self.HeaderLines: OutputFile.write(Line) for Vector in self.TrainingSetAll.AllVectors: if Vector.FileBits[FormatBits.SisterAnnotationFlag]: PValue = self.FacModel.GetPValue(Vector.Score) else: PValue = self.ConModel.GetPValue(Vector.Score) while len(Vector.FileBits) <= FormatBits.ModelPValue: Vector.FileBits.append("") Vector.FileBits[FormatBits.ModelScore] = str(Vector.Score) Vector.FileBits[FormatBits.ModelPValue] = str(PValue) Str = string.join(Vector.FileBits, "\t") OutputFile.write(Str + "\n")
def TrainModel(self): """ Our training data-set is in self.InputFeaturePath. Let's train a model to predict which entries come from the true database. """ if not self.InputFeaturePath: print "* Please specify an input feature-file." print UsageInfo sys.exit(-1) # Load in features for a collection of TRUE and FALSE instances. File = open(self.InputFeaturePath, "rb") self.FeatureNames = {} FeatureCount = FormatBits.LastFeature - FormatBits.FirstFeature + 1 # We have one set of features for facultative sites, and one for constitutive. # Note that some features (modification rate, correlation with unmodified peptide) # are applicable to F but not C. #self.FeaturesF = range(FeatureCount) # For constitutive modifications: Modification rate, protein coverage, # and number of unmodified peptides are all off-limits. (Those features # are "dead giveaways" that we have a non-shuffled protein!) #self.FeaturesC = [2, 3, 5, 22, 24, 25, 26] self.FeaturesC = ValidFeatureIndices[:] #self.FeaturesC = range(FeatureCount) self.FeaturesF = self.FeaturesC self.FeaturesAll = [] for FeatureIndex in self.FeaturesF: if FeatureIndex in self.FeaturesC: self.FeaturesAll.append(FeatureIndex) # We can OVERRIDE the list of features here, to forbid the use of some: print "Permitted features all:", self.FeaturesAll # Parse the features from the TRAINING and TESTING files. We generate # training sets for the FACULTATIVE (F) and for CONSTITUTIVE (C) sites. self.TrainingSet2 = Learning.FeatureSetClass() self.TrainingSet2.Type = "Charge-2" self.TrainingSet3 = Learning.FeatureSetClass() self.TrainingSet3.Type = "Charge-3" #self.TrainingSetAll = Learning.FeatureSetClass() #self.TrainingSetAll.Type = "All" self.ParseFeatureFile(self.InputFeaturePath, self.TrainingSet2, self.TrainingSet3, self.TrainingSetDBRatio) if self.ModelTestFilePath: self.TestingSet2 = FeatureSetClass() self.TestingSet3 = FeatureSetClass() self.ParseFeatureFile(self.ModelTestFilePath, self.TestingSet2, self.TestingSet3, self.TestingSetAll, self.TestingSetDBRatio) # SPECIAL values for model, which don't actually cause training: if self.ModelType == "feature": print "\n\nSINGLE feature:" self.TrainOneFeature(self.TrainingSet2) self.TrainOneFeature(self.TrainingSet3) return if self.ModelType == "featurescatter": print "\n\nFeature+feature scatter-plots:" self.ProduceFeatureScatterPlots(self.TrainingSetAll) return if self.ModelType == "summary": self.PerformFeatureSummary() return # Instantiate our model: self.Model2 = self.GetModelObject(self.FeaturesAll) self.Model3 = self.GetModelObject(self.FeaturesAll) # Load a pre-trained model, if we received a path: if self.ReadModelFilePath2: self.Model2.LoadModel(self.ReadModelFilePath2) self.Model3.LoadModel(self.ReadModelFilePath3) ####################################################################### # Special value for feature selection (3) means that we train a model on # all data, then use it to generate a sub-feature-set for a facultative model! if self.FeatureSelectionFlag == 3: self.TrainFacultative() return ####################################################################### # If we're not doing feature selection: Train on the training set, # and then (if we have a testing set) test on the testing set. if not self.FeatureSelectionFlag: # Train the model (unless we just loaded it in): if not self.ReadModelFilePath2: self.Model2.Train(self.TrainingSet2) self.Model3.Train(self.TrainingSet3) # Compute the score of each vector: if self.ModelTestFilePath: self.Model2.Test(self.TestingSet2) self.Model2.ReportAccuracy(self.TestingSet2) self.Model3.Test(self.TestingSet3) self.Model3.ReportAccuracy(self.TestingSet3) self.WriteScoredFeatureSet(self.TestingSet2, self.TestingSet3) else: self.Model2.Test(self.TrainingSet2) self.Model2.ReportAccuracy(self.TrainingSet2) shutil.copyfile("PValues.txt", "PValues.chg2.txt") self.Model3.Test(self.TrainingSet3) self.Model3.ReportAccuracy(self.TrainingSet3) shutil.copyfile("PValues.txt", "PValues.chg3.txt") #if self.ReportROCPath: # self.Model.ReportROC(self.TrainingSetAll, self.ReportROCPath) self.WriteScoredFeatureSet(self.TrainingSet2, self.TrainingSet3) if self.WriteModelFilePath2: self.Model2.SaveModel(self.WriteModelFilePath2) self.Model3.SaveModel(self.WriteModelFilePath3) return ####################################################################### # We're doing feature selection. We'll need to write out feature files, # then call TrainMachineLearner print "Feature names:", self.FeatureNames print "AllFeatures:", self.FeaturesAll self.WriteFeaturesToFile(self.TrainingSet2, "PTMFeatures.2.txt") self.WriteFeaturesToFile(self.TrainingSet3, "PTMFeatures.3.txt")
def __init__(self): self.spezies = lng.Spezies()
BACKEND = 'CPU' dt = 0.001 duration = 10 discount = 0.9995 env = TestEnvActor(dt=dt, trial_length=40, reset=1000) with nengo.Network() as net: envnode = nengo.Node(lambda t, v: env.step(v), size_in=1, size_out=3) in_ens = nengo.Ensemble(n_neurons=1000, radius=2, dimensions=1) # encodes position actor = nengo.Ensemble(n_neurons=1000, radius=1, dimensions=1) critic = CriticNet(in_ens, n_neuron_out=1000, lr=1e-5) error = ErrorNode(discount=discount) # seems like a reasonable value to have a reward gradient over the entire episode switch = Switch(state=1, switch_off=False, switchtime=duration/2) # needed for compatibility with error implementation nengo.Connection(envnode[0], in_ens) conn = nengo.Connection(in_ens, actor, function=lambda x: [0], solver=nengo.solvers.LstsqL2(weights=True), learning_rule_type=Learning.TDL(learning_rate=1e-8)) nengo.Connection(actor, envnode) # error node connections # reward = input[0] value = input[1] switch = input[2] state = input[3] reset = input[4].astype(int) nengo.Connection(envnode[1], error.net.errornode[0], synapse=0.01) # reward connection nengo.Connection(critic.net.output, error.net.errornode[1], synapse=0.01) # value prediction nengo.Connection(switch.net.switch, error.net.errornode[2], synapse=0.01) # learning switch nengo.Connection(error.net.errornode[1], error.net.errornode[3], synapse=0.01) # feed value into next step nengo.Connection(envnode[2], error.net.errornode[4], synapse=0.01) # propagate reset signal # error to critic nengo.Connection(error.net.errornode[0], critic.net.conn.learning_rule, transform=-1) nengo.Connection(error.net.errornode[0], conn.learning_rule) # Probes