def nnTest(tx, ty, rx, ry, iterations): print "NN start" print strftime("%a, %d %b %Y %H:%M:%S", localtime()) resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(16, 16, 1, bias=True) ds = ClassificationDataSet(16, 1, class_labels=["1", "0"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.05) validator = CrossValidator(trainer, ds, n_folds=10) print validator.validate() for i in positions: print trainer.train() resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty))) resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))) print i, resultst[i], resultsr[i] plt.plot(positions, resultst, 'g-', positions, resultsr, 'r-') plt.axis([0, iterations, 0, 1]) plt.ylabel("Percent Error") plt.xlabel("Network Epoch") plt.title("Neural Network Error") plt.savefig('nn.png', dpi=500) print "NN end" print strftime("%a, %d %b %Y %H:%M:%S", localtime())
def nntester(tx, ty, rx, ry, iterations): """ builds, tests, and graphs a neural network over a series of trials as it is constructed """ resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.01) for i in positions: print trainer.train() resultst.append( sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2) / float(len(ty))) resultsr.append( sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry))) print i, resultst[i], resultsr[i] NetworkWriter.writeToFile(network, "network.xml") plt.plot(positions, resultst, 'ro', positions, resultsr, 'bo') plt.axis([0, iterations, 0, 1]) plt.ylabel("Percent Error") plt.xlabel("Network Epoch") plt.title("Neural Network Error") plt.savefig('3Lnn.png', dpi=300)
def nn(tx, ty, rx, ry, add="", iterations=250): """ trains and plots a neural network on the data we have """ resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(tx[1].size, 5, 1, bias=True) ds = ClassificationDataSet(tx[1].size, 1) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.01) train = zip(tx, ty) test = zip(rx, ry) for i in positions: trainer.train() resultst.append( sum( np.array([(round(network.activate(t_x)) - t_y)**2 for t_x, t_y in train]) / float(len(train)))) resultsr.append( sum( np.array([(round(network.activate(t_x)) - t_y)**2 for t_x, t_y in test]) / float(len(test)))) # resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))) print i, resultst[-1], resultsr[-1] plot([0, iterations, 0, 1], (positions, resultst, "ro", positions, resultsr, "bo"), "Network Epoch", "Percent Error", "Neural Network Error", "NN" + add)
def createTrainingSupervisedDataSet(self,msrcImages , scale , keepClassDistTrain): print "\tSplitting MSRC data into train, test, valid data sets." splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain) print "\tNow generating features for each training image." trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True) features = trainData[0] numDataPoints = np.shape(features)[0] numFeatures = np.shape(features)[1] labels = trainData[1] numLabels = np.size(labels) #!!error! nb unique labels, or max label assert numDataPoints == numLabels , "Number of feature data points and number of labels not equal!" dataSetTrain = ClassificationDataSet(numFeatures , numClasses) print "\tNow adding all data points to the ClassificationDataSet..." for idx in range(0,numDataPoints): feature = trainData[0][idx] label = trainData[1][idx] binaryLabels = np.zeros(numClasses) # to cope with the removal of void class (idx 13) if label < voidClass: binaryLabels[label] = 1 else: binaryLabels[label-1] = 1 dataSetTrain.addSample(feature , binaryLabels) print "\tAdded" , np.size(trainData) , " labeled data points to DataSet." return dataSetTrain
def nntester(tx, ty, rx, ry, iterations): """ builds, tests, and graphs a neural network over a series of trials as it is constructed """ resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100,1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.01) for i in positions: print trainer.train() resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty))) resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))) print i, resultst[i], resultsr[i] NetworkWriter.writeToFile(network, "network.xml") plt.plot(positions, resultst, 'ro', positions, resultsr, 'bo') plt.axis([0, iterations, 0, 1]) plt.ylabel("Percent Error") plt.xlabel("Network Epoch") plt.title("Neural Network Error") plt.savefig('3Lnn.png', dpi=300)
def createTrainingSupervisedDataSet(self, msrcImages, scale, keepClassDistTrain): print "\tSplitting MSRC data into train, test, valid data sets." splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain) print "\tNow generating features for each training image." trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True) features = trainData[0] numDataPoints = np.shape(features)[0] numFeatures = np.shape(features)[1] labels = trainData[1] numLabels = np.size(labels) #!!error! nb unique labels, or max label assert numDataPoints == numLabels, "Number of feature data points and number of labels not equal!" dataSetTrain = ClassificationDataSet(numFeatures, numClasses) print "\tNow adding all data points to the ClassificationDataSet..." for idx in range(0, numDataPoints): feature = trainData[0][idx] label = trainData[1][idx] binaryLabels = np.zeros(numClasses) # to cope with the removal of void class (idx 13) if label < voidClass: binaryLabels[label] = 1 else: binaryLabels[label - 1] = 1 dataSetTrain.addSample(feature, binaryLabels) print "\tAdded", np.size(trainData), " labeled data points to DataSet." return dataSetTrain
def cvnntester(tx, ty, rx, ry, iterations, folds): network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100,1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.005) cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True) print cv.validate() print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))
def train_network(X, y, hidden_units=3, learningrate=0.04, max_epochs=8, continue_epochs=2): indim = X.shape[1] nn = buildNetwork(indim, hidden_units, 1, outclass=SigmoidLayer) ds = ClassificationDataSet(indim, 1) for i, row in enumerate(X): ds.addSample(row, y[i]) trainer = BackpropTrainer(nn, ds, learningrate=learningrate) trainer.trainUntilConvergence(maxEpochs=max_epochs, continueEpochs=continue_epochs) return nn
def initializeNetwork(self): self.net = buildNetwork(26, 15, 5, hiddenclass=TanhLayer, outclass=SoftmaxLayer) # 15 is just a mean ds = ClassificationDataSet(26, nb_classes=5) for x in self.train: ds.addSample(x.frequency, self.encodingDict[x.lang]) ds._convertToOneOfMany() trainer = BackpropTrainer(self.net, dataset=ds, weightdecay=0.01, momentum=0.1, verbose=True) trainer.trainUntilConvergence(maxEpochs=100)
def nn(tx, ty, rx, ry, iterations): network = buildNetwork(14, 5, 5, 1) ds = ClassificationDataSet(14,1, class_labels=["<50K", ">=50K"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds) trainer.trainOnDataset(ds, iterations) NetworkWriter.writeToFile(network, "network.xml") results = sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry)) return results
def initialize(trainingfeatures,traininglabels,p=0.7): alldata = ClassificationDataSet(trainingfeatures.shape[1], 1, nb_classes=len(set(traininglabels))) for i in xrange(traininglabels[0]): alldata.appendLinked(trainingfeatures[i] , traininglabels[i]) trndata, tstdata = alldata.splitWithProportion( p ) trndata._convertToOneOfMany(bounds=[0, 1]) tstdata._convertToOneOfMany(bounds=[0, 1]) model, accuracy, params = buildANN(trndata, tstdata) print '\nThe best model had '+str(accuracy)+'% accuracy and used the parameters:\n'+params+'\n' return model
def nn(tx, ty, rx, ry, iterations): network = buildNetwork(14, 5, 5, 1) ds = ClassificationDataSet(14, 1, class_labels=["<50K", ">=50K"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds) trainer.trainOnDataset(ds, iterations) NetworkWriter.writeToFile(network, "network.xml") results = sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry)) return results
def _createL1Dataset(self, classifiers, l1PreDataset): l1DatasetDimensions = classifiers[0].distributionLength() * len(classifiers) l1Dataset = ClassificationDataSet(l1DatasetDimensions, nb_classes=2) for instance in l1PreDataset: input = instance[0] target = instance[1] l1Input = _getLevel1Input(classifiers, input, self.useDistributions) l1Dataset.appendLinked(l1Input, target) return l1Dataset
def cifar_nn(offset=None): data_ = cifar(one_hot=True, ten_percent=False) x_dim = len(data_['train']['data'][0]) data = ClassificationDataSet(x_dim, 10) if offset: max_sample = offset else: max_sample = len(data_['train']['data']) for i in xrange(max_sample): data.addSample(data_['train']['data'][i], data_['train']['labels'][i]) data_['train_nn'] = data return data_
def sentiment_nn(bag_size=100, offset=None): data_ = sentiment(bag_size) x_dim = len(data_['train']['data'][0]) data = ClassificationDataSet(x_dim, 1) if offset: max_sample = offset else: max_sample = len(data_['train']['data']) for i in xrange(max_sample): data.addSample(data_['train']['data'][i], [data_['train']['labels'][i]]) data_['train_nn'] = data return data_
def _createDatasetForClass(self, dataset, classValue): datasetForClass = ClassificationDataSet(dataset.getDimension('input'), nb_classes=2) for instance in dataset: input = instance[0] target = instance[1] if target[0] == classValue: datasetForClass.appendLinked(input, [1]) else: datasetForClass.appendLinked(input, [0]) return datasetForClass
def createTrainingSetFromMatrix( self, dataMat, labelsVec=None ): assert labelsVec==None or dataMat.shape[0] == len(labelsVec) #nbFtrs = dataMat.shape[1] #nbClasses = np.max(labelsVec) + 1 if labelsVec != None and np.unique(labelsVec) != range(self.nbClasses): print 'WARNING: class labels only contain these values %s ' % (str( np.unique(labelsVec) )) dataSetTrain = ClassificationDataSet(self.nbFeatures, numClasses) for i in range(dataMat.shape[0]): binaryLabels = np.zeros(numClasses) if labelsVec != None: binaryLabels[labelsVec[i]] = 1 dataSetTrain.addSample( dataMat[i,:], binaryLabels ) return dataSetTrain
def splitWithProportion(self, proportion = 0.7): """Produce two new datasets, the first one containing the fraction given by `proportion` of the samples.""" indicies = random.permutation(len(self)) separator = int(len(self) * proportion) leftIndicies = indicies[:separator] rightIndicies = indicies[separator:] leftDs = ClassificationDataSet(inp=self['input'][leftIndicies].copy(), target=self['target'][leftIndicies].copy()) rightDs = ClassificationDataSet(inp=self['input'][rightIndicies].copy(), target=self['target'][rightIndicies].copy()) return leftDs, rightDs
def cvnntester(tx, ty, rx, ry, iterations, folds): network = buildNetwork(100, 50, 1, bias=True) ds = ClassificationDataSet(100, 1, class_labels=["valley", "hill"]) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.005) cv = CrossValidator(trainer, ds, n_folds=folds, max_epochs=iterations, verbosity=True) print cv.validate() print sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2) / float(len(ry))
def buildXor(self): self.params['dataset'] = 'XOR' d = ClassificationDataSet(2) d.addSample([0., 0.], [0.]) d.addSample([0., 1.], [1.]) d.addSample([1., 0.], [1.]) d.addSample([1., 1.], [0.]) d.setField('class', [[0.], [1.], [1.], [0.]]) self.trn_data = d self.tst_data = d global trn_data trn_data = self.trn_data nn = FeedForwardNetwork() inLayer = TanhLayer(2, name='in') hiddenLayer = TanhLayer(3, name='hidden0') outLayer = ThresholdLayer(1, name='out') nn.addInputModule(inLayer) nn.addModule(hiddenLayer) nn.addOutputModule(outLayer) in_to_hidden = FullConnection(inLayer, hiddenLayer) hidden_to_out = FullConnection(hiddenLayer, outLayer) nn.addConnection(in_to_hidden) nn.addConnection(hidden_to_out) nn.sortModules() nn.randomize() self.net_settings = str(nn.connections) self.nn = nn
def xorDataSet(): d = ClassificationDataSet(2) d.addSample([0., 0.], [0.]) d.addSample([0., 1.], [1.]) d.addSample([1., 0.], [1.]) d.addSample([1., 1.], [0.]) d.setField('class', [[0.], [1.], [1.], [0.]]) return d
def pybrainData(split, data=None): # taken from iris data set at machine learning repository if not data: pat = cat1 + cat2 + cat3 else: pat = data alldata = ClassificationDataSet(4, 1, nb_classes=3, class_labels=['set', 'vers', 'virg']) for p in pat: t = p[2] alldata.addSample(p[0], t) tstdata, trndata = alldata.splitWithProportion(split) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() return trndata, tstdata
def testNetwork(self): correctAnswers = [] for testItem in self.test: correctAnswers.append(self.encodingDict[testItem.lang]) ds_test = ClassificationDataSet(26, nb_classes=5) for x in self.test: ds_test.addSample(x.frequency, self.encodingDict[x.lang]) ds_test._convertToOneOfMany() sumCorrect = sum(self.net.activateOnDataset(ds_test).argmax(axis=1) == correctAnswers) print "\nNeural network: " + str(sumCorrect*100/float(len(self.test))) + "% efficiency"
def _getFilteredDataset(self, dataset, pair): datasetForPair = ClassificationDataSet(dataset.getDimension('input'), nb_classes=2) for instance in dataset: input = instance[0] target = instance[1] classValue = target[0] # First class in pair is negative class and the second one is a positive class if classValue == pair[0]: datasetForPair.appendLinked(input, [0]) elif classValue == pair[1]: datasetForPair.appendLinked(input, [1]) return datasetForPair
def _testTrainingOnClassificationDataset(self): DS = ClassificationDataSet(2, class_labels=['Zero', 'One']) DS.appendLinked([ 0, 0 ] , [0]) DS.appendLinked([ 0, 1 ] , [0]) DS.appendLinked([ 1, 0 ] , [0]) DS.appendLinked([ 1, 1 ] , [1]) network = buildNetwork(DS.indim, 5, 2, outclass=SoftmaxLayer) trainer = BackpropTrainer( network, momentum=0.1, verbose=True, weightdecay=0.01) nnf = NeuralNetworkFactory(network, trainer, seed=2, iterationsNum=20) nnClassifier = nnf.buildClassifier(DS) self.assertEqual(nnClassifier.getPrediction([0, 0]), 0) self.assertEqual(nnClassifier.getPrediction([0, 1]), 0) self.assertEqual(nnClassifier.getPrediction([1, 0]), 0) self.assertEqual(nnClassifier.getPrediction([1, 1]), 1)
def _createGradingDataset(self, baseClassifier, gradingSet, numOfAttirubes): gradingDataset = ClassificationDataSet(numOfAttirubes, nb_classes=2, class_labels=["Incorrect", "Correct"]) for instance in gradingSet: # Get attributes from the instances attributes = instance[0] # Get class from the instance cls = instance[0][0] prediction = baseClassifier.getPrediction(attributes) if prediction == cls: gradingDataset.appendLinked(attributes, [CorrectPrediction]) else: gradingDataset.appendLinked(attributes, [IncorrectPrediction]) return gradingDataset
def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio( mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors( mfcc_coeff_vectors_dict, min_frames) # frames = min_frames # print frames # print len(processed_mfcc_coeff['1']) # for each_vector in processed_mfcc_coeff['1']: # print len(each_vector) print "mffcc found..." classes = ["happiness", "sadness"] training_data = ClassificationDataSet( 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(13, 1) try: network = NetworkReader.readFrom( 'network_state_frame_level_new2_no_pp1.xml') except: for i in range(1, 51): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [1]) for i in range(201, 251): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [0]) training_data._convertToOneOfMany() print "prepared training data.." print training_data.indim, training_data.outdim network = buildNetwork( training_data.indim, 5, training_data.outdim, fast=True) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile( network, "network_state_frame_level_new2_no_pp.xml")
def main(): logger.debug('starting') print 'starting' #create the training & test sets, skipping the header row with [1:] dataset = genfromtxt(open(basepath + '/train.csv','r'), delimiter=',', dtype='f8')[1:] logger.debug('opened dataset') target = [x[0] for x in dataset] train = [x[1:] for x in dataset] print target logger.debug('about to build data set') print 'building dataset' cds = ClassificationDataSet(784, target=10, nb_classes=10) for i in range(len(target)): targetvec = [0 for j in range(10)] targetnum = float(target[i]) targetvec[int(float(target[i]))] = 1 cds.addSample(train[i], targetvec) print i print 'adding sample: ' + str(targetnum) print targetvec logger.debug('about to build network') net = buildNetwork(784, 20, 10) logger.debug('about to build trainer') trainer = BackpropTrainer(net, dataset=cds, momentum=0.1, verbose=True, weightdecay=0.01) logger.debug('about to start training') print 'training' trainer.trainUntilConvergence() #save the net nfile = open(basepath + '/nn.pickle', 'w') pickle.dump(net, nfile) nfile.close() #run the real test logger.debug('opening test set') tests = genfromtxt(open(basepath + '/test.csv','r'), delimiter=',', dtype='f8')[1:] results = [] print 'testing' for test in tests: logger.debug('activating net!') res = net.activate(test) logger.debug('result: ' + str(res)) results.append(res) resultfile = open(basepath + '/nn.output', 'w') resultfile.write(str(results)) print 'done'
def nn(tx, ty, rx, ry, add="", iterations=250): """ trains and plots a neural network on the data we have """ resultst = [] resultsr = [] positions = range(iterations) network = buildNetwork(tx[1].size, 5, 1, bias=True) ds = ClassificationDataSet(tx[1].size, 1) for i in xrange(len(tx)): ds.addSample(tx[i], [ty[i]]) trainer = BackpropTrainer(network, ds, learningrate=0.01) for i in positions: trainer.train() resultst.append(sum((np.array([round(network.activate(test)) for test in tx]) - ty)**2)/float(len(ty))) resultsr.append(sum((np.array([round(network.activate(test)) for test in rx]) - ry)**2)/float(len(ry))) print i plot([0, iterations, 0, 1], (positions, resultst, "ro", positions, resultsr, "bo"), "Network Epoch", "Percent Error", "Neural Network Error", "NN"+add)
def start(): featuresList=[] labelsList=[] featuresList, labelsList= loadFile("output.txt") print 'Normalizing array...' normalizearray(featuresList) alldata = ClassificationDataSet( len(featuresList[0]), 1, nb_classes=8, class_labels=['ffi_brainmatter','ffi_neuron','ffi_vacuole','ffi_astrocyte', 'wt_brainmatter', 'wt_neuron', 'wt_vacuole', 'wt_astrocyte'] ) for i in range(len(labelsList)): alldata.appendLinked(featuresList[i], labelsList[i]) #print 'All data: ', alldata #print 'Statisticcs: ', alldata.calculateStatistics() newK=fSel.getTreeFeatures(featuresList, labelsList); newK=newK.shape[1] print "K= ", newK reducedFeatures= fSel.getBestK(featuresList,labelsList, 'f_classif', newK) reducedData=ClassificationDataSet( len(reducedFeatures[0]), 1, nb_classes=8, class_labels=['ffi_brainmatter','ffi_neuron','ffi_vacuole','ffi_astrocyte', 'wt_brainmatter', 'wt_neuron', 'wt_vacuole', 'wt_astrocyte'] ) #prep reducedData object with reduced feature list for i in range(len(labelsList)): reducedData.appendLinked(reducedFeatures[i], labelsList[i]) print 'Splitting test and training data...' tstdata, trndata = alldata.splitWithProportion( 0.30 ) reducedTestData, reducedTrainData=reducedData.splitWithProportion(0.3) print 'Number of training and test patterns: ', len(trndata), len(tstdata) trndata._convertToOneOfMany(bounds=[0,1]) tstdata._convertToOneOfMany(bounds=[0,1]) reducedTestData._convertToOneOfMany(bounds=[0,1]) reducedTrainData._convertToOneOfMany(bounds=[0,1]) #print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim #print "Sample (input, target, class):" #print trndata['input'][0], trndata['target'][0], trndata['class'][0] #print trndata['input'][1], trndata['target'][1], trndata['class'][1] buildFNN(tstdata, trndata) print "___________________________________________FEATURE REDUCTION________________________________________________" buildFNN(reducedTestData, reducedTrainData)
def neutral_net(train_data,test_data,n_est,maxd): #ds = SupervisedDataSet(len(train_data[0,:])-1,1) ds = ClassificationDataSet(len(train_data[0,:])-1,1,nb_classes=2,class_labels=['Lived','Died']) X=[];y=[]; X1=[]; y1=[] for row in range(0,len(train_data[:,0])): X.append(train_data[row,1:].astype(int)) y.append([train_data[row,0].astype(int)]) #ds.addSample(train_data[row,1:].astype(int),train_data[row,0].astype(int)) #for row in range(0,len(test_data[:,0])): # X.append(test_data[row,1:].astype(int)) # y.append([test_data[row,0].astype(int)]) X=np.array(X); y=np.array(y) ds.setField('input',X) ds.setField('target',y) ds._convertToOneOfMany(bounds=[0,1]) # only for classification #net = buildNetwork(len(train_data[0,:])-1,100, 1) read = False if read: #net = NetworkReader.readFrom('10_200.xml') # hiddenclass=SigmoidLayer pass else: net = buildNetwork(ds.indim,maxd,ds.outdim,bias=True,hiddenclass=SigmoidLayer,outclass=SoftmaxLayer)#SoftmaxLayer) trainer = BackpropTrainer(net,dataset=ds,verbose=False,learningrate=0.01,momentum=0.1,weightdecay=0.01) trainer.trainUntilConvergence(maxEpochs=n_est,continueEpochs=10,validationProportion=0.3) #NetworkWriter.writeToFile(net, '10_200.xml') tot = 0. for a,b in zip(X,y): val = net.activate(a) tot+=int((val[0] > val[1] and b==0) or (val[0]<val[1] and b==1)) '''num = int((net.activate(a)<0.5 and b<0.5) or (net.activate(a)>0.5 and b>0.5)) tot+=num''' for row in range(0,len(test_data[:,0])): X1.append(test_data[row,1:].astype(int)) y1.append([test_data[row,0].astype(int)]) X1=np.array(X1); y1=np.array(y1) tot1 = 0. output = [] for a,b in zip(X1,y1): val = net.activate(a) tot1+=int((val[0] > val[1] and b==0) or (val[0]<val[1] and b==1)) output.append(int(val[0]<val[1])) '''num = int((net.activate(a)<0.5 and b<0.5) or (net.activate(a)>0.5 and b>0.5)) tot1+=num output.append(int(net.activate(a)>0.5))''' pr.print_results(output) return [tot/len(y),tot1/len(y1)]
def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio(mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, min_frames) frames = min_frames print "mfcc found...." classes = ["happiness", "sadness"] try: network = NetworkReader.readFrom('network_state_new_.xml') except: # Create new network and start Training training_data = ClassificationDataSet(frames * 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(frames * 39, 1) for i in range(1, 151): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [1]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [1]) for i in range(201, 351): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [0]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [0]) training_data._convertToOneOfMany() network = buildNetwork(training_data.indim, 5, training_data.outdim) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile(network, "network_state_new_.xml") print "*" * 30 , "Happiness Detection", "*" * 30 for i in range(151, 201): output = network.activate(processed_mfcc_coeff[str(i)].ravel()) # print output, # if output > 0.7: # print "happiness" class_index = max(xrange(len(output)), key=output.__getitem__) class_name = classes[class_index] print class_name
def montaDatasetConvertido(dadosTemporario): """ função que converte o objeto python.datasets.classficication.ClassificationDataSet para python.datasets.supervised.SupervisedDataSet Será utilizando tanto para o dataset de treino quanto para o dataset de teste e validação :return: dataset convertindo ao objeto python.datasets.supervised.SupervisedDataSet """ dataset = ClassificationDataSet(4, 1) for i in range(dadosTemporario.getLength()): dataset.addSample( dadosTemporario.getSample(i)[0], dadosTemporario.getSample(i)[1]) return dataset
def montaDataset(): """ Função que monta o dataset dos dados temporários do dataset :return: dataset montando """ # carregando o dataset do iris # pelo sktlearn iris = datasets.load_iris() dadosEntrada, dadosSaida = iris.data, iris.target # criando o dataset da iris onde : terá um array de tamanho 4 como dados de entrada # um array de tamanho 1 como dado de saida terá # 3 classes para classificar dataset = ClassificationDataSet(4, 1, nb_classes=3) for i in range(len(dadosEntrada)): dataset.addSample(dadosEntrada[i], dadosSaida[i]) return dataset
def predict(self, x_test): DS = ClassificationDataSet(x_test.shape[1], nb_classes=self.__class_num) DS.setField('input', x_test) DS.setField('target', np.zeros((x_test.shape[0], 1))) DS._convertToOneOfMany() out = self.__pybrain_bpnn.activateOnDataset(DS) # this part converts an activation vector to a class number # i'm saving this for a future purpose #out = out.argmax(axis=1) # the highest output activation gives the class #if not self.__class_zero_indexing: # indexing from 1 - add one to result # out += 1 return out
def mlp(): mlp = buildNetwork(26, 500, 3456, bias=True, outclass=SoftmaxLayer) #print net['in'], net['hidden0'], net['out'] ds = import_data() #http://stackoverflow.com/questions/27887936/attributeerror-using-pybrain-splitwithportion-object-type-changed tstdata_temp, trndata_temp = ds.splitWithProportion(0.25) tstdata = ClassificationDataSet(26, 1, nb_classes=3456) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample(tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1]) trndata = ClassificationDataSet(26, 1, nb_classes=3456) for n in xrange(0, trndata_temp.getLength()): trndata.addSample(trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print type(trndata['class']) print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] trainer = BackpropTrainer(mlp, trndata, verbose = True, learningrate=0.01) trainer.trainUntilConvergence(maxEpochs=1000) trnresult = percentError( trainer.testOnClassData(), trndata['class'] ) tstresult = percentError( trainer.testOnClassData( dataset=tstdata ), tstdata['class'] ) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult
def import_data(train_file_path='../data/train_trip.csv'): dataset = ClassificationDataSet(26, 1, nb_classes=3456) train_file = open(train_file_path, "r") for line in train_file: try: datas = json.loads(line) data = [] #CALL_TYPE: 1 data.append(datas[2]) #TAXI_ID: 1 data.append(ord(datas[1].lower()) - ord('a')) #time embedding: 4 for i in datas[3]: data.append(int(i)) #trip: 10*2 = 20 for i in datas[4]: data.append(i[0]) data.append(i[1]) dataset.addSample(data, [int(datas[5])]) except: print 'error line:', line return dataset
def train(self, x, y, class_number=-1): self.__class_num = max(np.unique(y).size, class_number) if max(y) == self.__class_num: self.__class_zero_indexing = False y = np.array([i - 1 for i in y]) DS = ClassificationDataSet(x.shape[1], nb_classes=self.__class_num) DS.setField('input', x) DS.setField('target', y.reshape(y.size, 1)) DS._convertToOneOfMany() hidden_num = (DS.indim + DS.outdim) / 2 self.__pybrain_bpnn = buildNetwork(DS.indim, hidden_num, DS.outdim, bias=True, hiddenclass=SigmoidLayer, outclass=SoftmaxLayer) trainer = BackpropTrainer(self.__pybrain_bpnn, dataset=DS, learningrate=0.07, lrdecay=1.0, momentum=0.6) trainer.trainUntilConvergence(DS, maxEpochs=30)
def test_ann(self): from pybrain.datasets.classification import ClassificationDataSet # below line can be replaced with the algorithm of choice e.g. # from pybrain.optimization.hillclimber import HillClimber from pybrain.optimization.populationbased.ga import GA from pybrain.tools.shortcuts import buildNetwork # create XOR dataset d = ClassificationDataSet(2) d.addSample([181, 80], [1]) d.addSample([177, 70], [1]) d.addSample([160, 60], [0]) d.addSample([154, 54], [0]) d.setField('class', [[0.], [1.], [1.], [0.]]) nn = buildNetwork(2, 3, 1) # d.evaluateModuleMSE takes nn as its first and only argument ga = GA(d.evaluateModuleMSE, nn, minimize=True) for i in range(100): nn = ga.learn(0)[0] print nn.activate([181, 80])
def train(network_file, input_length, output_length, training_data_file, learning_rate, momentum, stop_on_convergence, epochs, classify): n = get_network(network_file) if classify: ds = ClassificationDataSet(int(input_length), int(output_length) * 2) ds._convertToOneOfMany() else: ds = SupervisedDataSet(int(input_length), int(output_length)) training_data = get_training_data(training_data_file) NetworkManager.last_training_set_length = 0 for line in training_data: data = [float(x) for x in line.strip().split(',') if x != ''] input_data = tuple(data[:(int(input_length))]) output_data = tuple(data[(int(input_length)):]) ds.addSample(input_data, output_data) NetworkManager.last_training_set_length += 1 t = BackpropTrainer(n, learningrate=learning_rate, momentum=momentum, verbose=True) print "training network " + network_storage_path + network_file if stop_on_convergence: t.trainUntilConvergence(ds, epochs) else: if classify: t.trainOnDataset(ds['class'], epochs) else: t.trainOnDataset(ds, epochs) error = t.testOnData() print "training done" if not math.isnan(error): save_network(n, network_file) else: print "error occured, network not saved" print "network saved" return error
def _get_classification_dataset(): return ClassificationDataSet(INPUT, OUTPUT, nb_classes=CLASSES)
numdata[i][10] = qualidict[numdata[i][10].strip()] numdata[i][11] = modedict[numdata[i][11].strip()] numdata[i][12] = unidict[numdata[i][12].strip()] fobj = open('02 select_data_num.csv', 'wb') [(fobj.write(item), fobj.write(',')) for item in header] fobj.write('\n') [([(fobj.write(str(it).replace(',', ' ')), fobj.write(',')) for it in item], fobj.write('\n')) for item in numdata] fobj.close() npdata = np.array(numdata, dtype=np.float) npdata[:, 2:] = preprocessing.scale(npdata[:, 2:]) numdata = copy.deepcopy(npdata) net = buildNetwork(14, 14, 1, bias=True, outclass=SoftmaxLayer) ds = ClassificationDataSet(14, 1, nb_classes=2) for item in numdata: ds.addSample(tuple(item[2:]), (item[1])) dsTrain, dsTest = ds.splitWithProportion(0.8) print('Trainging') trainer = BackpropTrainer(net, ds, momentum=0.1, verbose=True, weightdecay=0.01) # trainer.train() trainer.trainUntilConvergence(maxEpochs=20) print('Finish training') Traininp = dsTrain['input']
@author: Leonardo """ #Carregando os dados do Iris Sataset com skLearn from sklearn import datasets iris = datasets.load_iris() #Obtendo as entradas e saídas X, y = iris.data, iris.target print(len(X)) print(len(y)) from pybrain.datasets.classification import ClassificationDataSet datasets = ClassificationDataSet(4, 1, nb_classes=3) #nb_classes = numeros de saidas # adicionando as amostras for i in range(len(X)): datasets.addSample(X[i], y[i]) len(datasets) ''' print(datasets['input']) print(datasets['target']) ''' # psrticonando os dados para treinamento train_data, part_data = datasets.splitWithProportion( 0.6) #sera dividido em 60% print('Quantidade para treino: %d' % len(train_data))
leftDs = ClassificationDataSet(inp=self['input'][leftIndicies].copy(), target=self['target'][leftIndicies].copy()) rightDs = ClassificationDataSet(inp=self['input'][rightIndicies].copy(), target=self['target'][rightIndicies].copy()) return leftDs, rightDs irisData = datasets.load_iris() dataFeatures = irisData.data dataTargets = irisData.target #plt.matshow(irisData.images[11], cmap=cm.Greys_r) #plt.show() #print dataTargets[11] #print dataFeatures.shape dataSet = ClassificationDataSet(4, 1 , nb_classes=3) for i in range(len(dataFeatures)): dataSet.addSample(np.ravel(dataFeatures[i]), dataTargets[i]) trainingData, testData = splitWithProportion(dataSet,0.7) trainingData._convertToOneOfMany() testData._convertToOneOfMany() neuralNetwork = buildNetwork(trainingData.indim, 7, trainingData.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(neuralNetwork, dataset=trainingData, momentum=0.01, learningrate=0.05, verbose=True) trainer.trainEpochs(10000) print('Error (test dataset): ' , percentError(trainer.testOnClassData(dataset=testData), testData['class']))
__author__ = 'QSG' from pybrain.datasets.classification import ClassificationDataSet from pybrain.optimization.populationbased.ga import GA from pybrain.tools.shortcuts import buildNetwork d = ClassificationDataSet(3) d.addSample([0, 0, 0], [0.]) d.addSample([0, 1, 0], [1.]) d.addSample([1, 0, 0], [1.]) d.addSample([1, 1, 0], [0.]) d.setField('class', [[0.], [1.], [1.], [0.]]) nn = buildNetwork(3, 3, 1) print nn.activate([0, 1, 1]) ga = GA(d.evaluateModuleMSE, nn, minimize=True) for i in range(100): nn = ga.learn(0)[0] print nn.activate([0, 1, 1])[0] # print nn
@author: Anusha """ from sklearn import datasets iris = datasets.load_iris() X, y = iris.data, iris.target from pybrain.datasets.classification import ClassificationDataSet from pybrain.utilities import percentError from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.modules import SoftmaxLayer #import numpy as np import matplotlib.pyplot as pl ds = ClassificationDataSet(4, 1, nb_classes=3) for i in range(len(X)): ds.addSample(X[i], y[i]) # splitting data into train,test and valid data in 60/20/20 proportions trndata, partdata = ds.splitWithProportion(0.60) tstdata, validdata = partdata.splitWithProportion(0.50) # to encode classes wwith one output neuron per class trndata._convertToOneOfMany() tstdata._convertToOneOfMany() validdata._convertToOneOfMany() # original target values are stored in class created by function to #preserve the value print trndata['class']
from pybrain.datasets.classification import ClassificationDataSet from pybrain.optimization.populationbased.ga import GA from pybrain.tools.shortcuts import buildNetwork # create XOR dataset d = ClassificationDataSet(2) d.addSample([0., 0.], [0.]) d.addSample([0., 1.], [1.]) d.addSample([1., 0.], [1.]) d.addSample([1., 1.], [0.]) d.setField('class', [ [0.],[1.],[1.],[0.]]) nn = buildNetwork(2, 3, 1) ga = GA(d.evaluateModuleMSE, nn, minimize=True) for i in range(100): nn = ga.learn(0)[0] # test results after the above script In [68]: nn.activate([0,0]) Out[68]: array([-0.07944574]) In [69]: nn.activate([1,0]) Out[69]: array([ 0.97635635]) In [70]: nn.activate([0,1]) Out[70]: array([ 1.0216745]) In [71]: nn.activate([1,1]) Out[71]: array([ 0.03604205])
from sklearn import datasets from pybrain.datasets.classification import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer iris = datasets.load_iris() x, y = iris.data, iris.target dataset = ClassificationDataSet(4, 1, nb_classes=3) for i in range(len(x)): dataset.addSample(x[i], y[i]) train_data_temp, part_data_temp = dataset.splitWithProportion(0.6) test_data_temp, val_data_temp = part_data_temp.splitWithProportion(0.5) train_data = ClassificationDataSet(4, 1, nb_classes=3) for n in range(train_data_temp.getLength()): train_data.addSample( train_data_temp.getSample(n)[0], train_data_temp.getSample(n)[1]) test_data = ClassificationDataSet(4, 1, nb_classes=3) for n in range(test_data_temp.getLength()): train_data.addSample( test_data_temp.getSample(n)[0], test_data_temp.getSample(n)[1]) val_data = ClassificationDataSet(4, 1, nb_classes=3) for n in range(val_data_temp.getLength()): val_data.addSample( val_data_temp.getSample(n)[0],
from sklearn import datasets from pybrain.datasets.classification import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer iris = datasets.load_iris() x, y = iris.data, iris.target print(len(x)) dataset = ClassificationDataSet(4, 1, nb_classes=3) for i in range(len(x)): dataset.addSample(x[i], y[i]) train_data, part_data = dataset.splitWithProportion(0.6) test_data, val_data = part_data.splitWithProportion(0.5) net = buildNetwork(dataset.indim, 3, dataset.outdim) trainer = BackpropTrainer(net, dataset=train_data, learningrate=0.01, momentum=0.1, verbose=True) train_errors, val_errors = trainer.trainUntilConvergence(dataset=train_data, maxEpochs=100) trainer.totalepochs
#! /usr/bin/env python3 import matplotlib.pyplot as plt from sklearn import datasets from pybrain.datasets.classification import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised import BackpropTrainer iris = datasets.load_iris() X, y = iris.data, iris.target dataset = ClassificationDataSet(4, 1, nb_classes=3) for sample_input, sample_output in zip(X, y): dataset.addSample(sample_input, sample_output) # Partitioning data for training training_data, partitioned_data = dataset.splitWithProportion(0.6) # Spliting data for testing and validation testing_data, validation_data, = partitioned_data.splitWithProportion(0.5) network = buildNetwork(dataset.indim, 2, 2, dataset.outdim) trainer = BackpropTrainer(network, dataset=training_data, learningrate=0.01, momentum=0.1, verbose=True) training_errors, validation_errors = trainer.trainUntilConvergence( dataset=training_data, maxEpochs=200)
# To do the following you need to run command: pip install pybrain from pybrain.datasets.classification import ClassificationDataSet # below line can be replaced with the algorithm of choice e.g. # from pybrain.optimization.hillclimber import HillClimber from pybrain.optimization.populationbased.ga import GA from pybrain.tools.shortcuts import buildNetwork # create dataset d = ClassificationDataSet(2) d.addSample([181, 80], [1]) d.addSample([177, 70], [1]) d.addSample([160, 60], [0]) d.addSample([154, 54], [0]) d.setField('class', [[0.], [1.], [1.], [0.]]) nn = buildNetwork(2, 3, 1) # d.evaluateModuleMSE takes nn as its first and only argument ga = GA(d.evaluateModuleMSE, nn, minimize=True) for i in range(100): nn = ga.learn(0)[0] print(nn.activate([181, 80]))
import numpy as np from sklearn import datasets from pybrain.datasets.classification import ClassificationDataSet from pybrain.tools.shortcuts import buildNetwork from pybrain.supervised.trainers import BackpropTrainer import matplotlib.pyplot as plt iris = datasets.load_iris() entrada, saida = iris.data, iris.target dataset = ClassificationDataSet(4, 1, nb_classes=3) #Adicionar as amostras ao dataset for i in range(len(entrada)): dataset.addSample(entrada[i], saida[i]) #Recuperar dados para realizar o treinamento da rede parteTreino, parteDados = dataset.splitWithProportion(0.6) print("Quantidade para treinamento da rede : " + str(len(parteTreino))) #Separando a parte de dados para realização do teste e para a validação da rede teste, validacao = parteDados.splitWithProportion(0.5) print("Quantidade para teste da rede : " + str(len(teste))) print("Quantidade para validação da rede : " + str(len(validacao))) #Criando a rede rede = buildNetwork(dataset.indim, 3, dataset.outdim) #Realizando o treinamento e recuperando os erros treinamento = BackpropTrainer(rede,