ds = ClassificationDataSet(64, 10, nb_classes=10) test = ClassificationDataSet(64, 10, nb_classes=10) training = ClassificationDataSet(64, 10, nb_classes=10) for k in xrange(len(X)): ds.addSample(ravel(X[k]), y[k]) test_t, training_t = ds.splitWithProportion(0.25) for k in xrange(0, test_t.getLength()): test.addSample(test_t.getSample(k)[0], test_t.getSample(k)[1]) for k in xrange(0, training_t.getLength()): training.addSample(training_t.getSample(k)[0], training_t.getSample(k)[1]) print(training.getLength()) print(test.getLength()) print(test.indim) print(test.outdim) print(training.indim) print(training.outdim) fnn = buildNetwork(training.indim, 64, training.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(fnn, dataset=training, momentum=0.1, learningrate=0.01, verbose=True, weightdecay=0.01) trainer.trainEpochs(10)
def exec_algo(xml_file, output_location): rootObj=ml.parse(xml_file) file_name=rootObj.MachineLearning.prediction.datafile file=open(file_name) var_input=rootObj.MachineLearning.prediction.input var_output=rootObj.MachineLearning.prediction.output var_classes=rootObj.MachineLearning.prediction.classes DS=ClassificationDataSet(var_input,var_output,nb_classes=var_classes) #DS1=ClassificationDataSet(13,1,nb_classes=10) for line in file.readlines(): data=[float(x) for x in line.strip().split(',') if x != ''] inp=tuple(data[:var_input]) output=tuple(data[var_input:]) DS.addSample(inp,output) tstdata,trndata=DS.splitWithProportion(0) #trndatatest,tstdatatest=DS1.splitWithProportion(0) trdata=ClassificationDataSet(trndata.indim,1,nb_classes=10) #tsdata=ClassificationDataSet(DS1.indim,1,nb_classes=10) #tsdata1=ClassificationDataSet(DS1.indim,1,nb_classes=10) for i in xrange(trndata.getLength()): if (trndata.getSample(i)[1][0]!=100): trdata.addSample(trndata.getSample(i)[0],trndata.getSample(i)[1]) trdata._convertToOneOfMany() #tsdata._convertToOneOfMany() #tsdata1._convertToOneOfMany() print "%d" % (trdata.getLength()) rnn=RecurrentNetwork() inputLayer=LinearLayer(trdata.indim) hiddenLayer=rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.hiddenLayerActivation hiddenNeurons=rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.hiddenNeurons if hiddenLayer=='Sigmoid': hiddenLayer=SigmoidLayer(hiddenNeurons) elif hiddenLayer=='Softmax': hiddenLayer=SoftmaxLayer(hiddenNeurons) else: hiddenLayer=LinearLayer(hiddenNeurons) outputLayer=rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.outputLayerActivation if outputLayer=='Sigmoid': outputLayer=SigmoidLayer(trdata.outdim) elif outputLayer=='Softmax': outputLayer=SoftmaxLayer(trdata.outdim) else: outputLayer=LinearLayer(trdata.outdim) rnn.addInputModule(inputLayer) rnn.addModule(hiddenLayer) rnn.addOutputModule(outputLayer) rnn_type=rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.RNN_Type in_to_hidden=FullConnection(inputLayer,hiddenLayer) hidden_to_outputLayer=FullConnection(hiddenLayer,outputLayer) rnn.addConnection(in_to_hidden) rnn.addConnection(hidden_to_outputLayer) if rnn_type=='Elman': hidden_to_hidden=FullConnection(hiddenLayer,hiddenLayer, name='c3') rnn.addRecurrentConnection(hidden_to_hidden) #hidden_to_hidden=FullConnection(hiddenLayer,hiddenLayer, name='c3') if rnn_type=='Jordan': output_to_hidden=FullConnection(outputLayer,hiddenLayer, name='c3') rnn.addRecurrentConnection(output_to_hidden) #rnn.addRecurrentConnection(hidden_to_hidden) momentum=rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.momentum weightdecay=rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.learningRate rnn.sortModules() trainer=BackpropTrainer(rnn,dataset=trdata,momentum=0.1,verbose=True,weightdecay=0.01) trainer.train(); result=(percentError(trainer.testOnClassData(dataset=trdata),trdata['class'])) #result1=percentError(trainer.testOnClassData(dataset=tsdata1),tsdata1['class']) print ('%f \n') % (100-result) #print ('%f \n') % (100-result1) ts=time.time() directory = output_location + sep + str(int(ts)) makedirs(directory) fileObject=open(output_location + sep + str(int(ts)) + sep + 'pybrain_RNN','w') pickle.dump(trainer,fileObject) pickle.dump(rnn,fileObject) fileObject.close()
len(collection.getUniqueCategories())) # Create trainingsets and test sets trainingset = collection.createAnnTrainingsets() #print trainingset # Map trainingsets and test sets to PyBrain DS = ClassificationDataSet(trainingset['input_dimension'], trainingset['output_dimension']) for i in range(0, len(trainingset['input_arrays'])): DS.appendLinked(trainingset['input_arrays'][i], trainingset['output_arrays'][i]) # Split dataset into five parts (1/5 test and 4/5 trainingsets) num_splits = 5 dividend = (DS.getLength() / num_splits) indicies = np.random.permutation(DS.getLength()) sliced_indicies = slice_list(indicies, num_splits) trainingsets = [] testsets = [] target_actual = [] target_pred = [] #Create 5 parts, append to trainingsets for part in sliced_indicies: testset = SupervisedDataSet(inp=DS['input'][part].copy(), target=DS['target'][part].copy()) testsets.append(testset) trainingset = set(indicies) - set(part) trainingset = SupervisedDataSet( inp=DS['input'][list(trainingset)].copy(),
#tstdata = ClassificationDataSet(input, target, nb_classes=classes) #for m in xrange(0, tstdata_temp.getLength()): # tstdata.addSample(tstdata_temp.getSample(m)[0], tstdata_temp.getSample(m)[1]) validata = ClassificationDataSet(input, target, nb_classes = classes) for j in xrange(0, validata_temp.getLength()): validata.addSample(validata_temp.getSample(j)[0], validata_temp.getSample(j)[1]) #------ PREPARE TTEST DATA test_temp = ClassificationDataSet(input,target, nb_classes=classes) for i in range(len(x2)): test_temp.addSample(x2[i]) tstdata = ClassificationDataSet(input, target, nb_classes=classes) for m in xrange(0, test_temp.getLength()): tstdata.addSample(test_temp.getSample(m)[0], test_temp.getSample(m)[1]) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() validata._convertToOneOfMany() # TIME TO CREATE A FNN NEURAL NETWORK WITH 2 INPUTS, 3 HIDDEN NEURONS AND 2 OUTPUTS FNN_INPUT = 4 FNN_HIDDEN = 70 FNN_OUTPUT = 2 fnn = buildNetwork(FNN_INPUT, FNN_HIDDEN, FNN_OUTPUT, outclass = SoftmaxLayer, bias=True)
def exec_algo(xml_file, output_location): rootObj = ml.parse(xml_file) file_name = rootObj.MachineLearning.prediction.datafile file = open(file_name) var_input = rootObj.MachineLearning.prediction.input var_output = rootObj.MachineLearning.prediction.output var_classes = rootObj.MachineLearning.prediction.classes DS = ClassificationDataSet(var_input, var_output, nb_classes=var_classes) #DS1=ClassificationDataSet(13,1,nb_classes=10) for line in file.readlines(): data = [float(x) for x in line.strip().split(',') if x != ''] inp = tuple(data[:var_input]) output = tuple(data[var_input:]) DS.addSample(inp, output) tstdata, trndata = DS.splitWithProportion(0) #trndatatest,tstdatatest=DS1.splitWithProportion(0) trdata = ClassificationDataSet(trndata.indim, 1, nb_classes=10) #tsdata=ClassificationDataSet(DS1.indim,1,nb_classes=10) #tsdata1=ClassificationDataSet(DS1.indim,1,nb_classes=10) for i in xrange(trndata.getLength()): if (trndata.getSample(i)[1][0] != 100): trdata.addSample(trndata.getSample(i)[0], trndata.getSample(i)[1]) trdata._convertToOneOfMany() #tsdata._convertToOneOfMany() #tsdata1._convertToOneOfMany() print "%d" % (trdata.getLength()) rnn = RecurrentNetwork() inputLayer = LinearLayer(trdata.indim) hiddenLayer = rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.hiddenLayerActivation hiddenNeurons = rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.hiddenNeurons if hiddenLayer == 'Sigmoid': hiddenLayer = SigmoidLayer(hiddenNeurons) elif hiddenLayer == 'Softmax': hiddenLayer = SoftmaxLayer(hiddenNeurons) else: hiddenLayer = LinearLayer(hiddenNeurons) outputLayer = rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.outputLayerActivation if outputLayer == 'Sigmoid': outputLayer = SigmoidLayer(trdata.outdim) elif outputLayer == 'Softmax': outputLayer = SoftmaxLayer(trdata.outdim) else: outputLayer = LinearLayer(trdata.outdim) rnn.addInputModule(inputLayer) rnn.addModule(hiddenLayer) rnn.addOutputModule(outputLayer) rnn_type = rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.RNN_Type in_to_hidden = FullConnection(inputLayer, hiddenLayer) hidden_to_outputLayer = FullConnection(hiddenLayer, outputLayer) rnn.addConnection(in_to_hidden) rnn.addConnection(hidden_to_outputLayer) if rnn_type == 'Elman': hidden_to_hidden = FullConnection(hiddenLayer, hiddenLayer, name='c3') rnn.addRecurrentConnection(hidden_to_hidden) #hidden_to_hidden=FullConnection(hiddenLayer,hiddenLayer, name='c3') if rnn_type == 'Jordan': output_to_hidden = FullConnection(outputLayer, hiddenLayer, name='c3') rnn.addRecurrentConnection(output_to_hidden) #rnn.addRecurrentConnection(hidden_to_hidden) momentum = rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.momentum weightdecay = rootObj.MachineLearning.prediction.algorithm.RecurrentNeuralNetwork.learningRate rnn.sortModules() trainer = BackpropTrainer(rnn, dataset=trdata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.train() result = (percentError(trainer.testOnClassData(dataset=trdata), trdata['class'])) #result1=percentError(trainer.testOnClassData(dataset=tsdata1),tsdata1['class']) print('%f \n') % (100 - result) #print ('%f \n') % (100-result1) ts = time.time() directory = output_location + sep + str(int(ts)) makedirs(directory) fileObject = open( output_location + sep + str(int(ts)) + sep + 'pybrain_RNN', 'w') pickle.dump(trainer, fileObject) pickle.dump(rnn, fileObject) fileObject.close()
def classifySegments(trainingCSVs, testingCSVs): global NUMBER_OF_GENRES global INPUT_DIMS global GENRE_DICT global DIR SEGMENT_LENGTH = 1000 PROCESSING_FILENAME = DIR + '/processing.wav' TRAINING_EPOCHS = 80 print('Reading training data...') trndata_temp = ClassificationDataSet(INPUT_DIMS, 1, nb_classes=NUMBER_OF_GENRES) for filename in trainingCSVs: basename = os.path.splitext(filename)[0] data = None genre = None with open(filename, 'rb') as fhandle: data = list(csv.reader(fhandle))[0] data = map(float, data) with open(basename + '.genre', 'r') as fhandle: genre = fhandle.readline() trndata_temp.addSample(data, [GENRE_DICT[genre]]) print('Reading data done') trndata = ClassificationDataSet(INPUT_DIMS, 1, nb_classes=NUMBER_OF_GENRES) for n in xrange(0, trndata_temp.getLength()): trndata.addSample(trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1]) trndata._convertToOneOfMany() fnn = buildNetwork(trndata.indim, 60, trndata.outdim, outclass=SoftmaxLayer) mistakenDict = dict() for x in GENRE_DICT.keys(): mistakenDict[x] = [0] * NUMBER_OF_GENRES print('Training...') trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainEpochs(TRAINING_EPOCHS) print('Training done') print('Classifying test data segments...') genreSongCount = [0] * NUMBER_OF_GENRES correctlyClassifiedSongCount = [0] * NUMBER_OF_GENRES averageSegmentAccuracies=[0] * NUMBER_OF_GENRES for filename in testingCSVs: basename = os.path.splitext(os.path.basename(filename))[0] print('Processing ' + basename + '...') song = AudioSegment.from_wav(SONG_FILE_DIR + '/' + basename + '.wav') segment = song i = 0 genreCounts = [0] * NUMBER_OF_GENRES try: while segment.duration_seconds: segment = song[i:i+SEGMENT_LENGTH] i += SEGMENT_LENGTH segment.export(PROCESSING_FILENAME, format='wav') inputs = getData(PROCESSING_FILENAME).tolist() genreConfidences = list(fnn.activate(inputs)) segmentGenreIndex = genreConfidences.index(max(genreConfidences)) genreCounts[segmentGenreIndex] += 1 os.remove(PROCESSING_FILENAME) except: print('Except at: ' + str(genreCounts)) os.remove(PROCESSING_FILENAME) thisSongGenre = genreCounts.index(max(genreCounts)) trueGenre = None with open(DIR + '/' + basename + '.genre', 'r') as f: trueGenre = f.readline() genreIndex = GENRE_DICT[trueGenre] accuracy = genreCounts[genreIndex] / float(sum(genreCounts)) genreSongCount[genreIndex] += 1 averageSegmentAccuracies[genreIndex] += accuracy if thisSongGenre == genreIndex: correctlyClassifiedSongCount[genreIndex] += 1 print("%5.2f%% accurate for '%s'" % (100*accuracy, basename)) mistakenList = mistakenDict[trueGenre] total = float(sum(genreCounts)) for j in xrange(len(genreCounts)): mistakenList[j] += genreCounts[j] / total print('Done classifying segments') for k in mistakenDict: for v in xrange(len(mistakenDict[k])): if genreSongCount[v] > 0: mistakenDict[k][v] /= float(genreSongCount[v]) mistakenDict[k][v] *= 100 for k in GENRE_DICT: i = GENRE_DICT[k] print('-'*75) print('Total songs classified in %s genre: %d' % (k, genreSongCount[i])) if genreSongCount[i]: print('Total song classification accuracy for %s: %5.2f%%' % (k, 100.0*correctlyClassifiedSongCount[i]/genreSongCount[i])) print('Average segment classification accuracy for %s: %5.2f%%' % (k, 100.0*averageSegmentAccuracies[i]/genreSongCount[i])) print('Mistakes: ' + str(mistakenDict[k])) totalSongCount = sum(genreSongCount) totalAccuracy = sum(averageSegmentAccuracies) correctlyClassifiedSongs = sum(correctlyClassifiedSongCount) print('='*75) print('Total songs tested: %d' % totalSongCount) print('Average segment classification accuracy per song: %5.2f%%' % (100.0*totalAccuracy/totalSongCount)) print('Total accuracy for properly identified songs: %5.2f%%' % (100.0*correctlyClassifiedSongs/totalSongCount)) print('='*75) genreSongCount = [1 if i == 0 else i for i in genreSongCount] return [correctlyClassifiedSongCount[i]/float(genreSongCount[i]) for i in xrange(NUMBER_OF_GENRES)], [averageSegmentAccuracies[i]/genreSongCount[i] for i in xrange(NUMBER_OF_GENRES)]
# trainer.trainUntilConvergence(continueEpochs=5, validationProportion=0.25) # Can take a long time # trainer.train() # Train on one epoch only # Training error and testing error trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "The hidden layers are", hiddenlayers[0], hiddenlayers[1], hiddenlayers[ 2] print "Percentage training error: ", trnresult print "Percentage testing error: ", tstresult # Test on a couple pictures: testout = [] for i in xrange(tstdata.getLength()): out = fnn.activate(trndata['input'][i]) testout.append(out.argmax() == trndata['class'][i]) print "Correctly classified", sum(t == True for t in testout)[0], "/", len(testout) # Take new picture # with picamera.PiCamera() as camera: # camera.resolution = (res, res) # newpicture = '/home/pi/camera/newpicture.jpg' # camera.capture(newpicture) # im = Image.open(newpicture) # bw_im = im.convert('1') # Convert black and white # pixels = [bw_im.getpixel((i, j)) for i in range(res) for j in range(res)] # list of black and white pixels # # # Get results