def initialize(trainingfeatures,traininglabels,p=0.7): alldata = ClassificationDataSet(trainingfeatures.shape[1], 1, nb_classes=len(set(traininglabels))) for i in xrange(traininglabels[0]): alldata.appendLinked(trainingfeatures[i] , traininglabels[i]) trndata, tstdata = alldata.splitWithProportion( p ) trndata._convertToOneOfMany(bounds=[0, 1]) tstdata._convertToOneOfMany(bounds=[0, 1]) model, accuracy, params = buildANN(trndata, tstdata) print '\nThe best model had '+str(accuracy)+'% accuracy and used the parameters:\n'+params+'\n' return model
def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio( mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors( mfcc_coeff_vectors_dict, min_frames) # frames = min_frames # print frames # print len(processed_mfcc_coeff['1']) # for each_vector in processed_mfcc_coeff['1']: # print len(each_vector) print "mffcc found..." classes = ["happiness", "sadness"] training_data = ClassificationDataSet( 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(13, 1) try: network = NetworkReader.readFrom( 'network_state_frame_level_new2_no_pp1.xml') except: for i in range(1, 51): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [1]) for i in range(201, 251): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [0]) training_data._convertToOneOfMany() print "prepared training data.." print training_data.indim, training_data.outdim network = buildNetwork( training_data.indim, 5, training_data.outdim, fast=True) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile( network, "network_state_frame_level_new2_no_pp.xml")
def start(): featuresList=[] labelsList=[] featuresList, labelsList= loadFile("output.txt") print 'Normalizing array...' normalizearray(featuresList) alldata = ClassificationDataSet( len(featuresList[0]), 1, nb_classes=8, class_labels=['ffi_brainmatter','ffi_neuron','ffi_vacuole','ffi_astrocyte', 'wt_brainmatter', 'wt_neuron', 'wt_vacuole', 'wt_astrocyte'] ) for i in range(len(labelsList)): alldata.appendLinked(featuresList[i], labelsList[i]) #print 'All data: ', alldata #print 'Statisticcs: ', alldata.calculateStatistics() newK=fSel.getTreeFeatures(featuresList, labelsList); newK=newK.shape[1] print "K= ", newK reducedFeatures= fSel.getBestK(featuresList,labelsList, 'f_classif', newK) reducedData=ClassificationDataSet( len(reducedFeatures[0]), 1, nb_classes=8, class_labels=['ffi_brainmatter','ffi_neuron','ffi_vacuole','ffi_astrocyte', 'wt_brainmatter', 'wt_neuron', 'wt_vacuole', 'wt_astrocyte'] ) #prep reducedData object with reduced feature list for i in range(len(labelsList)): reducedData.appendLinked(reducedFeatures[i], labelsList[i]) print 'Splitting test and training data...' tstdata, trndata = alldata.splitWithProportion( 0.30 ) reducedTestData, reducedTrainData=reducedData.splitWithProportion(0.3) print 'Number of training and test patterns: ', len(trndata), len(tstdata) trndata._convertToOneOfMany(bounds=[0,1]) tstdata._convertToOneOfMany(bounds=[0,1]) reducedTestData._convertToOneOfMany(bounds=[0,1]) reducedTrainData._convertToOneOfMany(bounds=[0,1]) #print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim #print "Sample (input, target, class):" #print trndata['input'][0], trndata['target'][0], trndata['class'][0] #print trndata['input'][1], trndata['target'][1], trndata['class'][1] buildFNN(tstdata, trndata) print "___________________________________________FEATURE REDUCTION________________________________________________" buildFNN(reducedTestData, reducedTrainData)
def _createL1Dataset(self, classifiers, l1PreDataset): l1DatasetDimensions = classifiers[0].distributionLength() * len(classifiers) l1Dataset = ClassificationDataSet(l1DatasetDimensions, nb_classes=2) for instance in l1PreDataset: input = instance[0] target = instance[1] l1Input = _getLevel1Input(classifiers, input, self.useDistributions) l1Dataset.appendLinked(l1Input, target) return l1Dataset
def _createDatasetForClass(self, dataset, classValue): datasetForClass = ClassificationDataSet(dataset.getDimension('input'), nb_classes=2) for instance in dataset: input = instance[0] target = instance[1] if target[0] == classValue: datasetForClass.appendLinked(input, [1]) else: datasetForClass.appendLinked(input, [0]) return datasetForClass
def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio(mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, min_frames) frames = min_frames print "mfcc found...." classes = ["happiness", "sadness"] try: network = NetworkReader.readFrom('network_state_new_.xml') except: # Create new network and start Training training_data = ClassificationDataSet(frames * 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(frames * 39, 1) for i in range(1, 151): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [1]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [1]) for i in range(201, 351): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [0]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [0]) training_data._convertToOneOfMany() network = buildNetwork(training_data.indim, 5, training_data.outdim) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile(network, "network_state_new_.xml") print "*" * 30 , "Happiness Detection", "*" * 30 for i in range(151, 201): output = network.activate(processed_mfcc_coeff[str(i)].ravel()) # print output, # if output > 0.7: # print "happiness" class_index = max(xrange(len(output)), key=output.__getitem__) class_name = classes[class_index] print class_name
def _getFilteredDataset(self, dataset, pair): datasetForPair = ClassificationDataSet(dataset.getDimension('input'), nb_classes=2) for instance in dataset: input = instance[0] target = instance[1] classValue = target[0] # First class in pair is negative class and the second one is a positive class if classValue == pair[0]: datasetForPair.appendLinked(input, [0]) elif classValue == pair[1]: datasetForPair.appendLinked(input, [1]) return datasetForPair
def _createGradingDataset(self, baseClassifier, gradingSet, numOfAttirubes): gradingDataset = ClassificationDataSet(numOfAttirubes, nb_classes=2, class_labels=["Incorrect", "Correct"]) for instance in gradingSet: # Get attributes from the instances attributes = instance[0] # Get class from the instance cls = instance[0][0] prediction = baseClassifier.getPrediction(attributes) if prediction == cls: gradingDataset.appendLinked(attributes, [CorrectPrediction]) else: gradingDataset.appendLinked(attributes, [IncorrectPrediction]) return gradingDataset
def _testTrainingOnClassificationDataset(self): DS = ClassificationDataSet(2, class_labels=['Zero', 'One']) DS.appendLinked([ 0, 0 ] , [0]) DS.appendLinked([ 0, 1 ] , [0]) DS.appendLinked([ 1, 0 ] , [0]) DS.appendLinked([ 1, 1 ] , [1]) network = buildNetwork(DS.indim, 5, 2, outclass=SoftmaxLayer) trainer = BackpropTrainer( network, momentum=0.1, verbose=True, weightdecay=0.01) nnf = NeuralNetworkFactory(network, trainer, seed=2, iterationsNum=20) nnClassifier = nnf.buildClassifier(DS) self.assertEqual(nnClassifier.getPrediction([0, 0]), 0) self.assertEqual(nnClassifier.getPrediction([0, 1]), 0) self.assertEqual(nnClassifier.getPrediction([1, 0]), 0) self.assertEqual(nnClassifier.getPrediction([1, 1]), 1)
from pybrain.tools.shortcuts import buildNetwork from pybrain.datasets.classification import ClassificationDataSet, SequenceClassificationDataSet from pybrain.supervised.trainers import BackpropTrainer from pybrain.structure.connections import FullConnection from pybrain.structure.modules import TanhLayer, LSTMLayer # Load Dataset. ds = ClassificationDataSet(2, class_labels=['zero', 'one']) ds.appendLinked((0, 0), 0) ds.appendLinked((0, 1), 0) ds.appendLinked((1, 0), 1) ds.appendLinked((1, 1), 0) # Load Dataset. sds = SequenceClassificationDataSet(1, 1) sds.appendLinked(0, 0) sds.appendLinked(0, 0) sds.newSequence() sds.appendLinked(0, 0) sds.appendLinked(1, 0) sds.newSequence() sds.appendLinked(1, 0) sds.appendLinked(0, 1) sds.newSequence() sds.appendLinked(1, 0) sds.appendLinked(1, 0) sds.newSequence() print ds['input'] print ds['target']
def testTwoDimensionalLinearClassificationTest(self): # Class number one - points from I quadrant of Cartesian coordinates # Class number two - points from III quadrant of Cartesian coordinates testDataset = ClassificationDataSet(2, nb_classes=2, class_labels=["I-Quadrant", "III-Quadrant"]) testDataset.appendLinked([2, 2], [0]) testDataset.appendLinked([4, 2], [0]) testDataset.appendLinked([5, 0], [0]) testDataset.appendLinked([0, 5], [0]) testDataset.appendLinked([3, 2], [0]) testDataset.appendLinked([8, 1], [0]) testDataset.appendLinked([1, 8], [0]) testDataset.appendLinked([-4, -2], [1]) testDataset.appendLinked([-3, -2], [1]) testDataset.appendLinked([-8, -1], [1]) testDataset.appendLinked([-1, -5], [1]) testDataset.appendLinked([-2, -2], [1]) testDataset.appendLinked([-5, -5], [1]) optimizer = GradientOptimizer(minChange=1e-6) optimizer.maxLearningSteps = 1000 optimizer.verbose = False lrf = LogisticRegressionFactory(optimizer) classifier = lrf.buildClassifier(testDataset) self.assertEqual(classifier.getPrediction([3, 3]), 0) self.assertEqual(classifier.getPrediction([2, 4]), 0) self.assertEqual(classifier.getPrediction([10, 10]), 0) self.assertEqual(classifier.getPrediction([9, 5]), 0) self.assertEqual(classifier.getPrediction([-4, -4]), 1) self.assertEqual(classifier.getPrediction([-20, -20]), 1) self.assertEqual(classifier.getPrediction([-8, -3]), 1) self.assertEqual(classifier.getPrediction([-9, -9]), 1)
def testOneDimensionalClassificationTest(self): # Imaginable medical dataset. If tumor is small it is benigh, otherwise it is malignant. tumorDataset = ClassificationDataSet(1, nb_classes=2, class_labels=["Benign", "Malignant"]) tumorDataset.appendLinked([0.1], [0]) tumorDataset.appendLinked([0.15], [0]) tumorDataset.appendLinked([0.2], [0]) tumorDataset.appendLinked([0.33], [0]) tumorDataset.appendLinked([0.23], [0]) tumorDataset.appendLinked([0.4], [0]) tumorDataset.appendLinked([0.8], [1]) tumorDataset.appendLinked([1.4], [1]) tumorDataset.appendLinked([2.3], [1]) tumorDataset.appendLinked([0.9], [1]) tumorDataset.appendLinked([1.9], [1]) tumorDataset.appendLinked([2.9], [1]) optimizer = GradientOptimizer(minChange=1e-6) optimizer.maxLearningSteps = 1000 optimizer.verbose = False lrf = LogisticRegressionFactory(optimizer) classifier = lrf.buildClassifier(tumorDataset) self.assertEqual(classifier.getPrediction([0.2]), 0) self.assertEqual(classifier.getPrediction([0.1]), 0) self.assertEqual(classifier.getPrediction([0.3]), 0) self.assertEqual(classifier.getPrediction([0.001]), 0) self.assertEqual(classifier.getPrediction([1.2]), 1) self.assertEqual(classifier.getPrediction([2.2]), 1) self.assertEqual(classifier.getPrediction([3.2]), 1) self.assertEqual(classifier.getPrediction([1.9]), 1)
def testNonLinearClassificaion(self): # This tests checks if logistic regression classifier will able # to classify non linear separable data # Data set consists of two classes were elements of class 0 # are inside the circle of radius 0.5 and elements with class 1 # are outside this circle dataset = ClassificationDataSet(4, nb_classes=2, class_labels=["0", "1"]) dataset.appendLinked(self._getSquaredTerms([0.5, 0.5]), [0]) dataset.appendLinked(self._getSquaredTerms([0, 0.5]), [0]) dataset.appendLinked(self._getSquaredTerms([0.5, 0]), [0]) dataset.appendLinked(self._getSquaredTerms([0, 0]), [0]) dataset.appendLinked(self._getSquaredTerms([0.3, 0.2]), [0]) dataset.appendLinked(self._getSquaredTerms([0.3, -0.5]), [0]) dataset.appendLinked(self._getSquaredTerms([-0.3, 0.5]), [0]) dataset.appendLinked(self._getSquaredTerms([-0.4, 0]), [0]) dataset.appendLinked(self._getSquaredTerms([0.6, -0.3]), [0]) dataset.appendLinked(self._getSquaredTerms([-0.3, -0.5]), [0]) dataset.appendLinked(self._getSquaredTerms([2, 4]), [1]) dataset.appendLinked(self._getSquaredTerms([4, -5]), [1]) dataset.appendLinked(self._getSquaredTerms([-3, 2]), [1]) dataset.appendLinked(self._getSquaredTerms([4, 4]), [1]) dataset.appendLinked(self._getSquaredTerms([-3, 5]), [1]) dataset.appendLinked(self._getSquaredTerms([-2, -4]), [1]) dataset.appendLinked(self._getSquaredTerms([-5, 0]), [1]) dataset.appendLinked(self._getSquaredTerms([5, 0]), [1]) dataset.appendLinked(self._getSquaredTerms([4, 3]), [1]) dataset.appendLinked(self._getSquaredTerms([-5, 1]), [1]) optimizer = GradientOptimizer(minChange=1e-6) optimizer.maxLearningSteps = 1000 optimizer.verbose = False lrf = LogisticRegressionFactory(optimizer) classifier = lrf.buildClassifier(dataset) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([0.1, 0.1])), 0) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([0.1, -0.1])), 0) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-0.1, 0.1])), 0) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-0.1, -0.1])), 0) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([4, 4])), 1) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([4, -4])), 1) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-4, 4])), 1) self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-4, -4])), 1)