Example #1
0
def initialize(trainingfeatures,traininglabels,p=0.7):
    alldata = ClassificationDataSet(trainingfeatures.shape[1], 1, nb_classes=len(set(traininglabels)))
    for i in xrange(traininglabels[0]):
       	alldata.appendLinked(trainingfeatures[i] , traininglabels[i])
    trndata, tstdata = alldata.splitWithProportion( p )
    trndata._convertToOneOfMany(bounds=[0, 1])
    tstdata._convertToOneOfMany(bounds=[0, 1])
    model, accuracy, params = buildANN(trndata, tstdata)
    print '\nThe best model had '+str(accuracy)+'% accuracy and used the parameters:\n'+params+'\n'
    return model
Example #2
0
def main():
    print "Calculating mfcc...."
    mfcc_coeff_vectors_dict = {}
    for i in range(1, 201):
        extractor = FeatureExtractor(
            '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    for i in range(201, 401):
        extractor = FeatureExtractor(
            '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    audio_with_min_frames, min_frames = get_min_frames_audio(
        mfcc_coeff_vectors_dict)
    processed_mfcc_coeff = preprocess_input_vectors(
        mfcc_coeff_vectors_dict, min_frames)
    # frames = min_frames
    # print frames
    # print len(processed_mfcc_coeff['1'])
    # for each_vector in processed_mfcc_coeff['1']:
    #     print len(each_vector)
    print "mffcc found..."
    classes = ["happiness", "sadness"]

    training_data = ClassificationDataSet(
        26, target=1, nb_classes=2, class_labels=classes)
    # training_data = SupervisedDataSet(13, 1)
    try:
        network = NetworkReader.readFrom(
            'network_state_frame_level_new2_no_pp1.xml')
    except:
        for i in range(1, 51):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            for each_vector in mfcc_coeff_vectors:
                training_data.appendLinked(each_vector, [1])

        for i in range(201, 251):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            for each_vector in mfcc_coeff_vectors:
                training_data.appendLinked(each_vector, [0])

        training_data._convertToOneOfMany()
        print "prepared training data.."
        print training_data.indim, training_data.outdim
        network = buildNetwork(
            training_data.indim, 5, training_data.outdim, fast=True)
        trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99)
        print "Before training...", trainer.testOnData(training_data)
        trainer.trainOnDataset(training_data, 1000)
        print "After training...", trainer.testOnData(training_data)
        NetworkWriter.writeToFile(
            network, "network_state_frame_level_new2_no_pp.xml")
Example #3
0
def start():
    featuresList=[]
    labelsList=[]
    featuresList, labelsList= loadFile("output.txt")

    print 'Normalizing array...'

    normalizearray(featuresList)
   
 
    alldata = ClassificationDataSet( len(featuresList[0]), 1, nb_classes=8, class_labels=['ffi_brainmatter','ffi_neuron','ffi_vacuole','ffi_astrocyte', 'wt_brainmatter', 'wt_neuron', 'wt_vacuole', 'wt_astrocyte'] )
    for i in range(len(labelsList)):
         alldata.appendLinked(featuresList[i], labelsList[i])
          
         
    #print 'All data: ', alldata
    #print 'Statisticcs: ', alldata.calculateStatistics()
    
    newK=fSel.getTreeFeatures(featuresList, labelsList);
    newK=newK.shape[1]
    print "K= ", newK
    reducedFeatures= fSel.getBestK(featuresList,labelsList, 'f_classif', newK)
    reducedData=ClassificationDataSet( len(reducedFeatures[0]), 1, nb_classes=8, class_labels=['ffi_brainmatter','ffi_neuron','ffi_vacuole','ffi_astrocyte', 'wt_brainmatter', 'wt_neuron', 'wt_vacuole', 'wt_astrocyte'] )
    
    #prep reducedData object with reduced feature list
    for i in range(len(labelsList)):
        reducedData.appendLinked(reducedFeatures[i], labelsList[i])
    
    
    print 'Splitting test and training data...'
    tstdata, trndata = alldata.splitWithProportion( 0.30 )
    reducedTestData, reducedTrainData=reducedData.splitWithProportion(0.3)
    
    print 'Number of training and test patterns: ', len(trndata), len(tstdata)
    
    
    trndata._convertToOneOfMany(bounds=[0,1])
    tstdata._convertToOneOfMany(bounds=[0,1])  
    
    reducedTestData._convertToOneOfMany(bounds=[0,1])
    reducedTrainData._convertToOneOfMany(bounds=[0,1])
    
    
    #print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    #print "Sample (input, target, class):"
    #print trndata['input'][0], trndata['target'][0], trndata['class'][0]
    #print trndata['input'][1], trndata['target'][1], trndata['class'][1]

    
    buildFNN(tstdata, trndata)
    
    print "___________________________________________FEATURE REDUCTION________________________________________________"
    buildFNN(reducedTestData, reducedTrainData)
Example #4
0
 def _createL1Dataset(self, classifiers, l1PreDataset):
     l1DatasetDimensions = classifiers[0].distributionLength() * len(classifiers)
     l1Dataset = ClassificationDataSet(l1DatasetDimensions, nb_classes=2)
     
     for instance in l1PreDataset:
         input = instance[0]
         target = instance[1]            
         
         l1Input = _getLevel1Input(classifiers, input, self.useDistributions)
         l1Dataset.appendLinked(l1Input, target)
         
     return l1Dataset
Example #5
0
 def _createDatasetForClass(self, dataset, classValue):
     datasetForClass = ClassificationDataSet(dataset.getDimension('input'), nb_classes=2)
     
     for instance in dataset:
         input = instance[0]
         target = instance[1]
         
         if target[0] == classValue:
             datasetForClass.appendLinked(input, [1])
         else:
             datasetForClass.appendLinked(input, [0])
             
     return datasetForClass 
def main():
    print "Calculating mfcc...."
    mfcc_coeff_vectors_dict = {}
    for i in range(1, 201):
        extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    for i in range(201, 401):
        extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    audio_with_min_frames, min_frames = get_min_frames_audio(mfcc_coeff_vectors_dict)
    processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, min_frames)
    frames = min_frames
    print "mfcc found...."
    classes = ["happiness", "sadness"]
    try:
        network = NetworkReader.readFrom('network_state_new_.xml')
    except:
        # Create new network and start Training
        training_data = ClassificationDataSet(frames * 26, target=1, nb_classes=2, class_labels=classes)
        # training_data = SupervisedDataSet(frames * 39, 1)
        for i in range(1, 151):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            training_data.appendLinked(mfcc_coeff_vectors.ravel(), [1])
            # training_data.addSample(mfcc_coeff_vectors.ravel(), [1])

        for i in range(201, 351):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            training_data.appendLinked(mfcc_coeff_vectors.ravel(), [0])
            # training_data.addSample(mfcc_coeff_vectors.ravel(), [0])

        training_data._convertToOneOfMany()
        network = buildNetwork(training_data.indim, 5, training_data.outdim)
        trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99)
        print "Before training...", trainer.testOnData(training_data)
        trainer.trainOnDataset(training_data, 1000)
        print "After training...", trainer.testOnData(training_data)
        NetworkWriter.writeToFile(network, "network_state_new_.xml")

    print "*" * 30 , "Happiness Detection", "*" * 30
    for i in range(151, 201):
        output = network.activate(processed_mfcc_coeff[str(i)].ravel())
        # print output,
        # if output > 0.7:
        #     print "happiness"
        class_index = max(xrange(len(output)), key=output.__getitem__)
        class_name = classes[class_index]
        print class_name
Example #7
0
 def _getFilteredDataset(self, dataset, pair):
     datasetForPair = ClassificationDataSet(dataset.getDimension('input'), nb_classes=2)
     
     for instance in dataset:
         input = instance[0]
         target = instance[1]
         
         classValue = target[0]
         
         # First class in pair is negative class and the second one is a positive class
         if classValue == pair[0]:
             datasetForPair.appendLinked(input, [0])
         elif classValue == pair[1]:
             datasetForPair.appendLinked(input, [1])
             
     return datasetForPair
Example #8
0
 def _createGradingDataset(self, baseClassifier, gradingSet, numOfAttirubes):
     gradingDataset = ClassificationDataSet(numOfAttirubes, nb_classes=2, class_labels=["Incorrect", "Correct"])
     
     for instance in gradingSet:
         # Get attributes from the instances
         attributes = instance[0]
         # Get class from the instance
         cls = instance[0][0]
         
         prediction = baseClassifier.getPrediction(attributes)
         
         if prediction == cls:
             gradingDataset.appendLinked(attributes, [CorrectPrediction])
         else:
             gradingDataset.appendLinked(attributes, [IncorrectPrediction])
     
     return gradingDataset
Example #9
0
 def _testTrainingOnClassificationDataset(self):
     DS = ClassificationDataSet(2, class_labels=['Zero', 'One'])
     DS.appendLinked([ 0, 0 ] , [0])
     DS.appendLinked([ 0, 1 ] , [0])
     DS.appendLinked([ 1, 0 ] , [0])
     DS.appendLinked([ 1, 1 ] , [1])
     
     network = buildNetwork(DS.indim, 5, 2, outclass=SoftmaxLayer)
     trainer = BackpropTrainer( network, momentum=0.1, verbose=True, weightdecay=0.01)
     
     nnf = NeuralNetworkFactory(network, trainer, seed=2, iterationsNum=20)
     nnClassifier = nnf.buildClassifier(DS)
     
     self.assertEqual(nnClassifier.getPrediction([0, 0]), 0) 
     self.assertEqual(nnClassifier.getPrediction([0, 1]), 0)
     self.assertEqual(nnClassifier.getPrediction([1, 0]), 0)
     self.assertEqual(nnClassifier.getPrediction([1, 1]), 1) 
Example #10
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets.classification import ClassificationDataSet, SequenceClassificationDataSet
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.connections import FullConnection
from pybrain.structure.modules import TanhLayer, LSTMLayer

# Load Dataset.
ds = ClassificationDataSet(2, class_labels=['zero', 'one'])
ds.appendLinked((0, 0), 0)
ds.appendLinked((0, 1), 0)
ds.appendLinked((1, 0), 1)
ds.appendLinked((1, 1), 0)

# Load Dataset.
sds = SequenceClassificationDataSet(1, 1)
sds.appendLinked(0, 0)
sds.appendLinked(0, 0)
sds.newSequence()
sds.appendLinked(0, 0)
sds.appendLinked(1, 0)
sds.newSequence()
sds.appendLinked(1, 0)
sds.appendLinked(0, 1)
sds.newSequence()
sds.appendLinked(1, 0)
sds.appendLinked(1, 0)
sds.newSequence()

print ds['input']
print ds['target']
    def testTwoDimensionalLinearClassificationTest(self):
        # Class number one - points from I quadrant of Cartesian coordinates
        # Class number two - points from III quadrant of Cartesian coordinates
        testDataset = ClassificationDataSet(2, nb_classes=2, class_labels=["I-Quadrant", "III-Quadrant"])
        testDataset.appendLinked([2, 2], [0])
        testDataset.appendLinked([4, 2], [0])
        testDataset.appendLinked([5, 0], [0])
        testDataset.appendLinked([0, 5], [0])
        testDataset.appendLinked([3, 2], [0])
        testDataset.appendLinked([8, 1], [0])
        testDataset.appendLinked([1, 8], [0])
        testDataset.appendLinked([-4, -2], [1])
        testDataset.appendLinked([-3, -2], [1])
        testDataset.appendLinked([-8, -1], [1])
        testDataset.appendLinked([-1, -5], [1])
        testDataset.appendLinked([-2, -2], [1])
        testDataset.appendLinked([-5, -5], [1])

        optimizer = GradientOptimizer(minChange=1e-6)
        optimizer.maxLearningSteps = 1000
        optimizer.verbose = False
        lrf = LogisticRegressionFactory(optimizer)
        classifier = lrf.buildClassifier(testDataset)

        self.assertEqual(classifier.getPrediction([3, 3]), 0)
        self.assertEqual(classifier.getPrediction([2, 4]), 0)
        self.assertEqual(classifier.getPrediction([10, 10]), 0)
        self.assertEqual(classifier.getPrediction([9, 5]), 0)

        self.assertEqual(classifier.getPrediction([-4, -4]), 1)
        self.assertEqual(classifier.getPrediction([-20, -20]), 1)
        self.assertEqual(classifier.getPrediction([-8, -3]), 1)
        self.assertEqual(classifier.getPrediction([-9, -9]), 1)
    def testOneDimensionalClassificationTest(self):
        # Imaginable medical dataset. If tumor is small it is benigh, otherwise it is malignant.
        tumorDataset = ClassificationDataSet(1, nb_classes=2, class_labels=["Benign", "Malignant"])
        tumorDataset.appendLinked([0.1], [0])
        tumorDataset.appendLinked([0.15], [0])
        tumorDataset.appendLinked([0.2], [0])
        tumorDataset.appendLinked([0.33], [0])
        tumorDataset.appendLinked([0.23], [0])
        tumorDataset.appendLinked([0.4], [0])
        tumorDataset.appendLinked([0.8], [1])
        tumorDataset.appendLinked([1.4], [1])
        tumorDataset.appendLinked([2.3], [1])
        tumorDataset.appendLinked([0.9], [1])
        tumorDataset.appendLinked([1.9], [1])
        tumorDataset.appendLinked([2.9], [1])

        optimizer = GradientOptimizer(minChange=1e-6)
        optimizer.maxLearningSteps = 1000
        optimizer.verbose = False
        lrf = LogisticRegressionFactory(optimizer)
        classifier = lrf.buildClassifier(tumorDataset)

        self.assertEqual(classifier.getPrediction([0.2]), 0)
        self.assertEqual(classifier.getPrediction([0.1]), 0)
        self.assertEqual(classifier.getPrediction([0.3]), 0)
        self.assertEqual(classifier.getPrediction([0.001]), 0)

        self.assertEqual(classifier.getPrediction([1.2]), 1)
        self.assertEqual(classifier.getPrediction([2.2]), 1)
        self.assertEqual(classifier.getPrediction([3.2]), 1)
        self.assertEqual(classifier.getPrediction([1.9]), 1)
    def testNonLinearClassificaion(self):
        # This tests checks if logistic regression classifier will able
        # to classify non linear separable data
        # Data set consists of two classes were elements of class 0
        # are inside the circle of radius 0.5 and elements with class 1
        # are outside this circle
        dataset = ClassificationDataSet(4, nb_classes=2, class_labels=["0", "1"])

        dataset.appendLinked(self._getSquaredTerms([0.5, 0.5]), [0])
        dataset.appendLinked(self._getSquaredTerms([0, 0.5]), [0])
        dataset.appendLinked(self._getSquaredTerms([0.5, 0]), [0])
        dataset.appendLinked(self._getSquaredTerms([0, 0]), [0])
        dataset.appendLinked(self._getSquaredTerms([0.3, 0.2]), [0])
        dataset.appendLinked(self._getSquaredTerms([0.3, -0.5]), [0])
        dataset.appendLinked(self._getSquaredTerms([-0.3, 0.5]), [0])
        dataset.appendLinked(self._getSquaredTerms([-0.4, 0]), [0])
        dataset.appendLinked(self._getSquaredTerms([0.6, -0.3]), [0])
        dataset.appendLinked(self._getSquaredTerms([-0.3, -0.5]), [0])

        dataset.appendLinked(self._getSquaredTerms([2, 4]), [1])
        dataset.appendLinked(self._getSquaredTerms([4, -5]), [1])
        dataset.appendLinked(self._getSquaredTerms([-3, 2]), [1])
        dataset.appendLinked(self._getSquaredTerms([4, 4]), [1])
        dataset.appendLinked(self._getSquaredTerms([-3, 5]), [1])
        dataset.appendLinked(self._getSquaredTerms([-2, -4]), [1])
        dataset.appendLinked(self._getSquaredTerms([-5, 0]), [1])
        dataset.appendLinked(self._getSquaredTerms([5, 0]), [1])
        dataset.appendLinked(self._getSquaredTerms([4, 3]), [1])
        dataset.appendLinked(self._getSquaredTerms([-5, 1]), [1])

        optimizer = GradientOptimizer(minChange=1e-6)
        optimizer.maxLearningSteps = 1000
        optimizer.verbose = False
        lrf = LogisticRegressionFactory(optimizer)
        classifier = lrf.buildClassifier(dataset)

        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([0.1, 0.1])), 0)
        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([0.1, -0.1])), 0)
        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-0.1, 0.1])), 0)
        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-0.1, -0.1])), 0)

        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([4, 4])), 1)
        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([4, -4])), 1)
        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-4, 4])), 1)
        self.assertEqual(classifier.getPrediction(self._getSquaredTerms([-4, -4])), 1)