Esempio n. 1
0
    def get_audio_frames(self, file_path):
        all_frames = []
        for i in xrange(1, self.number_of_samples):
            extractor = FeatureExtractor(file_path + "/" + str(i) + ".wav")
            all_frames.append(extractor.calculate_mfcc())

        return np.vstack(tuple(all_frames))
Esempio n. 2
0
    def get_audio_frames(self, file_path):
        all_frames = []
        for i in xrange(1, self.number_of_samples):
            extractor = FeatureExtractor(file_path + "/" + str(i) + ".wav")
            all_frames.append(extractor.calculate_mfcc())

        return np.vstack(tuple(all_frames))
Esempio n. 3
0
 def get_emotion(self, file_path):
     """
         method to get the emotion of the given audio file
             if new_network is set then new network is created
             else previous trained network is used to guess the emotion of the audio
     """
     self._ann.create_from_file("network_states/fann/fann.net")
     extractor = FeatureExtractor(file_path)
     mfcc_vectors = extractor.calculate_mfcc()
     frame_level_values = []
     for each_vector in mfcc_vectors:
         output = self._ann.run(each_vector)
         if np.array(output) > np.array([0.5]):
             frame_level_values.append("happiness")
         else:
             frame_level_values.append("sadness")
     labels_count = Counter(frame_level_values)
     label = max(labels_count.iteritems(), key=operator.itemgetter(1))[0]
     return label
Esempio n. 4
0
    def _get_mfcc_of_training_set(self):
        """
            Reads the mffc feature vectors of the audio
        """
        print "   -> Calculating mfcc feature vectors..."
        mfcc_coeff_vectors_dict = {}
        for i in range(1, 151):
            extractor = FeatureExtractor(
                DATASET_PATH + 'Happiness/HappinessAudios/' + str(i) + '.wav')
            mfcc_coeff_vectors = extractor.calculate_mfcc()
            mfcc_coeff_vectors_dict.update({str(i): mfcc_coeff_vectors})

        for i in range(201, 351):
            extractor = FeatureExtractor(
                DATASET_PATH + 'Sadness/SadnessAudios/' + str(i - 200) + '.wav')
            mfcc_coeff_vectors = extractor.calculate_mfcc()
            mfcc_coeff_vectors_dict.update({str(i): mfcc_coeff_vectors})
        print "   -> mfcc feature vectors found...."
        return mfcc_coeff_vectors_dict
Esempio n. 5
0
 def get_emotion(self, file_path):
     """
         method to get the emotion of the given audio file
             if new_network is set then new network is created
             else previous trained network is used to guess the emotion of the audio
     """
     self._ann.create_from_file("network_states/fann/fann.net")
     extractor = FeatureExtractor(file_path)
     mfcc_vectors = extractor.calculate_mfcc()
     frame_level_values = []
     for each_vector in mfcc_vectors:
         output = self._ann.run(each_vector)
         if np.array(output) > np.array([0.5]):
             frame_level_values.append("happiness")
         else:
             frame_level_values.append("sadness")
     labels_count = Counter(frame_level_values)
     label = max(labels_count.iteritems(), key=operator.itemgetter(1))[0]
     return label
Esempio n. 6
0
    def test(self):
        sadness_correctCount = 0
        for i in xrange(150, 200):
            extractor = FeatureExtractor(
                "../../../Dataset/Sadness/SadnessAudios/" + str(i) + ".wav")
            input_frames = extractor.calculate_mfcc()
            if self.gmm_object.predict(input_frames) == 1:
                sadness_correctCount = sadness_correctCount + 1

        print "sadness correct count: ", sadness_correctCount

        happiness_correctCount = 0
        for i in xrange(150, 200):
            extractor = FeatureExtractor(
                "../../../Dataset/Happiness/HappinessAudios/" + str(i) + ".wav")
            input_frames = extractor.calculate_mfcc()
            if self.gmm_object.predict(input_frames) == 0:
                happiness_correctCount = happiness_correctCount + 1

        print "happiness correct count: ", happiness_correctCount
Esempio n. 7
0
def main():
    print "Calculating mfcc...."
    mfcc_coeff_vectors_dict = {}
    for i in range(1, 201):
        extractor = FeatureExtractor(
            '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    for i in range(201, 401):
        extractor = FeatureExtractor(
            '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    audio_with_min_frames, min_frames = get_min_frames_audio(
        mfcc_coeff_vectors_dict)
    processed_mfcc_coeff = preprocess_input_vectors(
        mfcc_coeff_vectors_dict, min_frames)
    # frames = min_frames
    # print frames
    # print len(processed_mfcc_coeff['1'])
    # for each_vector in processed_mfcc_coeff['1']:
    #     print len(each_vector)
    print "mffcc found..."
    classes = ["happiness", "sadness"]

    training_data = ClassificationDataSet(
        26, target=1, nb_classes=2, class_labels=classes)
    # training_data = SupervisedDataSet(13, 1)
    try:
        network = NetworkReader.readFrom(
            'network_state_frame_level_new2_no_pp1.xml')
    except:
        for i in range(1, 51):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            for each_vector in mfcc_coeff_vectors:
                training_data.appendLinked(each_vector, [1])

        for i in range(201, 251):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            for each_vector in mfcc_coeff_vectors:
                training_data.appendLinked(each_vector, [0])

        training_data._convertToOneOfMany()
        print "prepared training data.."
        print training_data.indim, training_data.outdim
        network = buildNetwork(
            training_data.indim, 5, training_data.outdim, fast=True)
        trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99)
        print "Before training...", trainer.testOnData(training_data)
        trainer.trainOnDataset(training_data, 1000)
        print "After training...", trainer.testOnData(training_data)
        NetworkWriter.writeToFile(
            network, "network_state_frame_level_new2_no_pp.xml")
def main():
    print "Calculating mfcc...."
    mfcc_coeff_vectors_dict = {}
    for i in range(1, 201):
        extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    for i in range(201, 401):
        extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    audio_with_min_frames, min_frames = get_min_frames_audio(mfcc_coeff_vectors_dict)
    processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, min_frames)
    frames = min_frames
    print "mfcc found...."
    classes = ["happiness", "sadness"]
    try:
        network = NetworkReader.readFrom('network_state_new_.xml')
    except:
        # Create new network and start Training
        training_data = ClassificationDataSet(frames * 26, target=1, nb_classes=2, class_labels=classes)
        # training_data = SupervisedDataSet(frames * 39, 1)
        for i in range(1, 151):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            training_data.appendLinked(mfcc_coeff_vectors.ravel(), [1])
            # training_data.addSample(mfcc_coeff_vectors.ravel(), [1])

        for i in range(201, 351):
            mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
            training_data.appendLinked(mfcc_coeff_vectors.ravel(), [0])
            # training_data.addSample(mfcc_coeff_vectors.ravel(), [0])

        training_data._convertToOneOfMany()
        network = buildNetwork(training_data.indim, 5, training_data.outdim)
        trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99)
        print "Before training...", trainer.testOnData(training_data)
        trainer.trainOnDataset(training_data, 1000)
        print "After training...", trainer.testOnData(training_data)
        NetworkWriter.writeToFile(network, "network_state_new_.xml")

    print "*" * 30 , "Happiness Detection", "*" * 30
    for i in range(151, 201):
        output = network.activate(processed_mfcc_coeff[str(i)].ravel())
        # print output,
        # if output > 0.7:
        #     print "happiness"
        class_index = max(xrange(len(output)), key=output.__getitem__)
        class_name = classes[class_index]
        print class_name
Esempio n. 9
0
    def _get_mfcc_of_training_set(self):
        """
            Reads the mffc feature vectors of the audio
        """
        print "   -> Calculating mfcc feature vectors..."
        mfcc_coeff_vectors_dict = {}
        for i in range(1, 151):
            extractor = FeatureExtractor(DATASET_PATH +
                                         'Happiness/HappinessAudios/' +
                                         str(i) + '.wav')
            mfcc_coeff_vectors = extractor.calculate_mfcc()
            mfcc_coeff_vectors_dict.update({str(i): mfcc_coeff_vectors})

        for i in range(201, 351):
            extractor = FeatureExtractor(DATASET_PATH +
                                         'Sadness/SadnessAudios/' +
                                         str(i - 200) + '.wav')
            mfcc_coeff_vectors = extractor.calculate_mfcc()
            mfcc_coeff_vectors_dict.update({str(i): mfcc_coeff_vectors})
        print "   -> mfcc feature vectors found...."
        return mfcc_coeff_vectors_dict
Esempio n. 10
0
    def test(self):
        sadness_correctCount = 0
        for i in xrange(150, 200):
            extractor = FeatureExtractor(
                "../../../Dataset/Sadness/SadnessAudios/" + str(i) + ".wav")
            input_frames = extractor.calculate_mfcc()
            if self.gmm_object.predict(input_frames) == 1:
                sadness_correctCount = sadness_correctCount + 1

        print "sadness correct count: ", sadness_correctCount

        happiness_correctCount = 0
        for i in xrange(150, 200):
            extractor = FeatureExtractor(
                "../../../Dataset/Happiness/HappinessAudios/" + str(i) +
                ".wav")
            input_frames = extractor.calculate_mfcc()
            if self.gmm_object.predict(input_frames) == 0:
                happiness_correctCount = happiness_correctCount + 1

        print "happiness correct count: ", happiness_correctCount
Esempio n. 11
0
def main():
    mfcc_coeff_vectors_dict = {}
    for i in range(1, 201):
        extractor = FeatureExtractor('../../../Dataset/Happiness/HappinessAudios/' + str(i) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i) :(mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    for i in range(201, 401):
        extractor = FeatureExtractor('../../../Dataset/Sadness/SadnessAudios/' + str(i - 200) + '.wav')
        mfcc_coeff_vectors = extractor.calculate_mfcc()
        mfcc_coeff_vectors_dict.update({str(i) :(mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])})

    processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, 0)

    # Prepare training dataset
    # train_data_file = open("training_data/fann/training_data.data", "w")
    # train_data_file.writelines("194226 " + str(36) + " 1\n")
    # for i in range(1, 151):
    #     for each_vector in processed_mfcc_coeff[str(i)]:
    #         train_data_file.writelines((" ").join(map(str, each_vector)) + "\n")
    #         train_data_file.writelines("1\n")
    #
    # for i in range(201, 350):
    #     for each_vector in processed_mfcc_coeff[str(i)]:
    #         train_data_file.writelines((" ").join(map(str, each_vector)) + "\n")
    #         train_data_file.writelines("0\n")
    #
    # for each_vector in processed_mfcc_coeff[str(350)]:
    #     train_data_file.writelines((" ").join(map(str, each_vector)) + "\n")
    #     train_data_file.writelines("0")
    #
    # train_data_file.close()
    #
    # print "Data prepared...."
    #
    # connection_rate = 1
    # learning_rate = 0.01
    # num_input = 36
    # num_hidden = 6
    # num_output = 1
    #
    # desired_error = 0.0001
    # max_iterations = 500
    # iterations_between_reports = 100
    # #
    # ann = libfann.neural_net()
    # ann.create_sparse_array(connection_rate, (num_input, num_hidden, num_output))
    # ann.set_learning_rate(learning_rate)
    # ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE)
    #
    # ann.train_on_file("training_data/fann/training_data.data", max_iterations, iterations_between_reports, desired_error)
    #
    # ann.save("network_states/fann/fann.net")
    # print "done!"
    # Create neural network from file
    ann = libfann.neural_net()
    ann.create_from_file("network_states/fann/fann.net")  # A trained network ll be loaded

    # Test for happiness detection
    print "*" * 30, "Happiness Detection", "*" * 30
    counter = {
        'happiness': 0,
        'sadness': 0
    }
    for i in range(1, 151):
        mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
        frame_level_values = []
        for each_vector in mfcc_coeff_vectors:
            output = ann.run(each_vector)
            if np.array(output) > np.array([0.5]):
                frame_level_values.append("happiness")
            else:
                frame_level_values.append("sadness")
        labels_count = Counter(frame_level_values)
        label = max(labels_count.iteritems(), key=operator.itemgetter(1))[0]
        print str(i) + ".wav: " + label
        counter[label] = counter[label] + 1
    print
    print counter
    print
    #
    # # This is test for sadness detection
    print "*" * 30, "Sadness Detection", "*" * 30
    counter = {
        'happiness': 0,
        'sadness': 0
    }

    for i in range(201, 351):
        mfcc_coeff_vectors = processed_mfcc_coeff[str(i)]
        frame_level_values = []
        for each_vector in mfcc_coeff_vectors:
            output = ann.run(each_vector)
            if np.array(output) > np.array([0.5]):
                frame_level_values.append("happiness")
            else:
                frame_level_values.append("sadness")
        labels_count = Counter(frame_level_values)
        label = max(labels_count.iteritems(), key=operator.itemgetter(1))[0]
        print str(i - 200) + ".wav: " + label
        counter[label] = counter[label] + 1
    print
    print counter