def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor( '/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio( mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors( mfcc_coeff_vectors_dict, min_frames) # frames = min_frames # print frames # print len(processed_mfcc_coeff['1']) # for each_vector in processed_mfcc_coeff['1']: # print len(each_vector) print "mffcc found..." classes = ["happiness", "sadness"] training_data = ClassificationDataSet( 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(13, 1) try: network = NetworkReader.readFrom( 'network_state_frame_level_new2_no_pp1.xml') except: for i in range(1, 51): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [1]) for i in range(201, 251): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] for each_vector in mfcc_coeff_vectors: training_data.appendLinked(each_vector, [0]) training_data._convertToOneOfMany() print "prepared training data.." print training_data.indim, training_data.outdim network = buildNetwork( training_data.indim, 5, training_data.outdim, fast=True) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile( network, "network_state_frame_level_new2_no_pp.xml")
def main(): print "Calculating mfcc...." mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor('/home/venkatesh/Venki/FINAL_SEM/Project/Datasets/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i): (mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) audio_with_min_frames, min_frames = get_min_frames_audio(mfcc_coeff_vectors_dict) processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, min_frames) frames = min_frames print "mfcc found...." classes = ["happiness", "sadness"] try: network = NetworkReader.readFrom('network_state_new_.xml') except: # Create new network and start Training training_data = ClassificationDataSet(frames * 26, target=1, nb_classes=2, class_labels=classes) # training_data = SupervisedDataSet(frames * 39, 1) for i in range(1, 151): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [1]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [1]) for i in range(201, 351): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] training_data.appendLinked(mfcc_coeff_vectors.ravel(), [0]) # training_data.addSample(mfcc_coeff_vectors.ravel(), [0]) training_data._convertToOneOfMany() network = buildNetwork(training_data.indim, 5, training_data.outdim) trainer = BackpropTrainer(network, learningrate=0.01, momentum=0.99) print "Before training...", trainer.testOnData(training_data) trainer.trainOnDataset(training_data, 1000) print "After training...", trainer.testOnData(training_data) NetworkWriter.writeToFile(network, "network_state_new_.xml") print "*" * 30 , "Happiness Detection", "*" * 30 for i in range(151, 201): output = network.activate(processed_mfcc_coeff[str(i)].ravel()) # print output, # if output > 0.7: # print "happiness" class_index = max(xrange(len(output)), key=output.__getitem__) class_name = classes[class_index] print class_name
def main(): mfcc_coeff_vectors_dict = {} for i in range(1, 201): extractor = FeatureExtractor('../../../Dataset/Happiness/HappinessAudios/' + str(i) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i) :(mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) for i in range(201, 401): extractor = FeatureExtractor('../../../Dataset/Sadness/SadnessAudios/' + str(i - 200) + '.wav') mfcc_coeff_vectors = extractor.calculate_mfcc() mfcc_coeff_vectors_dict.update({str(i) :(mfcc_coeff_vectors, mfcc_coeff_vectors.shape[0])}) processed_mfcc_coeff = preprocess_input_vectors(mfcc_coeff_vectors_dict, 0) # Prepare training dataset # train_data_file = open("training_data/fann/training_data.data", "w") # train_data_file.writelines("194226 " + str(36) + " 1\n") # for i in range(1, 151): # for each_vector in processed_mfcc_coeff[str(i)]: # train_data_file.writelines((" ").join(map(str, each_vector)) + "\n") # train_data_file.writelines("1\n") # # for i in range(201, 350): # for each_vector in processed_mfcc_coeff[str(i)]: # train_data_file.writelines((" ").join(map(str, each_vector)) + "\n") # train_data_file.writelines("0\n") # # for each_vector in processed_mfcc_coeff[str(350)]: # train_data_file.writelines((" ").join(map(str, each_vector)) + "\n") # train_data_file.writelines("0") # # train_data_file.close() # # print "Data prepared...." # # connection_rate = 1 # learning_rate = 0.01 # num_input = 36 # num_hidden = 6 # num_output = 1 # # desired_error = 0.0001 # max_iterations = 500 # iterations_between_reports = 100 # # # ann = libfann.neural_net() # ann.create_sparse_array(connection_rate, (num_input, num_hidden, num_output)) # ann.set_learning_rate(learning_rate) # ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE) # # ann.train_on_file("training_data/fann/training_data.data", max_iterations, iterations_between_reports, desired_error) # # ann.save("network_states/fann/fann.net") # print "done!" # Create neural network from file ann = libfann.neural_net() ann.create_from_file("network_states/fann/fann.net") # A trained network ll be loaded # Test for happiness detection print "*" * 30, "Happiness Detection", "*" * 30 counter = { 'happiness': 0, 'sadness': 0 } for i in range(1, 151): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] frame_level_values = [] for each_vector in mfcc_coeff_vectors: output = ann.run(each_vector) if np.array(output) > np.array([0.5]): frame_level_values.append("happiness") else: frame_level_values.append("sadness") labels_count = Counter(frame_level_values) label = max(labels_count.iteritems(), key=operator.itemgetter(1))[0] print str(i) + ".wav: " + label counter[label] = counter[label] + 1 print print counter print # # # This is test for sadness detection print "*" * 30, "Sadness Detection", "*" * 30 counter = { 'happiness': 0, 'sadness': 0 } for i in range(201, 351): mfcc_coeff_vectors = processed_mfcc_coeff[str(i)] frame_level_values = [] for each_vector in mfcc_coeff_vectors: output = ann.run(each_vector) if np.array(output) > np.array([0.5]): frame_level_values.append("happiness") else: frame_level_values.append("sadness") labels_count = Counter(frame_level_values) label = max(labels_count.iteritems(), key=operator.itemgetter(1))[0] print str(i - 200) + ".wav: " + label counter[label] = counter[label] + 1 print print counter