def transcriptProcessedFiles(folderName): """ Transcription of all audio files present in a folder and fuse them. To be combined with the audio_splitter. :param folderName: folder name in the audioProcessed folder :return: transcript of the splited audio files """ pathToFolder = tweaks.outputDirectory + folderName transcriptText = "" files = common_functions.getListFolders(pathToFolder) progressbarUnit = int(100 / len(files)) totalProgressbar = 0 language = common_functions.getLanguage(folderName) print("speech_to_text: " + folderName + " in " + language) pathToFolder_serialized = tweaks.serialized_folder + tweaks.serialized_audio_folder check_serialized_files = common_functions.getListFolders(pathToFolder_serialized) sys.stdout.write("Text-to-Speech in progress: " + str(totalProgressbar) + "%.") sys.stdout.flush() for file in files: if tweaks.use_serialized_audio: if folderName in check_serialized_files: load_file = open(pathToFolder_serialized + folderName, 'rb') transcriptText = pickle.load(load_file) load_file.close() print("Load Serialized.100%.Done") return transcriptText try: text = transcriptAudioFile(pathToFolder + "/" + file, language) if text is not None: transcriptText = transcriptText + " " + str(text) except: transcriptText = transcriptText + " " + str(text) totalProgressbar += progressbarUnit sys.stdout.write(str(totalProgressbar) + "%.") sys.stdout.flush() print("..Done") if tweaks.serialize_audio: output = open(tweaks.serialized_folder + tweaks.serialized_audio_folder + folderName, 'wb') pickle.dump(transcriptText[1:], output, -1) # use of highest protocol available output.close() print(folderName + " is now serialized.") return transcriptText[1:]
def section_voice_recognition(toggle, max_files=None, filename=None): if toggle: if max_files is None: max_files = len(common_functions.getListFolders(tweaks.baseAudioFile)) if tweaks.manual_split_control: audio_spliter.splitAudioFiles(max_files) count = 0 for fileFolder in common_functions.getListFolders(tweaks.outputDirectory): if filename is None: text_transcripted = speech_to_text.transcriptProcessedFiles(fileFolder) section_part_of_speech(tweaks.run_part_of_speech, True, text_transcripted, None, fileFolder) count += 1 if count >= max_files: return True elif filename == fileFolder: text_transcripted = speech_to_text.transcriptProcessedFiles(fileFolder) section_part_of_speech(tweaks.run_part_of_speech, True, text_transcripted, None, fileFolder) return True return True
def splitAudioFiles(number_of_iterations=None): """ Function that splits all audio files available at silences. And makes a folder containing the part of the file. Used mainly to distribute the load for the Google Speech Recognition, 15 seconds. It also simulate the real time communication by cutting the buffer at respiration of the speaker. :param number_of_iterations: maximum amount of files to process """ count = 0 os.system("rm -r " + tweaks.outputDirectory + "*") for file in common_functions.getListFolders(tweaks.baseAudioFile): print("audio splitting: " + file + " and the language is: " + common_functions.getLanguage(file)) foldername = file.split(".", 1)[0] os.system("mkdir " + tweaks.outputDirectory + foldername + " &> /dev/null") os.system( "./sox/sox " + tweaks.baseAudioFile + file + " " + tweaks.outputDirectory + foldername + "/" + file + " " + tweaks.customParameters + " &> /dev/null") if number_of_iterations is not None: count += 1 if count >= number_of_iterations: break
def demo_of_the_presentation(filename, threshold_distance_of_meanings, playing_audio): """ This is the demo function, what does it? It loads the serialized data Split the audio file Do a speech to text of each file and compare the vector with the meaning space Keep track of the real talk time during call """ audio_spliter.splitAudioFile(filename) load_serialized_matrix_of_meanings() load_serialized_clusters() if tweaks.print_clusters_names: print_clusters_names() if tweaks.print_clusters_top_meanings: print_clusters_top_meanings() final_time = 0 track_total_audio_parts = 0 latest_top_context_detected = "" text = "" total_lenght_of_audio_file = subprocess.check_output("./sox/soxi -D " + tweaks.baseAudioFile + filename, shell=True) folder_name = filename.split(".", 1)[0] language = common_functions.getLanguage(folder_name) print() print("Demo starts here") if playing_audio: print("Launching the audio file with VLC") subprocess.Popen("/Applications/VLC.app/Contents/MacOS/VLC " + tweaks.baseAudioFile + filename + " &> /dev/null", shell=True, stdin=None, stdout=None, stderr=None, close_fds=False) time.sleep(1) print() for audio_part in common_functions.getListFolders(tweaks.outputDirectory + folder_name): start_time = time.time() lenght_of_audio_file = subprocess.check_output( "./sox/soxi -D " + tweaks.outputDirectory + folder_name + "/" + audio_part, shell=True) track_total_audio_parts += float(lenght_of_audio_file) try: tmp_text = speech_to_text.transcriptAudioFile(tweaks.outputDirectory + folder_name + "/" + audio_part, language) if tmp_text is not None: text += str(tmp_text) + " " print("Speech to text finished: \"" + str(tmp_text) + "\"") else: print("/!\\ Sorry, Google didn't understand this part.") except: print("An error occured during the speech to text process, do you have internet?") cleaned_string = "".join(i for i in text if ord(i) < 128) frequencies = meaning_space.normalize_frequencies(part_of_speech.get_words_frequency(cleaned_string, 0)) file_to_classify = [] for element in tweaks.matrix_of_meanings[0]: count = 0 for frequency in frequencies: if frequency[0] == element: file_to_classify.append(frequency[1]) count += 1 break if count == 0: file_to_classify.append(0) file_clusters_distance = list(kmeans_nD.cos_cdist_clusters(tweaks.clusters_list, file_to_classify)) distance_order = [] for i in range(0, len(tweaks.clusters_list)): distance_order.append([file_clusters_distance[i], tweaks.cluster_name[i]]) latest_top_context_detected = distance_order distance_order.sort(key=lambda x: x[0], reverse=False) distance_order = distance_order[:tweaks.top_values] for distance in distance_order: print(distance) elapsed_time = time.time() - start_time final_time += elapsed_time print("Process time for this part: " + str(format(elapsed_time, '.2f'))) print("Original file length for this part: " + str(format(float(lenght_of_audio_file), '.2f'))) different_of_time_process_real = float(lenght_of_audio_file) - elapsed_time if different_of_time_process_real < 0: if float(lenght_of_audio_file) <= 0.01: print("Process time is late on audio track of: " + str(format(different_of_time_process_real*(-1), '.2f')) + " seconds") else: percent_time_value = different_of_time_process_real/float(lenght_of_audio_file) print("Process time is late on audio track of: " + str(format(abs(percent_time_value), '.2f')) + "%") else: percent_time_value = different_of_time_process_real/float(lenght_of_audio_file)*100 print("Process time is in advance on audio track of: " + str(format(percent_time_value, '.2f')) + "%") print("Total process time until now: " + str(format(final_time, '.2f'))) print("Total calling time until now: " + str(format(track_total_audio_parts, '.2f'))) print() # Waiting until the end of the audio length part to simulate the while float(lenght_of_audio_file) > elapsed_time: elapsed_time = time.time() - start_time latest_top_context_detected.sort(key=lambda x: x[0], reverse=False) context_output = [latest_top_context_detected[0][1][1]] for context in latest_top_context_detected[1:]: if (float(context[0]) - float(latest_top_context_detected[0][0])) <= threshold_distance_of_meanings: context_output.append(str(context[1][1])) print() print("Audio file length: " + str(format(float(total_lenght_of_audio_file), '.2f')) + " seconds") print("Process time of demo: " + str(format(float(final_time), '.2f')) + " seconds") different_of_time_process_real = float(total_lenght_of_audio_file) - final_time if different_of_time_process_real <= 0: percent_time_value = different_of_time_process_real/float(total_lenght_of_audio_file) print("Total Process time is late on audio track of: " + str(format(abs(percent_time_value), '.2f')) + "%") else: percent_time_value = different_of_time_process_real/float(total_lenght_of_audio_file)*100 print("Process time is ahead on audio track of: " + str(format(percent_time_value, '.2f')) + "%") if len(context_output) > 1: print("Contexts extracted within " + str(threshold_distance_of_meanings) + " of meaning threshold: " + str(context_output)) elif len(context_output) == 1: print("Context extracted within " + str(threshold_distance_of_meanings) + " of meaning threshold: " + str(context_output)) else: print("Could not extract any context of the audio file. Sorry?")
def section_part_of_speech_training(number_of_clusters, files=None): """ This section trains the part of speech by creating clusters of meanings. :param number_of_clusters: number of cluster targeted :param files: list of corpus to classify. If not specified, all corpus available will be used """ meaning_space.init_meaning_space() if files is None: files = common_functions.getListFolders(tweaks.textFilesDirectory) for file in files: f = open(tweaks.textFilesDirectory + file, encoding="utf-8") text = f.read() tweaks.matrix_of_names.append(file[:-4]) # remove non-ascii processed_string = "".join(i for i in text if ord(i) < 128) frequency = part_of_speech.get_words_frequency(processed_string, 0) meaning_space.build_meaning_space(frequency) f.close() for file in files: f = open(tweaks.textFilesDirectory + file, encoding="utf-8") text = f.read() # remove non-ascii processed_string = "".join(i for i in text if ord(i) < 128) frequency = meaning_space.normalize_frequencies(part_of_speech.get_words_frequency(processed_string, 0)) tweaks.top_matrix_of_meanings.append(part_of_speech.get_words_frequency(processed_string, top_values)) meaning_space.populate_meaning_matrix(frequency, files.index(file), file) f.close() tweaks.clusters_list = kmeans_nD.get_clusters_centroids(tweaks.matrix_of_meanings[1:], number_of_clusters) tweaks.index_of_data_custering = kmeans_nD.get_index_assign_data_to_cluster( tweaks.matrix_of_meanings[1:], tweaks.clusters_list ) cluster_meaning_tmp = [[i] for i in range(number_of_clusters)] cluster_names_tmp = [[i] for i in range(number_of_clusters)] # append cluster_number and top_values of the related file for n in range(0, len(tweaks.index_of_data_custering)): for meaning in tweaks.top_matrix_of_meanings[n]: cluster_meaning_tmp[tweaks.index_of_data_custering[n]].append(meaning[0]) for n in range(0, len(tweaks.index_of_data_custering)): cluster_names_tmp[tweaks.index_of_data_custering[n]].append(tweaks.matrix_of_names[n]) tweaks.cluster_meaning = [] for cluster in cluster_meaning_tmp: for name in cluster: if cluster.count(name) > 1: cluster.remove(name) tweaks.cluster_meaning.append(cluster) tweaks.cluster_name = [] for cluster in cluster_names_tmp: tweaks.cluster_name.append(cluster)
result.sort(key=lambda x: x[1], reverse=True) result = result[:top_values] return result else: return result if __name__ == "__main__": """ This function is run if this file is run directly. It will: Open the first corpus in alphabetical order Get the frequency Display the synset and its frequency """ files = common_functions.getListFolders(tweaks.textFilesDirectory) f = open(tweaks.textFilesDirectory + files[0], encoding="utf-8") text = f.read() # remove non-ascii processed_string = "".join(i for i in text if ord(i) < 128) frequency = get_words_frequency(processed_string, 0) for word in frequency: print(word)
if text is not None: transcriptText = transcriptText + " " + str(text) except: transcriptText = transcriptText + " " + str(text) totalProgressbar += progressbarUnit sys.stdout.write(str(totalProgressbar) + "%.") sys.stdout.flush() print("..Done") if tweaks.serialize_audio: output = open(tweaks.serialized_folder + tweaks.serialized_audio_folder + folderName, 'wb') pickle.dump(transcriptText[1:], output, -1) # use of highest protocol available output.close() print(folderName + " is now serialized.") return transcriptText[1:] if __name__ == "__main__": """ This function is run if this file is run directly. It will: Transcribe the first sub-folder in the audioProcessing folder. Print the transcription """ folders = common_functions.getListFolders(tweaks.outputDirectory) text = transcriptProcessedFiles(folders[0]) print(text)