def demo_of_the_presentation(filename, threshold_distance_of_meanings, playing_audio): """ This is the demo function, what does it? It loads the serialized data Split the audio file Do a speech to text of each file and compare the vector with the meaning space Keep track of the real talk time during call """ audio_spliter.splitAudioFile(filename) load_serialized_matrix_of_meanings() load_serialized_clusters() if tweaks.print_clusters_names: print_clusters_names() if tweaks.print_clusters_top_meanings: print_clusters_top_meanings() final_time = 0 track_total_audio_parts = 0 latest_top_context_detected = "" text = "" total_lenght_of_audio_file = subprocess.check_output("./sox/soxi -D " + tweaks.baseAudioFile + filename, shell=True) folder_name = filename.split(".", 1)[0] language = common_functions.getLanguage(folder_name) print() print("Demo starts here") if playing_audio: print("Launching the audio file with VLC") subprocess.Popen("/Applications/ " + tweaks.baseAudioFile + filename + " &> /dev/null", shell=True, stdin=None, stdout=None, stderr=None, close_fds=False) time.sleep(1) print() for audio_part in common_functions.getListFolders(tweaks.outputDirectory + folder_name): start_time = time.time() lenght_of_audio_file = subprocess.check_output( "./sox/soxi -D " + tweaks.outputDirectory + folder_name + "/" + audio_part, shell=True) track_total_audio_parts += float(lenght_of_audio_file) try: tmp_text = speech_to_text.transcriptAudioFile(tweaks.outputDirectory + folder_name + "/" + audio_part, language) if tmp_text is not None: text += str(tmp_text) + " " print("Speech to text finished: \"" + str(tmp_text) + "\"") else: print("/!\\ Sorry, Google didn't understand this part.") except: print("An error occured during the speech to text process, do you have internet?") cleaned_string = "".join(i for i in text if ord(i) < 128) frequencies = meaning_space.normalize_frequencies(part_of_speech.get_words_frequency(cleaned_string, 0)) file_to_classify = [] for element in tweaks.matrix_of_meanings[0]: count = 0 for frequency in frequencies: if frequency[0] == element: file_to_classify.append(frequency[1]) count += 1 break if count == 0: file_to_classify.append(0) file_clusters_distance = list(kmeans_nD.cos_cdist_clusters(tweaks.clusters_list, file_to_classify)) distance_order = [] for i in range(0, len(tweaks.clusters_list)): distance_order.append([file_clusters_distance[i], tweaks.cluster_name[i]]) latest_top_context_detected = distance_order distance_order.sort(key=lambda x: x[0], reverse=False) distance_order = distance_order[:tweaks.top_values] for distance in distance_order: print(distance) elapsed_time = time.time() - start_time final_time += elapsed_time print("Process time for this part: " + str(format(elapsed_time, '.2f'))) print("Original file length for this part: " + str(format(float(lenght_of_audio_file), '.2f'))) different_of_time_process_real = float(lenght_of_audio_file) - elapsed_time if different_of_time_process_real < 0: if float(lenght_of_audio_file) <= 0.01: print("Process time is late on audio track of: " + str(format(different_of_time_process_real*(-1), '.2f')) + " seconds") else: percent_time_value = different_of_time_process_real/float(lenght_of_audio_file) print("Process time is late on audio track of: " + str(format(abs(percent_time_value), '.2f')) + "%") else: percent_time_value = different_of_time_process_real/float(lenght_of_audio_file)*100 print("Process time is in advance on audio track of: " + str(format(percent_time_value, '.2f')) + "%") print("Total process time until now: " + str(format(final_time, '.2f'))) print("Total calling time until now: " + str(format(track_total_audio_parts, '.2f'))) print() # Waiting until the end of the audio length part to simulate the while float(lenght_of_audio_file) > elapsed_time: elapsed_time = time.time() - start_time latest_top_context_detected.sort(key=lambda x: x[0], reverse=False) context_output = [latest_top_context_detected[0][1][1]] for context in latest_top_context_detected[1:]: if (float(context[0]) - float(latest_top_context_detected[0][0])) <= threshold_distance_of_meanings: context_output.append(str(context[1][1])) print() print("Audio file length: " + str(format(float(total_lenght_of_audio_file), '.2f')) + " seconds") print("Process time of demo: " + str(format(float(final_time), '.2f')) + " seconds") different_of_time_process_real = float(total_lenght_of_audio_file) - final_time if different_of_time_process_real <= 0: percent_time_value = different_of_time_process_real/float(total_lenght_of_audio_file) print("Total Process time is late on audio track of: " + str(format(abs(percent_time_value), '.2f')) + "%") else: percent_time_value = different_of_time_process_real/float(total_lenght_of_audio_file)*100 print("Process time is ahead on audio track of: " + str(format(percent_time_value, '.2f')) + "%") if len(context_output) > 1: print("Contexts extracted within " + str(threshold_distance_of_meanings) + " of meaning threshold: " + str(context_output)) elif len(context_output) == 1: print("Context extracted within " + str(threshold_distance_of_meanings) + " of meaning threshold: " + str(context_output)) else: print("Could not extract any context of the audio file. Sorry?")
def section_part_of_speech(toggle, voice=False, audio_to_compare=None, file_to_compare=None, fileFolder=None): """ This section will try to listen to an audio or text file to extract the corpus and then match it to the knowledge database. :param toggle: specify if the voice recognition should be speech should be running :param voice: specify the use of an audio file :param audio_to_compare: corpus :param file_to_compare: specify name of file if used :param fileFolder: name of the processed directory """ if toggle: if not voice: if file_to_compare is None: file_to_compare = "wikipedia-intelligence.txt" f = open(tweaks.textFilesDirectory + file_to_compare, encoding="utf-8") text = f.close() else: file_to_compare = "the audio file" text = audio_to_compare if fileFolder is not None: file_to_compare = fileFolder # remove non-ascii processed_string = "".join(i for i in text if ord(i) < 128) frequencies = meaning_space.normalize_frequencies(part_of_speech.get_words_frequency(processed_string, 0)) file_to_classify = [] for element in tweaks.matrix_of_meanings[0]: count = 0 for frequency in frequencies: if frequency[0] == element: file_to_classify.append(frequency[1]) count += 1 break if count == 0: file_to_classify.append(0) if not frequencies: print( "The speech_to_text process of " + str(file_to_compare) + " has failed. Try again, or delete the audio source because it is too noisy." ) else: print("The cos distance of " + str(file_to_compare) + " with the clusters is: [distance], [cluster]") file_clusters_distance = list(kmeans_nD.cos_cdist_clusters(tweaks.clusters_list, file_to_classify)) distance_order = [] for i in range(0, len(tweaks.clusters_list)): if tweaks.use_clusters_top_meanings_or_names == 0: distance_order.append([file_clusters_distance[i], tweaks.cluster_meaning[i]]) else: distance_order.append([file_clusters_distance[i], tweaks.cluster_name[i]]) distance_order.sort(key=lambda x: x[0], reverse=False) distance_order = distance_order[:3] for distance in distance_order: print(distance) print()