def transcriptProcessedFiles(folderName):
    Transcription of all audio files present in a folder and fuse them. To be combined with the audio_splitter.
    :param folderName: folder name in the audioProcessed folder
    :return: transcript of the splited audio files
    pathToFolder = tweaks.outputDirectory + folderName
    transcriptText = ""

    files = common_functions.getListFolders(pathToFolder)
    progressbarUnit = int(100 / len(files))
    totalProgressbar = 0

    language = common_functions.getLanguage(folderName)
    print("speech_to_text: " + folderName + " in " + language)

    pathToFolder_serialized = tweaks.serialized_folder + tweaks.serialized_audio_folder
    check_serialized_files = common_functions.getListFolders(pathToFolder_serialized)

    sys.stdout.write("Text-to-Speech in progress: " + str(totalProgressbar) + "%.")
    for file in files:
        if tweaks.use_serialized_audio:
            if folderName in check_serialized_files:
                load_file = open(pathToFolder_serialized + folderName, 'rb')
                transcriptText = pickle.load(load_file)
                print("Load Serialized.100%.Done")
                return transcriptText

            text = transcriptAudioFile(pathToFolder + "/" + file, language)
            if text is not None:
                transcriptText = transcriptText + " " + str(text)
            transcriptText = transcriptText + " " + str(text)

        totalProgressbar += progressbarUnit
        sys.stdout.write(str(totalProgressbar) + "%.")

    if tweaks.serialize_audio:
        output = open(tweaks.serialized_folder + tweaks.serialized_audio_folder + folderName, 'wb')
        pickle.dump(transcriptText[1:], output, -1)  # use of highest protocol available
        print(folderName + " is now serialized.")

    return transcriptText[1:]
def splitAudioFile(file):
    Function that splits an audio file at silences. And makes a folder containing the part of the file.
    Used mainly to distribute the load for the Google Speech Recognition, 15 seconds.
    It also simulate the real time communication by cutting the buffer at respiration of the speaker.
    :param file: Name of the file
    foldername = file.split(".", 1)[0]
    os.system("rm -r " + tweaks.outputDirectory + foldername)

    print("audio splitting: " + file + " & language is: " + common_functions.getLanguage(file))
    os.system("mkdir " + tweaks.outputDirectory + foldername + " &> /dev/null")
            "./sox/sox " + tweaks.baseAudioFile + file + " " + tweaks.outputDirectory + foldername + "/" + file +
            " " + tweaks.customParameters + " &> /dev/null")
def splitAudioFiles(number_of_iterations=None):
    Function that splits all audio files available at silences. And makes a folder containing the part of the file.
    Used mainly to distribute the load for the Google Speech Recognition, 15 seconds.
    It also simulate the real time communication by cutting the buffer at respiration of the speaker.

    :param number_of_iterations: maximum amount of files to process

    count = 0

    os.system("rm -r " + tweaks.outputDirectory + "*")

    for file in common_functions.getListFolders(tweaks.baseAudioFile):
        print("audio splitting: " + file + " and the language is: " + common_functions.getLanguage(file))
        foldername = file.split(".", 1)[0]
        os.system("mkdir " + tweaks.outputDirectory + foldername + " &> /dev/null")
                "./sox/sox " + tweaks.baseAudioFile + file + " " + tweaks.outputDirectory + foldername + "/" + file +
                " " + tweaks.customParameters + " &> /dev/null")
        if number_of_iterations is not None:
            count += 1
            if count >= number_of_iterations:
예제 #4
def demo_of_the_presentation(filename, threshold_distance_of_meanings, playing_audio):
    This is the demo function, what does it?
    It loads the serialized data
    Split the audio file
    Do a speech to text of each file and compare the vector with the meaning space
    Keep track of the real talk time during call


    if tweaks.print_clusters_names:

    if tweaks.print_clusters_top_meanings:

    final_time = 0
    track_total_audio_parts = 0
    latest_top_context_detected = ""
    text = ""
    total_lenght_of_audio_file = subprocess.check_output("./sox/soxi -D " + tweaks.baseAudioFile + filename, shell=True)
    folder_name = filename.split(".", 1)[0]
    language = common_functions.getLanguage(folder_name)

    print("Demo starts here")
    if playing_audio:
        print("Launching the audio file with VLC")
        subprocess.Popen("/Applications/ " + tweaks.baseAudioFile + filename + " &> /dev/null", shell=True,
             stdin=None, stdout=None, stderr=None, close_fds=False)

    for audio_part in common_functions.getListFolders(tweaks.outputDirectory + folder_name):
        start_time = time.time()
        lenght_of_audio_file = subprocess.check_output(
            "./sox/soxi -D " + tweaks.outputDirectory + folder_name + "/" + audio_part, shell=True)
        track_total_audio_parts += float(lenght_of_audio_file)
            tmp_text = speech_to_text.transcriptAudioFile(tweaks.outputDirectory + folder_name + "/" + audio_part,
            if tmp_text is not None:
                text += str(tmp_text) + " "
                print("Speech to text finished: \"" + str(tmp_text) + "\"")
                print("/!\\ Sorry, Google didn't understand this part.")
            print("An error occured during the speech to text process, do you have internet?")

        cleaned_string = "".join(i for i in text if ord(i) < 128)
        frequencies = meaning_space.normalize_frequencies(part_of_speech.get_words_frequency(cleaned_string, 0))
        file_to_classify = []

        for element in tweaks.matrix_of_meanings[0]:
            count = 0
            for frequency in frequencies:
                if frequency[0] == element:
                    count += 1
            if count == 0:

        file_clusters_distance = list(kmeans_nD.cos_cdist_clusters(tweaks.clusters_list, file_to_classify))
        distance_order = []
        for i in range(0, len(tweaks.clusters_list)):
            distance_order.append([file_clusters_distance[i], tweaks.cluster_name[i]])

        latest_top_context_detected = distance_order

        distance_order.sort(key=lambda x: x[0], reverse=False)
        distance_order = distance_order[:tweaks.top_values]

        for distance in distance_order:

        elapsed_time = time.time() - start_time

        final_time += elapsed_time

        print("Process time for this part: " + str(format(elapsed_time, '.2f')))
        print("Original file length for this part: " + str(format(float(lenght_of_audio_file), '.2f')))
        different_of_time_process_real = float(lenght_of_audio_file) - elapsed_time
        if different_of_time_process_real < 0:
            if float(lenght_of_audio_file) <= 0.01:
                print("Process time is late on audio track of: " + str(format(different_of_time_process_real*(-1), '.2f')) + " seconds")
                percent_time_value = different_of_time_process_real/float(lenght_of_audio_file)
                print("Process time is late on audio track of: " + str(format(abs(percent_time_value), '.2f')) + "%")
            percent_time_value = different_of_time_process_real/float(lenght_of_audio_file)*100
            print("Process time is in advance on audio track of: " + str(format(percent_time_value, '.2f')) + "%")
        print("Total process time until now: " + str(format(final_time, '.2f')))
        print("Total calling time until now: " + str(format(track_total_audio_parts, '.2f')))

        # Waiting until the end of the audio length part to simulate the
        while float(lenght_of_audio_file) > elapsed_time:
            elapsed_time = time.time() - start_time

    latest_top_context_detected.sort(key=lambda x: x[0], reverse=False)
    context_output = [latest_top_context_detected[0][1][1]]
    for context in latest_top_context_detected[1:]:
        if (float(context[0]) - float(latest_top_context_detected[0][0])) <= threshold_distance_of_meanings:

    print("Audio file length: " + str(format(float(total_lenght_of_audio_file), '.2f')) + " seconds")
    print("Process time of demo: " + str(format(float(final_time), '.2f')) + " seconds")

    different_of_time_process_real = float(total_lenght_of_audio_file) - final_time
    if different_of_time_process_real <= 0:
        percent_time_value = different_of_time_process_real/float(total_lenght_of_audio_file)
        print("Total Process time is late on audio track of: " + str(format(abs(percent_time_value), '.2f')) + "%")
        percent_time_value = different_of_time_process_real/float(total_lenght_of_audio_file)*100
        print("Process time is ahead on audio track of: " + str(format(percent_time_value, '.2f')) + "%")

    if len(context_output) > 1:
        print("Contexts extracted within " + str(threshold_distance_of_meanings) + " of meaning threshold: " + str(context_output))
    elif len(context_output) == 1:
        print("Context extracted within " + str(threshold_distance_of_meanings) + " of meaning threshold: " + str(context_output))
        print("Could not extract any context of the audio file. Sorry?")