Exemple #1
0
def rate_sentence(videos, s):
    # TODO seed ?
    print(f'generating combos for "{s}"')
    combos = sm.process_sm(s, videos)
    print(f'rating "{s}"')
    rates = []

    for c in combos[:NB_COMBOS]:
        rate, wave = sentence_mixing.video_creator.audio.concat_segments(
            c.get_audio_phonems())
        wave = (sentence_mixing.logic.audio_analysis.resample(
            np.sum(wave, axis=1).reshape((-1, )), rate,
            16000).astype("int16").copy(order="C"))

        decoder = get_decoder()

        decoder.start_utt()  # begin utterance processing
        decoder.process_raw(
            wave, False, True
        )  # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
        decoder.end_utt()  # stop utterance processing

        recognized_sentence = decoder.hyp().hypstr

        rates.append(sentence_distance(s, recognized_sentence))

    print(f"returning from {s}")
    return rates
Exemple #2
0
 def analyze(self, interrupt_callback):
     self.set_combos([
         Combo(c, self, i) for i, c in enumerate(
             sm.process_sm(
                 self._sentence,
                 self.project.videos,
                 interrupt_callback=interrupt_callback,
             ))
     ])
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=DESCRIPTION)
    parser.add_argument(
        "-s", "--seed", default=params.DEFAULT_SEED, help=SEED_HELP,
    )
    parser.add_argument(
        "sentence",
        metavar="TARGET_SENTENCE",
        action="store",
        help=TARGET_SENTENCE_HELP,
    )
    parser.add_argument(
        "config_path",
        metavar="CONFIG_PATH",
        action="store",
        help=CONFIG_PATH_HELP,
    )
    parser.add_argument(
        "video_urls",
        metavar="VIDEO_URL",
        nargs="+",
        action="store",
        help=VIDEO_URL_HELP,
    )

    args = parser.parse_args()

    sm.prepare_sm_config_file(args.config_path)
    videos = sm.get_videos(args.video_urls)
    print(sm.process_sm(args.sentence, videos, args.seed)[0])
Exemple #4
0
def loop_interface(audio_command, video_futures):
    total_timestamps = []
    total_text = ""
    timestamps_buffer = []
    timestamps_buffer_sentence = []

    sentence = get_sentence(None)
    videos = None

    while sentence != "":
        timestamps = []
        combo = None
        available_combos = []

        edit = False
        store = False
        valid = False
        load_audio_index = None
        i = 0
        while not valid and not edit:

            # Stores previous audio in buffer
            if store:
                timestamps_buffer.append(timestamps)
                timestamps_buffer_sentence.append(sentence)
                store = False

            if load_audio_index is not None:
                timestamps = timestamps_buffer[load_audio_index]
                load_audio_index = None
            else:
                if len(available_combos) == 0:
                    bad_sentence = True
                    while bad_sentence:
                        try:
                            if videos is None:
                                print("downloading...")
                                videos = list(video_futures)[0]
                            available_combos = sm.process_sm(sentence, videos)
                            bad_sentence = False
                        except KeyError as e:
                            print(e, "not recognized")
                            sentence = get_sentence(total_text)
                        except PhonemError as e:
                            print(
                                e,
                                "Try to change your sentence or add more videos.",
                            )
                            sentence = get_sentence(total_text)
                        except TokenAmbiguityError as e:
                            print(
                                e,
                                "Please change this word",
                            )
                            sentence = get_sentence(total_text)
                combo = available_combos.pop(0)
                timestamps = combo.get_audio_phonems()

            print(combo_displayer(combo))
            concat_wav(AUDIO_FILE_PATH, timestamps)

            os.system(audio_command.format(AUDIO_FILE_PATH))

            if timestamps_buffer_sentence:
                print("Stashed audios:")
                for i, stashed_sentence in enumerate(
                        timestamps_buffer_sentence):
                    print(i, ".", stashed_sentence)
                print("")

            line = input(
                "Enter 'y' to validate, 'e' to edit the sentence, 's' to store this audio in the buffer, 'l' + index for loading previously stored audio, otherwise just press enter: "
            )
            valid = line == "y"
            edit = line == "e"
            store = line == "s"

            if line.startswith("l "):
                index = line.split(" ")[1]
                if index.isdigit():
                    index = int(index)
                    if -1 < index < len(timestamps_buffer):
                        load_audio_index = index
                        print(load_audio_index)

            i += 1
            clear_screen()
        if not edit:
            total_timestamps.extend(timestamps)
            total_text += "\n" + sentence

        save(total_timestamps, total_text, name="video.json")
        sentence = get_sentence(total_text)
    clear_screen()
    return total_timestamps, total_text, videos
Exemple #5
0
    parser.add_argument(
        "sentence",
        metavar="TARGET_SENTENCE",
        action="store",
        help=TARGET_SENTENCE_HELP,
    )
    parser.add_argument(
        "config_path",
        metavar="CONFIG_PATH",
        action="store",
        help=CONFIG_PATH_HELP,
    )
    parser.add_argument(
        "video_urls",
        metavar="VIDEO_URL",
        nargs="+",
        action="store",
        help=VIDEO_URL_HELP,
    )

    args = parser.parse_args()
    out_dir = args.out_dir

    sm.prepare_sm_config_file(args.config_path)
    videos = sm.get_videos(args.video_urls)
    combos = sm.process_sm(args.sentence, videos, args.seed)

    print(combo_displayer(combos[0]))
    sentence_mixing.video_creator.audio.concat_wav(
        cli_interface.AUDIO_FILE_PATH, combos[0].get_audio_phonems())