예제 #1
0
def transcribe(input_file_url):
    """
    Transcribes an audio file. The audio file format is a URL.

    Params:
        input_file_url (string): The input audio file URL
    """

    # Setting up environment
    if not utilities.check_env_vars():
        return
    utilities.create_ancillary_folders()

    # download the podcast file
    filepath = get_podcast_file(input_file_url)

    # convert file to raw audio chunks
    chunks = convert_to_raw_audio_chunks(filepath)

    # transcribe chunks
    transcriber = Transcriber(os.environ['GOOGLE_API_KEY'])
    transcript = transcriber.transcribe_many(chunks)

    # write to the output file
    output_file_name = os.path.split(filepath)[-1]
    utilities.write_output_file(output_file_name, transcript)

    print "Cleaning up...\n"
    utilities.cleanup()
 def test_transcribe_new_more_than_one_audio_file(self):
     print(f'running______________......................................')
     transcriber = Transcriber()
     audio_input_folder = 'input3'
     output_folder = 'output3'
     transcriber.transcribe(audio_input_folder, output_folder)
     self.assertEqual(1, 1)
예제 #3
0
 def __init__(self, summary_word_count=30, number_of_chapters=10):
     self.transcriber = Transcriber()
     # A list of segmentizers, if one fails to segmentize, it tries to use the next in line.
     self.segmentizers = [
         SentenceSimilaritySegmentizer(),
         SimpleSegmentizer()
     ]
     self.summarizer = BartSummarizer(word_count=summary_word_count)
    def test_can_transcribe_long_speech(self):
        manager = VideoManager(
            os.path.join(os.path.dirname(__file__), "input", "test1.mp4"), )
        _, audio = tempfile.mkstemp(".wav")
        manager.extract_audio(audio)
        audio_manager = AudioManager(audio)

        transcriber = Transcriber(audio_manager)

        transcribe_result = list(transcriber.transcribe())

        self.assertGreaterEqual(len(transcribe_result), 1)
예제 #5
0
def main():
    args = parse_args()
    transcribe = Transcriber()
    chunker = ChunkSpeaker()
    converter = Converter()
    try:
        if args.mode == 'transcribe':
            transcribe.transcribe(args.input_folder, args.output_folder)
        if args.mode == 'chunk_speaker':
            chunker.chunk(args.audio_input_path, args.speech_segmentation_path,
                          args.output_folder)
        if args.mode == 'convert':
            converter.convert(args.type, args.online_folder,
                              args.chunks_text_path, args.output_folder)

    except InputError as e:
        print(f'{InputError.__name__}:\n\t{e}')
예제 #6
0
def main():
    parser = argparse.ArgumentParser(
        description='Evaluate Sphinx3 WER on speech/transcripts dataset')
    parser.add_argument(
        '--config',
        metavar='INI',
        type=str,
        default='sphinx-config.ini',
        help=
        'A configuration file specifying which models to use (default: %(default)s)'
    )
    parser.add_argument(
        '--directory',
        metavar='DIR',
        type=str,
        default='reith-lectures',
        help='Path to the evaluation dataset (default: %(default)s)')
    parser.add_argument(
        '--lazy',
        metavar='L',
        type=bool,
        default=False,
        help=
        'If set to true, do not attempt to derive any new data (default: %(default)s)'
    )
    args = parser.parse_args()
    lazy = args.lazy
    directory = args.directory
    config = ConfigParser()
    config_models = args.config
    transcriber = Transcriber()
    if not lazy:
        config.readfp(open(config_models))
        config.items('models')
        acoustic_model = config.get('models', 'acoustic_model')
        dictionary = config.get('models', 'dictionary')
        filler = config.get('models', 'filler')
        language_model = config.get('models', 'language_model')
        convert_pdf_to_text(directory)
        transcriber.initialise(acoustic_model, dictionary, filler,
                               language_model)
    evaluate(transcriber, directory, lazy)
def main():
    parser = argparse.ArgumentParser(description='Evaluate Sphinx3 WER on speech/transcripts dataset')
    parser.add_argument('--config', metavar='INI', type=str, default='sphinx-config.ini', help='A configuration file specifying which models to use (default: %(default)s)')
    parser.add_argument('--directory', metavar='DIR', type=str, default='reith-lectures', help='Path to the evaluation dataset (default: %(default)s)')
    parser.add_argument('--lazy', metavar='L', type=bool, default=False, help='If set to true, do not attempt to derive any new data (default: %(default)s)')
    args = parser.parse_args()
    lazy = args.lazy
    directory = args.directory
    config = ConfigParser()
    config_models = args.config
    transcriber = Transcriber()
    if not lazy:
        config.readfp(open(config_models))
        config.items('models')
        acoustic_model = config.get('models', 'acoustic_model')
        dictionary = config.get('models', 'dictionary')
        filler = config.get('models', 'filler')
        language_model = config.get('models', 'language_model')
        convert_pdf_to_text(directory)
        transcriber.initialise(acoustic_model, dictionary, filler, language_model)
    evaluate(transcriber, directory, lazy)
 def test_transcribe_new(self):
     transcriber = Transcriber()
     audio_input_folder = 'input1'
     output_folder = 'output1'
     transcriber.transcribe(audio_input_folder, output_folder)
     expected_transcription = load_fixture_rel(
         f'fixture2/{output_folder}/transcription.json')
     expected_text = load_document_rel(f'fixture2/{output_folder}/text')
     expected_segments = load_document_rel(
         f'fixture2/{output_folder}/segments')
     expected_wav_spc = load_document_rel(
         f'fixture2/{output_folder}/wav.scp')
     transcription = load_fixture_rel(
         f'{output_folder}/results/20191130-2034_Test1/transcription.json')
     text = load_document_rel(f'{output_folder}/20191130-2034_Test1/text')
     segments = load_document_rel(
         f'{output_folder}/20191130-2034_Test1/segments')
     wav_scp = load_document_rel(
         f'{output_folder}/20191130-2034_Test1/wav.scp')
     self.validate_file(text, expected_text)
     self.validate_file(segments, expected_segments)
     self.validate_file(wav_scp, expected_wav_spc)
     self.validate_json(transcription, expected_transcription)
def main():
    logger = logging.getLogger()

    logger.info("start processing %s", input_file)

    manager = VideoManager(input_file)
    audio = manager.extract_audio(settings.ROOT / "output/a.wav")
    thumbnail = manager.extract_thumbnail(settings.ROOT / "output/a.jpg")

    audio_manager = AudioManager(audio)
    transcriber = Transcriber(audio_manager)

    filter_sections = []

    for transcribtion in transcriber.transcribe():
        logger.debug("transcription: %s", transcribtion)
        word, start, end = transcribtion
        if detect_mature_word(word):
            logger.debug("mature word: %s, %s", word, detect_mature_word(word))
            audio_manager.apply_beep(start, end)
            manager.apply_mask(start, end)
            filter_sections.append({
                "start_time": start,
                "end_time": end,
                "word": word
            })

    manager.apply_audio(audio_manager.save(settings.ROOT /
                                           "output/a_beep.wav"))
    manager.save(settings.ROOT / "output/a.mp4")

    print(
        json.dumps({
            "thumbnail": str(thumbnail),
            "filter_sections": filter_sections,
            "filter_video": str(settings.ROOT / "output" / "a.mp4"),
        }))
예제 #10
0
def run(config_manager, logger):

    logger.msg('loading configuration file...')
    config = config_manager.get_config()

    logger.msg('ensuring that file directories exist...')
    file_manager = File_Manager(config['speech'], config['text'])
    if (not file_manager.directories_exist()):
        logger.err(
            'Please check the directories entered into the config file: ' +
            config_file_name)
        return

    logger.msg('waiting for files to transcribe...')
    transcriber = Transcriber(config['username'], config['password'],
                              file_manager)  # Create transcription object
    while (True):  # Loop for eternity
        for file in file_manager.speech_files_without_text_files(
        ):  # Loop through files that need to be transcribed
            logger.msg('Transcribing: ' + file + '...')
            transcriber.transcribe(file)  # Transcribe the current file
            logger.msg(file + ' transcribed')
            logger.msg('waiting for files to transcribe...')
        sleep(10)  # Wait for 10 seconds
예제 #11
0
class Chapterizer:
    """Able to chapterize a YoutubeVideo. A chapter is a segment and its summary."""
    def __init__(self, summary_word_count=30, number_of_chapters=10):
        self.transcriber = Transcriber()
        # A list of segmentizers, if one fails to segmentize, it tries to use the next in line.
        self.segmentizers = [
            SentenceSimilaritySegmentizer(),
            SimpleSegmentizer()
        ]
        self.summarizer = BartSummarizer(word_count=summary_word_count)

    def chapterize(self, url) -> ChapterizedYoutubeVideo:
        print(f"Processing: {url}")
        transcribed_youtube_video = self._get_transcribed_youtube_video(url)
        chapters = []
        for segmentizer in self.segmentizers:
            print(f"Using {segmentizer.__class__.__name__}")
            segments = [
                s for s in segmentizer.generate_segments(
                    transcribed_youtube_video)
            ]
            if len(segments) == 0:
                print(
                    f"{segmentizer.__class__.__name__} did not generate results."
                )
                continue
            for segment in segments:
                try:
                    summary = self.summarizer.summarize(segment.get_text())
                    if summary == "":
                        summary = segment.get_text()
                except ValueError:
                    summary = segment.get_text()
                chapter = Chapter(segment=segment, summary=summary)
                chapters.append(chapter)
            break

        chapterized_youtube_video = ChapterizedYoutubeVideo(
            transcribed_youtube_video, chapters)
        print("Successfully chapterized!\n")
        return chapterized_youtube_video

    def _get_transcribed_youtube_video(self, url):
        youtube_video: YoutubeVideo = YoutubeVideo(url)
        transcript = self.transcriber.get_transcript(youtube_video.id)
        return TranscribedYoutubeVideo(youtube_video.id, transcript)
예제 #12
0
def transcriber():
    return Transcriber()
import json
import os

from builder import ChunkBuilder
from transcriber import Transcriber

transcriber = Transcriber()
builder = ChunkBuilder()


class ChunkSpeaker(object):
    def chunk(self, audio_input_path, speech_segmentation_path, output_folder):
        with open(speech_segmentation_path, "r", encoding='utf-8') as fp:
            segmentation = json.load(fp)
        filename = audio_input_path.split("/")[-1].split(".")[0]
        os.makedirs(f'{output_folder}/{filename}')
        for item in segmentation['segments']:
            start_end = (item['start'],
                         round(item['start'] + item['duration'], 2))
            transcriber.chunk_audio_file(audio_input_path, output_folder,
                                         start_end)
예제 #14
0
import os
from transcriber import Transcriber

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
os.environ['BUCKET_NAME'] = 'kerphi-video-bucket'

transcriber = Transcriber()
transcriber.extract_text('ja-JP', './video.mp4')