def transcribe(input_file_url): """ Transcribes an audio file. The audio file format is a URL. Params: input_file_url (string): The input audio file URL """ # Setting up environment if not utilities.check_env_vars(): return utilities.create_ancillary_folders() # download the podcast file filepath = get_podcast_file(input_file_url) # convert file to raw audio chunks chunks = convert_to_raw_audio_chunks(filepath) # transcribe chunks transcriber = Transcriber(os.environ['GOOGLE_API_KEY']) transcript = transcriber.transcribe_many(chunks) # write to the output file output_file_name = os.path.split(filepath)[-1] utilities.write_output_file(output_file_name, transcript) print "Cleaning up...\n" utilities.cleanup()
def test_transcribe_new_more_than_one_audio_file(self): print(f'running______________......................................') transcriber = Transcriber() audio_input_folder = 'input3' output_folder = 'output3' transcriber.transcribe(audio_input_folder, output_folder) self.assertEqual(1, 1)
def __init__(self, summary_word_count=30, number_of_chapters=10): self.transcriber = Transcriber() # A list of segmentizers, if one fails to segmentize, it tries to use the next in line. self.segmentizers = [ SentenceSimilaritySegmentizer(), SimpleSegmentizer() ] self.summarizer = BartSummarizer(word_count=summary_word_count)
def test_can_transcribe_long_speech(self): manager = VideoManager( os.path.join(os.path.dirname(__file__), "input", "test1.mp4"), ) _, audio = tempfile.mkstemp(".wav") manager.extract_audio(audio) audio_manager = AudioManager(audio) transcriber = Transcriber(audio_manager) transcribe_result = list(transcriber.transcribe()) self.assertGreaterEqual(len(transcribe_result), 1)
def main(): args = parse_args() transcribe = Transcriber() chunker = ChunkSpeaker() converter = Converter() try: if args.mode == 'transcribe': transcribe.transcribe(args.input_folder, args.output_folder) if args.mode == 'chunk_speaker': chunker.chunk(args.audio_input_path, args.speech_segmentation_path, args.output_folder) if args.mode == 'convert': converter.convert(args.type, args.online_folder, args.chunks_text_path, args.output_folder) except InputError as e: print(f'{InputError.__name__}:\n\t{e}')
def main(): parser = argparse.ArgumentParser( description='Evaluate Sphinx3 WER on speech/transcripts dataset') parser.add_argument( '--config', metavar='INI', type=str, default='sphinx-config.ini', help= 'A configuration file specifying which models to use (default: %(default)s)' ) parser.add_argument( '--directory', metavar='DIR', type=str, default='reith-lectures', help='Path to the evaluation dataset (default: %(default)s)') parser.add_argument( '--lazy', metavar='L', type=bool, default=False, help= 'If set to true, do not attempt to derive any new data (default: %(default)s)' ) args = parser.parse_args() lazy = args.lazy directory = args.directory config = ConfigParser() config_models = args.config transcriber = Transcriber() if not lazy: config.readfp(open(config_models)) config.items('models') acoustic_model = config.get('models', 'acoustic_model') dictionary = config.get('models', 'dictionary') filler = config.get('models', 'filler') language_model = config.get('models', 'language_model') convert_pdf_to_text(directory) transcriber.initialise(acoustic_model, dictionary, filler, language_model) evaluate(transcriber, directory, lazy)
def main(): parser = argparse.ArgumentParser(description='Evaluate Sphinx3 WER on speech/transcripts dataset') parser.add_argument('--config', metavar='INI', type=str, default='sphinx-config.ini', help='A configuration file specifying which models to use (default: %(default)s)') parser.add_argument('--directory', metavar='DIR', type=str, default='reith-lectures', help='Path to the evaluation dataset (default: %(default)s)') parser.add_argument('--lazy', metavar='L', type=bool, default=False, help='If set to true, do not attempt to derive any new data (default: %(default)s)') args = parser.parse_args() lazy = args.lazy directory = args.directory config = ConfigParser() config_models = args.config transcriber = Transcriber() if not lazy: config.readfp(open(config_models)) config.items('models') acoustic_model = config.get('models', 'acoustic_model') dictionary = config.get('models', 'dictionary') filler = config.get('models', 'filler') language_model = config.get('models', 'language_model') convert_pdf_to_text(directory) transcriber.initialise(acoustic_model, dictionary, filler, language_model) evaluate(transcriber, directory, lazy)
def test_transcribe_new(self): transcriber = Transcriber() audio_input_folder = 'input1' output_folder = 'output1' transcriber.transcribe(audio_input_folder, output_folder) expected_transcription = load_fixture_rel( f'fixture2/{output_folder}/transcription.json') expected_text = load_document_rel(f'fixture2/{output_folder}/text') expected_segments = load_document_rel( f'fixture2/{output_folder}/segments') expected_wav_spc = load_document_rel( f'fixture2/{output_folder}/wav.scp') transcription = load_fixture_rel( f'{output_folder}/results/20191130-2034_Test1/transcription.json') text = load_document_rel(f'{output_folder}/20191130-2034_Test1/text') segments = load_document_rel( f'{output_folder}/20191130-2034_Test1/segments') wav_scp = load_document_rel( f'{output_folder}/20191130-2034_Test1/wav.scp') self.validate_file(text, expected_text) self.validate_file(segments, expected_segments) self.validate_file(wav_scp, expected_wav_spc) self.validate_json(transcription, expected_transcription)
def main(): logger = logging.getLogger() logger.info("start processing %s", input_file) manager = VideoManager(input_file) audio = manager.extract_audio(settings.ROOT / "output/a.wav") thumbnail = manager.extract_thumbnail(settings.ROOT / "output/a.jpg") audio_manager = AudioManager(audio) transcriber = Transcriber(audio_manager) filter_sections = [] for transcribtion in transcriber.transcribe(): logger.debug("transcription: %s", transcribtion) word, start, end = transcribtion if detect_mature_word(word): logger.debug("mature word: %s, %s", word, detect_mature_word(word)) audio_manager.apply_beep(start, end) manager.apply_mask(start, end) filter_sections.append({ "start_time": start, "end_time": end, "word": word }) manager.apply_audio(audio_manager.save(settings.ROOT / "output/a_beep.wav")) manager.save(settings.ROOT / "output/a.mp4") print( json.dumps({ "thumbnail": str(thumbnail), "filter_sections": filter_sections, "filter_video": str(settings.ROOT / "output" / "a.mp4"), }))
def run(config_manager, logger): logger.msg('loading configuration file...') config = config_manager.get_config() logger.msg('ensuring that file directories exist...') file_manager = File_Manager(config['speech'], config['text']) if (not file_manager.directories_exist()): logger.err( 'Please check the directories entered into the config file: ' + config_file_name) return logger.msg('waiting for files to transcribe...') transcriber = Transcriber(config['username'], config['password'], file_manager) # Create transcription object while (True): # Loop for eternity for file in file_manager.speech_files_without_text_files( ): # Loop through files that need to be transcribed logger.msg('Transcribing: ' + file + '...') transcriber.transcribe(file) # Transcribe the current file logger.msg(file + ' transcribed') logger.msg('waiting for files to transcribe...') sleep(10) # Wait for 10 seconds
class Chapterizer: """Able to chapterize a YoutubeVideo. A chapter is a segment and its summary.""" def __init__(self, summary_word_count=30, number_of_chapters=10): self.transcriber = Transcriber() # A list of segmentizers, if one fails to segmentize, it tries to use the next in line. self.segmentizers = [ SentenceSimilaritySegmentizer(), SimpleSegmentizer() ] self.summarizer = BartSummarizer(word_count=summary_word_count) def chapterize(self, url) -> ChapterizedYoutubeVideo: print(f"Processing: {url}") transcribed_youtube_video = self._get_transcribed_youtube_video(url) chapters = [] for segmentizer in self.segmentizers: print(f"Using {segmentizer.__class__.__name__}") segments = [ s for s in segmentizer.generate_segments( transcribed_youtube_video) ] if len(segments) == 0: print( f"{segmentizer.__class__.__name__} did not generate results." ) continue for segment in segments: try: summary = self.summarizer.summarize(segment.get_text()) if summary == "": summary = segment.get_text() except ValueError: summary = segment.get_text() chapter = Chapter(segment=segment, summary=summary) chapters.append(chapter) break chapterized_youtube_video = ChapterizedYoutubeVideo( transcribed_youtube_video, chapters) print("Successfully chapterized!\n") return chapterized_youtube_video def _get_transcribed_youtube_video(self, url): youtube_video: YoutubeVideo = YoutubeVideo(url) transcript = self.transcriber.get_transcript(youtube_video.id) return TranscribedYoutubeVideo(youtube_video.id, transcript)
def transcriber(): return Transcriber()
import json import os from builder import ChunkBuilder from transcriber import Transcriber transcriber = Transcriber() builder = ChunkBuilder() class ChunkSpeaker(object): def chunk(self, audio_input_path, speech_segmentation_path, output_folder): with open(speech_segmentation_path, "r", encoding='utf-8') as fp: segmentation = json.load(fp) filename = audio_input_path.split("/")[-1].split(".")[0] os.makedirs(f'{output_folder}/{filename}') for item in segmentation['segments']: start_end = (item['start'], round(item['start'] + item['duration'], 2)) transcriber.chunk_audio_file(audio_input_path, output_folder, start_end)
import os from transcriber import Transcriber os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json" os.environ['BUCKET_NAME'] = 'kerphi-video-bucket' transcriber = Transcriber() transcriber.extract_text('ja-JP', './video.mp4')