Ejemplo n.º 1
0
import os
import tempfile
import pysubs2
from pysubs2 import SSAFile, SSAEvent, SSAStyle, make_time, Color
from audio_pipeline import logging_config
from audio_pipeline.audio_processing.ffmpeg_processor import run_ffmpeg
"""
Subtitle generation tends to create a lot of very small entries across the video file.
To counter this we need to combine together subtitle entries so that they are retained on
the screen as multiple words.  We also want to ensure that subtitles disappear if they pass a
time period since they appeared.  We also want to be able to combine subtitle files in such
a way that the output from each file can be distinguished and covers the correct time periods
"""

logger = logging_config.get_logger(__name__)


def compress(subs, max_chars=30, max_stretch_time=3, max_oldest_time=10):
    new_subs = SSAFile()
    # Phase 1 based on character count so that we dont overflow the screen
    # Phase 2 if the end of the last subtitle is close to the start of the next we want to stretch out the end
    # Phase 3 is to make sure that the oldest word on the screen has not been there for too long
    char_count = 0
    current_text = ''
    current_event = None
    oldest_start_time = 0
    for sub in subs:
        last_event = current_event
        current_event = SSAEvent()
        current_event.start = sub.start
        current_event.end = sub.end
Ejemplo n.º 2
0
from audio_pipeline.logging_config import get_logger
from audio_pipeline import pipeline_config
from audio_pipeline.audio_sounds.sound_recogniser import SoundRecogniser, process_overlap
from audio_pipeline.audio_processing.audio_extractor import extract_audio
from audio_pipeline.audio_processing.subtitle_utils import *
from audio_pipeline.audio_speech.natural_language_processor import SpaCyNaturalLanguageProcessor
from audio_pipeline.audio_speech.speech_recogniser import SpeechRecogniser, get_words
import os

logger = get_logger(__name__)


def main(path):
    """
    Run the pipeline which processes a video file to produce a new subtitled video file
    :param path: Input video path
    """
    logger.info(f'Processing video file {path}')
    # Extract audio
    audio_file = extract_audio(path, pipeline_config.audio_target_dir)

    # Generate sound classification results and speech recogniser results
    sound_results = SoundRecogniser().process_file(audio_file)
    sound_results = process_overlap(sound_results)
    speech_results = SpeechRecogniser().process_file(audio_file)

    # NLP
    wrds = get_words(speech_results)
    nlp = SpaCyNaturalLanguageProcessor(pipeline_config.spacy_model)
    custom_nlp = SpaCyNaturalLanguageProcessor(
        pipeline_config.custom_spacy_model)