Beispiel #1
0
def main():
    global line_count
    print("AutoSub v0.1\n")
        
    parser = argparse.ArgumentParser(description="AutoSub v0.1")
    parser.add_argument('--model', required=True,
                        help='DeepSpeech model file')
    parser.add_argument('--scorer',
                        help='DeepSpeech scorer file')
    parser.add_argument('--file', required=True,
                        help='Input video file')
    args = parser.parse_args()
    
    ds_model = args.model
    if not ds_model.endswith(".pbmm"):
        print("Invalid model file. Exiting\n")
        exit(1)
    
    # Load DeepSpeech model 
    ds = Model(ds_model)
            
    if args.scorer:
        ds_scorer = args.scorer
        if not ds_scorer.endswith(".scorer"):
            print("Invalid scorer file. Running inference using only model file\n")
        else:
            ds.enableExternalScorer(ds_scorer)
    
    input_file = args.file
    print("\nInput file:", input_file)
    
    base_directory = os.getcwd()
    output_directory = os.path.join(base_directory, "output")
    audio_directory = os.path.join(base_directory, "audio")
    video_file_name = input_file.split("/")[-1].split(".")[0]
    audio_file_name = os.path.join(audio_directory, video_file_name + ".wav")
    srt_file_name = os.path.join(output_directory, video_file_name + ".srt")
    
    # Extract audio from input video file
    extract_audio(input_file, audio_file_name)
    
    print("Splitting on silent parts in audio file")
    silenceRemoval(audio_file_name)
    
    # Output SRT file
    file_handle = open(srt_file_name, "a+")
    
    print("\nRunning inference:")
    
    for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))):
        audio_segment_path = os.path.join(audio_directory, file)
        
        # Dont run inference on the original audio file
        if audio_segment_path.split("/")[-1] != audio_file_name.split("/")[-1]:
            ds_process_audio(ds, audio_segment_path, file_handle)
            
    print("\nSRT file saved to", srt_file_name)
    file_handle.close()
Beispiel #2
0
def to_deepspeech(filepath):
    
    model_file_path = 'deepspeech-0.9.3-models.pbmm'
    scorer_path = 'deepspeech-0.9.3-models.scorer'
    ds = deepspeech.Model(model_file_path)
    ds.enableExternalScorer(scorer_path)
    
    base_directory = os.getcwd() #return current directory
    output_directory = os.path.join(base_directory,"output")
    audio_directory = os.path.join(base_directory,"temp")#save temp audio segment
    #output text file
    audio_file_name = filepath.split(os.sep)[-1].split(".")[0]
    filename = ntpath.basename(filepath).split(".")[0]
    txt_file_name = os.path.join(output_directory,filename+"_temp1.txt")
    file_handle = open(txt_file_name,"w+")
    
    #split silent parts in audio file
    segmentAudio.silenceRemoval(filepath,audio_directory)  
    
    print("Running interface:")
    for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))):
        audio_segment_path = os.path.join(audio_directory,file)
        ds_process_audio(ds, audio_segment_path,file_handle)
    
    file_handle.close()
    
    print("\nSpeech-to-Text Conversion Complete, restoring Punctuation...")
    
    # Running commands to punctuate deepspeech output
    punct2_script = os.path.join(base_directory,"punctutator2_theano\punctuator2-master\punctuator.py")
    punct2_model = os.path.join(base_directory,"punctutator2_theano\models\INTERSPEECH-T-BRNN.pcl")
    punct_output = os.path.join(output_directory,filename+"_temp2.txt")
    final_output = os.path.join(output_directory,filename+".txt")
    
    subprocess.run(["python2",punct2_script,punct2_model,punct_output,txt_file_name]) #punct_output_txt
    correct_output(punct_output,final_output)
    
    
    print("\nFinished! Output is",final_output,"\n") #final_output_txt
    print("Summarization starts...\n")
    
    
    # deleting original, unpuncutatued deepspeech output
    if os.path.exists(txt_file_name):
      os.remove(txt_file_name)
    else:
      print("The file does not exist")
    
    if os.path.exists(punct_output):
      os.remove(punct_output)
    else:
      print("The file does not exist")
        
    
    # Text Summarization
    LANGUAGE = "english"
    f = open(final_output,'r')
    lines = f.read().find(".")
    #print(lines,"\n")
    f.close()
    
    SENTENCES_COUNT = int(lines/4) #Quarter of the content
    # print(SENTENCES_COUNT,"\n")

    parser = PlaintextParser.from_file(final_output, Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)

    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    Final = os.path.join(output_directory,filename+"_Final.txt")
    F = open(Final,"w")
    
    print("Your output will be shown below and saved in the output directory.\n")
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
        print(sentence, file=F)
    
    F.close()
    print("\nEverything is done!")
    
    ##clean directory and temp file
    tmp_wav = os.path.join(base_directory,filename + ".wav")
    if os.path.exists(tmp_wav):
        os.remove(tmp_wav)
        
    shutil.rmtree(audio_directory)
    os.mkdir(audio_directory)
Beispiel #3
0
def main():
    global line_count
    print("AutoSub v0.1\n")
        
    parser = argparse.ArgumentParser(description="AutoSub v0.1")
    parser.add_argument('--model', required=True,
                        help='DeepSpeech model file')
    parser.add_argument('--scorer',
                        help='DeepSpeech scorer file')
    parser.add_argument('--file', required=True,
                        help='Input video file')
    parser.add_argument('--vtt', dest="vtt", action="store_true",
                        help='Output a vtt file with cue points for individual words instead of a srt file')
    args = parser.parse_args()

    ds_model = args.model
    if not ds_model.endswith(".pbmm"):
        print("Invalid model file. Exiting\n")
        exit(1)
    
    # Load DeepSpeech model 
    ds = Model(ds_model)
            
    if args.scorer:
        ds_scorer = args.scorer
        if not ds_scorer.endswith(".scorer"):
            print("Invalid scorer file. Running inference using only model file\n")
        else:
            ds.enableExternalScorer(ds_scorer)
    
    input_file = args.file
    print("\nInput file:", input_file)
    
    base_directory = os.getcwd()
    output_directory = os.path.join(base_directory, "output")
    audio_directory = os.path.join(base_directory, "audio")
    video_file_name = input_file.split(os.sep)[-1].split(".")[0]
    audio_file_name = os.path.join(audio_directory, video_file_name + ".wav")
    srt_extension = ".srt" if not args.vtt else ".vtt"
    srt_file_name = os.path.join(output_directory, video_file_name + srt_extension)
    
    # Extract audio from input video file
    extract_audio(input_file, audio_file_name)
    
    print("Splitting on silent parts in audio file")
    silenceRemoval(audio_file_name)
    
    # Output SRT or VTT file
    file_handle = open(srt_file_name, "a+")

    if args.vtt:
        file_handle.write("WEBVTT\n")
        file_handle.write("Kind: captions\n\n")
    
    print("\nRunning inference:")
    
    for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))):
        audio_segment_path = os.path.join(audio_directory, file)
        
        # Dont run inference on the original audio file
        if audio_segment_path.split(os.sep)[-1] != audio_file_name.split(os.sep)[-1]:
            ds_process_audio(ds, audio_segment_path, file_handle, args.vtt)

    if not args.vtt:        
        print("\nSRT file saved to", srt_file_name)
    else:
        print("\nVTT file saved to", srt_file_name)

    file_handle.close()

    # Clean audio/ directory 
    shutil.rmtree(audio_directory)
    os.mkdir(audio_directory)