def compose_from_transcript(files, search, searchtype): """Takes transcripts created by audiogrep/pocketsphinx, a search and search type and returns a list of timestamps for creating a supercut""" final_segments = [] if searchtype in ['re', 'word', 'franken', 'fragment']: if searchtype == 're': searchtype = 'sentence' segments = audiogrep.search(search, files, mode=searchtype, regex=True) for seg in segments: seg['file'] = seg['file'].replace('.transcription.txt', '') seg['line'] = seg['words'] final_segments.append(seg) elif searchtype in ['hyper', 'pos']: for s in audiogrep.convert_timestamps(files): for w in s['words']: if search_line(w[0], search, searchtype): seg = { 'file': s['file'].replace('.transcription.txt',''), 'line': w[0], 'start': float(w[1]), 'end': float(w[2]) } final_segments.append(seg) return final_segments
def compose_from_transcript(files, search, searchtype): """Takes transcripts created by audiogrep/pocketsphinx, a search and search type and returns a list of timestamps for creating a supercut""" final_segments = [] if searchtype in ['re', 'word', 'franken', 'fragment']: if searchtype == 're': searchtype = 'sentence' segments = audiogrep.search(search, files, mode=searchtype, regex=True) for seg in segments: seg['file'] = seg['file'].replace('.transcription.txt', '') seg['line'] = seg['words'] final_segments.append(seg) elif searchtype in ['hyper', 'pos']: for s in audiogrep.convert_timestamps(files): for w in s['words']: if search_line(w[0], search, searchtype): seg = { 'file': s['file'].replace('.transcription.txt', ''), 'line': w[0], 'start': float(w[1]), 'end': float(w[2]) } final_segments.append(seg) return final_segments
def get_ngrams(inputfile, n=1, use_transcript=False, use_vtt=False): ''' Get ngrams from a text Sourced from: https://gist.github.com/dannguyen/93c2c43f4e65328b85af ''' words = [] if use_transcript: for s in audiogrep.convert_timestamps(inputfile): for w in s['words']: words.append(w[0]) elif use_vtt: vtts = get_vtt_files(inputfile) for vtt in vtts: with open(vtt['vtt'], 'r') as infile: sentences = parse_auto_sub(infile.read()) for s in sentences: for w in s['words']: words.append(w['word']) else: text = '' srts = get_subtitle_files(inputfile) for srt in srts: lines = clean_srt(srt) if lines: for timespan in lines.keys(): line = lines[timespan].strip() text += line + ' ' words = re.split(r'[.?!,:\"]+\s*|\s+', text) ngrams = zip(*[words[i:] for i in range(n)]) return ngrams
def extract_words(files, padding, use_uuid=False, confidence=0.0, output_directory='extracted_words'): ''' Extracts individual words form files and exports them to individual files. ''' segments = [] for s in audiogrep.convert_timestamps(files): for w in s['words']: if w[3] < confidence: continue try: float(w[1]) except: continue seg = { 'word': w[0], 'file': s['file'].replace('.transcription.txt', ''), 'line': w[0], 'start': float(w[1]), 'end': float(w[2]) } segments.append(seg) composition = segments # apply padding and sync for c in composition: c['start'] = c['start'] - padding c['end'] = c['end'] + padding all_filenames = set([c['file'] for c in composition]) videofileclips = dict([(f, VideoFileClip(f)) for f in all_filenames]) cut_clips = [] for c in composition: try: clip = videofileclips[c['file']] start = max(0, c['start'] - padding) end = min(c['end'] + padding, clip.duration) subclip = clip.subclip(start, end) cut_clips.append((c['word'], subclip)) except Exception as e: continue from collections import defaultdict wc = defaultdict(int) for word, clip in cut_clips: print word, clip if use_uuid: word_id = str(uuid.uuid1()) else: wc[word] += 1 word_id = str(wc[word]) path = output_directory + "/" + word if not os.path.exists(path): os.makedirs(path) clip.to_videofile(path + "/" + word_id + ".mp4", codec="libx264", temp_audiofile='temp-audio.m4a', audio_codec='aac', remove_temp=True, fps=23)
def test_convert_timestamps(): filename = os.path.join(os.path.dirname(__file__), 'data/test.mp3') sentences = audiogrep.convert_timestamps([filename]) words = {} for sentence in sentences: for word in sentence['words']: words[word[0]] = True assert 'fashion' in words assert len(sentences) == 9
import sys import moviepy.editor as mp import audiogrep videofile = sys.argv[1] # get the sentence timestamps sentences = audiogrep.convert_timestamps([videofile]) timestamps = [] for sentence in sentences: timestamps += sentence['words'] # alphabetize the list timestamps.sort(key=lambda x: x[0]) # we could limit how many clips here # words = words[100:200] original_video = mp.VideoFileClip(videofile) clips = [] for timestamp in timestamps: word = timestamp[0] start = float(timestamp[1]) end = float(timestamp[2]) # skip this clip if the word is shorter than 5 characters