def video2Audio(video_file): '''Takes in any extension supported by ffmpeg: .ogv, .mp4, .mpeg, .avi, .mov, etc''' audio = AudioFileClip(video_file, nbytes=2, fps=16000) sound_array = audio.to_soundarray(fps=16000, quantize=True, nbytes=2) if audio.nchannels == 2: sound_array = sound_array.sum(axis=1) / 2 sound_array = sound_array.astype(np.int16) return sound_array, audio.duration
def AVsync(audio_fn, video_fn, offset=None, verbose=False): audio = AudioFileClip(audio_fn) video = VideoFileClip(video_fn) if offset is None: x1 = audio.to_soundarray() x2 = video.audio.to_soundarray() offset, corr = calculate_time_offset(x1[:, 0], x2[:, 0], fs=44100) if verbose: print(f"Offset: {offset:2.3f}s\nCorrelation: {corr*100:2.2f}%") if offset > 0: video_out = video.set_audio(audio.subclip(offset)) else: video_out = video.subclip(-offset).set_audio(audio) return video_out
def poop(source, destination, midi_file, stretch, fadeout, rebuild, max_stack): """ Create multiple pitchshifted versions of source video and arrange them to the pattern of the midi_file, also arrange the video if multiple notes play at the same time. """ print "Reading input files" video = VideoFileClip(source, audio=False) """ Non-main tracks are 30% the size of the main and have a white border and a margin around them. """ smaller = video.resize(0.3)\ .margin(mar=2, color=3*[255])\ .margin(mar=8, opacity=0) audio = AudioFileClip(source, fps=44100) mid = MidiFile(midi_file) ignoredtracks = ["Percussion", "Bass"] print "Analysing MIDI file" notes = [] # the number of messages in each track lowest = 127 # will contain the lowest note highest = 0 # will contain the highest note for i, track in enumerate(mid.tracks): notes.append(0) #if track.name in ignoredtracks: continue for message in track: if message.type == "note_on": lowest = min(lowest, message.note) highest = max(highest, message.note) notes[-1] += 1 """ The main track is the one featured in the center. It is probably the one with the most notes. Also record the lowest, highest, and average note to generate the appropriate pitches. """ maintrack = max(enumerate(notes), key=lambda x: x[1])[0] midpitch = int((lowest + highest) / 2) print "Main track is probably", str( maintrack) + ":", mid.tracks[maintrack].name mid.tracks.insert(0, mid.tracks.pop(maintrack)) # move main track to front notes.insert(0, notes.pop(maintrack)) # move main note count to front print sum( notes ), "notes ranging from", lowest, "to", highest, "centering around", midpitch print "Transposing audio" sound = audio.to_soundarray(fps=44100) # source, original audio tones = range(lowest - midpitch, highest - midpitch) # the range of pitches we need pitches = [] # this will contain the final AudioFileClips if not os.path.exists("pitches/"): print "Creating folder for audio files" os.makedirs("pitches/") for n in tones: """ Pitches only need to be generated if they do not already exist or if we force the creation of new ones. Save them in order in pitches. """ name = "pitches/" + source + "_" + str(n) + ".mp3" if not os.path.isfile(name) or rebuild: print "Transposing pitch", n splitshift(sound, n).write_audiofile(name) pitches.append(AudioFileClip(name, fps=44100)) print "Adding video clips" clips = [video.set_duration(1)] # to set the video size positions = [("left", "bottom"), ("right", "bottom"), ("left", "top"), ("right", "top"), ("center", "bottom"), ("center", "top"), ("left", "center"), ("right", "center")] # non-main tracks """ curpos is the current corner position on the screen and changes with each track. cache is used to make a unique file name whenever a new temporary file is created. endtime will be used at the end to set the end TextClip. It is the latest time any clip ends. """ curpos = -2 cache = endtime = 0 for i, track in enumerate(mid.tracks): #if track.name in ignoredtracks: continue print("Processing {} notes: {}".format(notes[i], track.name)) t = 1.0 # not 0 because we added one second of original video for size opennotes = [] # will contain all notes that are still playing curpos += 1 for message in track: if not isinstance(message, MetaMessage): message.time *= stretch t += message.time if message.type == "note_on": """ Add a video clip with the appropriate starting time and pitch. Also add an entry to opennotes (we don't know when the note ends yet). """ part = video mainvid = i is 0 # and len(opennotes) is 0 if not mainvid: part = smaller part = part\ .set_audio(pitches[min(len(pitches)-1, max(0, message.note-lowest))])\ .set_start(t/1000) opennotes.append((message.note, len(clips), t)) """ If this isn't the main track, the video will be smaller and placed at the edge. We'll get a position for each track. If there is more than one video playing in this track, it will be placed slighly closer to the center. """ if not mainvid: stackheight = 6 part = part.set_position(positions[curpos % len(positions)]) clips.append(part) elif message.type == "note_off": reference = message.note index = 0 """ Find the note that ended in opennotes using the note. Get the index and start time, remove it from opennotes. """ for note in reversed(opennotes): n, j, d = note if n == reference: index = j opennotes.remove(note) break """ Get the clip for the open note, set its time to the difference between time now and start time. Have it fade out and update the endtime if needed. """ clips[index] = clips[index].set_duration((t - d) / 1000 + fadeout) clips[index] = clips[index].crossfadeout(fadeout) endtime = max(endtime, t / 1000 + fadeout) if len(clips) == max_stack: """ To save some memory, the clips in memory are emptied whenever they reach a certain size. All clips that are closed are merged into one file on disk. """ upuntil = len(clips) # the first open note if len(opennotes) > 0: _, upuntil, _ = opennotes[0] stillopen = clips[upuntil:] print "Stack reached", len( clips), "clips, merging", upuntil """ Save a temporary file to disk with all clips we can safely discard from clips. """ newcache = destination + ".temporary" + str(cache) + ".mp4" CompositeVideoClip( clips[:upuntil]).write_videofile(newcache) cache += 1 """ Shift all opennotes' indices down by the number of clips merged and saved to disk. Set clips to be the new, merged clip and any leftover clips. """ for i, note in enumerate(opennotes): n, j, d = note opennotes[i] = (n, j - upuntil + 1, d) clips = [VideoFileClip(newcache)] + stillopen end = TextClip("pitch.py", font="Arial", color="white", fontsize=70)\ .set_pos("center")\ .set_duration(1)\ .set_start(endtime) clips.append(end) # add an ending frame """ Combine all leftover clips, write them to the final file and remove temporary files created before. """ print "Combining", len(clips), "clips" final = CompositeVideoClip(clips).set_start(1) final.write_videofile(destination) clips = [] if cache == 1: print "Removing one temporary file" elif cache > 1: print "Removing", cache, "temporary files" for i in range(0, cache): os.remove(destination + ".temporary" + str(i) + ".mp4")
def prepare_audio(file_name, rate=44100, speedup=1.3): audio = AudioFileClip(file_name) data = audio.to_soundarray(fps=rate) data = speedup_audio(data, speedup) return AudioArrayClip(data, fps=rate)
def poop(source, destination, midi_file, stretch, fadeout, rebuild, max_stack): """ Create multiple pitchshifted versions of source video and arrange them to the pattern of the midi_file, also arrange the video if multiple notes play at the same time. """ print "Reading input files" video = VideoFileClip(source, audio=False) """ Non-main tracks are 30% the size of the main and have a white border and a margin around them. """ smaller = video.resize(0.3)\ .margin(mar=2, color=3*[255])\ .margin(mar=8, opacity=0) audio = AudioFileClip(source, fps=44100) mid = MidiFile(midi_file) ignoredtracks = ["Percussion", "Bass"] print "Analysing MIDI file" notes = [] # the number of messages in each track lowest = 127 # will contain the lowest note highest = 0 # will contain the highest note for i, track in enumerate(mid.tracks): notes.append(0) #if track.name in ignoredtracks: continue for message in track: if message.type == "note_on": lowest = min(lowest, message.note) highest = max(highest, message.note) notes[-1] += 1 """ The main track is the one featured in the center. It is probably the one with the most notes. Also record the lowest, highest, and average note to generate the appropriate pitches. """ maintrack = max(enumerate(notes), key=lambda x: x[1])[0] midpitch = int((lowest+highest)/2) print "Main track is probably", str(maintrack)+":", mid.tracks[maintrack].name mid.tracks.insert(0, mid.tracks.pop(maintrack)) # move main track to front notes.insert(0, notes.pop(maintrack)) # move main note count to front print sum(notes), "notes ranging from", lowest, "to", highest, "centering around", midpitch print "Transposing audio" sound = audio.to_soundarray(fps=44100) # source, original audio tones = range(lowest-midpitch, highest-midpitch) # the range of pitches we need pitches = [] # this will contain the final AudioFileClips if not os.path.exists("pitches/"): print "Creating folder for audio files" os.makedirs("pitches/") for n in tones: """ Pitches only need to be generated if they do not already exist or if we force the creation of new ones. Save them in order in pitches. """ name = "pitches/"+source+"_"+str(n)+".mp3" if not os.path.isfile(name) or rebuild: print "Transposing pitch", n splitshift(sound, n).write_audiofile(name) pitches.append(AudioFileClip(name, fps=44100)) print "Adding video clips" clips = [video.set_duration(1)] # to set the video size positions = [("left", "bottom"), ("right", "bottom"), ("left", "top"), ("right", "top"), ("center", "bottom"), ("center", "top"), ("left", "center"), ("right", "center")] # non-main tracks """ curpos is the current corner position on the screen and changes with each track. cache is used to make a unique file name whenever a new temporary file is created. endtime will be used at the end to set the end TextClip. It is the latest time any clip ends. """ curpos = -2 cache = endtime = 0 for i, track in enumerate(mid.tracks): #if track.name in ignoredtracks: continue print("Processing {} notes: {}".format(notes[i], track.name)) t = 1.0 # not 0 because we added one second of original video for size opennotes = [] # will contain all notes that are still playing curpos += 1 for message in track: if not isinstance(message, MetaMessage): message.time *= stretch t += message.time if message.type == "note_on": """ Add a video clip with the appropriate starting time and pitch. Also add an entry to opennotes (we don't know when the note ends yet). """ part = video mainvid = i is 0# and len(opennotes) is 0 if not mainvid: part = smaller part = part\ .set_audio(pitches[min(len(pitches)-1, max(0, message.note-lowest))])\ .set_start(t/1000) opennotes.append((message.note, len(clips), t)) """ If this isn't the main track, the video will be smaller and placed at the edge. We'll get a position for each track. If there is more than one video playing in this track, it will be placed slighly closer to the center. """ if not mainvid: stackheight = 6 part = part.set_position(positions[curpos % len(positions)]) clips.append(part) elif message.type == "note_off": reference = message.note index = 0 """ Find the note that ended in opennotes using the note. Get the index and start time, remove it from opennotes. """ for note in reversed(opennotes): n, j, d = note if n == reference: index = j opennotes.remove(note) break """ Get the clip for the open note, set its time to the difference between time now and start time. Have it fade out and update the endtime if needed. """ clips[index] = clips[index].set_duration((t-d)/1000+fadeout) clips[index] = clips[index].crossfadeout(fadeout) endtime = max(endtime, t/1000+fadeout) if len(clips) == max_stack: """ To save some memory, the clips in memory are emptied whenever they reach a certain size. All clips that are closed are merged into one file on disk. """ upuntil = len(clips) # the first open note if len(opennotes) > 0: _, upuntil, _ = opennotes[0] stillopen = clips[upuntil:] print "Stack reached", len(clips), "clips, merging", upuntil """ Save a temporary file to disk with all clips we can safely discard from clips. """ newcache = destination+".temporary"+str(cache)+".mp4" CompositeVideoClip(clips[:upuntil]).write_videofile(newcache) cache += 1 """ Shift all opennotes' indices down by the number of clips merged and saved to disk. Set clips to be the new, merged clip and any leftover clips. """ for i, note in enumerate(opennotes): n, j, d = note opennotes[i] = (n, j-upuntil+1, d) clips = [VideoFileClip(newcache)]+stillopen end = TextClip("pitch.py", font="Arial", color="white", fontsize=70)\ .set_pos("center")\ .set_duration(1)\ .set_start(endtime) clips.append(end) # add an ending frame """ Combine all leftover clips, write them to the final file and remove temporary files created before. """ print "Combining", len(clips), "clips" final = CompositeVideoClip(clips).set_start(1) final.write_videofile(destination) clips = [] if cache == 1: print "Removing one temporary file" elif cache > 1: print "Removing", cache, "temporary files" for i in range(0, cache): os.remove(destination+".temporary"+str(i)+".mp4")
def sync(self,fps=11025,nbytes=2,low_memory=False,print_progress=False, convert=False): """ This function calculates the shift neccisary for the other cameras to be in sync with the first camera. It uses scipy's fftconvolve to compute the cross correlation. :param convert: if convert is True, the audio from the video file is written to a wave file. (This uses scipy to read the file if it exists.) """ # first file (refence) if convert: # only use wav if convert is on if os.path.exists(self.filenames[0][0]+'.wav'): with open(self.filenames[0][0]+'.wav','rb') as f: fs,data = wavfile.read(f) # see if settings changed if fs != fps: data = write_audio(self.filenames[0],fps,nbytes,overwrite=True) else: data = write_audio(self.filenames[0],fps,nbytes,overwrite=True) else: clip = AudioFileClip(self.filenames[0][0]+self.filenames[0][1]) data = clip.to_soundarray(fps=fps, nbytes=nbytes)[0] #### is this right clip.reader.close_proc() ############### maak seker if low_memory: reference = np.memmap(self.filenames[0][0]+'.dat', dtype='int16', mode='w+',shape=data.shape) reference = data[:] del data else: reference = data[:] del data # the rest (to sync) shift = [] for i in range(len(self.filenames)-1): if print_progress: print "Syncing "+str(i+2)+" of "+str(len(self.filenames)) if convert: # only use wav if convert is on if os.path.exists(self.filenames[i][0]+'.wav'): with open(self.filenames[i][0]+'.wav','rb') as f: fs,data = wavfile.read(f) # see if settings changed if fs != fps: data = write_audio(self.filenames[i],fps,nbytes,overwrite=True) else: data = write_audio(self.filenames[i],fps,nbytes,overwrite=True) else: clip = AudioClip(self.filenames[i][0]+self.filenames[i][1]) data = clip.to_soundarray(fps=fps, nbytes=nbytes)[0] del clip.reader if low_memory: to_sync = np.memmap(self.filenames[i][0]+'.dat', dtype='int16', mode='w+',shape=data.shape) to_sync = data[:] del data else: to_sync = data[:] ########### neccisary? (wrong) del data sync_time = get_shift(reference,to_sync,fps,low_memory=low_memory) if print_progress: print sync_time shift.append( sync_time ) self.shift = shift return shift