def run(self): # Initialize signals self.percentDoneSignal.emit(0) percent_scale = 1000.0 / 5 self.doneSignal.emit(0) self.statusSignal.emit("") # Load in audio data self.statusSignal.emit("Loading {}".format( os.path.split(self.mix_file)[1])) mix, self.fs = librosa.load(self.mix_file, sr=None) self.percentDoneSignal.emit(1 * percent_scale) self.statusSignal.emit("Loading {}".format( os.path.split(self.source_file)[1])) source, self.fs = librosa.load(self.source_file, sr=self.fs) self.percentDoneSignal.emit(2 * percent_scale) # Fix any gross timing offset self.statusSignal.emit("Aligning...") mix, source = estimate.align(mix, source, self.fs) self.percentDoneSignal.emit(3 * percent_scale) self.statusSignal.emit("Subtracting...") source = estimate.reverse_channel(mix, source) mix, source = estimate.pad(mix, source) self.percentDoneSignal.emit(4 * percent_scale) self.statusSignal.emit("Enhancing...") self.subtracted = estimate.wiener_enhance(mix - source, source, self.wiener_threshold) self.percentDoneSignal.emit(5 * percent_scale) self.doneSignal.emit(1)
def run(self): # Initialize signals self.percentDoneSignal.emit(0) percent_scale = 1000.0 / 5 self.doneSignal.emit(0) self.statusSignal.emit("") # Load in audio data self.statusSignal.emit("Loading {}".format(os.path.split(self.mix_file)[1])) mix, self.fs = librosa.load(self.mix_file, sr=None) self.percentDoneSignal.emit(1 * percent_scale) self.statusSignal.emit("Loading {}".format(os.path.split(self.source_file)[1])) source, self.fs = librosa.load(self.source_file, sr=self.fs) self.percentDoneSignal.emit(2 * percent_scale) # Fix any gross timing offset self.statusSignal.emit("Aligning...") mix, source = estimate.align(mix, source, self.fs) self.percentDoneSignal.emit(3 * percent_scale) self.statusSignal.emit("Subtracting...") source = estimate.reverse_channel(mix, source) mix, source = estimate.pad(mix, source) self.percentDoneSignal.emit(4 * percent_scale) self.statusSignal.emit("Enhancing...") self.subtracted = estimate.wiener_enhance(mix - source, source, self.wiener_threshold) self.percentDoneSignal.emit(5 * percent_scale) self.doneSignal.emit(1)
] for subdirectory in [d for d in subdirectories if os.path.isdir(d)]: print 'Processing file {}'.format(subdirectory) # Load in mixture and corrupted source mix, fs = librosa.load(os.path.join(subdirectory, 'M.wav'), sr=None) source, fs = librosa.load(os.path.join(subdirectory, 'C.wav'), sr=fs) # Align the source to the mixture mix, source_aligned = estimate.align(mix, source, fs, max_global_offset=0) # Estimate the filter source_filtered = estimate.reverse_channel(mix, source_aligned) # Write out aligned version librosa.output.write_wav(os.path.join(subdirectory, 'C-filtered.wav'), source_filtered, fs) mix, source_filtered = estimate.pad(mix, source_filtered) # Wiener filter the approximate separation enhanced = estimate.wiener_enhance(mix - source_filtered, source_aligned, 6) # Write out approximation of the true source librosa.output.write_wav(os.path.join(subdirectory, 'S-approx.wav'), enhanced, fs) # <markdowncell> # ## Step 2: Compute SDRs # As above, we compute the SDR of our approximated source against the true source. We don't compare it against the true source with channel/timing distortion approximately removed because the vinyl distortion is much greater than for CDs. # <codecell> # Path to .wav files ripped from vinyl... data_directory = '../Dataset/Vinyl/' # Subdirectories 1-7 have C.wav an a cappella, 8-14 have instrumental subdirectories = [
# Each example has its own folder, with M.wav (mix, CD), C.wav (vinyl corrupted source), and S.wav (true source, CD) subdirectories = [os.path.join(data_directory, d) for d in os.listdir(data_directory)] for subdirectory in [d for d in subdirectories if os.path.isdir(d)]: print 'Processing file {}'.format(subdirectory) # Load in mixture and corrupted source mix, fs = librosa.load(os.path.join( subdirectory, 'M.wav' ), sr=None) source, fs = librosa.load(os.path.join( subdirectory, 'C.wav' ), sr=fs) # Align the source to the mixture mix, source_aligned = estimate.align(mix, source, fs, max_global_offset=0) # Estimate the filter source_filtered = estimate.reverse_channel(mix, source_aligned) # Write out aligned version librosa.output.write_wav(os.path.join(subdirectory, 'C-filtered.wav'), source_filtered, fs) mix, source_filtered = estimate.pad(mix, source_filtered) # Wiener filter the approximate separation enhanced = estimate.wiener_enhance( mix - source_filtered, source_aligned, 6 ) # Write out approximation of the true source librosa.output.write_wav(os.path.join(subdirectory, 'S-approx.wav'), enhanced, fs) # <markdowncell> # ## Step 2: Compute SDRs # As above, we compute the SDR of our approximated source against the true source. We don't compare it against the true source with channel/timing distortion approximately removed because the vinyl distortion is much greater than for CDs. # <codecell> # Path to .wav files ripped from vinyl... data_directory = '../Dataset/Vinyl/' # Subdirectories 1-7 have C.wav an a cappella, 8-14 have instrumental subdirectories = [os.path.join(data_directory, d) for d in os.listdir(data_directory)] # Dict to store the results, each entry is an array of length 7