def expand(self,): if self.file_src: print("Resampling...") # get input clip_lower = self.s_clip_lower.value() clip_upper = self.s_clip_upper.value() signal, sr, channels = io_ops.read_file(self.file_src) for channel_i in range(channels): # map curve to channel output if channel_i < len(self.vol_curves): dBs = self.vol_curves[channel_i] else: dBs = self.vol_curves[-1] # clip dB curve clipped = np.clip(dBs, clip_lower, clip_upper) dB_diff = clip_upper - clipped fac = units.to_fac(dB_diff) # create factor for each sample final_fac = np.interp( np.arange(len(signal)), self.t*sr, fac) signal[:,channel_i] *= final_fac signal = units.normalize(signal) io_ops.write_file(self.file_src, signal, sr, channels, "decompressed")
def run_resample(self): if self.filenames[0] and self.pan_samples: channels = self.parent.props.resampling_widget.channels if channels and self.pan_samples: lag_curve = self.pan_line.data signal, sr, channels = io_ops.read_file(self.filenames[0]) af = np.interp(np.arange(len(signal[:, 0])), lag_curve[:, 0] * sr, lag_curve[:, 1]) io_ops.write_file(self.filenames[0], signal[:, 1] * af, sr, 1)
def spectrum_from_audio(filename, fft_size=4096, hop=256, channel_mode="L"): signal, sr, channels = io_ops.read_file(filename) spectra = [] channel_map = {"L":(0,), "R":(1,), "L,R":(0,1), "Mean":(0,1)} for channel in channel_map[channel_mode]: print("channel",channel) if channel == channels: print("not enough channels for L/R comparison - fallback to mono") break #get the magnitude spectrum imdata = units.to_dB(fourier.get_mag(signal[:,channel], fft_size, hop, "hann")) spectra.append(imdata) # take mean across axis if channel_mode == "Mean": return (np.mean(spectra, axis=0), ), sr else: return spectra, sr
def resample(self, ): if self.file_src and self.ratios: if resampy is None: print("Can't resample without resampy!") print("Resampling...") # get input ratio = self.ratios[-1] percentage = (ratio - 1) * 100 signal, sr, channels = io_ops.read_file(self.file_src) # resample, first axis is time! res = resampy.resample(signal, sr * ratio, sr, axis=0, filter='sinc_window', num_zeros=8) io_ops.write_file(self.file_src, res, sr, channels, "_resampled_%.3f" % percentage)
def process_max_mono(self, fft_size, hop): for file_name in self.file_names: file_path = self.names_to_full_paths[file_name] signal, sr, channels = io_ops.read_file(file_path) if channels != 2: print("expects stereo input") continue n = len(signal) # pad input stereo signal y_pad = fourier.fix_length(signal, n + fft_size // 2, axis=0) # take FFT for each channel D_L = fourier.stft(y_pad[:,0], n_fft=fft_size, step=hop) D_R = fourier.stft(y_pad[:,1], n_fft=fft_size, step=hop) # take the max of each bin D_out = np.where( np.abs(D_L) > np.abs(D_R), D_L, D_R ) # take iFFT y_out = fourier.istft(D_out, length=n, hop_length=hop) io_ops.write_file(file_path, y_out, sr, 1)
def spectrum_from_audio(filename, fft_size=4096, hop=256, channel_mode="L", start=None, end=None): print("reading",filename) signal, sr, channels = io_ops.read_file(filename) print(sr) spectra = [] channel_map = {"L":(0,), "R":(1,), "L+R":(0,1)} for channel in channel_map[channel_mode]: print("channel",channel) if channel == channels: print("not enough channels for L/R comparison - fallback to mono") break #get the magnitude spectrum #avoid divide by 0 error in log imdata = units.to_dB(fourier.get_mag(signal[:,channel], fft_size, hop, "hann")) spec = np.mean(imdata, axis=1) spectra.append(spec) #pad the data so we can compare this in a stereo setting if required if len(spectra) < 2: spectra.append(spectra[0]) # return np.mean(spectra, axis=0), sr return spectra, sr
def run(filenames, signal_data=None, speed_curve=None, resampling_mode="Linear", sinc_quality=50, use_channels=[ 0, ], prog_sig=None, lag_curve=None): if prog_sig: prog_sig.notifyProgress.emit(0) if signal_data is None: signal_data = [None for filename in filenames] for filename, sig_data in zip(filenames, signal_data): start_time = time() print(f"Resampling '{os.path.basename(filename)}'...", resampling_mode, sinc_quality, use_channels) #read the file if sig_data: signal, sr = sig_data else: from util import io_ops signal, sr, channels = io_ops.read_file(filename) if resampling_mode == "Linear": samples_in = np.arange(len(signal)) lowpass = 0 if speed_curve is not None: sampletimes = speed_curve[:, 0] * sr speeds = speed_curve[:, 1] sample_at = speed_to_pos(sampletimes, speeds, len(signal)) # the problem is we don't really need the lerped speeds but what happens from the cumsum # get the speed for every output sample # if resampling_mode == "Sinc": # lowpass = np.interp(np.arange( len(sample_at) ), sampletimes, speeds) elif lag_curve is not None: sampletimes = lag_curve[:, 0] * sr lags = lag_curve[:, 1] * sr # lag_to_pos(sampletimes, lags, len(signal)) sample_at = np.interp(np.arange(len(signal) + lags[-1]), sampletimes, sampletimes - lags) # ensure we have no sub-zero values, saves one max in sinc np.clip(sample_at, 0, None, out=sample_at) # with lerped speed curve # speeds = np.diff(lag_curve[:,1])/np.diff(lag_curve[:,0])+1 # sampletimes = (lag_curve[:-1,0]+np.diff(lag_curve[:,0])/2)*sr # sample_at = speed_to_pos(sampletimes, speeds) print(f"Preparation took {time() - start_time:.3f} seconds.") start_time = time() length = len(sample_at) # create multichannel output array num_channels = len(use_channels) # first create the output array output = np.empty((length, num_channels), dtype="float32") # enumerate because maybe we want to resample less channels than input has for out_channel, in_channel in enumerate(use_channels): if resampling_mode == "Sinc": sinc_wrapper_mt(output[:, out_channel], sample_at, signal[:, in_channel], lowpass, sinc_quality) elif resampling_mode == "Linear": output[:, out_channel] = np.interp(sample_at, samples_in, signal[:, in_channel]) if prog_sig: prog_sig.notifyProgress.emit( (out_channel + 1) / num_channels * 100) # after all pieces have been resampled, write it out to the file print(f"Resampling took {time() - start_time:.3f} seconds.") start_time = time() outfilename = filename.rsplit('.', 1)[0] + '_res.wav' with sf.SoundFile(outfilename, 'w+', sr, num_channels, subtype='FLOAT') as outfile: outfile.write(output) if prog_sig: prog_sig.notifyProgress.emit(100) print(f"Writing took {time() - start_time:.3f} seconds.") print("Done!\n")
def process_heuristic(self, fft_size, hop): # get params from gui max_width = self.dropout_widget.max_width max_slope = self.dropout_widget.max_slope num_bands = self.dropout_widget.num_bands f_upper = self.dropout_widget.f_upper f_lower = self.dropout_widget.f_lower #split the range up into n bands bands = np.logspace(np.log2(f_lower), np.log2(f_upper), num=num_bands, endpoint=True, base=2, dtype=np.uint16) for file_name in self.file_names: file_path = self.names_to_full_paths[file_name] signal, sr, channels = io_ops.read_file(file_path) # distance to look around current fft # divide by two because we are looking around the center d = int(max_width/1.5 * sr / hop ) for channel in range(channels): print("Processing channel",channel) #which range should dropouts be detected in? imdata = fourier.get_mag(signal[:,channel], fft_size, hop, "hann") imdata = units.to_dB(imdata) #now what we generally don't want to do is "fix" dropouts of the lower bands only #basically, the gain of a band should be always controlled by that of the band above # only the top band acts freely # initialize correction correction_fac = np.ones( imdata.shape[1] ) * 1000 # go over all bands for f_lower_band, f_upper_band in reversed(list(pairwise(bands))): # get the bin indices for this band bin_lower = int(f_lower_band * fft_size / sr) bin_upper = int(f_upper_band * fft_size / sr) # take the mean volume across this band vol = np.mean(imdata[bin_lower:bin_upper], axis=0) # detect valleys in the volume curve peaks, properties = scipy.signal.find_peaks(-vol, height=None, threshold=None, distance=None, prominence=5, wlen=None, rel_height=0.5, plateau_size=None) # initialize the gain curve for this band gain_curve = np.zeros( imdata.shape[1] ) # go over all peak candidates and use good ones for peak_i in peaks: # avoid errors at the very ends if 2*d < peak_i < imdata.shape[1]-2*d-1: # make sure we are not blurring the left side of a transient # sample mean volume around +-d samples on either side of the potential dropout # patch_region = np.asarray( (peak_i-d, peak_i+d) ) # patch_coords = vol[patch_region] left = np.mean(vol[peak_i-2*d:peak_i-d]) right = np.mean(vol[peak_i+d:peak_i+2*d]) m = (left-right) / (2*d) # only use it if slant is desirable # actually better make this abs() to avoid adding reverb # if not m < -.5: if abs(m) < max_slope: # now interpolate a new patch and get gain from difference to original volume curve gain_curve[peak_i-d:peak_i+d+1] = np.interp( range(2*d+1), (0, 2*d), (left, right) ) - vol[peak_i-d:peak_i+d+1] # gain_curve = gain_curve.clip(0) # we don't want to make pops more quiet, so clip at 1 # clip the upper boundary according to band above (was processed before) # -> clip the factor to be between 1 and the factor of the band above (with some tolerance) correction_fac = np.clip(np.power(10, gain_curve/20), 1, correction_fac*2) # resample to match the signal vol_corr = signal[:,channel] * np.interp(np.linspace(0,1, len(signal[:,channel])), np.linspace(0,1, len(correction_fac)), correction_fac - 1) # add the extra bits to the signal signal[:,channel] += filters.butter_bandpass_filter(vol_corr, f_lower_band, f_upper_band, sr, order=3) io_ops.write_file(file_path, signal, sr, channels)
def compute_spectra(self, filenames, fft_size, fft_overlap): # TODO: implement adaptive / intelligent hop reusing data # maybe move more into the thread must_reset_view = False self.dirty = False if self.fourier_thread.jobs: print("Fourier job is still running, wait!") return # go over all new file candidates for i, filename in enumerate(filenames): # only reload audio if this filename has changed if self.filenames[i] != filename: # remove all ffts of the old file from storage for k in [k for k in self.fft_storage if k[0] == self.filenames[i]]: del self.fft_storage[k] # now load new audio self.signals[i], self.sr, self.channels = io_ops.read_file(filename) self.filenames[i] = filename must_reset_view = True durations = [len(sig) / self.sr for sig in self.signals if sig is not None] self.duration = max(durations) if durations else 0 self.keys = [] self.fft_size = fft_size self.hop = fft_size // fft_overlap if must_reset_view: self.reset_view() for filename, signal, channel in zip(self.filenames, self.signals, self.selected_channels): if filename: k = (filename, self.fft_size, channel, self.hop) self.keys.append(k) if not channel < self.channels: print("Not enough audio channels to load, reverting to first channel") channel = 0 # first try to get FFT from current storage and continue directly if k in self.fft_storage: self.dirty = True # check for alternate hops else: more_dense = None more_sparse = None # go over all keys and see if there is a bigger one for key in self.fft_storage: if key[0:3] == k[0:3]: if key[3] > k[3]: more_sparse = key elif key[3] < k[3]: # only save key if none had been set or the new key is closer to the desired k if not more_dense or more_dense[3] < key[3]: more_dense = key # prefer reduction via strides if more_dense: print("reducing resolution via stride",more_dense[3],k[3]) step = k[3]//more_dense[3] self.fft_storage[k] = np.array(self.fft_storage[more_dense][:,::step]) self.continue_spectra() # TODO: implement gap filling, will need changes to stft function # # then fill missing gaps # elif more_sparse: # print("increasing resolution by filling gaps",self.fft_size) # self.fft_storage[k] = self.fft_storage[more_sparse] else: print("storing new fft",k) # append to the fourier job list self.fourier_thread.jobs.append( (signal[:,channel], self.fft_size, self.hop, "hann", self.num_cores, k) ) # all tasks are started below # perform all fourier jobs if self.fourier_thread.jobs: self.fourier_thread.start() # we continue when the thread emits a "finished" signal, conntected to retrieve_fft() # this happens when only loading from storage is required elif self.dirty: self.continue_spectra()