def __init__(self) -> None: debug_prefix = "[AudioProcessing.__init__]" # Create some util classes self.fourier = Fourier() self.datautils = DataUtils() self.functions = Functions() self.config = None # List of full frequencies of notes # - 50 to 68 yields freqs of 24.4 Hz up to self.piano_keys_frequencies = [ round(self.get_frequency_of_key(x), 2) for x in range(-50, 68) ]
def __init__(self, depth = LOG_NO_DEPTH) -> None: debug_prefix = "[AudioProcessing.__init__]" ndepth = depth + LOG_NEXT_DEPTH self.fourier = Fourier() self.datautils = DataUtils() self.functions = Functions() self.config = None # MMV specific, where we return repeated frequencies from the # function process self.where_decay_less_than_one = 440 self.value_at_zero = 5 # List of full frequencies of notes # - 50 to 68 yields freqs of 24.4 Hz up to self.piano_keys_frequencies = [round(self.get_frequency_of_key(x), 2) for x in range(-50, 68)] logging.info(f"{depth}{debug_prefix} Whole notes frequencies we'll care: [{self.piano_keys_frequencies}]")
def setup(self) -> None: debug_prefix = "[MMVSkiaMain.setup]" self.utils = Utils() logging.info(f"{debug_prefix} Creating MMVContext() class") self.context = MMVContext(mmv_skia_main=self) logging.info(f"{debug_prefix} Creating SkiaNoWindowBackend() class") self.skia = SkiaNoWindowBackend() logging.info(f"{debug_prefix} Creating Functions() class") self.functions = Functions() logging.info(f"{debug_prefix} Creating Interpolation() class") self.interpolation = Interpolation() logging.info(f"{debug_prefix} Creating PolarCoordinates() class") self.polar_coordinates = PolarCoordinates() logging.info(f"{debug_prefix} Creating Canvas() class") self.canvas = MMVSkiaImage(mmvskia_main=self) logging.info(f"{debug_prefix} Creating Fourier() class") self.fourier = Fourier() # The user must explicitly set and override this, mostly for compatibility # and code cleanup reasons. self.pipe_video_to = None logging.info(f"{debug_prefix} Creating AudioFile() class") self.audio = AudioFile() logging.info(f"{debug_prefix} Creating AudioProcessing() class") self.audio_processing = AudioProcessing() logging.info(f"{debug_prefix} Creating MMVSkiaAnimation() class") self.mmv_skia_animation = MMVSkiaAnimation(mmv_skia_main=self) logging.info(f"{debug_prefix} Creating MMVSkiaCore() class") self.core = MMVSkiaCore(mmvskia_main=self)
def setup(self) -> None: debug_prefix = "[MMVMain.__init__]" self.utils = Utils() print(debug_prefix, "Creating Context()") self.context = Context(self) print(debug_prefix, "Creating SkiaNoWindowBackend()") self.skia = SkiaNoWindowBackend(self) print(debug_prefix, "Creating Functions()") self.functions = Functions() print(debug_prefix, "Creating Interpolation()") self.interpolation = Interpolation() print(debug_prefix, "Creating Canvas()") self.canvas = MMVImage(self) print(debug_prefix, "Creating Fourier()") self.fourier = Fourier() print(debug_prefix, "Creating FFmpegWrapper()") self.ffmpeg = FFmpegWrapper(self) print(debug_prefix, "Creating AudioFile()") self.audio = AudioFile() print(debug_prefix, "Creating AudioProcessing()") self.audio_processing = AudioProcessing() print(debug_prefix, "Creating MMVAnimation()") self.mmv_animation = MMVAnimation(self) print(debug_prefix, "Creating Core()") self.core = Core(self)
class AudioProcessing: def __init__(self, depth = LOG_NO_DEPTH) -> None: debug_prefix = "[AudioProcessing.__init__]" ndepth = depth + LOG_NEXT_DEPTH self.fourier = Fourier() self.datautils = DataUtils() self.functions = Functions() self.config = None # MMV specific, where we return repeated frequencies from the # function process self.where_decay_less_than_one = 440 self.value_at_zero = 5 # List of full frequencies of notes # - 50 to 68 yields freqs of 24.4 Hz up to self.piano_keys_frequencies = [round(self.get_frequency_of_key(x), 2) for x in range(-50, 68)] logging.info(f"{depth}{debug_prefix} Whole notes frequencies we'll care: [{self.piano_keys_frequencies}]") # Slice a mono and stereo audio data def slice_audio(self, stereo_data: np.ndarray, mono_data: np.ndarray, sample_rate: int, start_cut: int, end_cut: int, batch_size: int=None ) -> None: # Cut the left and right points range left_slice = stereo_data[0][start_cut:end_cut] right_slice = stereo_data[1][start_cut:end_cut] # Cut the mono points range # mono_slice = mono_data[start_cut:end_cut] if not batch_size == None: # Empty audio slice array if we're at the end of the audio self.audio_slice = np.zeros([3, batch_size]) # Get the audio slices of the left and right channel self.audio_slice[0][ 0:left_slice.shape[0] ] = left_slice self.audio_slice[1][ 0:right_slice.shape[0] ] = right_slice # self.audio_slice[2][ 0:mono_slice.shape[0] ] = mono_slice else: # self.audio_slice = [left_slice, right_slice, mono_slice] self.audio_slice = [left_slice, right_slice] # Calculate average amplitude self.average_value = float(np.mean(np.abs( mono_data[start_cut:end_cut] ))) def resample(self, data: np.ndarray, original_sample_rate: int, new_sample_rate: int ) -> None: ratio = new_sample_rate / original_sample_rate if ratio == 1: return data else: return samplerate.resample(data, ratio, 'sinc_best') # Get N semitones above / below A4 key, 440 Hz # # get_frequency_of_key(-12) = 220 Hz # get_frequency_of_key( 0) = 440 Hz # get_frequency_of_key( 12) = 880 Hz # def get_frequency_of_key(self, n): return 440 * (2**(n/12)) # https://stackoverflow.com/a/2566508 def find_nearest(self, array, value): index = (np.abs(array - value)).argmin() return index, array[index] # Calculate the FFT of this data, get only wanted frequencies based on the musical notes def process(self, data: np.ndarray, original_sample_rate: int, ) -> None: # The returned dictionary processed = {} # Iterate on config for _, value in self.config.items(): # Get info on config sample_rate = value.get("sample_rate") start_freq = value.get("start_freq") end_freq = value.get("end_freq") # Get the frequencies we want and will return in the end wanted_freqs = self.datautils.list_items_in_between( self.piano_keys_frequencies, start_freq, end_freq, ) # Calculate the binned FFT, we get N vectors of [freq, value] # of this FFT binned_fft = self.fourier.binned_fft( # Resample our data to the one specified on the config data = self.resample( data = data, original_sample_rate = original_sample_rate, new_sample_rate = sample_rate, ), # # # # # # # # # # # # # # # # # # # # # # # # # # # # sample_rate = sample_rate, original_sample_rate = original_sample_rate, ) # Get the nearest freq and add to processed for freq in wanted_freqs: # Get the nearest and FFT value nearest = self.find_nearest(binned_fft[0], freq) value = binned_fft[1][nearest[0]] # How much bars we'll render duped at this freq, see # this function on the Functions class for more detail N = math.ceil( self.functions.how_much_bars_on_this_frequency( x = freq, where_decay_less_than_one = self.where_decay_less_than_one, value_at_zero = self.value_at_zero, ) ) # Add repeated bars or just one for i in range(N): processed[nearest[1] + (i/10)] = value linear_processed_fft = [] frequencies = [] for frequency, value in processed.items(): frequencies.append(frequency) linear_processed_fft.append(value) return [linear_processed_fft, frequencies]
class AudioProcessing: def __init__(self) -> None: debug_prefix = "[AudioProcessing.__init__]" # Create some util classes self.fourier = Fourier() self.datautils = DataUtils() self.functions = Functions() self.config = None # List of full frequencies of notes # - 50 to 68 yields freqs of 24.4 Hz up to self.piano_keys_frequencies = [ round(self.get_frequency_of_key(x), 2) for x in range(-50, 68) ] # Get specs on config dictionary def _get_config_stuff(self, config_dict): # Get config start_freq = config_dict["start_freq"] end_freq = config_dict["end_freq"] # Get the frequencies we want and will return in the end wanted_freqs = self.datautils.list_items_in_between( self.piano_keys_frequencies, start_freq, end_freq, ) # Counter for expected frequencies on this config expected_N_frequencies = 0 expected_frequencies = [] # Add target freq if it's not on the list for freq in wanted_freqs: # How much bars we'll render duped at this freq, see # this function on the Functions class for more detail N = math.ceil( self.functions.how_much_bars_on_this_frequency( x=freq, where_decay_less_than_one=self.where_decay_less_than_one, value_at_zero=self.value_at_zero, )) # Add to total freqs the amount we expect expected_N_frequencies += N # Add individual frequencies expected_frequencies.extend([freq + (i / 100) for i in range(N)]) # Return info return { "original_sample_rate": config_dict["original_sample_rate"], "target_sample_rate": config_dict["target_sample_rate"], "expected_N_frequencies": expected_N_frequencies, "expected_frequencies": expected_frequencies, "start_freq": start_freq, "end_freq": end_freq, } # Set up a configuration list of dicts, They can look like this: """ [ { "original_sample_rate": 48000, "target_sample_rate": 5000, "start_freq": 20, "end_freq": 2500, }, { ... }] """ # NOTE: The FFT will only get values of frequencies up to SAMPLE_RATE/2 and jumps of # the calculation sample rate divided by the window size (batch size) # So if you want more bass information, downsample to 5000 Hz or 1000 Hz and get frequencies # up to 2500 or 500, respectively. def configure(self, config, where_decay_less_than_one=440, value_at_zero=3): debug_prefix = "[AudioProcessing.configure]" logging.info( f"{debug_prefix} Whole notes frequencies we'll care: [{self.piano_keys_frequencies}]" ) # Assign self.config = config self.FFT_length = 0 self.where_decay_less_than_one = where_decay_less_than_one self.value_at_zero = value_at_zero # The configurations on sample rate, frequencies to expect self.process_layer_configs = [] # For every config dict on the config for layers in self.config: info = self._get_config_stuff(layers) self.FFT_length += info["expected_N_frequencies"] * 2 self.process_layer_configs.append(info) # The size will be half, because left and right channel so we multiply by 2 # self.FFT_length *= 2 print("BINNED FFT LENGTH", self.FFT_length) # # Feature Extraction # Calculate the Root Mean Square def rms(self, values: np.ndarray) -> float: return np.sqrt(np.mean(values**2)) # # New Methods # Yield information on the audio slice def get_info_on_audio_slice(self, audio_slice: np.ndarray, original_sample_rate, do_calculate_fft=True) -> dict: N = audio_slice.shape[1] # Calculate MONO mono = (audio_slice[0] + audio_slice[1]) / 2 yield ["mmv_raw_audio_left", audio_slice[0]] yield ["mmv_raw_audio_right", audio_slice[1]] # # Average audio amplitude based on RMS # L, R, Mono respectively RMS = [] # Iterate, calculate the median of the absolute values for channel_number in [0, 1]: RMS.append(np.sqrt(np.mean(audio_slice[channel_number][0:N]**2))) # RMS.append(np.median(np.abs(audio_slice[channel_number][0:N//120]))) # Append mono average amplitude RMS.append(sum(RMS) / 2) # Yield average amplitudes info yield ["mmv_rms", tuple([round(value, 8) for value in RMS])] # # Standard deviations yield [ "mmv_std", tuple( [np.std(audio_slice[0]), np.std(audio_slice[1]), np.std(mono)]) ] # # FFT shenanigans if do_calculate_fft: # The final fft we give to the shader processed = np.zeros(self.FFT_length, dtype=np.float32) # Counter to assign values on the processed array counter = 0 # For each channel for channel_index, data in enumerate(audio_slice): # For every config dict on the config for info in self.process_layer_configs: # Sample rate original_sample_rate = info["original_sample_rate"] target_sample_rate = info["target_sample_rate"] # Individual frequencies expected_frequencies = info["expected_frequencies"] # The FFT of [[frequencies], [values]] binned_fft = self.fourier.binned_fft( data=self.resample( data=data, original_sample_rate=original_sample_rate, target_sample_rate=target_sample_rate, ), original_sample_rate=original_sample_rate, target_sample_rate=target_sample_rate, ) if binned_fft is None: return # Information on the frequencies, the index 0 is the DC bias, or frequency 0 Hz # and at every index it jumps the distance between any index N and N+1 fft_freqs = binned_fft[0] jumps = abs(fft_freqs[1]) # Get the nearest freq and add to processed for freq in expected_frequencies: # TODO: make configurable flatten_scalar = self.functions.value_on_line_of_two_points( Xa=20, Ya=0.1, Xb=20000, Yb=3, get_x=freq) # # Get the nearest and FFT value # Trick: since the jump of freqs are always the same, the nearest frequency index # given a target freq will be the frequency itself divided by how many frequency we # jump at the indexes nearest = int(freq / jumps) # The abs value of the FFT value = abs(binned_fft[1][nearest]) * flatten_scalar # Assign, iterate processed[counter] = value counter += 1 # Yield FFT data yield ["mmv_fft", processed] # # Common Methods # Resample an audio slice (raw array) to some other frequency, this is useful when calculating # FFTs because a lower sample rate means we get more info on the bass freqs def resample(self, data: np.ndarray, original_sample_rate: int, target_sample_rate: int) -> np.ndarray: # If the ratio is 1 then we don't do anything cause new/old = 1, just return the input data if target_sample_rate == original_sample_rate: return data # Use libsamplerate for resampling the audio otherwise return samplerate.resample(data, ratio=(target_sample_rate / original_sample_rate), converter_type='sinc_fastest') # Resample the data with nearest index approach # Doesn't really work, experimental, maybe I understand resampling wrong def resample_nearest(self, data: np.ndarray, original_sample_rate: int, target_sample_rate: int) -> np.ndarray: # Nothing to do, target sample rate is the same as old one if (target_sample_rate == original_sample_rate): return data # The ratio we'll resample ratio = (original_sample_rate / target_sample_rate) # Length of the data N = data.shape[0] # Target new array length T = N * ratio # Array of original indexes indexes = np.arange(T) # (indexes / T) is the normalized to max at 1, we multiply # by the length of the original data so we expand the indexes # we just shaved by N * ratio, then use integer numbers and # offset by 1 indexes = ((indexes / T) * N).astype(np.int32) - 1 # Return the original data with selected indexes return data[indexes] # Get N semitones above / below A4 key, 440 Hz # # get_frequency_of_key(-12) = 220 Hz # get_frequency_of_key( 0) = 440 Hz # get_frequency_of_key( 12) = 880 Hz # def get_frequency_of_key(self, n, A4=440): return A4 * (2**(n / 12)) # https://stackoverflow.com/a/2566508 # Find nearest value inside one array from a given target value # I could make my own but this one is more efficient because it uses numpy # Returns: index of the match and its value def find_nearest(self, array, value): index = (np.abs(array - value)).argmin() return index, array[index]
def __init__(self) -> None: self.fourier = Fourier() self.datautils = DataUtils() self.config = None
class AudioProcessing: def __init__(self) -> None: self.fourier = Fourier() self.datautils = DataUtils() self.config = None # Slice a mono and stereo audio data def slice_audio(self, stereo_data: np.ndarray, mono_data: np.ndarray, sample_rate: int, start_cut: int, end_cut: int, batch_size: int=None ) -> None: # Cut the left and right points range left_slice = stereo_data[0][start_cut:end_cut] right_slice = stereo_data[1][start_cut:end_cut] # Cut the mono points range mono_slice = mono_data[start_cut:end_cut] if not batch_size == None: # Empty audio slice array if we're at the end of the audio self.audio_slice = np.zeros([3, batch_size]) # Get the audio slices of the left and right channel self.audio_slice[0][ 0:left_slice.shape[0] ] = left_slice self.audio_slice[1][ 0:right_slice.shape[0] ] = right_slice self.audio_slice[2][ 0:mono_slice.shape[0] ] = mono_slice else: self.audio_slice = [left_slice, right_slice, mono_slice] # Calculate average amplitude self.average_value = np.mean(np.abs(mono_slice)) def resample(self, data: np.ndarray, original_sample_rate: int, new_sample_rate: int ) -> None: ratio = new_sample_rate / original_sample_rate if ratio == 1: return data else: return samplerate.resample(data, ratio, 'sinc_best') # Get N semitones above / below A4 key, 440 Hz def get_frequency_of_key(self, n): return 440 * ( (2**(1/12)) ** n ) def process(self, data: np.ndarray, original_sample_rate: int ) -> None: # The returned dictionary processed = {} # Iterate on config for key, value in self.config.items(): # Get info on config get_frequencies = value.get("get_frequencies") sample_rate = value.get("sample_rate") start_freq = value.get("start_freq") end_freq = value.get("end_freq") nbars = value.get("nbars") N = len(data) # Resample audio to target sample rate resampled = self.resample(data, original_sample_rate, sample_rate) # Get freqs vs fft value dictionary binned_fft = self.fourier.binned_fft(resampled, sample_rate) wanted_binned_fft = {} # Do we want every frequency of the binned_fft or a set of it if get_frequencies == "range": wanted_binned_fft = self.datautils.dictionary_items_in_between(binned_fft, start_freq, end_freq) elif get_frequencies == "all": wanted_binned_fft = binned_fft elif get_frequencies == "musical": key_freqs = [self.get_frequency_of_key(x) for x in range(-1000, 1000)] wanted_freqs = self.datautils.list_items_in_between(key_freqs, start_freq, end_freq) # https://stackoverflow.com/a/7934608/13477696 closest_freq = lambda a,l:min(l,key=lambda x:abs(x-a)) keylist = list( binned_fft.keys() ) already = [] for freq in wanted_freqs: new_closest = closest_freq(freq, list(binned_fft.keys())) if new_closest in already: next_index = keylist.index(new_closest) + 1 if next_index in keylist: new_closest = keylist[next_index] already.append(new_closest) wanted_binned_fft[freq] = binned_fft[new_closest] # Send the raw frequency vs fft dict processed[key] = wanted_binned_fft linear_processed = [] frequencies = [] for key, item in processed.items(): for frequency in item: frequencies.append(frequency) linear_processed.append(item[frequency]) return {"fft": linear_processed, "frequencies": frequencies}