コード例 #1
0
    def __init__(self) -> None:
        debug_prefix = "[AudioProcessing.__init__]"

        # Create some util classes
        self.fourier = Fourier()
        self.datautils = DataUtils()
        self.functions = Functions()
        self.config = None

        # List of full frequencies of notes
        # - 50 to 68 yields freqs of 24.4 Hz up to
        self.piano_keys_frequencies = [
            round(self.get_frequency_of_key(x), 2) for x in range(-50, 68)
        ]
コード例 #2
0
    def __init__(self, depth = LOG_NO_DEPTH) -> None:
        debug_prefix = "[AudioProcessing.__init__]"
        ndepth = depth + LOG_NEXT_DEPTH

        self.fourier = Fourier()
        self.datautils = DataUtils()
        self.functions = Functions()
        self.config = None

        # MMV specific, where we return repeated frequencies from the
        # function process
        self.where_decay_less_than_one = 440
        self.value_at_zero = 5

        # List of full frequencies of notes
        # - 50 to 68 yields freqs of 24.4 Hz up to 
        self.piano_keys_frequencies = [round(self.get_frequency_of_key(x), 2) for x in range(-50, 68)]
        logging.info(f"{depth}{debug_prefix} Whole notes frequencies we'll care: [{self.piano_keys_frequencies}]")
コード例 #3
0
    def __init__(self, MMVVectorial, context, skia_object, midi):
        self.vectorial = MMVVectorial
        self.context = context
        self.skia = skia_object
        self.midi = midi
        self.config = self.vectorial.config
        self.functions = Functions()
        self.datautils = DataUtils()
        self.piano_keys = {}
        self.keys_centers = {}

        self.background_color = 22 / 255
コード例 #4
0
    def __init__(self, mmv, MMVSkiaPianoRollVectorial):
        self.mmvskia_main = mmv
        self.vectorial = MMVSkiaPianoRollVectorial
        self.config = self.vectorial.config
        self.functions = Functions()
        self.datautils = DataUtils()
        self.piano_keys = {}
        self.keys_centers = {}
        """
        When converting the colors we divide by 255 so we normalize in a value in between 0 and 1
        Because skia works like that.
        """

        sep = os.path.sep

        # Load the yaml config file
        color_preset = self.config["color_preset"]
        color_config_yaml = self.mmvskia_main.utils.load_yaml(
            f"{self.mmvskia_main.mmvskia_interface.top_level_interace.data_dir}{sep}mmvskia{sep}piano_roll{sep}color_{color_preset}.yaml"
        )

        # Get the global colors into a dictionary

        self.global_colors = {}

        for key in color_config_yaml["global"]:
            self.global_colors[key] = [
                channel / 255 for channel in ImageColor.getcolor(
                    "#" + str(color_config_yaml["global"][key]), "RGB")
            ]

        # # Get the note colors based on their channel

        self.color_channels = {}

        # For every channel config
        for key in color_config_yaml["channels"]:

            # Create empty dir
            self.color_channels[key] = {}

            # Get colors of sharp and plain, border, etc..
            for color_type in color_config_yaml["channels"][key].keys():

                # Hexadecimal representation of color
                color_hex = color_config_yaml["channels"][key][color_type]

                # Assign RGB value
                self.color_channels[key][color_type] = [
                    channel / 255
                    for channel in ImageColor.getcolor(f"#{color_hex}", "RGB")
                ]
    def __init__(self, mmv, MMVSkiaPianoRollVectorial):
        self.mmvskia_main = mmv
        self.vectorial = MMVSkiaPianoRollVectorial
        self.config = self.vectorial.config
        self.functions = Functions()
        self.datautils = DataUtils()
        self.piano_keys = {}
        self.keys_centers = {}
        self.colors = {}

        # Generate skia.Color4F out of the colors dictionary
        self.colors = self.parse_colors_dict(colors_dict = self.config["colors"])

        self.font_fits_on_width = {}
コード例 #6
0
class AudioProcessing:
    def __init__(self, depth = LOG_NO_DEPTH) -> None:
        debug_prefix = "[AudioProcessing.__init__]"
        ndepth = depth + LOG_NEXT_DEPTH

        self.fourier = Fourier()
        self.datautils = DataUtils()
        self.functions = Functions()
        self.config = None

        # MMV specific, where we return repeated frequencies from the
        # function process
        self.where_decay_less_than_one = 440
        self.value_at_zero = 5

        # List of full frequencies of notes
        # - 50 to 68 yields freqs of 24.4 Hz up to 
        self.piano_keys_frequencies = [round(self.get_frequency_of_key(x), 2) for x in range(-50, 68)]
        logging.info(f"{depth}{debug_prefix} Whole notes frequencies we'll care: [{self.piano_keys_frequencies}]")

    # Slice a mono and stereo audio data
    def slice_audio(self,
            stereo_data: np.ndarray,
            mono_data: np.ndarray,
            sample_rate: int,
            start_cut: int,
            end_cut: int,
            batch_size: int=None
        ) -> None:
        
        # Cut the left and right points range
        left_slice = stereo_data[0][start_cut:end_cut]
        right_slice = stereo_data[1][start_cut:end_cut]

        # Cut the mono points range
        # mono_slice = mono_data[start_cut:end_cut]

        if not batch_size == None:
            # Empty audio slice array if we're at the end of the audio
            self.audio_slice = np.zeros([3, batch_size])

            # Get the audio slices of the left and right channel
            self.audio_slice[0][ 0:left_slice.shape[0] ] = left_slice
            self.audio_slice[1][ 0:right_slice.shape[0] ] = right_slice
            # self.audio_slice[2][ 0:mono_slice.shape[0] ] = mono_slice

        else:
            # self.audio_slice = [left_slice, right_slice, mono_slice]
            self.audio_slice = [left_slice, right_slice]

        # Calculate average amplitude
        self.average_value = float(np.mean(np.abs(
            mono_data[start_cut:end_cut]
        )))

    def resample(self,
            data: np.ndarray,
            original_sample_rate: int,
            new_sample_rate: int
        ) -> None:

        ratio = new_sample_rate / original_sample_rate
        if ratio == 1:
            return data
        else:
            return samplerate.resample(data, ratio, 'sinc_best')

    # Get N semitones above / below A4 key, 440 Hz
    #
    # get_frequency_of_key(-12) = 220 Hz
    # get_frequency_of_key(  0) = 440 Hz
    # get_frequency_of_key( 12) = 880 Hz
    #
    def get_frequency_of_key(self, n):
        return 440 * (2**(n/12))

    # https://stackoverflow.com/a/2566508
    def find_nearest(self, array, value):
        index = (np.abs(array - value)).argmin()
        return index, array[index]
    
    # Calculate the FFT of this data, get only wanted frequencies based on the musical notes
    def process(self,
            data: np.ndarray,
            original_sample_rate: int,
        ) -> None:
        
        # The returned dictionary
        processed = {}

        # Iterate on config
        for _, value in self.config.items():

            # Get info on config
            sample_rate = value.get("sample_rate")
            start_freq = value.get("start_freq")
            end_freq = value.get("end_freq")

            # Get the frequencies we want and will return in the end
            wanted_freqs = self.datautils.list_items_in_between(
                self.piano_keys_frequencies,
                start_freq, end_freq,
            )

            # Calculate the binned FFT, we get N vectors of [freq, value]
            # of this FFT
            binned_fft = self.fourier.binned_fft(
                # Resample our data to the one specified on the config
                data = self.resample(
                    data = data,
                    original_sample_rate = original_sample_rate,
                    new_sample_rate = sample_rate,
                ),
                # # # # # # # # # # # # # # # # # # # # # # # # # # # #
                sample_rate =  sample_rate,
                original_sample_rate = original_sample_rate,
            )

            # Get the nearest freq and add to processed            
            for freq in wanted_freqs:

                # Get the nearest and FFT value
                nearest = self.find_nearest(binned_fft[0], freq)
                value = binned_fft[1][nearest[0]]
     
                # How much bars we'll render duped at this freq, see
                # this function on the Functions class for more detail
                N = math.ceil(
                    self.functions.how_much_bars_on_this_frequency(
                        x = freq,
                        where_decay_less_than_one = self.where_decay_less_than_one,
                        value_at_zero = self.value_at_zero,
                    )
                )

                # Add repeated bars or just one
                for i in range(N):
                    processed[nearest[1] + (i/10)] = value
                
        linear_processed_fft = []
        frequencies = []

        for frequency, value in processed.items():
            frequencies.append(frequency)
            linear_processed_fft.append(value)
        
        return [linear_processed_fft, frequencies]
コード例 #7
0
 def load(self, path, bpm=130):
     self.midi = mido.MidiFile(path, clip=True)
     self.tempo = mido.bpm2tempo(bpm)
     self.range_notes = RangeNotes()
     self.datautils = DataUtils()
     self.midi_file_path = path
コード例 #8
0
class AudioProcessing:
    def __init__(self) -> None:
        debug_prefix = "[AudioProcessing.__init__]"

        # Create some util classes
        self.fourier = Fourier()
        self.datautils = DataUtils()
        self.functions = Functions()
        self.config = None

        # List of full frequencies of notes
        # - 50 to 68 yields freqs of 24.4 Hz up to
        self.piano_keys_frequencies = [
            round(self.get_frequency_of_key(x), 2) for x in range(-50, 68)
        ]

    # Get specs on config dictionary
    def _get_config_stuff(self, config_dict):

        # Get config
        start_freq = config_dict["start_freq"]
        end_freq = config_dict["end_freq"]

        # Get the frequencies we want and will return in the end
        wanted_freqs = self.datautils.list_items_in_between(
            self.piano_keys_frequencies,
            start_freq,
            end_freq,
        )

        # Counter for expected frequencies on this config
        expected_N_frequencies = 0
        expected_frequencies = []

        # Add target freq if it's not on the list
        for freq in wanted_freqs:

            # How much bars we'll render duped at this freq, see
            # this function on the Functions class for more detail
            N = math.ceil(
                self.functions.how_much_bars_on_this_frequency(
                    x=freq,
                    where_decay_less_than_one=self.where_decay_less_than_one,
                    value_at_zero=self.value_at_zero,
                ))

            # Add to total freqs the amount we expect
            expected_N_frequencies += N

            # Add individual frequencies
            expected_frequencies.extend([freq + (i / 100) for i in range(N)])

        # Return info
        return {
            "original_sample_rate": config_dict["original_sample_rate"],
            "target_sample_rate": config_dict["target_sample_rate"],
            "expected_N_frequencies": expected_N_frequencies,
            "expected_frequencies": expected_frequencies,
            "start_freq": start_freq,
            "end_freq": end_freq,
        }

    # Set up a configuration list of dicts, They can look like this:
    """
    [ {
        "original_sample_rate": 48000,
        "target_sample_rate": 5000,
        "start_freq": 20,
        "end_freq": 2500,
    }, {
        ...
    }]
    """

    # NOTE: The FFT will only get values of frequencies up to SAMPLE_RATE/2 and jumps of
    # the calculation sample rate divided by the window size (batch size)
    # So if you want more bass information, downsample to 5000 Hz or 1000 Hz and get frequencies
    # up to 2500 or 500, respectively.
    def configure(self,
                  config,
                  where_decay_less_than_one=440,
                  value_at_zero=3):
        debug_prefix = "[AudioProcessing.configure]"

        logging.info(
            f"{debug_prefix} Whole notes frequencies we'll care: [{self.piano_keys_frequencies}]"
        )

        # Assign
        self.config = config
        self.FFT_length = 0
        self.where_decay_less_than_one = where_decay_less_than_one
        self.value_at_zero = value_at_zero

        # The configurations on sample rate, frequencies to expect
        self.process_layer_configs = []

        # For every config dict on the config
        for layers in self.config:
            info = self._get_config_stuff(layers)
            self.FFT_length += info["expected_N_frequencies"] * 2
            self.process_layer_configs.append(info)

        # The size will be half, because left and right channel so we multiply by 2
        # self.FFT_length *= 2
        print("BINNED FFT LENGTH", self.FFT_length)

    # # Feature Extraction

    # Calculate the Root Mean Square
    def rms(self, values: np.ndarray) -> float:
        return np.sqrt(np.mean(values**2))

    # # New Methods

    # Yield information on the audio slice
    def get_info_on_audio_slice(self,
                                audio_slice: np.ndarray,
                                original_sample_rate,
                                do_calculate_fft=True) -> dict:
        N = audio_slice.shape[1]

        # Calculate MONO
        mono = (audio_slice[0] + audio_slice[1]) / 2

        yield ["mmv_raw_audio_left", audio_slice[0]]
        yield ["mmv_raw_audio_right", audio_slice[1]]

        # # Average audio amplitude based on RMS

        # L, R, Mono respectively
        RMS = []

        # Iterate, calculate the median of the absolute values
        for channel_number in [0, 1]:
            RMS.append(np.sqrt(np.mean(audio_slice[channel_number][0:N]**2)))
            # RMS.append(np.median(np.abs(audio_slice[channel_number][0:N//120])))

        # Append mono average amplitude
        RMS.append(sum(RMS) / 2)

        # Yield average amplitudes info
        yield ["mmv_rms", tuple([round(value, 8) for value in RMS])]

        # # Standard deviations

        yield [
            "mmv_std",
            tuple(
                [np.std(audio_slice[0]),
                 np.std(audio_slice[1]),
                 np.std(mono)])
        ]

        # # FFT shenanigans
        if do_calculate_fft:

            # The final fft we give to the shader
            processed = np.zeros(self.FFT_length, dtype=np.float32)

            # Counter to assign values on the processed array
            counter = 0

            # For each channel
            for channel_index, data in enumerate(audio_slice):

                # For every config dict on the config
                for info in self.process_layer_configs:

                    # Sample rate
                    original_sample_rate = info["original_sample_rate"]
                    target_sample_rate = info["target_sample_rate"]

                    # Individual frequencies
                    expected_frequencies = info["expected_frequencies"]

                    # The FFT of [[frequencies], [values]]
                    binned_fft = self.fourier.binned_fft(
                        data=self.resample(
                            data=data,
                            original_sample_rate=original_sample_rate,
                            target_sample_rate=target_sample_rate,
                        ),
                        original_sample_rate=original_sample_rate,
                        target_sample_rate=target_sample_rate,
                    )
                    if binned_fft is None: return

                    # Information on the frequencies, the index 0 is the DC bias, or frequency 0 Hz
                    # and at every index it jumps the distance between any index N and N+1
                    fft_freqs = binned_fft[0]
                    jumps = abs(fft_freqs[1])

                    # Get the nearest freq and add to processed
                    for freq in expected_frequencies:

                        # TODO: make configurable
                        flatten_scalar = self.functions.value_on_line_of_two_points(
                            Xa=20, Ya=0.1, Xb=20000, Yb=3, get_x=freq)

                        # # Get the nearest and FFT value

                        # Trick: since the jump of freqs are always the same, the nearest frequency index
                        # given a target freq will be the frequency itself divided by how many frequency we
                        # jump at the indexes
                        nearest = int(freq / jumps)

                        # The abs value of the FFT
                        value = abs(binned_fft[1][nearest]) * flatten_scalar

                        # Assign, iterate
                        processed[counter] = value
                        counter += 1

            # Yield FFT data
            yield ["mmv_fft", processed]

    # # Common Methods

    # Resample an audio slice (raw array) to some other frequency, this is useful when calculating
    # FFTs because a lower sample rate means we get more info on the bass freqs
    def resample(self, data: np.ndarray, original_sample_rate: int,
                 target_sample_rate: int) -> np.ndarray:

        # If the ratio is 1 then we don't do anything cause new/old = 1, just return the input data
        if target_sample_rate == original_sample_rate:
            return data

        # Use libsamplerate for resampling the audio otherwise
        return samplerate.resample(data,
                                   ratio=(target_sample_rate /
                                          original_sample_rate),
                                   converter_type='sinc_fastest')

    # Resample the data with nearest index approach
    # Doesn't really work, experimental, maybe I understand resampling wrong
    def resample_nearest(self, data: np.ndarray, original_sample_rate: int,
                         target_sample_rate: int) -> np.ndarray:

        # Nothing to do, target sample rate is the same as old one
        if (target_sample_rate == original_sample_rate):
            return data

        # The ratio we'll resample
        ratio = (original_sample_rate / target_sample_rate)

        # Length of the data
        N = data.shape[0]

        # Target new array length
        T = N * ratio

        # Array of original indexes
        indexes = np.arange(T)

        # (indexes / T) is the normalized to max at 1, we multiply
        # by the length of the original data so we expand the indexes
        # we just shaved by N * ratio, then use integer numbers and
        # offset by 1
        indexes = ((indexes / T) * N).astype(np.int32) - 1

        # Return the original data with selected indexes
        return data[indexes]

    # Get N semitones above / below A4 key, 440 Hz
    #
    # get_frequency_of_key(-12) = 220 Hz
    # get_frequency_of_key(  0) = 440 Hz
    # get_frequency_of_key( 12) = 880 Hz
    #
    def get_frequency_of_key(self, n, A4=440):
        return A4 * (2**(n / 12))

    # https://stackoverflow.com/a/2566508
    # Find nearest value inside one array from a given target value
    # I could make my own but this one is more efficient because it uses numpy
    # Returns: index of the match and its value
    def find_nearest(self, array, value):
        index = (np.abs(array - value)).argmin()
        return index, array[index]
コード例 #9
0
 def __init__(self) -> None:
     self.fourier = Fourier()
     self.datautils = DataUtils()
     self.config = None
コード例 #10
0
class AudioProcessing:
    def __init__(self) -> None:
        self.fourier = Fourier()
        self.datautils = DataUtils()
        self.config = None

    # Slice a mono and stereo audio data
    def slice_audio(self,
            stereo_data: np.ndarray,
            mono_data: np.ndarray,
            sample_rate: int,
            start_cut: int,
            end_cut: int,
            batch_size: int=None
        ) -> None:
        
        # Cut the left and right points range
        left_slice = stereo_data[0][start_cut:end_cut]
        right_slice = stereo_data[1][start_cut:end_cut]

        # Cut the mono points range
        mono_slice = mono_data[start_cut:end_cut]

        if not batch_size == None:
            # Empty audio slice array if we're at the end of the audio
            self.audio_slice = np.zeros([3, batch_size])

            # Get the audio slices of the left and right channel
            self.audio_slice[0][ 0:left_slice.shape[0] ] = left_slice
            self.audio_slice[1][ 0:right_slice.shape[0] ] = right_slice
            self.audio_slice[2][ 0:mono_slice.shape[0] ] = mono_slice

        else:
            self.audio_slice = [left_slice, right_slice, mono_slice]

        # Calculate average amplitude
        self.average_value = np.mean(np.abs(mono_slice))

    def resample(self,
            data: np.ndarray,
            original_sample_rate: int,
            new_sample_rate: int
        ) -> None:

        ratio = new_sample_rate / original_sample_rate
        if ratio == 1:
            return data
        else:
            return samplerate.resample(data, ratio, 'sinc_best')

    # Get N semitones above / below A4 key, 440 Hz
    def get_frequency_of_key(self, n):
        return 440 * ( (2**(1/12)) ** n )

    def process(self,
            data: np.ndarray,
            original_sample_rate: int
        ) -> None:
        
        # The returned dictionary
        processed = {}

        # Iterate on config
        for key, value in self.config.items():

            # Get info on config
            get_frequencies = value.get("get_frequencies")
            sample_rate = value.get("sample_rate")
            start_freq = value.get("start_freq")
            end_freq = value.get("end_freq")
            nbars = value.get("nbars")

            N = len(data)

            # Resample audio to target sample rate
            resampled = self.resample(data, original_sample_rate, sample_rate)

            # Get freqs vs fft value dictionary
            binned_fft = self.fourier.binned_fft(resampled, sample_rate)

            wanted_binned_fft = {}

            # Do we want every frequency of the binned_fft or a set of it
            if get_frequencies == "range":
                wanted_binned_fft = self.datautils.dictionary_items_in_between(binned_fft, start_freq, end_freq)

            elif get_frequencies == "all":
                wanted_binned_fft = binned_fft

            elif get_frequencies == "musical":
                key_freqs = [self.get_frequency_of_key(x) for x in range(-1000, 1000)]
                wanted_freqs = self.datautils.list_items_in_between(key_freqs, start_freq, end_freq)
                
                # https://stackoverflow.com/a/7934608/13477696
                closest_freq = lambda a,l:min(l,key=lambda x:abs(x-a))
                keylist = list( binned_fft.keys() )
                already = []
                
                for freq in wanted_freqs:
                    new_closest = closest_freq(freq, list(binned_fft.keys()))
                    if new_closest in already:
                        next_index = keylist.index(new_closest) + 1
                        if next_index in keylist:
                            new_closest = keylist[next_index]
                    already.append(new_closest)
                    wanted_binned_fft[freq] = binned_fft[new_closest]
                
            # Send the raw frequency vs fft dict 
            processed[key] = wanted_binned_fft
            
        linear_processed = []
        frequencies = []

        for key, item in processed.items():
            for frequency in item:
                frequencies.append(frequency)
                linear_processed.append(item[frequency])
        
        return {"fft": linear_processed, "frequencies": frequencies}