def process_song(i, dataset):
    """
    Returns features for all windows of a given song in the dataset
    (to be run in parallel)
    """
    score = dataset.get_pianoroll(
        i, score_type=['precise_alignment', 'broad_alignment'], resolution=RES)
    audio, sr = dataset.get_audio(i)

    audio = esst.Resample(inputSampleRate=sr, outputSampleRate=SR)(audio)
    return get_song_win_features(score, audio)
Ejemplo n.º 2
0
    def compute(self, *args):
        from math import pi

        x = args[1]
        for frame in es.FrameGenerator(x,
                                       frameSize=self._frameSize,
                                       hopSize=self._hopSize,
                                       startFromZero=True):
            y = []
            s = int(self._frameSize / 2 -
                    self._hopSize / 2) - 1  # consider non overlapping case
            e = int(self._frameSize / 2 + self._hopSize / 2)

            # Stage 1: Attenuation. Is not required because we are using float point.

            # Stage 2: Resample
            yResample = es.Resample(inputSampleRate=self._sampleRate,
                                    outputSampleRate=self._sampleRateOver,
                                    quality=self._quality)(frame)

            # Stage 3: Emphasis
            if self._emphatise:
                fPole = 20e3  # Hz
                fZero = 14.1e3

                rPole = fPole / self._sampleRateOver
                rZero = fZero / self._sampleRateOver

                yEmphasis = es.IIR(denominator=esarr([1., rPole]),
                                   numerator=esarr([1., -rZero]))(yResample)
            else:
                yEmphasis = yResample

            # Stage 4 Absolute
            yMaxArray = np.abs(yEmphasis)

            # Stage 5 optional DC Block
            if self._BlockDC:
                yDCBlocked = es.DCRemoval(sampleRate=self._sampleRate,
                                          cutoffFrequency=1.)(yEmphasis)

                yAbsoluteDCBlocked = np.abs(yDCBlocked)

                yMaxArray = np.maximum(yMaxArray, yAbsoluteDCBlocked)

            y = [
                ((i + self._idx * self._hopSize) / float(self._sampleRateOver),
                 yMax) for i, yMax in enumerate(yMaxArray)
                if yMax > self._clippingThreshold
            ]

            self._idx += 1

        return esarr(y)
Ejemplo n.º 3
0
def apply_replay_gain(float_signal, sample_rate):
    '''
    Normalizes perceived loudness af an audio signal.
    Calculates a replay gain value and applies this gain to the input.
    
    Returns normalized signal and the replay gain calculated.
    '''
    downsampled_signal = es.Resample(inputSampleRate=sample_rate,
                                     outputSampleRate=8000)(float_signal)
    replay_gain_dB = es.ReplayGain(sampleRate=8000)(downsampled_signal)
    gain = np.power(10, replay_gain_dB / 20)
    return np.array(float_signal) * gain, replay_gain_dB
Ejemplo n.º 4
0
def analyze_misc(filename, segment_duration=20):

    # Compute replay gain and duration on the entire file, then load the
    # segment that is centered in time with replaygain applied
    audio = es.MonoLoader(filename=filename)()
    replaygain = es.ReplayGain()(audio)

    segment_start = (len(audio) / 44100 - segment_duration) / 2
    segment_end = segment_start + segment_duration

    if segment_start < 0 or segment_end > len(audio) / 44100:
        raise ValueError(
            'Segment duration is larger than the input audio duration')

    loader = es.EasyLoader(filename=filename,
                           replayGain=replaygain,
                           startTime=segment_start,
                           endTime=segment_end)

    windowing = es.Windowing(type='blackmanharris62')
    spectrum = es.Spectrum()
    powerspectrum = es.PowerSpectrum()
    centroid = es.Centroid()
    zcr = es.ZeroCrossingRate()
    rms = es.RMS()
    hfc = es.HFC()
    pool = essentia.Pool()

    audio = loader()
    for frame in es.FrameGenerator(audio, frameSize=2048, hopSize=1024):
        frame_spectrum = spectrum(windowing(frame))
        pool.add('rms', rms(frame))
        pool.add('rms_spectrum', rms(frame_spectrum))
        pool.add('hfc', hfc(frame_spectrum))
        pool.add('spectral_centroid', centroid(frame_spectrum))
        pool.add('zcr', zcr(frame))

    audio_st, sr, _, _, _, _ = es.AudioLoader(filename=filename)()
    # Ugly hack because we don't have a StereoResample
    left, right = es.StereoDemuxer()(audio_st)
    resampler = es.Resample(inputSampleRate=sr, outputSampleRate=44100)
    left = resampler(left)
    right = resampler(right)
    audio_st = es.StereoMuxer()(left, right)
    audio_st = es.StereoTrimmer(startTime=segment_start,
                                endTime=segment_end)(audio_st)
    ebu_momentary, _, _, _ = es.LoudnessEBUR128(hopSize=1024 / 44100,
                                                startAtZero=True)(audio_st)
    pool.set('ebu_momentary', ebu_momentary)

    return pool
Ejemplo n.º 5
0
def load_audio(path, sample_rate, mono=True):
    """
    Load an audio file using Essentia

    :param path: (str) location of audio file to load
    :param sample_rate: (int) sampling rate to load audio at
    :param mono: (bool) convert file to mono, defaults to True
    :return: audio samples
    """

    # Load audio file
    loader = es.AudioLoader(filename=path)
    results = loader()
    samples = results[0]
    orig_rate = results[1]
    channels = results[2]

    # Make sure we get a mono or stereo audio
    if channels > 2:
        raise RuntimeError("Can't handle more than two audio channels.")

    # If there is only one channel, duplicate the first over to the second.
    # Essentia always loads as a stereo audio file and the right channel is
    # all zeros in this case. We'll convert to a stereo file for some of the
    # processing here such as the Loudness Normalization.
    if channels == 1:
        samples[:, 1] = samples[:, 0]

    # Mix to mono if required
    if mono:
        samples = mix_to_mono(samples)

    # Perform resampling if required
    if orig_rate != sample_rate:
        resample = es.Resample(inputSampleRate=orig_rate,
                               outputSampleRate=sample_rate)

        # Resampling for a stereo audio file
        if not mono:
            resampled_left = resample(samples[:, 0])
            resampled_right = resample(samples[:, 1])
            samples = np.array([resampled_left, resampled_right])
            samples = samples.T

        # Resampling for a mono audio file
        else:
            samples = resample(samples)

    return samples, channels
Ejemplo n.º 6
0
    def resample_audio(self, target_sample_rate):
        """Downsample a audio into a target sample rate

        Arguments:
            target_sample_rate {[type]} -- [description]

        Raises:
            ValueError: If `target_sample_rate` is less than the sample rate of given audio data.

        Returns:
            [type] -- [description]
        """
        if target_sample_rate > self.fs:
            raise ValueError("Target_sample_rate should be lower than %s" %
                             self.fs)
        resampler = estd.Resample(inputSampleRate=self.fs,
                                  outputSampleRate=target_sample_rate,
                                  quality=1)
        return resampler.compute(self.audio_vector)
Ejemplo n.º 7
0
 def resample_audio(self, target_sample_rate):
     """Downsample a audio into a target sample rate"""
     if target_sample_rate > self.fs:
         raise ValueError("Target_sample_rate should be lower than %s" % self.fs)
     resampler = estd.Resample(inputSampleRate=self.fs, outputSampleRate=target_sample_rate, quality=1)
     return resampler.compute(self.audio_vector)