예제 #1
0
    def run(self, fname):
        # load audio and eqLoudness
        # Note: MonoLoader resamples the audio signal to 44100 Hz by default
        audio = estd.MonoLoader(filename=fname)()
        audio = estd.EqualLoudness()(audio)

        contours_bins, contours_start_times, contour_saliences, duration = \
            self._extract_pitch_contours(audio)

        # run the simplified contour selection
        [pitch,
         pitch_salience] = self.select_contours(contours_bins,
                                                contour_saliences,
                                                contours_start_times, duration)

        # cent to Hz conversion
        pitch = [
            0. if p == 0 else 55. * 2.**(self.bin_resolution * p / 1200.)
            for p in pitch
        ]
        pitch = e_array(pitch)
        pitch_salience = e_array(pitch_salience)

        # pitch filter
        if self.filter_pitch:
            pitch, pitch_salience = self._post_filter_pitch(
                pitch, pitch_salience)

        # generate time stamps
        time_stamps = self._gen_time_stamps(0, len(pitch))

        # [time pitch salience] matrix
        out = np.transpose(
            np.vstack((time_stamps, pitch.tolist(), pitch_salience.tolist())))
        out = out.tolist()

        # settings
        settings = self.get_settings()
        settings.update({'source': fname})

        return {'pitch': out, 'settings': settings}
예제 #2
0
    def run(self, fname):
        # load audio and eqLoudness
        # Note: MonoLoader resamples the audio signal to 44100 Hz by default
        audio = estd.MonoLoader(filename=fname)()
        audio = estd.EqualLoudness()(audio)

        contours_bins, contours_start_times, contour_saliences, duration = \
            self._extract_pitch_contours(audio)

        # run the simplified contour selection
        [pitch, pitch_salience] = self.select_contours(
            contours_bins, contour_saliences, contours_start_times, duration)

        # cent to Hz conversion
        pitch = [0. if p == 0
                 else 55. * 2. ** (self.bin_resolution * p / 1200.)
                 for p in pitch]
        pitch = e_array(pitch)
        pitch_salience = e_array(pitch_salience)

        # pitch filter
        if self.filter_pitch:
            pitch, pitch_salience = self._post_filter_pitch(
                pitch, pitch_salience)

        # generate time stamps
        time_stamps = self._gen_time_stamps(0, len(pitch))

        # [time pitch salience] matrix
        out = np.transpose(
            np.vstack((time_stamps, pitch.tolist(), pitch_salience.tolist())))
        out = out.tolist()

        # settings
        settings = self.get_settings()
        settings.update({'source': fname})

        return {'pitch': out, 'settings': settings}
    def run(self, audio):

        contours_bins, contours_start_times, contour_saliences, duration = \
            self._extract_pitch_contours(audio)

        tmp = []
        tmp_times = []
        for i, j in zip(contours_bins, contours_start_times):
            values = [
                0. if p == 0 else 55. * 2.**(self.bin_resolution * p / 1200.)
                for p in i
            ]

            time_stamps = self._gen_time_stamps(0, len(i))
            time_stamps = [x + j for x in time_stamps]

            tmp.append(values)
            tmp_times.append(time_stamps)

        #print(np.shape(tmp))
        #print(np.shape(tmp_times))

        #y, sr = librosa.load(fname)
        #D = librosa.stft(y)  # STFT of y
        #S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
        #plt.figure(figsize=[30, 12])
        #fig, ax = plt.subplots()
        #for i, j in zip(tmp_times, tmp):
        #plt.plot(i, j, linewidth=1.0, c='b', zorder=10)
        #img = librosa.display.specshow(S_db, x_axis='time', y_axis='linear', ax=ax)
        #ax.set(title='Now with labeled axes!')
        #fig.colorbar(img, ax=ax, format="%+2.f dB")

        # run the simplified contour selection
        [pitch,
         pitch_salience] = self.select_contours(contours_bins,
                                                contour_saliences,
                                                contours_start_times, duration)

        # cent to Hz conversion
        pitch = [
            0. if p == 0 else 55. * 2.**(self.bin_resolution * p / 1200.)
            for p in pitch
        ]
        pitch = e_array(pitch)
        pitch_salience = e_array(pitch_salience)

        # pitch filter
        if self.filter_pitch:
            pitch, pitch_salience = self._post_filter_pitch(
                pitch, pitch_salience)

        # generate time stamps
        time_stamps = self._gen_time_stamps(0, len(pitch))

        #plt.plot(time_stamps, pitch.tolist(), c='orange', linewidth=2.5, zorder=0)
        #filename_to_plot = fname.split('/')[1].replace('.wav', '')
        #plt.savefig('outputs_figs/' + filename_to_plot + '.png')
        #plt.show()

        # [time pitch salience] matrix
        out = np.transpose(
            np.vstack((time_stamps, pitch.tolist(), pitch_salience.tolist())))
        out = out.tolist()

        # settings
        settings = self.get_settings()

        return {'pitch': out, 'settings': settings}
예제 #4
0
파일: pitch.py 프로젝트: EQ4/pycompmusic
  def run(self, musicbrainzid, fname):
    citation = u"""
            Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X.
            (2014). Audio feature extraction for exploring Turkish makam music.
            In Proceedings of 3rd International Conference on Audio Technologies
            for Music and Media, Ankara, Turkey.
            """

    run_windowing = estd.Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding
    run_spectrum = estd.Spectrum(size=self.settings.frameSize * 4)

    run_spectral_peaks = estd.SpectralPeaks(minFrequency=self.settings.minFrequency,
            maxFrequency = self.settings.maxFrequency,
            sampleRate = self.settings.sampleRate,
            magnitudeThreshold = self.settings.magnitudeThreshold,
            orderBy = 'magnitude')

    run_pitch_salience_function = estd.PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference
    run_pitch_salience_function_peaks = estd.PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution,
            minFrequency=self.settings.minFrequency,
            maxFrequency = self.settings.maxFrequency)
    run_pitch_contours = estd.PitchContours(hopSize=self.settings.hopSize,
            binResolution=self.settings.binResolution,
            peakDistributionThreshold = self.settings.peakDistributionThreshold)

    run_pitch_filter = estd.PitchFilter(confidenceThreshold=self.settings.confidenceThreshold,
            minChunkSize=self.settings.minChunkSize)
    pool = Pool()

    # load audio and eqLoudness
    audio = estd.MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default
    audio = estd.EqualLoudness()(audio)

    for frame in estd.FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize):
      frame = run_windowing(frame)
      spectrum = run_spectrum(frame)
      peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
      salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
      salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience)
      if not size(salience_peaks_bins):
          salience_peaks_bins = array([0])
      if not size(salience_peaks_contourSaliences):
          salience_peaks_contourSaliences = array([0])

      pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
      pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences)

    # post-processing: contour tracking
    contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours(
            pool['allframes_salience_peaks_bins'],
            pool['allframes_salience_peaks_contourSaliences'])

    # run the simplified contour selection
    [pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration)

    # cent to Hz conversion
    pitch = e_array([0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch])
    pitch_salience = e_array(pitch_salience)

    # pitch filter
    if self.settings.filterPitch:
      pitch = run_pitch_filter(pitch, pitch_salience)

    # generate time stamps
    time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))]

    # [time pitch salience] matrix
    out = transpose(vstack((time_stamps, pitch.tolist(), pitch_salience.tolist())))
    out = out.tolist()

    # settings
    settings = self.settings
    settings.update({'version':self._version,
            'slug':self._slug,
            'source': fname,
            'essentiaVersion': essentia_version,
            'pitchUnit': 'Hz',
            'citation': citation})

    # matlab
    matout = cStringIO.StringIO()
    matob = {'pitch': out}
    matob.update(settings)

    scipy.io.savemat(matout, matob)

    return {'pitch': out,
            'matlab': matout.getvalue(),
            'settings': settings}