Esempio n. 1
0
def test_pitch_filter():
    # reading extracted pitch from json
    pitch = numpy.array(json.load(open(os.path.join(
        "sample_data", "e72db0ad-2ed9-467b-88ae-1f91edcd2c59.json"), 'r')))

    # filtering the extracted pitch
    flt = PitchFilter()
    pitch_filt = flt.run(pitch)

    saved_filt = numpy.array(json.load(open(os.path.join(
        "sample_data", "e72db0ad-2ed9-467b-88ae-1f91edcd2c59_filtered.json"),
        'r')))

    assert numpy.allclose(saved_filt, pitch_filt)
    def _post_filter_pitch(self, pitch, pitch_salience):
        try:
            run_pitch_filter = estd.PitchFilter(
                confidenceThreshold=self.confidence_threshold,
                minChunkSize=self.min_chunk_size)
            pitch = run_pitch_filter(pitch, pitch_salience)

        except AttributeError:  # fall back to python implementation
            from pitchfilter.pitchfilter import PitchFilter
            run_pitch_filter = PitchFilter()

            # generate time stamps
            time_stamps = self._gen_time_stamps(0, len(pitch))

            temp_pitch = np.vstack((
                time_stamps, pitch, pitch_salience)).transpose()

            temp_pitch = run_pitch_filter.run(temp_pitch)

            pitch = temp_pitch[:, 1]
            pitch_salience = temp_pitch[:, 2]

        return pitch, pitch_salience
Esempio n. 3
0
    def _post_filter_pitch(self, pitch, pitch_salience):
        try:
            run_pitch_filter = estd.PitchFilter(
                confidenceThreshold=self.confidence_threshold,
                minChunkSize=self.min_chunk_size)
            pitch = run_pitch_filter(pitch, pitch_salience)

        except AttributeError:  # fall back to python implementation
            from pitchfilter.pitchfilter import PitchFilter
            run_pitch_filter = PitchFilter()

            # generate time stamps
            time_stamps = self._gen_time_stamps(0, len(pitch))

            temp_pitch = np.vstack((
                time_stamps, pitch, pitch_salience)).transpose()

            temp_pitch = run_pitch_filter.run(temp_pitch)

            pitch = temp_pitch[:, 1]
            pitch_salience = temp_pitch[:, 2]

        return pitch, pitch_salience
Esempio n. 4
0
class Pitch(object):
    extractor = PredominantMelodyMakam(filter_pitch=False)  # call the
    # Python implementation of pitch_filter explicitly
    filter = PitchFilter()
    DECIMAL = 1

    @classmethod
    def extract(cls, audiodir, start_idx=0):
        """
        Extract the predominant melody of all the audio recordings in the
        input folder and its subfolders
        :param audiodir: the audio directory
        :param start_idx: the index to start predominant melody extraction
        from the list of found audio recordings. This parameter is useful,
        if the user plans to run multiple instances of the extractor at once
        """
        # text file
        audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0]
        pitch_files = [
            os.path.join(os.path.dirname(f),
                         os.path.basename(os.path.splitext(f)[0]) + '.pitch')
            for f in audio_files
        ]

        if start_idx:  # if index is given
            audio_files = audio_files[start_idx:]
            pitch_files = pitch_files[start_idx:]

        for ii, (af, pf) in enumerate(zip(audio_files, pitch_files)):
            print(' ')
            print("{0:d}: {1:s}".format(ii + 1, os.path.basename(af)))

            if os.path.isfile(pf):  # already exists
                print("   > Already exist; skipped.")
            else:
                # extract and filter
                results = cls.extractor.run(af)
                pitch_track = cls.filter.run(results['pitch'])

                # save compact
                pitch_track = np.array(pitch_track)[:, 1]
                decimal_str = '%.' + str(cls.DECIMAL) + 'f'

                np.savetxt(pf, pitch_track, fmt=decimal_str)

    @staticmethod
    def slice(time_track, pitch_track, chunk_size, threshold=0.5, overlap=0):
        """--------------------------------------------------------------------
        Slices a pitch track into equal chunks of desired length.
        -----------------------------------------------------------------------
        time_track  : The timestamps of the pitch track. This is used to
                      determine the samples to cut the pitch track. 1-D list
        pitch_track : The pitch track's frequency entries. 1-D list
        chunk_size  : The sizes of the chunks.
        threshold   : This is the ratio of smallest acceptable chunk to
                      chunk_size. When a pitch track is sliced the remaining
                      tail at its end is returned if its longer than
                      (threshold * chunk_size). Else, it's discarded.
                      However if the entire track is shorter than this it is
                      still returned as it is, in order to be able to
                      represent that recording.
        overlap     : If it's zero, the next chunk starts from the end of the
                      previous chunk, else it starts from the
                      (chunk_size*threshold)th sample of the previous chunk.
        -----------------------------------------------------------------------
        chunks      : List of the pitch track chunks
        --------------------------------------------------------------------"""
        chunks = []
        last = 0

        # Main slicing loop
        for k in np.arange(1, (int(max(time_track) / chunk_size) + 1)):
            cur = 1 + max(np.where(time_track < chunk_size * k)[0])
            chunks.append(pitch_track[last:(cur - 1)])

            # This variable keep track of where the first sample of the
            # next iteration should start from.
            last = 1 + max(np.where(
                time_track < chunk_size * k * (1 - overlap))[0]) \
                if (overlap > 0) else cur

        # Checks if the remaining tail should be discarded or not.
        if max(time_track) - time_track[last] >= chunk_size * threshold:
            chunks.append(pitch_track[last:])

        # If the runtime of the entire track is below the threshold, keep it
        elif last == 0:
            chunks.append(pitch_track)
        return chunks
    def identify(self, pitch, plot=False):
        """
        Identify the tonic by detecting the last note and extracting the
        frequency
        """
        pitch_sliced = np.array(deepcopy(pitch))

        # trim silence in the end
        sil_trim_len = len(np.trim_zeros(pitch_sliced[:, 1], 'b'))  # remove
        pitch_sliced = pitch_sliced[:sil_trim_len, :]  # trailing zeros

        # slice the pitch track to only include the last 10% of the track
        # for performance reasons
        pitch_len = pitch_sliced.shape[0]
        pitch_sliced = pitch_sliced[-int(pitch_len * 0.1):, :]

        # compute the pitch distribution and distribution peaks
        dummy_freq = 440.0
        distribution = PitchDistribution.from_hz_pitch(
            np.array(pitch)[:, 1], ref_freq=dummy_freq,
            kernel_width=self.kernel_width, step_size=self.step_size)

        # get pitch chunks
        flt = PitchFilter(lower_interval_thres=self.lower_interval_thres,
                          upper_interval_thres=self.upper_interval_thres,
                          min_freq=self.min_freq, max_freq=self.max_freq)
        pitch_chunks = flt.decompose_into_chunks(pitch_sliced)

        pitch_chunks = flt.post_filter_chunks(pitch_chunks)

        tonic = {"value": None, "unit": "Hz",
                 "timeInterval": {"value": None, "unit": 'sec'},
                 "octaveWrapped": False,  # octave correction is done
                 "procedure": "Tonic identification by last note detection",
                 "citation": 'Atlı, H. S., Bozkurt, B., Şentürk, S. (2015). '
                             'A Method for Tonic Frequency Identification of '
                             'Turkish Makam Music Recordings. In Proceedings '
                             'of 5th International Workshop on Folk Music '
                             'Analysis, pages 119–122, Paris, France.'}

        # try all chunks starting from the last as the tonic candidate,
        # considering the octaves
        for chunk in reversed(pitch_chunks):
            last_note = median(chunk[:, 1])

            # check all the pitch classes of the last note as a tonic candidate
            # by checking the vicinity in the stable pitches
            tonic_candidate = self.check_tonic_with_octave_correction(
                last_note, deepcopy(distribution))

            # assign the tonic if there is an estimation
            if tonic_candidate is not None:
                tonic['value'] = tonic_candidate
                tonic['timeInterval']['value'] = [chunk[0, 0], chunk[-1, 0]]

                # convert distribution bins to frequency
                distribution.cent_to_hz()
                break

        if plot:
            self.plot(pitch_sliced, tonic, pitch_chunks, distribution)

        return tonic, pitch_sliced, pitch_chunks, distribution