def test_pitch_filter(): # reading extracted pitch from json pitch = numpy.array(json.load(open(os.path.join( "sample_data", "e72db0ad-2ed9-467b-88ae-1f91edcd2c59.json"), 'r'))) # filtering the extracted pitch flt = PitchFilter() pitch_filt = flt.run(pitch) saved_filt = numpy.array(json.load(open(os.path.join( "sample_data", "e72db0ad-2ed9-467b-88ae-1f91edcd2c59_filtered.json"), 'r'))) assert numpy.allclose(saved_filt, pitch_filt)
def _post_filter_pitch(self, pitch, pitch_salience): try: run_pitch_filter = estd.PitchFilter( confidenceThreshold=self.confidence_threshold, minChunkSize=self.min_chunk_size) pitch = run_pitch_filter(pitch, pitch_salience) except AttributeError: # fall back to python implementation from pitchfilter.pitchfilter import PitchFilter run_pitch_filter = PitchFilter() # generate time stamps time_stamps = self._gen_time_stamps(0, len(pitch)) temp_pitch = np.vstack(( time_stamps, pitch, pitch_salience)).transpose() temp_pitch = run_pitch_filter.run(temp_pitch) pitch = temp_pitch[:, 1] pitch_salience = temp_pitch[:, 2] return pitch, pitch_salience
class Pitch(object): extractor = PredominantMelodyMakam(filter_pitch=False) # call the # Python implementation of pitch_filter explicitly filter = PitchFilter() DECIMAL = 1 @classmethod def extract(cls, audiodir, start_idx=0): """ Extract the predominant melody of all the audio recordings in the input folder and its subfolders :param audiodir: the audio directory :param start_idx: the index to start predominant melody extraction from the list of found audio recordings. This parameter is useful, if the user plans to run multiple instances of the extractor at once """ # text file audio_files = get_filenames_in_dir(audiodir, keyword="*.mp3")[0] pitch_files = [ os.path.join(os.path.dirname(f), os.path.basename(os.path.splitext(f)[0]) + '.pitch') for f in audio_files ] if start_idx: # if index is given audio_files = audio_files[start_idx:] pitch_files = pitch_files[start_idx:] for ii, (af, pf) in enumerate(zip(audio_files, pitch_files)): print(' ') print("{0:d}: {1:s}".format(ii + 1, os.path.basename(af))) if os.path.isfile(pf): # already exists print(" > Already exist; skipped.") else: # extract and filter results = cls.extractor.run(af) pitch_track = cls.filter.run(results['pitch']) # save compact pitch_track = np.array(pitch_track)[:, 1] decimal_str = '%.' + str(cls.DECIMAL) + 'f' np.savetxt(pf, pitch_track, fmt=decimal_str) @staticmethod def slice(time_track, pitch_track, chunk_size, threshold=0.5, overlap=0): """-------------------------------------------------------------------- Slices a pitch track into equal chunks of desired length. ----------------------------------------------------------------------- time_track : The timestamps of the pitch track. This is used to determine the samples to cut the pitch track. 1-D list pitch_track : The pitch track's frequency entries. 1-D list chunk_size : The sizes of the chunks. threshold : This is the ratio of smallest acceptable chunk to chunk_size. When a pitch track is sliced the remaining tail at its end is returned if its longer than (threshold * chunk_size). Else, it's discarded. However if the entire track is shorter than this it is still returned as it is, in order to be able to represent that recording. overlap : If it's zero, the next chunk starts from the end of the previous chunk, else it starts from the (chunk_size*threshold)th sample of the previous chunk. ----------------------------------------------------------------------- chunks : List of the pitch track chunks --------------------------------------------------------------------""" chunks = [] last = 0 # Main slicing loop for k in np.arange(1, (int(max(time_track) / chunk_size) + 1)): cur = 1 + max(np.where(time_track < chunk_size * k)[0]) chunks.append(pitch_track[last:(cur - 1)]) # This variable keep track of where the first sample of the # next iteration should start from. last = 1 + max(np.where( time_track < chunk_size * k * (1 - overlap))[0]) \ if (overlap > 0) else cur # Checks if the remaining tail should be discarded or not. if max(time_track) - time_track[last] >= chunk_size * threshold: chunks.append(pitch_track[last:]) # If the runtime of the entire track is below the threshold, keep it elif last == 0: chunks.append(pitch_track) return chunks
def identify(self, pitch, plot=False): """ Identify the tonic by detecting the last note and extracting the frequency """ pitch_sliced = np.array(deepcopy(pitch)) # trim silence in the end sil_trim_len = len(np.trim_zeros(pitch_sliced[:, 1], 'b')) # remove pitch_sliced = pitch_sliced[:sil_trim_len, :] # trailing zeros # slice the pitch track to only include the last 10% of the track # for performance reasons pitch_len = pitch_sliced.shape[0] pitch_sliced = pitch_sliced[-int(pitch_len * 0.1):, :] # compute the pitch distribution and distribution peaks dummy_freq = 440.0 distribution = PitchDistribution.from_hz_pitch( np.array(pitch)[:, 1], ref_freq=dummy_freq, kernel_width=self.kernel_width, step_size=self.step_size) # get pitch chunks flt = PitchFilter(lower_interval_thres=self.lower_interval_thres, upper_interval_thres=self.upper_interval_thres, min_freq=self.min_freq, max_freq=self.max_freq) pitch_chunks = flt.decompose_into_chunks(pitch_sliced) pitch_chunks = flt.post_filter_chunks(pitch_chunks) tonic = {"value": None, "unit": "Hz", "timeInterval": {"value": None, "unit": 'sec'}, "octaveWrapped": False, # octave correction is done "procedure": "Tonic identification by last note detection", "citation": 'Atlı, H. S., Bozkurt, B., Şentürk, S. (2015). ' 'A Method for Tonic Frequency Identification of ' 'Turkish Makam Music Recordings. In Proceedings ' 'of 5th International Workshop on Folk Music ' 'Analysis, pages 119–122, Paris, France.'} # try all chunks starting from the last as the tonic candidate, # considering the octaves for chunk in reversed(pitch_chunks): last_note = median(chunk[:, 1]) # check all the pitch classes of the last note as a tonic candidate # by checking the vicinity in the stable pitches tonic_candidate = self.check_tonic_with_octave_correction( last_note, deepcopy(distribution)) # assign the tonic if there is an estimation if tonic_candidate is not None: tonic['value'] = tonic_candidate tonic['timeInterval']['value'] = [chunk[0, 0], chunk[-1, 0]] # convert distribution bins to frequency distribution.cent_to_hz() break if plot: self.plot(pitch_sliced, tonic, pitch_chunks, distribution) return tonic, pitch_sliced, pitch_chunks, distribution