def _get_stablepitch_distribution(self, note_trajectories, theoretical_interval, ref_freq=None): temp_pitch_vals = np.hstack(nn for nn in note_trajectories) # useful to keep the bins coinciding with a desired value, # e.g. tonic frequency if ref_freq is None: ref_freq = self._get_median_pitch(temp_pitch_vals) distribution = PitchDistribution.from_hz_pitch( temp_pitch_vals, ref_freq=ref_freq, kernel_width=self.kernel_width, step_size=self.step_size, norm_type=None) # get the stable pitch as the highest peaks among the peaks close to # the theoretical pitch TODO peaks = distribution.detect_peaks() peak_bins = distribution.bins[peaks[0]] peak_vals = distribution.vals[peaks[0]] try: cand_bool = (abs(peak_bins - theoretical_interval) < self.pitch_threshold) stable_pitch_cand = peak_bins[cand_bool] cand_occr = peak_vals[cand_bool] peak_cent = stable_pitch_cand[np.argmax(cand_occr)] # convert to hz scale peak_freq = Converter.cent_to_hz(peak_cent, ref_freq) except ValueError: # no stable pitch in the vicinity, probably a # misalignment peak_freq = None # convert to hz scale distribution.cent_to_hz() return peak_freq, distribution
def get_models(self, pitch, alignednotes, tonic_symbol): note_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'note_dict.json') note_dict = json.load(open(note_file, 'r')) pitch = np.array(pitch) alignednotes_ext = deepcopy(alignednotes) note_names = set(an['Symbol'] for an in alignednotes_ext) note_models = {} for nn in note_names: try: note_models[nn] = { 'notes': [], 'distribution': [], 'stable_pitch': [], 'performed_interval': [], 'theoretical_interval': { 'Value': (note_dict[nn]['Value'] - note_dict[tonic_symbol]['Value']), 'Unit': 'cent'}, 'theoretical_pitch': []} except KeyError: logging.warning( u"The note {0:s} is not in the note_dict.".format(nn)) # compute note trajectories and add to each model self._distribute_pitch_trajectories(alignednotes_ext, note_models, pitch) # remove models without any aligned note self._remove_unaligned_notes(note_models) # update the tonic frequency temporarily # NOTE: extremely unlikely but this value might shift to the next bin # in the note model computation. Hence we don't assign it to the # final tonic value. tonic_trajectories = [nn['PitchTrajectory'][:, 1] for nn in note_models[tonic_symbol]['notes']] temp_tonic_freq = self._get_stablepitch_distribution( tonic_trajectories, note_models[tonic_symbol]['theoretical_interval']['Value'])[0] # compute the histogram for each model self._get_note_histogram(note_models, temp_tonic_freq) # update the new tonic frequency newtonic = {'alignment': { 'Value': note_models[tonic_symbol]['stable_pitch']['Value'], 'Unit': 'Hz', 'Symbol': tonic_symbol, 'Method': 'alignedNoteModel', 'OctaveWrapped': False, 'Citation': 'SenturkPhDThesis', 'Procedure': 'Tonic identified from the tonic note model obtained ' 'from audio-score alignment'}} # get the distances wrt tonic self._get_tunings(newtonic, note_models) # compute the complete histogram without normalization recording_distribution = PitchDistribution.from_hz_pitch( pitch, ref_freq=temp_tonic_freq, kernel_width=self.kernel_width, step_size=self.step_size, norm_type=None) recording_distribution.cent_to_hz() # normalize all the distributions recording_distribution, note_models = self._normalize_distributions( recording_distribution, note_models) return note_models, recording_distribution, newtonic
def identify(self, pitch, plot=False): """ Identify the tonic by detecting the last note and extracting the frequency """ pitch_sliced = np.array(deepcopy(pitch)) # trim silence in the end sil_trim_len = len(np.trim_zeros(pitch_sliced[:, 1], 'b')) # remove pitch_sliced = pitch_sliced[:sil_trim_len, :] # trailing zeros # slice the pitch track to only include the last 10% of the track # for performance reasons pitch_len = pitch_sliced.shape[0] pitch_sliced = pitch_sliced[-int(pitch_len * 0.1):, :] # compute the pitch distribution and distribution peaks dummy_freq = 440.0 distribution = PitchDistribution.from_hz_pitch( np.array(pitch)[:, 1], ref_freq=dummy_freq, kernel_width=self.kernel_width, step_size=self.step_size) # get pitch chunks flt = PitchFilter(lower_interval_thres=self.lower_interval_thres, upper_interval_thres=self.upper_interval_thres, min_freq=self.min_freq, max_freq=self.max_freq) pitch_chunks = flt.decompose_into_chunks(pitch_sliced) pitch_chunks = flt.post_filter_chunks(pitch_chunks) tonic = {"value": None, "unit": "Hz", "timeInterval": {"value": None, "unit": 'sec'}, "octaveWrapped": False, # octave correction is done "procedure": "Tonic identification by last note detection", "citation": 'Atlı, H. S., Bozkurt, B., Şentürk, S. (2015). ' 'A Method for Tonic Frequency Identification of ' 'Turkish Makam Music Recordings. In Proceedings ' 'of 5th International Workshop on Folk Music ' 'Analysis, pages 119–122, Paris, France.'} # try all chunks starting from the last as the tonic candidate, # considering the octaves for chunk in reversed(pitch_chunks): last_note = median(chunk[:, 1]) # check all the pitch classes of the last note as a tonic candidate # by checking the vicinity in the stable pitches tonic_candidate = self.check_tonic_with_octave_correction( last_note, deepcopy(distribution)) # assign the tonic if there is an estimation if tonic_candidate is not None: tonic['value'] = tonic_candidate tonic['timeInterval']['value'] = [chunk[0, 0], chunk[-1, 0]] # convert distribution bins to frequency distribution.cent_to_hz() break if plot: self.plot(pitch_sliced, tonic, pitch_chunks, distribution) return tonic, pitch_sliced, pitch_chunks, distribution