def check_tonic_with_octave_correction(self, tonic, distribution):
        # shift the distribution to tonic
        distribution.bins -= Converter.hz_to_cent(tonic, distribution.ref_freq)
        distribution.ref_freq = tonic

        # get the stable pitches
        peaks = distribution.detect_peaks()
        peak_idx = peaks[0]
        stable_pitches = distribution.bins[peak_idx]

        # find all the frequencies in the tonic candidate's pitch class
        pitches_in_tonic_pitch_class = [
            sp for sp in stable_pitches
            if min([sp % 1200, 1200 - (sp % 1200)]) < self.stable_pitch_dev]

        # sum all the pitch occurrences in the pitch distribution starting from
        # these pitches till their octave
        pitch_weights = []
        for pp in pitches_in_tonic_pitch_class:
            vals_in_octave = distribution.vals[(pp <= distribution.bins) *
                                               (distribution.bins < pp + 1200)]
            pitch_weights.append(np.sum(vals_in_octave))

        # the candidate which accumulates the highest weight is the tonic
        try:
            tonic_corr_cent = pitches_in_tonic_pitch_class[
                pitch_weights.index(max(pitch_weights))]

            return Converter.cent_to_hz(tonic_corr_cent, tonic)
        except ValueError:
            return None  # no stable pitch class found for the given frequency
Example #2
0
    def _stable_pitches_to_notes(self, stable_pitches_hz,
                                 theoretical_intervals, tonic_hz):
        stable_pitches_cent = Converter.hz_to_cent(stable_pitches_hz, tonic_hz)
        # Finding nearest theoretical values of each stable pitch, identify the
        # name of this value and write to output
        stable_notes = {}  # Defining output (return) object
        for stable_pitch_cent, stable_pitch_hz in zip(stable_pitches_cent,
                                                      stable_pitches_hz):
            note_cent = TonicLastNote.find_nearest(
                theoretical_intervals.values(), stable_pitch_cent)

            if abs(stable_pitch_cent - note_cent) < self.pitch_threshold:
                for key, val in theoretical_intervals.iteritems():
                    if val == note_cent:
                        theoretical_pitch = Converter.cent_to_hz(
                            note_cent, tonic_hz)
                        stable_notes[key] = {
                            "performed_interval": {"value": stable_pitch_cent,
                                                   "unit": "cent"},
                            "theoretical_interval": {"value": note_cent,
                                                     "unit": "cent"},
                            "theoretical_pitch": {"value": theoretical_pitch,
                                                  "unit": "cent"},
                            "stable_pitch": {"value": stable_pitch_hz,
                                             "unit": "Hz"}}
                        break
        return stable_notes
    def _get_tunings(newtonic, note_models):
        for nm in note_models.values():
            interval = Converter.hz_to_cent(nm['stable_pitch']['Value'],
                                            newtonic['alignment']['Value'])
            nm['performed_interval'] = {'Value': interval, 'Unit': 'cent'}

            theo_pitch = Converter.cent_to_hz(
                nm['theoretical_interval']['Value'],
                newtonic['alignment']['Value'])
            nm['theoretical_pitch'] = {'Value': theo_pitch, 'Unit': 'Hz'}
 def _slice_pitch(self, pp, ti, tt):
     p_sliced = [p for t, p in zip(tt, pp) if ti[1] > t >= ti[0]]
     p_cent = Converter.hz_to_cent(p_sliced, self._dummy_ref_freq,
                                   min_freq=20.0)
     # pop nan and inf
     p_cent = p_cent[~np.isnan(p_cent)]
     p_cent = p_cent[~np.isinf(p_cent)]  # shouldn't exist, but anyways...
     return p_cent, p_sliced
    def _compute_seyir_features_per_interval(self, pp, tt, t_intervals,
                                             t_center):
        seyir_features = []
        maxdur = max(ti[1] - ti[0] for ti in t_intervals)

        for ti, tc in zip(t_intervals, t_center):
            p_cent, p_sliced = self._slice_pitch(pp, ti, tt)

            if p_cent.size == 0:  # silence
                seyir_features.append(
                    {'pitch_distribution': [], 'average_pitch': np.nan,
                     'stable_pitches': [], 'time_interval': ti,
                     'time_center': tc})
            else:
                pd = PitchDistribution.from_cent_pitch(
                    p_cent, ref_freq=self._dummy_ref_freq,
                    kernel_width=self.kernel_width, step_size=self.step_size)

                # reconvert to Hz
                pd.cent_to_hz()

                # normalize to 1 (instead of the area under the curve)
                maxval = max(pd.vals)
                num_ratio = float(len(p_cent)) / len(p_sliced)  # ratio of
                # number of samples
                time_ratio = (ti[1] - ti[0]) / maxdur
                pd.vals = pd.vals * num_ratio * time_ratio / maxval

                # get the stable pitches, i.e. peaks
                peak_idx, peak_vals = pd.detect_peaks()
                stable_pitches = [{'frequency': float(pd.bins[idx]),
                                   'value': float(val)}
                                  for idx, val in zip(peak_idx, peak_vals)]

                # get the average pitch
                avpitch = Converter.cent_to_hz(np.mean(p_cent),
                                               self._dummy_ref_freq)

                seyir_features.append(
                    {'pitch_distribution': pd, 'average_pitch': avpitch,
                     'stable_pitches': stable_pitches, 'time_interval': ti,
                     'time_center': tc})

        return seyir_features
    def _get_stablepitch_distribution(self, note_trajectories,
                                      theoretical_interval, ref_freq=None):
        temp_pitch_vals = np.hstack(nn for nn in note_trajectories)

        # useful to keep the bins coinciding with a desired value,
        # e.g. tonic frequency
        if ref_freq is None:
            ref_freq = self._get_median_pitch(temp_pitch_vals)

        distribution = PitchDistribution.from_hz_pitch(
            temp_pitch_vals, ref_freq=ref_freq,
            kernel_width=self.kernel_width, step_size=self.step_size,
            norm_type=None)

        # get the stable pitch as the highest peaks among the peaks close to
        # the theoretical pitch TODO
        peaks = distribution.detect_peaks()
        peak_bins = distribution.bins[peaks[0]]
        peak_vals = distribution.vals[peaks[0]]

        try:
            cand_bool = (abs(peak_bins - theoretical_interval) <
                         self.pitch_threshold)
            stable_pitch_cand = peak_bins[cand_bool]
            cand_occr = peak_vals[cand_bool]

            peak_cent = stable_pitch_cand[np.argmax(cand_occr)]

            # convert to hz scale
            peak_freq = Converter.cent_to_hz(peak_cent, ref_freq)
        except ValueError:  # no stable pitch in the vicinity, probably a
            # misalignment
            peak_freq = None

        # convert to hz scale
        distribution.cent_to_hz()

        return peak_freq, distribution
Example #7
0
    def _parse_pitch_input(pitch_in, tonic_freq):
        """
        Parses the pitch input from list, numpy array or file.

        If the input (or the file content) is a matrix, the method assumes the
        columns represent timestamps, pitch and "other columns".
        respectively. It only returns the second column in this case.

        :param pitch_in: pitch input, which is a list, numpy array or filename
        :param tonic_freq: the tonic frequency in Hz
        :return: parsed pitch track (numpy array)
        """
        # parse the pitch track from txt file, list or numpy array
        try:
            p = np.loadtxt(pitch_in)
        except ValueError:
            logger.debug('pitch_in is not a filename')
            p = np.array(pitch_in)

        p = p[:, 1] if p.ndim > 1 else p  # get the pitch stream

        # normalize wrt tonic
        return Converter.hz_to_cent(p, tonic_freq)
Example #8
0
    def _parse_pitch_input(pitch_in, tonic_freq):
        """
        Parses the pitch input from list, numpy array or file.

        If the input (or the file content) is a matrix, the method assumes the
        columns represent timestamps, pitch and "other columns".
        respectively. It only returns the second column in this case.

        :param pitch_in: pitch input, which is a list, numpy array or filename
        :param tonic_freq: the tonic frequency in Hz
        :return: parsed pitch track (numpy array)
        """
        # parse the pitch track from txt file, list or numpy array
        try:
            p = np.loadtxt(pitch_in)
        except ValueError:
            logger.debug('pitch_in is not a filename')
            p = np.array(pitch_in)

        p = p[:, 1] if p.ndim > 1 else p  # get the pitch stream

        # normalize wrt tonic
        return Converter.hz_to_cent(p, tonic_freq)
def search_min_peak_ratio(step_size, kernel_width, distribution_type,
                          min_peak_ratio):
    base_folder = os.path.join('data', 'features')
    feature_folder = os.path.abspath(io.get_folder(
        base_folder, distribution_type, step_size, kernel_width))
    files = get_filenames_in_dir(feature_folder, keyword='*pdf.json')[0]
    evaluator = Evaluator()
    num_peaks = 0
    num_tonic_in_peaks = 0
    for f in files:
        dd = json.load(open(f))
        dd['feature'] = PitchDistribution.from_dict(dd['feature'])

        peak_idx = dd['feature'].detect_peaks(min_peak_ratio=min_peak_ratio)[0]
        peak_cents = dd['feature'].bins[peak_idx]
        peak_freqs = Converter.cent_to_hz(peak_cents, dd['tonic'])

        ev = [evaluator.evaluate_tonic(pp, dd['tonic'])['tonic_eval']
              for pp in peak_freqs]

        num_tonic_in_peaks += any(ev)
        num_peaks += len(ev)

    return num_tonic_in_peaks, num_peaks