Example #1
0
    def _cent_pitch_to_feature(self, pitch_cent, ref_freq):
        feature = PitchDistribution.from_cent_pitch(
            pitch_cent, ref_freq=ref_freq, kernel_width=self.kernel_width,
            step_size=self.step_size)
        if self.feature_type == 'pcd':
            feature.to_pcd()

        return feature
 def deserialize(seyir_features):
     for sf in seyir_features:
         try:
             sf['pitch_distribution'] = PitchDistribution.from_dict(
                 sf['pitch_distribution'])
         except AttributeError:  # empty pitch distribution
             assert not sf['pitch_distribution'], \
                 'non-empty, non-object pitch distribution encountered'
Example #3
0
    def _cent_pitch_to_feature(self, pitch_cent, ref_freq):
        feature = PitchDistribution.from_cent_pitch(
            pitch_cent,
            ref_freq=ref_freq,
            kernel_width=self.kernel_width,
            step_size=self.step_size)
        if self.feature_type == 'pcd':
            feature.to_pcd()

        return feature
    def _compute_seyir_features_per_interval(self, pp, tt, t_intervals,
                                             t_center):
        seyir_features = []
        maxdur = max(ti[1] - ti[0] for ti in t_intervals)

        for ti, tc in zip(t_intervals, t_center):
            p_cent, p_sliced = self._slice_pitch(pp, ti, tt)

            if p_cent.size == 0:  # silence
                seyir_features.append(
                    {'pitch_distribution': [], 'average_pitch': np.nan,
                     'stable_pitches': [], 'time_interval': ti,
                     'time_center': tc})
            else:
                pd = PitchDistribution.from_cent_pitch(
                    p_cent, ref_freq=self._dummy_ref_freq,
                    kernel_width=self.kernel_width, step_size=self.step_size)

                # reconvert to Hz
                pd.cent_to_hz()

                # normalize to 1 (instead of the area under the curve)
                maxval = max(pd.vals)
                num_ratio = float(len(p_cent)) / len(p_sliced)  # ratio of
                # number of samples
                time_ratio = (ti[1] - ti[0]) / maxdur
                pd.vals = pd.vals * num_ratio * time_ratio / maxval

                # get the stable pitches, i.e. peaks
                peak_idx, peak_vals = pd.detect_peaks()
                stable_pitches = [{'frequency': float(pd.bins[idx]),
                                   'value': float(val)}
                                  for idx, val in zip(peak_idx, peak_vals)]

                # get the average pitch
                avpitch = Converter.cent_to_hz(np.mean(p_cent),
                                               self._dummy_ref_freq)

                seyir_features.append(
                    {'pitch_distribution': pd, 'average_pitch': avpitch,
                     'stable_pitches': stable_pitches, 'time_interval': ti,
                     'time_center': tc})

        return seyir_features
    def _get_stablepitch_distribution(self, note_trajectories,
                                      theoretical_interval, ref_freq=None):
        temp_pitch_vals = np.hstack(nn for nn in note_trajectories)

        # useful to keep the bins coinciding with a desired value,
        # e.g. tonic frequency
        if ref_freq is None:
            ref_freq = self._get_median_pitch(temp_pitch_vals)

        distribution = PitchDistribution.from_hz_pitch(
            temp_pitch_vals, ref_freq=ref_freq,
            kernel_width=self.kernel_width, step_size=self.step_size,
            norm_type=None)

        # get the stable pitch as the highest peaks among the peaks close to
        # the theoretical pitch TODO
        peaks = distribution.detect_peaks()
        peak_bins = distribution.bins[peaks[0]]
        peak_vals = distribution.vals[peaks[0]]

        try:
            cand_bool = (abs(peak_bins - theoretical_interval) <
                         self.pitch_threshold)
            stable_pitch_cand = peak_bins[cand_bool]
            cand_occr = peak_vals[cand_bool]

            peak_cent = stable_pitch_cand[np.argmax(cand_occr)]

            # convert to hz scale
            peak_freq = Converter.cent_to_hz(peak_cent, ref_freq)
        except ValueError:  # no stable pitch in the vicinity, probably a
            # misalignment
            peak_freq = None

        # convert to hz scale
        distribution.cent_to_hz()

        return peak_freq, distribution
def search_min_peak_ratio(step_size, kernel_width, distribution_type,
                          min_peak_ratio):
    base_folder = os.path.join('data', 'features')
    feature_folder = os.path.abspath(io.get_folder(
        base_folder, distribution_type, step_size, kernel_width))
    files = get_filenames_in_dir(feature_folder, keyword='*pdf.json')[0]
    evaluator = Evaluator()
    num_peaks = 0
    num_tonic_in_peaks = 0
    for f in files:
        dd = json.load(open(f))
        dd['feature'] = PitchDistribution.from_dict(dd['feature'])

        peak_idx = dd['feature'].detect_peaks(min_peak_ratio=min_peak_ratio)[0]
        peak_cents = dd['feature'].bins[peak_idx]
        peak_freqs = Converter.cent_to_hz(peak_cents, dd['tonic'])

        ev = [evaluator.evaluate_tonic(pp, dd['tonic'])['tonic_eval']
              for pp in peak_freqs]

        num_tonic_in_peaks += any(ev)
        num_peaks += len(ev)

    return num_tonic_in_peaks, num_peaks
    def get_models(self, pitch, alignednotes, tonic_symbol):
        note_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'data', 'note_dict.json')
        note_dict = json.load(open(note_file, 'r'))

        pitch = np.array(pitch)
        alignednotes_ext = deepcopy(alignednotes)

        note_names = set(an['Symbol'] for an in alignednotes_ext)
        note_models = {}
        for nn in note_names:
            try:
                note_models[nn] = {
                    'notes': [], 'distribution': [], 'stable_pitch': [],
                    'performed_interval': [], 'theoretical_interval': {
                        'Value': (note_dict[nn]['Value'] -
                                  note_dict[tonic_symbol]['Value']),
                        'Unit': 'cent'}, 'theoretical_pitch': []}
            except KeyError:
                logging.warning(
                    u"The note {0:s} is not in the note_dict.".format(nn))

        # compute note trajectories and add to each model
        self._distribute_pitch_trajectories(alignednotes_ext, note_models,
                                            pitch)

        # remove models without any aligned note
        self._remove_unaligned_notes(note_models)

        # update the tonic frequency temporarily
        # NOTE: extremely unlikely but this value might shift to the next bin
        # in the note model computation. Hence we don't assign it to the
        # final tonic value.
        tonic_trajectories = [nn['PitchTrajectory'][:, 1]
                              for nn in note_models[tonic_symbol]['notes']]
        temp_tonic_freq = self._get_stablepitch_distribution(
            tonic_trajectories,
            note_models[tonic_symbol]['theoretical_interval']['Value'])[0]

        # compute the histogram for each model
        self._get_note_histogram(note_models, temp_tonic_freq)

        # update the new tonic frequency
        newtonic = {'alignment': {
            'Value': note_models[tonic_symbol]['stable_pitch']['Value'],
            'Unit': 'Hz', 'Symbol': tonic_symbol, 'Method': 'alignedNoteModel',
            'OctaveWrapped': False, 'Citation': 'SenturkPhDThesis',
            'Procedure': 'Tonic identified from the tonic note model obtained '
                         'from audio-score alignment'}}

        # get the distances wrt tonic
        self._get_tunings(newtonic, note_models)

        # compute the complete histogram without normalization
        recording_distribution = PitchDistribution.from_hz_pitch(
            pitch, ref_freq=temp_tonic_freq, kernel_width=self.kernel_width,
            step_size=self.step_size, norm_type=None)
        recording_distribution.cent_to_hz()

        # normalize all the distributions
        recording_distribution, note_models = self._normalize_distributions(
            recording_distribution, note_models)

        return note_models, recording_distribution, newtonic
    def identify(self, pitch, plot=False):
        """
        Identify the tonic by detecting the last note and extracting the
        frequency
        """
        pitch_sliced = np.array(deepcopy(pitch))

        # trim silence in the end
        sil_trim_len = len(np.trim_zeros(pitch_sliced[:, 1], 'b'))  # remove
        pitch_sliced = pitch_sliced[:sil_trim_len, :]  # trailing zeros

        # slice the pitch track to only include the last 10% of the track
        # for performance reasons
        pitch_len = pitch_sliced.shape[0]
        pitch_sliced = pitch_sliced[-int(pitch_len * 0.1):, :]

        # compute the pitch distribution and distribution peaks
        dummy_freq = 440.0
        distribution = PitchDistribution.from_hz_pitch(
            np.array(pitch)[:, 1], ref_freq=dummy_freq,
            kernel_width=self.kernel_width, step_size=self.step_size)

        # get pitch chunks
        flt = PitchFilter(lower_interval_thres=self.lower_interval_thres,
                          upper_interval_thres=self.upper_interval_thres,
                          min_freq=self.min_freq, max_freq=self.max_freq)
        pitch_chunks = flt.decompose_into_chunks(pitch_sliced)

        pitch_chunks = flt.post_filter_chunks(pitch_chunks)

        tonic = {"value": None, "unit": "Hz",
                 "timeInterval": {"value": None, "unit": 'sec'},
                 "octaveWrapped": False,  # octave correction is done
                 "procedure": "Tonic identification by last note detection",
                 "citation": 'Atlı, H. S., Bozkurt, B., Şentürk, S. (2015). '
                             'A Method for Tonic Frequency Identification of '
                             'Turkish Makam Music Recordings. In Proceedings '
                             'of 5th International Workshop on Folk Music '
                             'Analysis, pages 119–122, Paris, France.'}

        # try all chunks starting from the last as the tonic candidate,
        # considering the octaves
        for chunk in reversed(pitch_chunks):
            last_note = median(chunk[:, 1])

            # check all the pitch classes of the last note as a tonic candidate
            # by checking the vicinity in the stable pitches
            tonic_candidate = self.check_tonic_with_octave_correction(
                last_note, deepcopy(distribution))

            # assign the tonic if there is an estimation
            if tonic_candidate is not None:
                tonic['value'] = tonic_candidate
                tonic['timeInterval']['value'] = [chunk[0, 0], chunk[-1, 0]]

                # convert distribution bins to frequency
                distribution.cent_to_hz()
                break

        if plot:
            self.plot(pitch_sliced, tonic, pitch_chunks, distribution)

        return tonic, pitch_sliced, pitch_chunks, distribution
def test(step_size, kernel_width, distribution_type,
         model_type, fold_idx, experiment_type, dis_measure, k_neighbor,
         min_peak_ratio, rank, save_folder, overwrite=False):

    # file to save the results
    res_dict = {'saved': [], 'failed': [], 'skipped': []}
    test_folder = os.path.abspath(os.path.join(io.get_folder(
        os.path.join(save_folder, 'testing', experiment_type), model_type,
        distribution_type, step_size, kernel_width, dis_measure,
        k_neighbor, min_peak_ratio), 'fold{0:d}'.format(fold_idx)))
    results_file = os.path.join(test_folder, 'results.json')
    if not os.path.exists(test_folder):
        os.makedirs(test_folder)
    else:
        if overwrite:
            shutil.rmtree(test_folder, ignore_errors=True)
            os.makedirs(test_folder)
        elif os.path.exists(results_file):
            return u"{0:s} already has results.".format(test_folder)

    # load fold
    fold_file = os.path.join(save_folder, 'folds.json')
    folds = json.load(open(fold_file))
    test_fold = []
    for f in folds:
        if f[0] == fold_idx:
            test_fold = f[1]['testing']
            break

    assert len(test_fold) == 100, "There should be 100 samples in the test " \
                                  "fold"

    # load training model
    training_folder = os.path.abspath(io.get_folder(
        os.path.join(save_folder, 'training'), model_type,
        distribution_type, step_size, kernel_width))

    model_file = os.path.join(training_folder,
                              u'fold{0:d}.json'.format(fold_idx))
    model = json.load(open(model_file))
    # instantiate the PitchDistributions
    for i, m in enumerate(model):
        try:  # filepath given
            model[i] = json.load(open(os.path.join(save_folder, m)))
        except (TypeError, AttributeError):  # dict already loaded
            assert isinstance(m['feature'], dict), "Unknown model."
        model[i]['feature'] = PitchDistribution.from_dict(
            model[i]['feature'])
        try:
            if any(test_sample['source'] in model[i]['sources']
                   for test_sample in test_fold):
                raise RuntimeError('Test data uses training data!')
        except KeyError:
            if any(test_sample['source'] == model[i]['source']
                   for test_sample in test_fold):
                raise RuntimeError('Test data uses training data!')

    for test_sample in test_fold:
        # get MBID from pitch file
        mbid = test_sample['source']
        save_file = os.path.join(test_folder, u'{0:s}.json'.format(mbid))
        if not overwrite and os.path.exists(save_file):
            res_dict['skipped'].append(save_file)
            continue

        # instantiate the classifier and evaluator object
        classifier = KNNClassifier(
            step_size=step_size, kernel_width=kernel_width,
            feature_type=distribution_type, model=copy.deepcopy(model))

        # if the model_type is multi and the test data is in the model,
        # remove it
        if model_type == 'multi':
            for i, m in enumerate(classifier.model):
                if mbid in m:
                    del classifier.model[i]
                    break

        try:
            # we use the pitch instead of the distribution already computed in
            # the feature extraction. those distributions are normalized wrt
            # tonic to one of the bins centers will exactly correspond to
            # the tonic freq. therefore it would be cheating
            pitch = np.loadtxt(test_sample['pitch'])
            if experiment_type == 'tonic':  # tonic identification
                results = classifier.estimate_tonic(
                    pitch, test_sample['mode'], min_peak_ratio=min_peak_ratio,
                    distance_method=dis_measure, k_neighbor=k_neighbor,
                    rank=rank)
            elif experiment_type == 'mode':  # mode recognition
                results = classifier.estimate_mode(
                    pitch, test_sample['tonic'], distance_method=dis_measure,
                    k_neighbor=k_neighbor, rank=rank)
            elif experiment_type == 'joint':  # joint estimation
                results = classifier.estimate_joint(
                    pitch, min_peak_ratio=min_peak_ratio,
                    distance_method=dis_measure, k_neighbor=k_neighbor,
                    rank=rank)
            else:
                raise ValueError("Unknown experiment_type")

            # save results
            json.dump(results, open(save_file, 'w'))
            res_dict['saved'].append(save_file)
        except:
            res_dict['failed'].append(save_file)

    if not res_dict['failed']:
        computed = get_filenames_in_dir(test_folder, keyword='*.json')[0]
        assert len(computed) == 100, 'There should have been 100 tested files.'

        results = {}
        for c in computed:
            mbid = os.path.splitext(os.path.split(c)[-1])[0]
            results[mbid] = json.load(open(c))

        json.dump(results, open(results_file, 'w'), indent=4)
        for c in computed:
            os.remove(c)
    return res_dict
def evaluate(step_size, kernel_width, distribution_type, model_type,
             experiment_type, dis_measure, k_neighbor, min_peak_ratio,
             result_folder):
    test_folder = os.path.abspath(os.path.join(io.get_folder(
        os.path.join(result_folder, 'testing', experiment_type), model_type,
        distribution_type, step_size, kernel_width, dis_measure,
        k_neighbor, min_peak_ratio)))
    result_files = get_filenames_in_dir(test_folder,
                                        keyword='*results.json')[0]

    anno_file = './data/ottoman_turkish_makam_recognition_dataset' \
                '/annotations.json'
    annotations = json.load(open(anno_file))
    makam_labels = np.unique([a['makam'] for a in annotations]).tolist()
    evaluator = Evaluator()

    tmp_bins = np.arange(0, 1200, step_size)
    if experiment_type == 'tonic':
        eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0,
                      'tonic_deviation_distribution': PitchDistribution(
                          tmp_bins, np.zeros(np.shape(tmp_bins)),
                          kernel_width=0, ref_freq=None)}
    elif experiment_type == 'mode':
        eval_folds = {'num_correct_mode': 0, 'mode_accuracy': 0,
                      'confusion_matrix': {
                          'matrix': np.zeros((len(makam_labels),
                                              len(makam_labels))),
                          'labels': makam_labels}
                      }
    else:
        eval_folds = {'num_correct_tonic': 0, 'tonic_accuracy': 0,
                      'num_correct_mode': 0, 'mode_accuracy': 0,
                      'num_correct_joint': 0, 'joint_accuracy': 0,
                      'tonic_deviation_distribution': PitchDistribution(
                          tmp_bins, np.zeros(np.shape(tmp_bins)),
                          kernel_width=0, ref_freq=None),
                      'confusion_matrix': {
                          'matrix': np.zeros((len(makam_labels),
                                              len(makam_labels))),
                          'labels': makam_labels}
                      }

    for rf in result_files:
        res = json.load(open(rf))
        eval_file = os.path.join(os.path.dirname(rf), 'evaluation.json')

        rec_ev = []
        for aa in annotations:
            mbid = os.path.split(aa['mbid'])[-1]

            if mbid in res.keys():  # in testing data
                if experiment_type == 'tonic':
                    rec_ev.append(evaluator.evaluate_tonic(res[mbid][0][0],
                                                           aa['tonic'], mbid))
                    rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\
                        tolist()
                    rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\
                        tolist()

                elif experiment_type == 'mode':
                    rec_ev.append(evaluator.evaluate_mode(res[mbid][0][0],
                                                          aa['makam'], mbid))

                else:
                    rec_ev.append(evaluator.evaluate_joint(
                        [res[mbid][0][0][0], aa['tonic']],
                        [res[mbid][0][0][1], aa['makam']], mbid))

                    rec_ev[-1]['tonic_eval'] = rec_ev[-1]['tonic_eval'].\
                        tolist()
                    rec_ev[-1]['same_octave'] = rec_ev[-1]['same_octave'].\
                        tolist()
                    try:
                        rec_ev[-1]['joint_eval'] = rec_ev[-1]['joint_eval'].\
                            tolist()
                    except AttributeError:
                        # TODO: find out why i've put an exception here
                        pass

        ev = {'per_recording': rec_ev, 'overall': {}}
        try:
            ev['overall']['num_correct_tonic'] = sum(re['tonic_eval']
                                                     for re in rec_ev)
            ev['overall']['tonic_accuracy'] = (
                ev['overall']['num_correct_tonic'] / len(rec_ev))

            ev['overall']['tonic_deviation_distribution'] = \
                PitchDistribution.from_cent_pitch(
                    [re['cent_diff'] for re in rec_ev], ref_freq=None,
                    step_size=step_size, kernel_width=0)

            try:  # force to pcd
                ev['overall']['tonic_deviation_distribution'].to_pcd()
            except AssertionError:
                pass

            eval_folds['num_correct_tonic'] += ev['overall'][
                'num_correct_tonic']
            eval_folds['tonic_deviation_distribution'].vals +=\
                ev['overall']['tonic_deviation_distribution'].vals

            ev['overall']['tonic_deviation_distribution'] = \
                ev['overall']['tonic_deviation_distribution'].to_dict()
        except KeyError:
            pass
        try:
            ev['overall']['num_correct_mode'] = sum(re['mode_eval']
                                                    for re in rec_ev)
            ev['overall']['mode_accuracy'] = (
                ev['overall']['num_correct_mode'] / len(rec_ev))

            ev['overall']['confusion_matrix'] = {
                'matrix': confusion_matrix(
                    [re['annotated_mode'] for re in rec_ev],
                    [re['estimated_mode'] for re in rec_ev],
                    labels=makam_labels),
                'labels': makam_labels}

            eval_folds['num_correct_mode'] += ev['overall'][
                'num_correct_mode']

            eval_folds['confusion_matrix']['matrix'] +=\
                ev['overall']['confusion_matrix']['matrix']

            ev['overall']['confusion_matrix']['matrix'] = \
                ev['overall']['confusion_matrix']['matrix'].astype(int).tolist()

        except KeyError:
            pass
        try:
            ev['overall']['num_correct_joint'] = sum(re['joint_eval']
                                                     for re in rec_ev)
            ev['overall']['joint_accuracy'] = (
                ev['overall']['num_correct_joint'] / len(rec_ev))

            eval_folds['num_correct_joint'] += ev['overall'][
                'num_correct_joint']
        except KeyError:
            pass

        json.dump(ev, open(eval_file, 'w'))

    if experiment_type == 'tonic':
        eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10
        eval_folds['tonic_deviation_distribution'] = \
            eval_folds['tonic_deviation_distribution'].to_dict()
    elif experiment_type == 'mode':
        eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10
        eval_folds['confusion_matrix']['matrix'] = \
            eval_folds['confusion_matrix']['matrix'].astype(int).tolist()
    else:
        eval_folds['tonic_accuracy'] = eval_folds['num_correct_tonic'] / 10
        eval_folds['mode_accuracy'] = eval_folds['num_correct_mode'] / 10
        eval_folds['joint_accuracy'] = eval_folds['num_correct_joint'] / 10

        eval_folds['tonic_deviation_distribution'] = \
            eval_folds['tonic_deviation_distribution'].to_dict()
        eval_folds['confusion_matrix']['matrix'] = \
            eval_folds['confusion_matrix']['matrix'].tolist()

    json.dump(eval_folds,
              open(os.path.join(test_folder, 'overall_eval.json'), 'w'))

    return u'{0:s} done'.format(test_folder)