예제 #1
0
def evaluate_clustering(x, clusters):
    """ Returns the root mean square of the differences between array points and their closest clusters. """
    x = _np.asarray(x)
    error = 0
    # Can be heavily optimized, but let's assume there are few clusters.
    for point in x:
        c = _mh.find_nearest_value(clusters, point)
        error += (point - c) ** 2

    return _np.sqrt(error / x.size)
예제 #2
0
def evaluate_clustering(x, clusters):
    """ Returns the root mean square of the differences between array points and their closest clusters. """
    x = _np.asarray(x)
    error = 0
    # Can be heavily optimized, but let's assume there are few clusters.
    for point in x:
        c = _mh.find_nearest_value(clusters, point)
        error += (point - c)**2

    return _np.sqrt(error / x.size)
예제 #3
0
파일: hps.py 프로젝트: vcanaa/PyTranscribe
def plot_tracking(audiopath,
                  title="",
                  binsize=1470,
                  tune=False,
                  plotpath=None,
                  repetitions=10):
    """ Plots the HPS tracking of an audio file. """
    samplerate, samples = _sf.readfile(audiopath)

    detections = samples.size // binsize

    p = _np.zeros(repetitions * detections)
    for i in range(detections):
        f = _hps.hps(samples[i * binsize:(i + 1) * binsize])

        if tune:
            f = _mh.find_nearest_value(_mt.notes, f)

    p = _np.repeat(p, repetitions)

    _pl.plot(p)
    _pl.title(title)

    xlocs = _np.linspace(0, 10 * detections, 5)
    _pl.xlabel("Time (s)")
    _pl.xlim([0, _np.max(xlocs)])
    _pl.xticks(xlocs, [
        "%.2f" % l
        for l in _np.multiply(xlocs, binsize / (repetitions * samplerate))
    ])

    _pl.ylabel("Fundamental Frequency (Hz)")
    _pl.ylim((0.9 * _np.min(p), 1.05 * _np.max(p)))

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
예제 #4
0
    def finalize(self):
        """ Finalizes the transcriber. Will close resources, perform calculations and normalizations based on the whole
            transcription (e.g.: normalize note duration) then writes the transcription to the desired outputs. """
        self.close()

        # Extract the last note.
        if self.current_ticks > 2:
            self.notes.append({
                "name":
                self.current_note,
                "duration":
                np.log2(self.current_ticks),
                "ticks":
                self.current_ticks,
                "slur":
                "stop" if self.currently_slurring else False
            })

            if DEBUG_NOTE:
                print("%s\t %d\t %.3fs" %
                      (self.current_note, self.current_ticks,
                       self.current_ticks / self.blocks_per_sec))

        print("\n\n###### Detected notes:")
        for note in self.notes:
            print(note)

        durations = np.array([n["duration"] for n in self.notes])
        clusters = clst.equidistant_clusterize(durations)
        corrected_notes = [{
            "name":
            n["name"],
            "duration":
            2**mh.find_nearest_value(clusters, n["duration"]),
            "slur":
            n["slur"]
        } for n in self.notes]

        print("\n\n###### Corrected notes:")
        for note in corrected_notes:
            print(note)

        most_common = scipy.stats.mode(
            [n["duration"] for n in corrected_notes])[0][0]
        tempo = int(round(60 * self.blocks_per_sec / most_common, 0))

        while tempo < 80:
            tempo *= 2
            most_common /= 2

        while tempo > 220:
            tempo /= 2
            most_common *= 2

        if WRITE_MIDI or WRITE_XML:
            s = music21.stream.Stream()
            s.append(music21.tempo.MetronomeMark(number=tempo))
            s.append(music21.meter.TimeSignature('4/4'))
            for note in corrected_notes:
                n = music21.note.Note()
                n.pitch.name = note["name"]
                n.duration.quarterLength = note["duration"] / most_common
                s.append(n)
                note["music21"] = n

            slurring = False
            slur = music21.spanner.Slur()
            for note in corrected_notes:
                if note["slur"] == "start":
                    slurring = True
                if slurring:
                    slur.addSpannedElements([note["music21"]])
                if note["slur"] == "stop":
                    slurring = False
                    s.insert(0, slur)
                    slur = music21.spanner.Slur()

            if slurring:
                s.insert(0, slur)

            s.insert(0, s.analyze('key'))

            if WRITE_MIDI:
                sf.write_m21stream_to_midi(s, MIDI_FILENAME)
            if WRITE_XML:
                s.show('musicxml')

        if WRITE_OUT:
            print("### Writing processed output file")
            f = open(OUT_FILENAME, 'w')
            f.write(self.out)
            f.flush()
            os.fsync(f)

        return
예제 #5
0
    def update(self):
        """ Performs a transcription iteration update, i.e. wait for microphone data to be available,
            then update the transcriber state (detect pitch, note duration, etc). """
        if not self.tong:
            raise AssertionError(
                "Please initialize the tonguing detector first. (missing a call to detect_noise()?)"
            )

        self.total_ticks += 1
        new_samples = self.mic.listen()

        if DEBUG_PERF:
            rms_start_time = time.time()

        rms = np.sqrt(np.mean(np.square(new_samples)))

        if DEBUG_PERF:
            self.rms_time = time.time() - rms_start_time
            tong_start_time = time.time()

        # Feed the new samples to the Tonguing Detector.
        # Beware we shouldn't send repeated samples, so we send the new_samples and not the entire block.
        tongued = self.tong.feed(new_samples)

        if DEBUG_PERF:
            self.tong_time = time.time() - tong_start_time

        if tongued:
            if self.current_ticks > 2:
                # We detected tonguing, so split the current note.
                # TODO: if 'previous_note' is considered noisy, account for it in the duration.
                # TODO: Consider whether we should increment the current tick partially (proportionally to the audible portion?).
                self.notes.append({
                    "name":
                    self.current_note,
                    "duration":
                    np.log2(self.current_ticks),
                    "ticks":
                    self.current_ticks,
                    "slur":
                    "stop" if self.currently_slurring else False
                })

                self.currently_slurring = False
                if DEBUG_NOTE:
                    print("%s\t %d\t %.3fs" %
                          (self.current_note, self.current_ticks,
                           self.current_ticks / self.blocks_per_sec))
                if DEBUG_TONG:
                    print("TONG")
                if WRITE_OUT:
                    self.out += "%d\t: TONG\n" % self.total_ticks

            self.current_ticks = 0

        # Add the new_samples at the beginning of the block, so they replace the oldest values.
        self.block[0:self.samples_per_read] = new_samples

        # Now roll the block back so that it is in chronological order.
        # Not strictly necessary as we're discarding phase, but it makes replacing old values easier and also ensures
        # usual windowing will smooth discontinuities at the borders.
        self.block = np.roll(self.block, -self.samples_per_read)

        # No need to proceed if we're to discard the pitch due to insufficient RMS power in the block.
        if not DEBUG_NOISE and rms < self.noise_threshold:
            self.out += "%d\t: 'rms < self.noise_threshold'\n" % self.total_ticks
            return

        # We want pitch, so pass the block to the PDA
        if DEBUG_PERF:
            hps_start_time = time.time()

        perceived_f = pda.hps.hps(self.block,
                                  fs=self.rate,
                                  harmonics=3,
                                  precision=2)

        if DEBUG_PERF:
            self.hps_time = time.time() - hps_start_time

        # and tune the pitch down to a known note.
        tuned_f = mh.find_nearest_value(mt.notes, perceived_f)
        note = mt.note_name[tuned_f]

        # TODO: rough error percentage estimate
        error = perceived_f - tuned_f
        percentage = np.sign(error) * 2 * error / (tuned_f * (1 + mt.semitone)
                                                   if error > 0 else tuned_f *
                                                   (1 - mt.semitone))

        if note == self.current_note:
            if self.current_note == self.previous_note:
                # We're receiving a new sample of the current note.
                self.current_ticks += 1
            else:
                # Our 'previous_note' measurement was probably noisy.
                # Pretend it was a measurement of 'current_note', and account ticks for both.
                self.current_ticks += 2
        elif note == self.previous_note:
            # Keep in mind that all notes are 'tentative' until their tick count is > n, so:
            #   - C5 C5 C5 D5 D5 means we successfully identified a C5 and the beginning of a D5, assuming n is 1.
            if self.current_ticks > 2:
                self.notes.append({
                    "name":
                    self.current_note,
                    "duration":
                    np.log2(self.current_ticks),
                    "ticks":
                    self.current_ticks,
                    "slur":
                    "continue" if self.currently_slurring else "start"
                })

                self.currently_slurring = True
                if DEBUG_NOTE:
                    print("%s\t %d\t %.3fs" %
                          (self.current_note, self.current_ticks,
                           self.current_ticks / self.blocks_per_sec))

            self.current_note = note
            self.current_ticks = 2
        elif self.previous_note != self.current_note:
            # Experimentally, it's pretty rare to have 2 noisy detections in a row, so if we find 2 different measurements
            # we can assume the old note has ended.
            #   - C5 C5 C5 D5 E5 means we identified a C5 end, but we don't know the next note yet.
            if self.current_ticks > 2:
                self.notes.append({
                    "name":
                    self.current_note,
                    "duration":
                    np.log2(self.current_ticks),
                    "ticks":
                    self.current_ticks,
                    "slur":
                    "continue" if self.currently_slurring else False
                })

                if DEBUG_NOTE:
                    print("%s\t %d\t %.3fs" %
                          (self.current_note, self.current_ticks,
                           self.current_ticks / self.blocks_per_sec))

            # We currently have no idea of the note being played, so assign an error string to it.
            # When we have k identical detections in a row (with k defined in the elifs above) we will successfully
            # assign the current note.
            self.current_note = "NOISE_ERR"
            self.current_ticks = 0

        self.previous_note = note

        if DEBUG_TICK:
            print("%s\t (%.3f)\t@ %.2f" % (note, percentage, rms))
        if WRITE_OUT:
            self.out += "%d\t: %s\t (%.3f)\t@ %.2f\r\n" % (
                self.total_ticks, note, percentage, rms)
        return
예제 #6
0
    def finalize(self):
        """ Finalizes the transcriber. Will close resources, perform calculations and normalizations based on the whole
            transcription (e.g.: normalize note duration) then writes the transcription to the desired outputs. """
        self.close()

        # Extract the last note.
        if self.current_ticks > 2:
            self.notes.append({"name":      self.current_note,
                               "duration":  np.log2(self.current_ticks),
                               "ticks":     self.current_ticks,
                               "slur":      "stop" if self.currently_slurring else False})

            if DEBUG_NOTE:
                print("%s\t %d\t %.3fs"%(self.current_note, self.current_ticks, self.current_ticks/self.blocks_per_sec))

        print("\n\n###### Detected notes:")
        for note in self.notes:
            print(note)

        durations = np.array([n["duration"] for n in self.notes])
        clusters = clst.equidistant_clusterize(durations)
        corrected_notes = [{"name":     n["name"],
                            "duration": 2**mh.find_nearest_value(clusters, n["duration"]),
                            "slur":     n["slur"]} 
                           for n in self.notes]

        print("\n\n###### Corrected notes:")
        for note in corrected_notes:
            print(note)

        most_common = scipy.stats.mode([n["duration"] for n in corrected_notes])[0][0]
        tempo = int(round(60*self.blocks_per_sec/most_common, 0))

        while tempo < 80:
            tempo *= 2
            most_common /= 2

        while tempo > 220:
            tempo /= 2
            most_common *= 2

        if WRITE_MIDI or WRITE_XML:
            s = music21.stream.Stream()
            s.append(music21.tempo.MetronomeMark(number=tempo))
            s.append(music21.meter.TimeSignature('4/4'))
            for note in corrected_notes:
                n = music21.note.Note()
                n.pitch.name = note["name"]
                n.duration.quarterLength = note["duration"]/most_common
                s.append(n)
                note["music21"] = n

            slurring = False
            slur = music21.spanner.Slur()
            for note in corrected_notes:
                if note["slur"] == "start":
                    slurring = True
                if slurring:
                    slur.addSpannedElements([note["music21"]])
                if note["slur"] == "stop":
                    slurring = False
                    s.insert(0, slur)
                    slur = music21.spanner.Slur()

            if slurring:
                s.insert(0, slur)

            s.insert(0, s.analyze('key'))

            if WRITE_MIDI:
                sf.write_m21stream_to_midi(s, MIDI_FILENAME)
            if WRITE_XML:
                s.show('musicxml')

        if WRITE_OUT:
            print("### Writing processed output file")
            f = open(OUT_FILENAME, 'w')
            f.write(self.out)
            f.flush()
            os.fsync(f)

        return
예제 #7
0
    def update(self):
        """ Performs a transcription iteration update, i.e. wait for microphone data to be available,
            then update the transcriber state (detect pitch, note duration, etc). """
        if not self.tong:
            raise AssertionError("Please initialize the tonguing detector first. (missing a call to detect_noise()?)")

        self.total_ticks += 1
        new_samples = self.mic.listen()

        if DEBUG_PERF:
            rms_start_time = time.time()

        rms = np.sqrt(np.mean(np.square(new_samples)))

        if DEBUG_PERF:
            self.rms_time = time.time() - rms_start_time
            tong_start_time = time.time()

        # Feed the new samples to the Tonguing Detector.
        # Beware we shouldn't send repeated samples, so we send the new_samples and not the entire block.
        tongued = self.tong.feed(new_samples)

        if DEBUG_PERF:
            self.tong_time = time.time() - tong_start_time

        if tongued:
            if self.current_ticks > 2:
                # We detected tonguing, so split the current note.
                # TODO: if 'previous_note' is considered noisy, account for it in the duration.
                # TODO: Consider whether we should increment the current tick partially (proportionally to the audible portion?).
                self.notes.append({"name":      self.current_note, 
                                   "duration":  np.log2(self.current_ticks),
                                   "ticks":     self.current_ticks,
                                   "slur":      "stop" if self.currently_slurring else False})

                self.currently_slurring = False
                if DEBUG_NOTE:
                    print("%s\t %d\t %.3fs"%(self.current_note, self.current_ticks, self.current_ticks/self.blocks_per_sec))
                if DEBUG_TONG:
                    print("TONG")
                if WRITE_OUT:
                    self.out += "%d\t: TONG\n" % self.total_ticks

            self.current_ticks = 0

        # Add the new_samples at the beginning of the block, so they replace the oldest values.
        self.block[0:self.samples_per_read] = new_samples

        # Now roll the block back so that it is in chronological order.
        # Not strictly necessary as we're discarding phase, but it makes replacing old values easier and also ensures
        # usual windowing will smooth discontinuities at the borders.
        self.block = np.roll(self.block, -self.samples_per_read)

        # No need to proceed if we're to discard the pitch due to insufficient RMS power in the block.
        if not DEBUG_NOISE and rms < self.noise_threshold:
            self.out += "%d\t: 'rms < self.noise_threshold'\n" % self.total_ticks
            return;

        # We want pitch, so pass the block to the PDA
        if DEBUG_PERF:
            hps_start_time = time.time()

        perceived_f = pda.hps.hps(self.block, fs=self.rate, harmonics=3, precision=2)

        if DEBUG_PERF:
            self.hps_time = time.time() - hps_start_time

        # and tune the pitch down to a known note.
        tuned_f = mh.find_nearest_value(mt.notes, perceived_f)
        note = mt.note_name[tuned_f]

        # TODO: rough error percentage estimate
        error = perceived_f - tuned_f
        percentage = np.sign(error) * 2 * error/(tuned_f*(1 + mt.semitone) if error > 0 else tuned_f*(1 - mt.semitone))

        if note == self.current_note:
            if self.current_note == self.previous_note:
                # We're receiving a new sample of the current note.
                self.current_ticks += 1
            else:
                # Our 'previous_note' measurement was probably noisy.
                # Pretend it was a measurement of 'current_note', and account ticks for both.
                self.current_ticks += 2
        elif note == self.previous_note:
            # Keep in mind that all notes are 'tentative' until their tick count is > n, so:
            #   - C5 C5 C5 D5 D5 means we successfully identified a C5 and the beginning of a D5, assuming n is 1.
            if self.current_ticks > 2:
                self.notes.append({"name":      self.current_note,
                                   "duration":  np.log2(self.current_ticks),
                                   "ticks":     self.current_ticks,
                                   "slur":      "continue" if self.currently_slurring else "start"})

                self.currently_slurring = True
                if DEBUG_NOTE:
                    print("%s\t %d\t %.3fs"%(self.current_note, self.current_ticks, self.current_ticks/self.blocks_per_sec))

            self.current_note = note
            self.current_ticks = 2
        elif self.previous_note != self.current_note:
            # Experimentally, it's pretty rare to have 2 noisy detections in a row, so if we find 2 different measurements
            # we can assume the old note has ended.
            #   - C5 C5 C5 D5 E5 means we identified a C5 end, but we don't know the next note yet.
            if self.current_ticks > 2:
                self.notes.append({"name":      self.current_note,
                                   "duration":  np.log2(self.current_ticks),
                                   "ticks":     self.current_ticks,
                                   "slur":      "continue" if self.currently_slurring else False})

                if DEBUG_NOTE:
                    print("%s\t %d\t %.3fs"%(self.current_note, self.current_ticks, self.current_ticks/self.blocks_per_sec))

            # We currently have no idea of the note being played, so assign an error string to it.
            # When we have k identical detections in a row (with k defined in the elifs above) we will successfully
            # assign the current note.
            self.current_note = "NOISE_ERR"
            self.current_ticks = 0

        self.previous_note = note

        if DEBUG_TICK:
            print("%s\t (%.3f)\t@ %.2f" % (note, percentage, rms))
        if WRITE_OUT:
            self.out += "%d\t: %s\t (%.3f)\t@ %.2f\r\n" % (self.total_ticks, note, percentage, rms)
        return