Exemple #1
0
    def compute(self,
                s,
                note,
                instrument,
                number_frames=10,
                frame_skip=2,
                save_metadata=True):
        """Builds a basis from the mean of a spectrogram.

        Uses a part of the spectrogram to compute the mean value.

        Args:
            s: Spectrogram object.
            note: note's name.
            instrument: instrument's name.
            number_frames: number of frames to use. Default: 10.
            frame_skip: number of frames to skip after the attack. Default: 2.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            LinearDecomposition object with the window's mean on the left.
        """
        data = s.data

        # Detects attack
        energy = numpy.sum(data, 0)
        energy_peak = numpy.argmax(energy)

        # Max number of frames availables
        max_frames = numpy.size(data, 1)

        # Avoids getting less frames because data ended
        if energy_peak > (max_frames - number_frames - frame_skip - 1):
            energy_peak = max_frames - number_frames - frame_skip - 1

        # But if there's enough data, skip a few frames after energy peak
        else:
            energy_peak += frame_skip

        # Cuts data
        data = data[:, energy_peak:energy_peak + number_frames]

        # Computes the mean
        data = numpy.array([numpy.mean(data, 1)]).transpose()

        # Saves metadata
        input_metadata = md.ObjectMetadata(s) if save_metadata else None

        # Stores the basis as a decompositon
        d = ld.LinearDecomposition()
        d.add((instrument, note),
              left=data,
              left_metadata=md.Metadata(method='mean',
                                        number_of_frames=number_frames,
                                        frame_skip=frame_skip,
                                        spectrogram=s.metadata,
                                        spectrogram_input=input_metadata))

        return d
Exemple #2
0
    def trim(self,
             s,
             min_freq=0,
             max_freq=float('Inf'),
             min_time=0,
             max_time=float('Inf'),
             save_metadata=True):
        """Cuts some pieces of the spectrogram.

        Keeps only a desired rectangle in the frequency/time matrix
        associated with the spectrogram. By default, all arguments not
        provided don't cause any restriction on the trimmed region.

        Args:
            min_freq: minimum frequency to be kept. Default: 0.
            max_freq: maximum frequency to be kept. Default: inf.
            min_time: minimum time to be kept. Default: 0.
            max_time: maximum time to be kept. Default: inf.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Trimmed Spectrogram object.
        """
        # Regard default parameters
        if max_freq > s.metadata.max_freq:
            max_freq = s.metadata.max_freq

        if max_time > s.metadata.max_time:
            max_time = s.metadata.max_time

        # Finds frequency and time bounds
        maxK = s.freq_bin(max_freq)
        minK = s.freq_bin(min_freq)
        maxT = s.time_bin(max_time)
        minT = s.time_bin(min_time)

        #print min_time, max_time, min_freq, max_freq

        new_s = spectrogram.Spectrogram()
        new_s.data = s.data[minK:maxK + 1, minT:maxT + 1]
        new_s.metadata.min_freq = s.freq_range(minK)[0]
        new_s.metadata.min_time = s.time_range(minT)[0]
        new_s.metadata.max_freq = s.freq_range(maxK)[0]
        new_s.metadata.max_time = s.time_range(maxT)[0]
        new_s.metadata.sampling_configuration = \
            s.metadata.sampling_configuration
        new_s.metadata.input_metadata = copy.deepcopy(s.metadata)

        new_s.metadata.method = md.Metadata(original_input=s.metadata.input,
                                            original_method=s.metadata.method,
                                            name='trim',
                                            min_freq=min_freq,
                                            max_freq=max_freq,
                                            min_time=min_time,
                                            max_time=max_time)
        if save_metadata:
            new_s.metadata.input = md.ObjectMetadata(s)

        return new_s
Exemple #3
0
    def hard_sparsity(self, d, sparsity, save_metadata=True):
        """Alters the given linear decomposition, applying a median filter.

        The filtering process is done in the time domain.

        The argument provided is destroyed.

        Args:
            d: LinearDecomposition object to filter.
            sparsity: maximum polyphony allowed.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Same decomposition given in arguments.
        """
        if save_metadata:
            metadata = md.ObjectMetadata(a)
        else:
            metadata = None

        meta = md.Metadata(name="sparsity_level",
                           sparsity=sparsity,
                           activation_input=metadata,
                           original_method=None)

        # Binarizes the data and adjusts the metadata
        A = None
        p = []
        for k in d.data.right.keys():
            if A is None:
                A = d.data.right[k]
            else:
                A = numpy.vstack((A, d.data.right[k]))
            p.append(k)

        for i in xrange(A.shape[1]):
            b = numpy.partition(A[:,i], sparsity-1)[sparsity-1]
            A[:,i] = A[:,i] * (A[:,i] >= b)


        for argk in xrange(len(p)):
            d.data.right[p[argk]] = A[argk,:]
            d.data.right[p[argk]].shape = (1, len(d.data.right[p[argk]]))
            d.metadata.right[p[argk]] = md.Metadata(method="sparsity_level",
                                              sparsity=sparsity,
                                              activation_input=metadata,
                                              original_method =
                                                d.metadata.right[k])

        return d
Exemple #4
0
    def fold(self, s, folds=2, save_metadata=True):
        """Cuts some pieces of the spectrogram.

        Keeps only a desired rectangle in the frequency/time matrix
        associated with the spectrogram. By default, all arguments not
        provided don't cause any restriction on the trimmed region.

        Args:
            folds: number of folds (tensor depth)
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Trimmed Spectrogram object.
        """

        #print min_time, max_time, min_freq, max_freq
        new_s = spectrogram.Spectrogram()
        new_s.data = s.data[:, 0:-folds]

        nFolds = 1
        while nFolds < folds:
            new_s.data = numpy.vstack(
                (new_s.data, s.data[:, nFolds:-folds + nFolds]))
            nFolds += 1

        new_s.metadata.min_freq = s.metadata.min_freq
        new_s.metadata.min_time = s.metadata.min_time
        new_s.metadata.max_freq = s.metadata.max_freq * folds
        new_s.metadata.max_time = s.metadata.max_time
        new_s.metadata.sampling_configuration = \
            s.metadata.sampling_configuration
        new_s.metadata.method = md.Metadata(original_input=s.metadata.input,
                                            original_method=s.metadata.method,
                                            name='tensor-fold',
                                            min_freq=s.metadata.min_freq,
                                            max_freq=s.metadata.max_freq *
                                            folds,
                                            min_time=s.metadata.min_time,
                                            max_time=s.metadata.max_time)
        if save_metadata:
            s.metadata.input = md.ObjectMetadata(s)

        return new_s
Exemple #5
0
    def median_filter(self, d, window, save_metadata=True):
        """Alters the given linear decomposition, applying a median filter.

        The filtering process is done in the time domain.

        The argument provided is destroyed.

        Args:
            d: LinearDecomposition object to filter.
            window: window length.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Same decomposition given in arguments.
        """
        if save_metadata:
            metadata = md.ObjectMetadata(a)
        else:
            metadata = None

        meta = md.Metadata(name="median_filter_length",
                           window=window,
                           activation_input=metadata,
                           original_method=None)

        # Binarizes the data and adjusts the metadata
        for k in d.data.right.keys():
            d.data.right[k] = \
                mf.median_filter_centered(d.data.right[k].transpose(),\
                window).transpose()
            d.metadata.right[k] = md.Metadata(
                method="median_filter_length",
                window=window,
                activation_input=metadata,
                original_method=d.metadata.right[k])

        return d
Exemple #6
0
    def convert(self,
                d,
                instrument,
                frequency,
                minimum_length,
                save_metadata=True):
        """Converts an linear decomposition to a score.

        If the given frequency is 0, then the frequency becomes the one in the
        spectrogram used to compute the linear decomposition activation.

        Args:
            a: LinearDecomposition object with binary right side.
            instrument: name of the instrument to be extracted.
            frequency: frequency used to transfer activation time bins to
                       timestamps.
            minimum_length: minimum length of note to be considered.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Score object.
        """
        # Loads valid frequency to be used
        if frequency == 0.:
            ofs = d.metadata.get('sampling_configuration.ofs')
        else:
            ofs = float(frequency)

        s = score.Score()

        s.metadata.instrument = instrument
        if save_metadata:
            s.metadata.input = md.ObjectMetadata(d)

        s.metadata.method_metadata = \
                md.Metadata(type='algorithm',
                            algorithm='binary activation',
                            frequency=ofs,
                            minimum_length=minimum_length,
                            activation_metadata=d.metadata.right)

        # TODO: check if this parameter really does what it's supposed to.
        # Currently it ignores zeros in the activation matrix once a not has
        # been detected and the minimum window haven't been found.
        minimum_window = minimum_length * ofs

        for k, data, metadata in d.right():
            if k[0] != instrument:
                continue

            note_start = -1
            activation = data

            # Considers only one line per note for now. TODO: consider more
            for t in range(activation.shape[1]):
                # Checks if starting a new note
                if activation[0, t] and note_start == -1:
                    note_start = t

                # Checks for note ending
                elif not activation[0, t] and note_start != -1:
                    # If minum length is met, adds note
                    if t - note_start > minimum_window:
                        s.append(note.Note(onset=note_start/float(ofs),\
                            offset=t/float(ofs), name=k[1]))
                    # Marks note as finished
                    note_start = -1

        return s
Exemple #7
0
    def compute(self, spectrograms, size=None, instrument=None, note=None,
                basis=None, beta=2., min_delta=0., max_iterations=100,
                save_metadata=True):
        """Computes the activation matrix from a basis matrix and a spectrogram.

        Uses the beta divergence to compute the activations.

        If min_delta is zero, the code may run faster because no beta divergence
        is actually computed. Otherwise, the code stops computing if two
        iterations of the algorithm don't improve the result by more than
        min_delta.

        Only one of 'basis' and 'size' arguments may be set, as they specify
        different things. With 'size', the user extracts both a basis and an
        activation from the spectrogram, while with 'basis' only an activation
        is computed.

        Each activation computed has the same key as the corresponding basis
        plus the spectrogram's index in the list provided.

        If a basis is being created, it's name is a tuple of (instrument, note),
        even if they are None.

        Args:
            spectrograms: list of Spectrograms to be merged and used to compute
                          the activations.
            size: Number of basis to extract from the spectrogram. Must be None
                  if the 'basis' argument is defined.
            instrument: Name of the instrument. This is used only if size is
                        set. If None, it's ignored. Default: None.
            note: Name of the note. This is used only if size is set. If None,
                  it's ignored. Default: None.
            basis: LinearDecomposition object describing the basis to be used.
                   Must be none if the 'size' argument is defined.
            beta: value for the beta used in divergence. Default: 2.
            min_delta: threshold for early stop. Default: 0.
            max_iterations: maximum number of iterations to use. Default: 100.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            LinearDecomposition object with basis and activations for the
            spectrograms.

        Raises:
            ValueError: matrices have incompatible sizes.
        """
        # Check arguments compatibility
        if size is None and basis is None:
            raise ValueError("One of 'size' or 'basis' must not be None.")

        if basis is not None and size is not None:
            raise ValueError("Only one of 'size' or 'basis' must not be None.")

        # Saves metadata
        if save_metadata:
            s_meta = [md.ObjectMetadata(s) for s in spectrograms]
        else:
            s_meta = [None for s in spectrograms]

        # Marks the limits of each spectrogram
        X_start = [0]
        for s in spectrograms:
            X_start.append(X_start[-1]+s.data.shape[1])

        # Merges spectrograms
        X = numpy.hstack([s.data for s in spectrograms])

        # If we have a basis, we only need to compute the activations
        if basis is not None:
            # Merges basis but keep track where each one starts so that it can
            # be used to characterize the activations
            B = []
            B_start = [0]
            for k, data, metadata in basis.left():
                B.append(data)
                B_start.append(B_start[-1]+data.shape[1])
            B = numpy.hstack(B)

            # Saves metadata
            if save_metadata:
                b_meta = md.ObjectMetadata(B)
            else:
                b_meta = None

            # Initilizes activations
            A = numpy.ones((B.shape[1], X.shape[1]))

            # Computes the activation
            self.compute_activation(X, B, A, beta, min_delta, max_iterations)

            # Starts creating the decomposition object
            d = ld.LinearDecomposition()

            # Copy the left stuff from the basis, since they came from there
            d.data.left = basis.data.left
            d.metadata.left = basis.metadata.left

            # Cuts the activation. For each combination of basis and
            # spectrograms, we get an activation
            i = 0
            for k, data, metadata in basis.left():
                for j in range(len(spectrograms)):
                    # Since spectrograms don't have name, we call it by its
                    # sequence number
                    s_name = (j,)

                    # Cuts the activation
                    A_cut = A[B_start[i]:B_start[i+1], X_start[j]:X_start[j+1]]

                    # Merges the basis key with the spectrogram name to create a
                    # key for the activation. Then stores a lot of metadata
                    # about what was used to compute it.
                    d.add(k+s_name,
                          right=A_cut,
                          right_metadata=md.Metadata(
                              method="beta_nmf",
                              beta=beta,
                              min_delta=min_delta,
                              max_iterations=max_iterations,
                              spectrogram_input=s_meta[j],
                              spectrogram=s.metadata,
                              basis_input=b_meta,
                              basis=metadata))

                # Increase basis iterator
                i += 1
        else:
            # Everyone gets the same matrices to work with every time, so we
            # avoid consistency problems. However, we can't have the same values
            # filling the matrices or the algorithm can't separate the basis and
            # activations (everyone keeps getting the same value).
            numpy.random.seed(0)
            B = numpy.random.rand(X.shape[0], size)
            A = numpy.random.rand(size, X.shape[1])

            # Computes both basis and activations
            self.compute_both(X, B, A, beta, min_delta, max_iterations)

            # Key for the basis created
            key = (instrument, note)

            # Starts creating the decomposition object
            d = ld.LinearDecomposition()

            # Adds basis
            d.add(key,
                  left=B,
                  left_metadata=md.Metadata(
                      method="beta_nmf",
                      beta=beta,
                      min_delta=min_delta,
                      max_iterations=max_iterations,
                      spectrogram_input=s_meta,
                      spectrogram=[s.metadata for s in spectrograms]))

            # Adds the activations cutted to match the spectrograms
            for j in range(len(spectrograms)):
                # Since spectrograms don't have name, we call it by its sequence
                # number
                s = spectrograms[j]
                s_name = (j,)

                # Cuts the activation
                A_cut = A[:, X_start[j]:X_start[j+1]]

                # Merges the basis key with the spectrogram name to create a key
                # for the activation. Then stores a lot of metadata about what
                # was used to compute it.
                d.add(key+s_name,
                      right=A_cut,
                      right_metadata=md.Metadata(
                          method="beta_nmf",
                          beta=beta,
                          min_delta=min_delta,
                          max_iterations=max_iterations,
                          spectrogram_input=s_meta[j],
                          spectrogram=s.metadata))

        return d
Exemple #8
0
    def trim_notes(self,
                   s,
                   min_duration=0,
                   max_duration=float('inf'),
                   min_pitch=0,
                   max_pitch=float('inf'),
                   min_onset=0,
                   max_onset=float('inf'),
                   min_offset=0,
                   max_offset=float('inf'),
                   save_metadata=True):
        """Removes from a score notes that don't satisfy a criteria.

        Trims the transcription so notes that are out of the specified bonds
        will be cut out of the transcription. The notes aren't copied for the
        new Score, so any modification on them alters both the original and
        trimmed.

        This function is useful when you are trying to exclude notes that are
        obviously wrong in a certain transcription. By default, all arguments
        not provided don't cause any note to be removed.

        Args:
            s: Score object.
            min_duration: minimum duration to keep. Default: 0.
            max_duration: maximum duration to keep. Default: inf.
            min_pitch: minimum pitch to keep. Default: 0.
            max_pitch: maximum pitch to keep. Default: inf.
            min_onset: minimum onset to keep. Default: 0.
            max_onset: maximum onset to keep. Default: inf.
            min_offset: minimum offset to keep. Default: 0.
            max_offset: maximum offset to keep. Default: inf.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Trimmed Score object.
        """
        new_s = score.Score()
        new_s.append([
            n for n in s.data if n.data.duration >= min_duration
            and n.data.duration <= max_duration and n.data.onset >= min_onset
            and n.data.onset <= max_onset and n.data.offset >= min_offset
            and n.data.offset <= max_offset and n.data.pitch >= min_pitch
            and n.data.pitch <= max_pitch
        ])

        new_s.metadata.instrument = s.metadata.instrument
        new_s.metadata.method_metadata = md.Metadata(
            type="trim",
            min_duration=min_duration,
            max_duration=max_duration,
            min_onset=min_onset,
            max_onset=max_onset,
            min_offset=min_offset,
            max_offset=max_offset,
            min_pitch=min_pitch,
            max_pitch=max_pitch,
            previous_method=s.metadata.method_metadata,
            previous_input=s.metadata.input)
        if save_metadata:
            s.metadata.input = md.ObjectMetadata(s)

        return new_s
Exemple #9
0
    def evaluate(self,
                 identification,
                 estimated,
                 reference,
                 frame_length=0.01,
                 save_metadata=True):
        """Computes the evaluation based on a estimated and reference scores.

        Args:
            identification: some form of identification that will be stored in
                            metadata.
            estimated: estimated score.
            reference: reference score.
            frame_length: step size for time.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Evaluation object.
        """
        t_start_estimated, t_end_estimated = estimated.get_timespan()
        t_start_reference, t_end_reference = reference.get_timespan()

        correct = 0.
        total_estimated = 0.
        total_reference = 0.

        # Sanity check
        if t_end_estimated - t_start_estimated >= 0 and \
           t_end_reference - t_start_reference >= 0:
            # Starts at the first frame
            t = min(t_start_estimated, t_start_reference)

            # Ends with the minimum frame time
            t_end = min(t_end_estimated, t_end_reference)

            while t < t_end:
                # Gets notes active at the current time
                estimated_active_notes = estimated.get_active_notes(t)
                reference_active_notes = reference.get_active_notes(t)

                total_estimated += len(estimated_active_notes)
                total_reference += len(reference_active_notes)

                for e in estimated_active_notes:
                    e_name = e.to_name()

                    for r in reference_active_notes:
                        if e_name == r.to_name():
                            correct += 1

                            # As each reference note can match only a single
                            # estimation, we remove the matched reference
                            reference_active_notes.remove(r)

                            # Stops looking for references, as we got a match
                            break

                t += frame_length

        # Creates evaluation object with the description of the method
        e = evaluation.Evaluation(total_estimated, total_reference, correct)
        e.metadata.estimated = estimated.metadata
        e.metadata.reference = reference.metadata
        e.metadata.method = md.Metadata(name='mirex framewise',
                                        id=identification)
        if save_metadata:
            e.metadata.estimated_input = md.ObjectMetadata(estimated)
            e.metadata.reference_input = md.ObjectMetadata(reference)

        return e
Exemple #10
0
    def evaluate(self,
                 identification,
                 estimated,
                 reference,
                 onset_tolerance=0.05,
                 duration_tolerance=-1,
                 ignore_pitch=False,
                 save_metadata=True):
        """Computes the evaluation based on a estimated and reference scores.

        Args:
            identification: some form of identification that will be stored in
                            metadata.
            estimated: estimated score.
            reference: reference score.
            onset_tolerance: additive tolerance for the onset to be valid.
            duration_tolerance: multiplicative tolerance for the duration to be
                                valid. If negative, ignore duration
                                restrictions.
            ignore_pitch: ignore notes' pitch when evaluating.
            save_metadata: flag indicating whether the metadata should be
                           computed. Default: True.

        Returns:
            Evaluation object.
        """
        n_ref = len(reference.data)
        n_est = len(estimated.data)

        correct = 0

        # Don't use default comparison because:
        # 1) some crazy person may want to change it, and that could break this
        # code
        # 2) we don't need to order offset and pitch
        estimated_data = sorted(estimated.data, key=lambda n: n.data.onset)
        reference_data = sorted(reference.data, key=lambda n: n.data.onset)
        negative_duration_tolerance = (duration_tolerance < 0)

        # Iterates estimated data to match the reference
        for e in estimated_data:
            e_onset = e.data.onset
            e_duration = e.data.duration
            e_name = e.to_name()

            # As the notes are ordered by onset, we can remove from the
            # reference every note whose onset is below the current lower bound
            for i in xrange(len(reference_data)):
                if reference_data[i].data.onset >= e_onset - onset_tolerance:
                    break
            reference_data = reference_data[i:]

            for r in reference_data:
                # Checks if onset is above range. If so, we can stop the search
                # because all other notes after it will also be above
                if r.data.onset > e_onset + onset_tolerance:
                    break

                # Checks if notes match in duration and name if required
                if (negative_duration_tolerance or
                         (abs(e_duration-r.data.duration) < \
                          max(r.data.duration * duration_tolerance,
                              onset_tolerance))) \
                    and (ignore_pitch or e_name == r.to_name()):
                    correct += 1

                    # As each reference note can match only a single estimation,
                    # we remove the matched reference
                    reference_data.remove(r)

                    # Stops looking for references, as we got a match
                    break

        # Creates evaluation object with the description of the method
        e = evaluation.Evaluation(n_est, n_ref, correct)
        e.metadata.estimated = estimated.metadata
        e.metadata.reference = reference.metadata
        e.metadata.method = md.Metadata(name='mirex symbolic',
                                        id=identification,
                                        duration_tolerance=duration_tolerance,
                                        onset_tolerance=onset_tolerance)
        if save_metadata:
            e.metadata.estimated_input = md.ObjectMetadata(estimated)
            e.metadata.reference_input = md.ObjectMetadata(reference)

        return e