def compute(self, s, note, instrument, number_frames=10, frame_skip=2, save_metadata=True): """Builds a basis from the mean of a spectrogram. Uses a part of the spectrogram to compute the mean value. Args: s: Spectrogram object. note: note's name. instrument: instrument's name. number_frames: number of frames to use. Default: 10. frame_skip: number of frames to skip after the attack. Default: 2. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: LinearDecomposition object with the window's mean on the left. """ data = s.data # Detects attack energy = numpy.sum(data, 0) energy_peak = numpy.argmax(energy) # Max number of frames availables max_frames = numpy.size(data, 1) # Avoids getting less frames because data ended if energy_peak > (max_frames - number_frames - frame_skip - 1): energy_peak = max_frames - number_frames - frame_skip - 1 # But if there's enough data, skip a few frames after energy peak else: energy_peak += frame_skip # Cuts data data = data[:, energy_peak:energy_peak + number_frames] # Computes the mean data = numpy.array([numpy.mean(data, 1)]).transpose() # Saves metadata input_metadata = md.ObjectMetadata(s) if save_metadata else None # Stores the basis as a decompositon d = ld.LinearDecomposition() d.add((instrument, note), left=data, left_metadata=md.Metadata(method='mean', number_of_frames=number_frames, frame_skip=frame_skip, spectrogram=s.metadata, spectrogram_input=input_metadata)) return d
def trim(self, s, min_freq=0, max_freq=float('Inf'), min_time=0, max_time=float('Inf'), save_metadata=True): """Cuts some pieces of the spectrogram. Keeps only a desired rectangle in the frequency/time matrix associated with the spectrogram. By default, all arguments not provided don't cause any restriction on the trimmed region. Args: min_freq: minimum frequency to be kept. Default: 0. max_freq: maximum frequency to be kept. Default: inf. min_time: minimum time to be kept. Default: 0. max_time: maximum time to be kept. Default: inf. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Trimmed Spectrogram object. """ # Regard default parameters if max_freq > s.metadata.max_freq: max_freq = s.metadata.max_freq if max_time > s.metadata.max_time: max_time = s.metadata.max_time # Finds frequency and time bounds maxK = s.freq_bin(max_freq) minK = s.freq_bin(min_freq) maxT = s.time_bin(max_time) minT = s.time_bin(min_time) #print min_time, max_time, min_freq, max_freq new_s = spectrogram.Spectrogram() new_s.data = s.data[minK:maxK + 1, minT:maxT + 1] new_s.metadata.min_freq = s.freq_range(minK)[0] new_s.metadata.min_time = s.time_range(minT)[0] new_s.metadata.max_freq = s.freq_range(maxK)[0] new_s.metadata.max_time = s.time_range(maxT)[0] new_s.metadata.sampling_configuration = \ s.metadata.sampling_configuration new_s.metadata.input_metadata = copy.deepcopy(s.metadata) new_s.metadata.method = md.Metadata(original_input=s.metadata.input, original_method=s.metadata.method, name='trim', min_freq=min_freq, max_freq=max_freq, min_time=min_time, max_time=max_time) if save_metadata: new_s.metadata.input = md.ObjectMetadata(s) return new_s
def hard_sparsity(self, d, sparsity, save_metadata=True): """Alters the given linear decomposition, applying a median filter. The filtering process is done in the time domain. The argument provided is destroyed. Args: d: LinearDecomposition object to filter. sparsity: maximum polyphony allowed. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Same decomposition given in arguments. """ if save_metadata: metadata = md.ObjectMetadata(a) else: metadata = None meta = md.Metadata(name="sparsity_level", sparsity=sparsity, activation_input=metadata, original_method=None) # Binarizes the data and adjusts the metadata A = None p = [] for k in d.data.right.keys(): if A is None: A = d.data.right[k] else: A = numpy.vstack((A, d.data.right[k])) p.append(k) for i in xrange(A.shape[1]): b = numpy.partition(A[:,i], sparsity-1)[sparsity-1] A[:,i] = A[:,i] * (A[:,i] >= b) for argk in xrange(len(p)): d.data.right[p[argk]] = A[argk,:] d.data.right[p[argk]].shape = (1, len(d.data.right[p[argk]])) d.metadata.right[p[argk]] = md.Metadata(method="sparsity_level", sparsity=sparsity, activation_input=metadata, original_method = d.metadata.right[k]) return d
def fold(self, s, folds=2, save_metadata=True): """Cuts some pieces of the spectrogram. Keeps only a desired rectangle in the frequency/time matrix associated with the spectrogram. By default, all arguments not provided don't cause any restriction on the trimmed region. Args: folds: number of folds (tensor depth) save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Trimmed Spectrogram object. """ #print min_time, max_time, min_freq, max_freq new_s = spectrogram.Spectrogram() new_s.data = s.data[:, 0:-folds] nFolds = 1 while nFolds < folds: new_s.data = numpy.vstack( (new_s.data, s.data[:, nFolds:-folds + nFolds])) nFolds += 1 new_s.metadata.min_freq = s.metadata.min_freq new_s.metadata.min_time = s.metadata.min_time new_s.metadata.max_freq = s.metadata.max_freq * folds new_s.metadata.max_time = s.metadata.max_time new_s.metadata.sampling_configuration = \ s.metadata.sampling_configuration new_s.metadata.method = md.Metadata(original_input=s.metadata.input, original_method=s.metadata.method, name='tensor-fold', min_freq=s.metadata.min_freq, max_freq=s.metadata.max_freq * folds, min_time=s.metadata.min_time, max_time=s.metadata.max_time) if save_metadata: s.metadata.input = md.ObjectMetadata(s) return new_s
def median_filter(self, d, window, save_metadata=True): """Alters the given linear decomposition, applying a median filter. The filtering process is done in the time domain. The argument provided is destroyed. Args: d: LinearDecomposition object to filter. window: window length. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Same decomposition given in arguments. """ if save_metadata: metadata = md.ObjectMetadata(a) else: metadata = None meta = md.Metadata(name="median_filter_length", window=window, activation_input=metadata, original_method=None) # Binarizes the data and adjusts the metadata for k in d.data.right.keys(): d.data.right[k] = \ mf.median_filter_centered(d.data.right[k].transpose(),\ window).transpose() d.metadata.right[k] = md.Metadata( method="median_filter_length", window=window, activation_input=metadata, original_method=d.metadata.right[k]) return d
def convert(self, d, instrument, frequency, minimum_length, save_metadata=True): """Converts an linear decomposition to a score. If the given frequency is 0, then the frequency becomes the one in the spectrogram used to compute the linear decomposition activation. Args: a: LinearDecomposition object with binary right side. instrument: name of the instrument to be extracted. frequency: frequency used to transfer activation time bins to timestamps. minimum_length: minimum length of note to be considered. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Score object. """ # Loads valid frequency to be used if frequency == 0.: ofs = d.metadata.get('sampling_configuration.ofs') else: ofs = float(frequency) s = score.Score() s.metadata.instrument = instrument if save_metadata: s.metadata.input = md.ObjectMetadata(d) s.metadata.method_metadata = \ md.Metadata(type='algorithm', algorithm='binary activation', frequency=ofs, minimum_length=minimum_length, activation_metadata=d.metadata.right) # TODO: check if this parameter really does what it's supposed to. # Currently it ignores zeros in the activation matrix once a not has # been detected and the minimum window haven't been found. minimum_window = minimum_length * ofs for k, data, metadata in d.right(): if k[0] != instrument: continue note_start = -1 activation = data # Considers only one line per note for now. TODO: consider more for t in range(activation.shape[1]): # Checks if starting a new note if activation[0, t] and note_start == -1: note_start = t # Checks for note ending elif not activation[0, t] and note_start != -1: # If minum length is met, adds note if t - note_start > minimum_window: s.append(note.Note(onset=note_start/float(ofs),\ offset=t/float(ofs), name=k[1])) # Marks note as finished note_start = -1 return s
def compute(self, spectrograms, size=None, instrument=None, note=None, basis=None, beta=2., min_delta=0., max_iterations=100, save_metadata=True): """Computes the activation matrix from a basis matrix and a spectrogram. Uses the beta divergence to compute the activations. If min_delta is zero, the code may run faster because no beta divergence is actually computed. Otherwise, the code stops computing if two iterations of the algorithm don't improve the result by more than min_delta. Only one of 'basis' and 'size' arguments may be set, as they specify different things. With 'size', the user extracts both a basis and an activation from the spectrogram, while with 'basis' only an activation is computed. Each activation computed has the same key as the corresponding basis plus the spectrogram's index in the list provided. If a basis is being created, it's name is a tuple of (instrument, note), even if they are None. Args: spectrograms: list of Spectrograms to be merged and used to compute the activations. size: Number of basis to extract from the spectrogram. Must be None if the 'basis' argument is defined. instrument: Name of the instrument. This is used only if size is set. If None, it's ignored. Default: None. note: Name of the note. This is used only if size is set. If None, it's ignored. Default: None. basis: LinearDecomposition object describing the basis to be used. Must be none if the 'size' argument is defined. beta: value for the beta used in divergence. Default: 2. min_delta: threshold for early stop. Default: 0. max_iterations: maximum number of iterations to use. Default: 100. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: LinearDecomposition object with basis and activations for the spectrograms. Raises: ValueError: matrices have incompatible sizes. """ # Check arguments compatibility if size is None and basis is None: raise ValueError("One of 'size' or 'basis' must not be None.") if basis is not None and size is not None: raise ValueError("Only one of 'size' or 'basis' must not be None.") # Saves metadata if save_metadata: s_meta = [md.ObjectMetadata(s) for s in spectrograms] else: s_meta = [None for s in spectrograms] # Marks the limits of each spectrogram X_start = [0] for s in spectrograms: X_start.append(X_start[-1]+s.data.shape[1]) # Merges spectrograms X = numpy.hstack([s.data for s in spectrograms]) # If we have a basis, we only need to compute the activations if basis is not None: # Merges basis but keep track where each one starts so that it can # be used to characterize the activations B = [] B_start = [0] for k, data, metadata in basis.left(): B.append(data) B_start.append(B_start[-1]+data.shape[1]) B = numpy.hstack(B) # Saves metadata if save_metadata: b_meta = md.ObjectMetadata(B) else: b_meta = None # Initilizes activations A = numpy.ones((B.shape[1], X.shape[1])) # Computes the activation self.compute_activation(X, B, A, beta, min_delta, max_iterations) # Starts creating the decomposition object d = ld.LinearDecomposition() # Copy the left stuff from the basis, since they came from there d.data.left = basis.data.left d.metadata.left = basis.metadata.left # Cuts the activation. For each combination of basis and # spectrograms, we get an activation i = 0 for k, data, metadata in basis.left(): for j in range(len(spectrograms)): # Since spectrograms don't have name, we call it by its # sequence number s_name = (j,) # Cuts the activation A_cut = A[B_start[i]:B_start[i+1], X_start[j]:X_start[j+1]] # Merges the basis key with the spectrogram name to create a # key for the activation. Then stores a lot of metadata # about what was used to compute it. d.add(k+s_name, right=A_cut, right_metadata=md.Metadata( method="beta_nmf", beta=beta, min_delta=min_delta, max_iterations=max_iterations, spectrogram_input=s_meta[j], spectrogram=s.metadata, basis_input=b_meta, basis=metadata)) # Increase basis iterator i += 1 else: # Everyone gets the same matrices to work with every time, so we # avoid consistency problems. However, we can't have the same values # filling the matrices or the algorithm can't separate the basis and # activations (everyone keeps getting the same value). numpy.random.seed(0) B = numpy.random.rand(X.shape[0], size) A = numpy.random.rand(size, X.shape[1]) # Computes both basis and activations self.compute_both(X, B, A, beta, min_delta, max_iterations) # Key for the basis created key = (instrument, note) # Starts creating the decomposition object d = ld.LinearDecomposition() # Adds basis d.add(key, left=B, left_metadata=md.Metadata( method="beta_nmf", beta=beta, min_delta=min_delta, max_iterations=max_iterations, spectrogram_input=s_meta, spectrogram=[s.metadata for s in spectrograms])) # Adds the activations cutted to match the spectrograms for j in range(len(spectrograms)): # Since spectrograms don't have name, we call it by its sequence # number s = spectrograms[j] s_name = (j,) # Cuts the activation A_cut = A[:, X_start[j]:X_start[j+1]] # Merges the basis key with the spectrogram name to create a key # for the activation. Then stores a lot of metadata about what # was used to compute it. d.add(key+s_name, right=A_cut, right_metadata=md.Metadata( method="beta_nmf", beta=beta, min_delta=min_delta, max_iterations=max_iterations, spectrogram_input=s_meta[j], spectrogram=s.metadata)) return d
def trim_notes(self, s, min_duration=0, max_duration=float('inf'), min_pitch=0, max_pitch=float('inf'), min_onset=0, max_onset=float('inf'), min_offset=0, max_offset=float('inf'), save_metadata=True): """Removes from a score notes that don't satisfy a criteria. Trims the transcription so notes that are out of the specified bonds will be cut out of the transcription. The notes aren't copied for the new Score, so any modification on them alters both the original and trimmed. This function is useful when you are trying to exclude notes that are obviously wrong in a certain transcription. By default, all arguments not provided don't cause any note to be removed. Args: s: Score object. min_duration: minimum duration to keep. Default: 0. max_duration: maximum duration to keep. Default: inf. min_pitch: minimum pitch to keep. Default: 0. max_pitch: maximum pitch to keep. Default: inf. min_onset: minimum onset to keep. Default: 0. max_onset: maximum onset to keep. Default: inf. min_offset: minimum offset to keep. Default: 0. max_offset: maximum offset to keep. Default: inf. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Trimmed Score object. """ new_s = score.Score() new_s.append([ n for n in s.data if n.data.duration >= min_duration and n.data.duration <= max_duration and n.data.onset >= min_onset and n.data.onset <= max_onset and n.data.offset >= min_offset and n.data.offset <= max_offset and n.data.pitch >= min_pitch and n.data.pitch <= max_pitch ]) new_s.metadata.instrument = s.metadata.instrument new_s.metadata.method_metadata = md.Metadata( type="trim", min_duration=min_duration, max_duration=max_duration, min_onset=min_onset, max_onset=max_onset, min_offset=min_offset, max_offset=max_offset, min_pitch=min_pitch, max_pitch=max_pitch, previous_method=s.metadata.method_metadata, previous_input=s.metadata.input) if save_metadata: s.metadata.input = md.ObjectMetadata(s) return new_s
def evaluate(self, identification, estimated, reference, frame_length=0.01, save_metadata=True): """Computes the evaluation based on a estimated and reference scores. Args: identification: some form of identification that will be stored in metadata. estimated: estimated score. reference: reference score. frame_length: step size for time. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Evaluation object. """ t_start_estimated, t_end_estimated = estimated.get_timespan() t_start_reference, t_end_reference = reference.get_timespan() correct = 0. total_estimated = 0. total_reference = 0. # Sanity check if t_end_estimated - t_start_estimated >= 0 and \ t_end_reference - t_start_reference >= 0: # Starts at the first frame t = min(t_start_estimated, t_start_reference) # Ends with the minimum frame time t_end = min(t_end_estimated, t_end_reference) while t < t_end: # Gets notes active at the current time estimated_active_notes = estimated.get_active_notes(t) reference_active_notes = reference.get_active_notes(t) total_estimated += len(estimated_active_notes) total_reference += len(reference_active_notes) for e in estimated_active_notes: e_name = e.to_name() for r in reference_active_notes: if e_name == r.to_name(): correct += 1 # As each reference note can match only a single # estimation, we remove the matched reference reference_active_notes.remove(r) # Stops looking for references, as we got a match break t += frame_length # Creates evaluation object with the description of the method e = evaluation.Evaluation(total_estimated, total_reference, correct) e.metadata.estimated = estimated.metadata e.metadata.reference = reference.metadata e.metadata.method = md.Metadata(name='mirex framewise', id=identification) if save_metadata: e.metadata.estimated_input = md.ObjectMetadata(estimated) e.metadata.reference_input = md.ObjectMetadata(reference) return e
def evaluate(self, identification, estimated, reference, onset_tolerance=0.05, duration_tolerance=-1, ignore_pitch=False, save_metadata=True): """Computes the evaluation based on a estimated and reference scores. Args: identification: some form of identification that will be stored in metadata. estimated: estimated score. reference: reference score. onset_tolerance: additive tolerance for the onset to be valid. duration_tolerance: multiplicative tolerance for the duration to be valid. If negative, ignore duration restrictions. ignore_pitch: ignore notes' pitch when evaluating. save_metadata: flag indicating whether the metadata should be computed. Default: True. Returns: Evaluation object. """ n_ref = len(reference.data) n_est = len(estimated.data) correct = 0 # Don't use default comparison because: # 1) some crazy person may want to change it, and that could break this # code # 2) we don't need to order offset and pitch estimated_data = sorted(estimated.data, key=lambda n: n.data.onset) reference_data = sorted(reference.data, key=lambda n: n.data.onset) negative_duration_tolerance = (duration_tolerance < 0) # Iterates estimated data to match the reference for e in estimated_data: e_onset = e.data.onset e_duration = e.data.duration e_name = e.to_name() # As the notes are ordered by onset, we can remove from the # reference every note whose onset is below the current lower bound for i in xrange(len(reference_data)): if reference_data[i].data.onset >= e_onset - onset_tolerance: break reference_data = reference_data[i:] for r in reference_data: # Checks if onset is above range. If so, we can stop the search # because all other notes after it will also be above if r.data.onset > e_onset + onset_tolerance: break # Checks if notes match in duration and name if required if (negative_duration_tolerance or (abs(e_duration-r.data.duration) < \ max(r.data.duration * duration_tolerance, onset_tolerance))) \ and (ignore_pitch or e_name == r.to_name()): correct += 1 # As each reference note can match only a single estimation, # we remove the matched reference reference_data.remove(r) # Stops looking for references, as we got a match break # Creates evaluation object with the description of the method e = evaluation.Evaluation(n_est, n_ref, correct) e.metadata.estimated = estimated.metadata e.metadata.reference = reference.metadata e.metadata.method = md.Metadata(name='mirex symbolic', id=identification, duration_tolerance=duration_tolerance, onset_tolerance=onset_tolerance) if save_metadata: e.metadata.estimated_input = md.ObjectMetadata(estimated) e.metadata.reference_input = md.ObjectMetadata(reference) return e