def __compute_zscore(data: np.ndarray, axis: int = -1, clip: float = 3.0): dims = data.ndim data[data == np.inf] = np.nan data[data == -np.inf] = np.nan if axis is None: length = 1 else: if axis < 0: axis += dims length = data.shape[axis] for i in range(length): _slice = [i if j == axis else slice(None) for j in range(dims)] sliced = data.__getitem__(_slice) extremes = find_extreme_values(sliced) score = (sliced - np.nanmedian(sliced[~extremes])) / np.nanstd(sliced[~extremes]) score = np.clip(score, -clip, clip) epsilon = 1e-4 total_err = 1.0 avg = np.nanmean(score[~extremes]) sd = np.nanstd(score[~extremes]) z = (score - avg) / sd trial = 0 while total_err > epsilon and trial < 3: if (abs(z) >= clip).any(): z = np.clip(z, -clip, clip) avg = np.nanmean(z) std = np.nanstd(z) total_err = abs(avg) + abs(std - 1) z = (z - avg) / std trial += 1 data.__setitem__(_slice, z) return data
def destagger_array(a: np.ndarray, axis: int) -> np.ndarray: """ Staggers/destaggers an array along a given axis. """ dims = len(a.shape) slices0 = tuple( slice(0, -1) if i == axis else slice(None) for i in range(dims)) slices1 = tuple( slice(1, None) if i == axis else slice(None) for i in range(dims)) a0 = a.__getitem__(slices0) a1 = a.__getitem__(slices1) aa = 0.5 * a0 + 0.5 * a1 return aa
def align_midi(audio_cqt: np.ndarray, audio_times: np.ndarray, full_synthesized_midi_path: str, full_alignment_write_path: str, alignment_parameters: Optional[AlignmentParameters] = None): """ Align audio (specified by CQT) to synthesized MIDI (specified by path), return path and score of the alignment :param alignment_parameters: Parameters for alignment :param audio_cqt: The CQT of the audio of the alignment :param audio_times: Array of times of the audio (from compute_cqt function) :param full_synthesized_midi_path: The path to the synthesized MIDI file :param full_alignment_write_path: The path to write the alignment to """ # Make sure to have alignment parameters if alignment_parameters is None: alignment_parameters = AlignmentParameters() # Open the synthesized midi file midi_audio, _ = librosa.load(full_synthesized_midi_path, sr=alignment_parameters.sampling_rate) # Compute log-magnitude CQT of the synthesized midi file midi_cqt, midi_times = _compute_cqt(midi_audio, alignment_parameters) # Compute the distance matrix of the midi and audio CQTs, using cosine distance distance_matrix = scipy.spatial.distance.cdist(midi_cqt, audio_cqt, 'cosine') additive_penalty = float(np.median(np.ravel(distance_matrix))) multiplicative_penalty = 1. # Get lowest cost path in the distance matrix p, q, score = _dtw(distance_matrix, alignment_parameters.gully, additive_penalty, multiplicative_penalty) # Compute MIDIAlignment midi_alignment = MIDIAlignment(midi_times.__getitem__(p), audio_times.__getitem__(q)) # Normalize by path length and the distance matrix sub-matrix within the path score = score / len(p) score = score / distance_matrix[p.min():p.max(), q.min():q.max()].mean() # Write score midi_name = fh.get_file_name_from_full_path(full_synthesized_midi_path) decibel.import_export.midi_alignment_score_io.write_chord_alignment_score( midi_name, score) # Write alignment decibel.import_export.midi_alignment_io.write_alignment_file( midi_alignment, full_alignment_write_path)