def padded_tens_with_zeros(tens, longest_bar_len): #, freq_dim): """ DEPRECATED Pad this tensor (a single matrix interpreted as a tensor slice, so a tensor with third mode) with zeros, for it to be of the same size as every bar. Parameters ---------- tens : numpy array The current matrix, containing spectrogram of a single bar, interpreted as a tensor (3rd dimension of size 1). longest_bar_len : integer Size of the longest bar, also the desired size for the tensor to return. Returns ------- numpy array The tensor, padded with zero, for its 2nd dimension to be of the desired size. """ raise err.OutdatedBehaviorException( "Padding the tensor with zeroes correspond to an outdated behvaior, shouldn't be invoked." ) return np.concatenate( (tens, np.zeros(tens.shape[0] * (longest_bar_len - tens.shape[1])).reshape( tens.shape[0], (longest_bar_len - tens.shape[1]), 1)), axis=1)
def load_RWC_dataset(music_folder_path, annotations_type="MIREX10", desired_format=None, downbeats=None): """ Load the data on the RWC dataset, ie path of songs and annotations. The annotations can be either AIST or MIREX 10. Parameters ---------- music_folder_path : String Path of the folder to parse. annotations_type : "AIST" [1] or "MIREX10" [2] The type of annotations to load (both have a specific behavior and formatting) The default is "MIREX10" desired_format : DEPRECATED downbeats : DEPRECATED Raises ------ NotImplementedError If the format is not taken in account. Returns ------- numpy array list of list of paths, each sublist being of the form [song, annotations, downbeat(if specified)]. References ---------- [1] Goto, M. (2006, October). AIST Annotation for the RWC Music Database. In ISMIR (pp. 359-360). [2] Bimbot, F., Sargent, G., Deruty, E., Guichaoua, C., & Vincent, E. (2014, January). Semiotic description of music structure: An introduction to the Quaero/Metiss structural annotations. """ if downbeats != None or desired_format != None: raise err.OutdatedBehaviorException( "This version of loading is deprecated.") # Load dataset paths at the format "song, annotations, downbeats" paths = [] for file in os.listdir(music_folder_path): if file[-4:] == ".wav": file_number = "{:03d}".format(int(file[:-4])) ann = dm.get_annotation_name_from_song(file_number, annotations_type) paths.append([file, ann]) return np.array(paths)
def compute_score_from_frontiers_in_bar(reference, frontiers_in_bar, bars, window_length=0.5): """ Computes precision, recall and f measure from estimated frontiers (in bar indexes) and the reference (in seconds). Scores are computed from the mir_eval toolbox. Parameters ---------- reference : list of tuples The reference annotations, as a list of tuples (start, end), in seconds. frontiers : list of integers The frontiers between segments, in bar indexes. bars : list of tuples The bars of the song. window_length : float, optional The window size for the score (tolerance for the frontier to be validated). The default is 0.5. Returns ------- precision: float \in [0,1] Precision of these frontiers, ie the proportion of accurately found frontiers among all found frontiers. recall: float \in [0,1] Recall of these frontiers, ie the proportion of accurately found frontiers among all accurate frontiers. f_measure: float \in [0,1] F measure of these frontiers, ie the geometric mean of both precedent scores. """ try: np.array(bars).shape[1] except: raise err.OutdatedBehaviorException( "Bars is still a list of downbeats, which is an old beavior, and shouldn't happen anymore. To track and to fix." ) frontiers_in_time = frontiers_from_bar_to_time(frontiers_in_bar, bars) return compute_score_of_segmentation( reference, frontiers_to_segments(frontiers_in_time), window_length=window_length)
def load_bars(persisted_path, song_name): """ Loads the bars for this song, which were persisted after a first computation. Parameters ---------- persisted_path : string Path where the bars should be found. song_name : string Name of the song (identifier of the bars to load). Returns ------- bars : list of tuple of floats The persisted bars for this song. """ raise err.OutdatedBehaviorException( "You should use load_or_save_bars(persisted_path, song_path) instead, as it handle the fact that bars weren't computed yet." ) bars = np.load("{}\\bars\\{}.npy".format(persisted_path, song_name)) return bars
def load_spectrogram_and_bars(persisted_path, song_name, feature, hop_length, fmin=98): """ Loads the spectrogram and the bars for this song, which were persisted after a first computation. Parameters ---------- persisted_path : string Path where the bars and the spectrogram should be found. song_name : string Name of the song (identifier of the bars to load). feature : string Feature of the spectrogram, part of the identifier of the spectrogram. hop_length : integer hop_length of the spectrogram, part of the identifier of the spectrogram. fmin : integer Minimal frequence for the spectrogram, part of the identifier of the spectrogram. The default is 98. Returns ------- bars : list of tuple of floats The persisted bars for this song. spectrogram : numpy array The pre-computed spectorgram. """ raise err.OutdatedBehaviorException( "You should use load_or_save_spectrogram_and_bars(persisted_path, song_path, feature, hop_length, fmin) instead, as it handle the fact that bars weren't computed yet." ) bars = np.load("{}\\bars\\{}.npy".format(persisted_path, song_name)) spectrogram = np.load("{}\\spectrograms\\{}_{}_stereo_{}_{}".format( persisted_path, song_name, feature, hop_length, fmin)) return bars, spectrogram
def tensorize_barwise(spectrogram, bars, hop_length_seconds, subdivision, midi=False): """ Returns a tensor-spectrogram from a spectrogram and bars starts and ends. Each bar of the tensor_spectrogram will contain the same number of frames, define by the "subdivision" parameter. These frames are selected from an over-sampled spectrogram, to adapt to the specific size of each bar. Parameters ---------- spectrogram : list of list of floats or numpy array The spectrogram to return as a tensor-spectrogram. bars : list of tuples List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation. hop_length_seconds : float The hop_length, in seconds. subdivision : integer The number of subdivision of the bar to be contained in each slice of the tensor. midi : boolean, optional A boolean to know if the spectrogram is in midi. If it is, adds a correction to deletes void bars. The default is False. Returns ------- tensorly tensor The tensor-spectrogram as a tensorly tensor. """ freq_len = spectrogram.shape[0] hop = int(hop_length_seconds * 44100) if hop != 32 and hop != 64: print("hop_length a 44100Hz = " + str(hop) + ", normal ?") bars_idx = dm.segments_from_time_to_frame_idx(bars[1:], hop_length_seconds) #if hop == 512: #raise NotImplementedError("Probably wrong hop here, to debug") samples_init = [ int( round(bars_idx[0][0] + k * (bars_idx[0][1] - bars_idx[0][0]) / subdivision)) for k in range(subdivision) ] if midi: raise err.OutdatedBehaviorException("Should'nt be used, still bugged") tens = np.array(spectrogram[:, samples_init]).reshape(freq_len, subdivision, 1) #tens = padded_tens_with_zeros(tens_init, longest_bar) for bar in bars_idx[1:]: t_0 = bar[0] t_1 = bar[1] samples = [ int(round(t_0 + k * (t_1 - t_0) / subdivision)) for k in range(subdivision) ] if samples[-1] < spectrogram.shape[1]: current_bar_tensor_spectrogram = spectrogram[:, samples].reshape( freq_len, subdivision, 1) tens = np.append(tens, current_bar_tensor_spectrogram, axis=2) else: break return tl.tensor(tens, dtype=tl.float32)