def load_features(path, chunk_size=128, r_threshold=32): """Load feature vectors from the specified HDF5 file. Since the original feature vectors are of variable length, this function partitions them into chunks of length `chunk_size`. When they cannot be partitioned exactly, one of three things can happen: * If the length of the vector is less than the chunk size, the vector is simply padded with a fill value. * If the remainder, ``r``, is less than ``r_threshold``, the edges of the vector are truncated so that it can be partitioned. * If the remainder, ``r``, is greater than ``r_threshold``, the last chunk is the last `chunk_size` frames of the feature vector such that it overlaps with the penultimate chunk. Args: path (str): Path to the HDF5 file. chunk_size (int): Size of a chunk. r_threshold (int): Threshold for ``r`` (see above). Returns: np.ndarray: Array of feature vectors. list: Number of chunks for each audio clip. """ chunks = [] n_chunks = [] with h5py.File(path, 'r') as f: feats = f['F'] shape = feats.attrs['shape'] for i, feat in enumerate(tqdm(feats)): # Reshape flat array to original shape feat = np.reshape(feat, (-1, *shape)) if len(feat) == 0: n_chunks.append(0) continue # Split feature vector into chunks along time axis q = len(feat) // chunk_size r = len(feat) % chunk_size if not q and r: split = [ utils.pad_truncate(feat, chunk_size, pad_value=np.min(feat)) ] elif r: r = len(feat) % chunk_size off = r // 2 if r < r_threshold else 0 split = np.split(feat[off:q * chunk_size + off], q) if r >= r_threshold: split.append(feat[-chunk_size:]) else: split = np.split(feat, q) n_chunks.append(len(split)) chunks += split return np.array(chunks), n_chunks
def _reshape_spec(feat, r_threshold=32): q = feat.shape[0] // 128 r = feat.shape[0] % 128 r_threshold = 32 #print(q,r) if not q: split = [utils.pad_truncate(feat, 128, pad_value=np.min(feat))] else: off = r // 2 if r < r_threshold else 0 split = np.split(feat[off:q * 128 + off], q) if r >= r_threshold: split.append(feat[-128:]) return np.array(split)
def extract_dataset(dataset_path, file_names, extractor, clip_duration, output_path, recompute=False, n_transforms_iter=None, ): """Extract features from the audio clips in a dataset. Args: dataset_path (str): Path of directory containing dataset. file_names (list): List of file names for the audio clips. extractor: Class instance for feature extraction. clip_duration: Duration of a reference clip in seconds. Used to ensure all feature vectors are of the same length. output_path: File path of output HDF5 file. recompute (bool): Whether to extract features that already exist in the HDF5 file. n_transforms_iter (iterator): Iterator for the number of transformations to apply for each example. If data augmentation should be disabled, set this to ``None``. Otherwise, ensure that `file_names` has been expanded as if by calling :func:`data_augmentation.expand_metadata`. """ # Create/load the HDF5 file to store the feature vectors with h5py.File(output_path, 'a') as f: size = len(file_names) # Size of dataset # Create/load feature vector dataset and timestamp dataset feats_shape = (size,) + extractor.output_shape(clip_duration) feats = f.require_dataset('F', feats_shape, dtype=np.float32) timestamps = f.require_dataset('timestamps', (size,), dtype=h5py.special_dtype(vlen=bytes)) transforms = iter(()) for i, name in enumerate(tqdm(file_names)): # Skip if existing feature vector should not be recomputed if timestamps[i] and not recompute: next(transforms, None) continue # Generate next transform or, if iterator is empty, load # the next audio clip from disk. Note that the iterator will # always be empty if data augmentation (DA) is disabled. x = next(transforms, None) if x is None: # Load audio file from disk path = os.path.join(dataset_path, name) x, sample_rate = librosa.load(path, sr=None) # Create new transform generator if DA is enabled if n_transforms_iter: transforms = aug.transformations( x, sample_rate, next(n_transforms_iter)) # Compute feature vector using extractor vec = extractor.extract(x, sample_rate) vec = utils.pad_truncate(vec, feats_shape[1]) # Save to dataset feats[i] = vec # Record timestamp in ISO format timestamps[i] = dt.datetime.now().isoformat()