def test_backfill_noise(noise_shape): noise = np.random.random(noise_shape) for t_len in [1, 2, 3, 8, 10, 43, 100]: backfilled = utils.backfill_noise(noise, t_len) assert backfilled.shape[-2] >= t_len assert noise.shape[:-2] == backfilled.shape[:-2] if noise.shape[0] > 1: # Ideally we want to check that these # are not all equal, but it should suffice to check # that they're not equal with the first. for i in range(1, noise.shape[0]): assert not np.array_equal(backfilled[0], backfilled[i])
def base_slicer(record, t_len, obs_slicer, shuffle=True, auto_restart=True, add_noise=True, random_seed=None, npz_data_key='cqt', slice_logger=None): """Base slicer function for yielding data from an .npz file which contains the features for streaming. Assumptions: * Input file is about 1s in length (43 frames), but might be shorter or longer. t_len should be 43 (~1s), however. * It is possible that this file could get sampled more than once consecutively, so for training, if shuffle is True, and the number of frames in the file > t_len, it will try to sample from frames other than the first t_len frames randomly. * For prediction (shuffle=False), it will always return the first t_len frames. To use this for training, use the following parameters: (...shuffle=True, auto_restart=True, add_noise=True) For prediction / eval, use the following parameters: (...shuffle=False, auto_restart=False, add_noise=False) Parameters ---------- record : pandas.Series Single pandas record containing a 'cqt' record which points to the cqt file in question. Also must contain an "instrument" column which contains the ground truth. t_len : int Length of the sliced array [in time/frames] obs_slicer : function Function which takes (cqt, idx, counter, t_len), and returns a slice from the data, formatted correctly for the desired data. shuffle : bool If True, shuffles the frames every time through the file. If False, returns the first t_len frames. auto_restart : bool If True, yields infinitely. If False, only goes through the file once. add_noise : bool If True, adds a small amount of noise to every sample. If False, does nothing. random_seed : int or None If int, uses this number as the random seed. Otherwise, makes it's own. npz_data_key : str The key in the npz file pointed to by record from which to load the data. Choices = ['cqt', 'harmonic_cqt'] Yields ------- sample : dict with fields {x_in, target} The windowed observation. """ rng = np.random.RandomState(random_seed) target = instrument_map.get_index(record['instrument']) if slice_logger: slice_logger.start(record, target) if not (('cqt' in record.index) and record['cqt'] is not None and isinstance(record['cqt'], str) and os.path.exists(record['cqt'])): logger.error( 'No valid feature file specified for record: {}'.format(record)) if slice_logger: slice_logger.error(record) return # Load the npz file with they key specified. try: cqt = np.load(record['cqt'])[npz_data_key] except zipfile.BadZipFile: logger.error('This zip file is bad! Sadness :( - {}'.format( record['cqt'])) if slice_logger: slice_logger.error(record) return # Take the logmagnitude of the cqt cqt = librosa.logamplitude(cqt**2, ref_power=np.max) # Make sure the data is long enough. # In practice this should no longer be necessary. cqt = utils.backfill_noise(cqt, t_len + 1) num_possible_obs = cqt.shape[-2] - t_len if shuffle: idx = np.arange(num_possible_obs) rng.shuffle(idx) else: idx = np.arange(1) counter = 0 while True: if slice_logger: slice_logger.sample(record) obs = obs_slicer(cqt, idx, counter, t_len) if add_noise: obs = obs + utils.same_shape_noise(obs, 1, rng) data = dict(x_in=obs, target=np.atleast_1d((target, ))) yield data counter += 1 if counter >= len(idx): if not auto_restart: break if shuffle: rng.shuffle(idx) counter = 0 if slice_logger: slice_logger.close(record)
def base_slicer( record, t_len, obs_slicer, shuffle=True, auto_restart=True, add_noise=True, random_seed=None, npz_data_key="cqt", slice_logger=None, ): """Base slicer function for yielding data from an .npz file which contains the features for streaming. Assumptions: * Input file is about 1s in length (43 frames), but might be shorter or longer. t_len should be 43 (~1s), however. * It is possible that this file could get sampled more than once consecutively, so for training, if shuffle is True, and the number of frames in the file > t_len, it will try to sample from frames other than the first t_len frames randomly. * For prediction (shuffle=False), it will always return the first t_len frames. To use this for training, use the following parameters: (...shuffle=True, auto_restart=True, add_noise=True) For prediction / eval, use the following parameters: (...shuffle=False, auto_restart=False, add_noise=False) Parameters ---------- record : pandas.Series Single pandas record containing a 'cqt' record which points to the cqt file in question. Also must contain an "instrument" column which contains the ground truth. t_len : int Length of the sliced array [in time/frames] obs_slicer : function Function which takes (cqt, idx, counter, t_len), and returns a slice from the data, formatted correctly for the desired data. shuffle : bool If True, shuffles the frames every time through the file. If False, returns the first t_len frames. auto_restart : bool If True, yields infinitely. If False, only goes through the file once. add_noise : bool If True, adds a small amount of noise to every sample. If False, does nothing. random_seed : int or None If int, uses this number as the random seed. Otherwise, makes it's own. npz_data_key : str The key in the npz file pointed to by record from which to load the data. Choices = ['cqt', 'harmonic_cqt'] Yields ------- sample : dict with fields {x_in, target} The windowed observation. """ rng = np.random.RandomState(random_seed) target = instrument_map.get_index(record["instrument"]) if slice_logger: slice_logger.start(record, target) if not ( ("cqt" in record.index) and record["cqt"] is not None and isinstance(record["cqt"], str) and os.path.exists(record["cqt"]) ): logger.error("No valid feature file specified for record: {}".format(record)) if slice_logger: slice_logger.error(record) return # Load the npz file with they key specified. try: cqt = np.load(record["cqt"])[npz_data_key] except zipfile.BadZipFile: logger.error("This zip file is bad! Sadness :( - {}".format(record["cqt"])) if slice_logger: slice_logger.error(record) return # Take the logmagnitude of the cqt cqt = librosa.logamplitude(cqt ** 2, ref_power=np.max) # Make sure the data is long enough. # In practice this should no longer be necessary. cqt = utils.backfill_noise(cqt, t_len + 1) num_possible_obs = cqt.shape[-2] - t_len if shuffle: idx = np.arange(num_possible_obs) rng.shuffle(idx) else: idx = np.arange(1) counter = 0 while True: if slice_logger: slice_logger.sample(record) obs = obs_slicer(cqt, idx, counter, t_len) if add_noise: obs = obs + utils.same_shape_noise(obs, 1, rng) data = dict(x_in=obs, target=np.atleast_1d((target,))) yield data counter += 1 if counter >= len(idx): if not auto_restart: break if shuffle: rng.shuffle(idx) counter = 0 if slice_logger: slice_logger.close(record)