Ejemplo n.º 1
0
def test_backfill_noise(noise_shape):
    noise = np.random.random(noise_shape)
    for t_len in [1, 2, 3, 8, 10, 43, 100]:
        backfilled = utils.backfill_noise(noise, t_len)
        assert backfilled.shape[-2] >= t_len
        assert noise.shape[:-2] == backfilled.shape[:-2]

        if noise.shape[0] > 1:
            # Ideally we want to check that these
            # are not all equal, but it should suffice to check
            # that they're not equal with the first.
            for i in range(1, noise.shape[0]):
                assert not np.array_equal(backfilled[0], backfilled[i])
Ejemplo n.º 2
0
def test_backfill_noise(noise_shape):
    noise = np.random.random(noise_shape)
    for t_len in [1, 2, 3, 8, 10, 43, 100]:
        backfilled = utils.backfill_noise(noise, t_len)
        assert backfilled.shape[-2] >= t_len
        assert noise.shape[:-2] == backfilled.shape[:-2]

        if noise.shape[0] > 1:
            # Ideally we want to check that these
            # are not all equal, but it should suffice to check
            # that they're not equal with the first.
            for i in range(1, noise.shape[0]):
                assert not np.array_equal(backfilled[0], backfilled[i])
Ejemplo n.º 3
0
def base_slicer(record,
                t_len,
                obs_slicer,
                shuffle=True,
                auto_restart=True,
                add_noise=True,
                random_seed=None,
                npz_data_key='cqt',
                slice_logger=None):
    """Base slicer function for yielding data from an .npz file
    which contains the features for streaming.

    Assumptions:
     * Input file is about 1s in length (43 frames), but might be
       shorter or longer. t_len should be 43 (~1s), however.
     * It is possible that this file could get sampled more than
       once consecutively, so for training, if shuffle is True,
       and the number of frames in the file > t_len, it will
       try to sample from frames other than the first t_len frames
       randomly.
     * For prediction (shuffle=False), it will always return
       the first t_len frames.

    To use this for training, use the following parameters:
        (...shuffle=True, auto_restart=True, add_noise=True)
    For prediction / eval, use the following parameters:
        (...shuffle=False, auto_restart=False, add_noise=False)

    Parameters
    ----------
    record : pandas.Series
        Single pandas record containing a 'cqt' record
        which points to the cqt file in question.
        Also must contain an "instrument" column
        which contains the ground truth.

    t_len : int
        Length of the sliced array [in time/frames]

    obs_slicer : function
        Function which takes (cqt, idx, counter, t_len), and
        returns a slice from the data, formatted correctly for
        the desired data.

    shuffle : bool
        If True, shuffles the frames every time through the file.
        If False, returns the first t_len frames.

    auto_restart : bool
        If True, yields infinitely.
        If False, only goes through the file once.

    add_noise : bool
        If True, adds a small amount of noise to every sample.
        If False, does nothing.

    random_seed : int or None
        If int, uses this number as the random seed. Otherwise,
        makes it's own.

    npz_data_key : str
        The key in the npz file pointed to by record from which to
        load the data. Choices = ['cqt', 'harmonic_cqt']

    Yields
    -------
    sample : dict with fields {x_in, target}
        The windowed observation.
    """
    rng = np.random.RandomState(random_seed)
    target = instrument_map.get_index(record['instrument'])
    if slice_logger: slice_logger.start(record, target)

    if not (('cqt' in record.index) and record['cqt'] is not None and
            isinstance(record['cqt'], str) and os.path.exists(record['cqt'])):
        logger.error(
            'No valid feature file specified for record: {}'.format(record))
        if slice_logger: slice_logger.error(record)
        return

    # Load the npz file with they key specified.
    try:
        cqt = np.load(record['cqt'])[npz_data_key]
    except zipfile.BadZipFile:
        logger.error('This zip file is bad! Sadness :(  -  {}'.format(
            record['cqt']))
        if slice_logger: slice_logger.error(record)
        return

    # Take the logmagnitude of the cqt
    cqt = librosa.logamplitude(cqt**2, ref_power=np.max)

    # Make sure the data is long enough.
    # In practice this should no longer be necessary.
    cqt = utils.backfill_noise(cqt, t_len + 1)

    num_possible_obs = cqt.shape[-2] - t_len
    if shuffle:
        idx = np.arange(num_possible_obs)
        rng.shuffle(idx)
    else:
        idx = np.arange(1)

    counter = 0
    while True:
        if slice_logger: slice_logger.sample(record)

        obs = obs_slicer(cqt, idx, counter, t_len)
        if add_noise:
            obs = obs + utils.same_shape_noise(obs, 1, rng)
        data = dict(x_in=obs, target=np.atleast_1d((target, )))
        yield data

        counter += 1
        if counter >= len(idx):
            if not auto_restart:
                break
            if shuffle:
                rng.shuffle(idx)
            counter = 0

    if slice_logger: slice_logger.close(record)
Ejemplo n.º 4
0
def base_slicer(
    record,
    t_len,
    obs_slicer,
    shuffle=True,
    auto_restart=True,
    add_noise=True,
    random_seed=None,
    npz_data_key="cqt",
    slice_logger=None,
):
    """Base slicer function for yielding data from an .npz file
    which contains the features for streaming.

    Assumptions:
     * Input file is about 1s in length (43 frames), but might be
       shorter or longer. t_len should be 43 (~1s), however.
     * It is possible that this file could get sampled more than
       once consecutively, so for training, if shuffle is True,
       and the number of frames in the file > t_len, it will
       try to sample from frames other than the first t_len frames
       randomly.
     * For prediction (shuffle=False), it will always return
       the first t_len frames.

    To use this for training, use the following parameters:
        (...shuffle=True, auto_restart=True, add_noise=True)
    For prediction / eval, use the following parameters:
        (...shuffle=False, auto_restart=False, add_noise=False)

    Parameters
    ----------
    record : pandas.Series
        Single pandas record containing a 'cqt' record
        which points to the cqt file in question.
        Also must contain an "instrument" column
        which contains the ground truth.

    t_len : int
        Length of the sliced array [in time/frames]

    obs_slicer : function
        Function which takes (cqt, idx, counter, t_len), and
        returns a slice from the data, formatted correctly for
        the desired data.

    shuffle : bool
        If True, shuffles the frames every time through the file.
        If False, returns the first t_len frames.

    auto_restart : bool
        If True, yields infinitely.
        If False, only goes through the file once.

    add_noise : bool
        If True, adds a small amount of noise to every sample.
        If False, does nothing.

    random_seed : int or None
        If int, uses this number as the random seed. Otherwise,
        makes it's own.

    npz_data_key : str
        The key in the npz file pointed to by record from which to
        load the data. Choices = ['cqt', 'harmonic_cqt']

    Yields
    -------
    sample : dict with fields {x_in, target}
        The windowed observation.
    """
    rng = np.random.RandomState(random_seed)
    target = instrument_map.get_index(record["instrument"])
    if slice_logger:
        slice_logger.start(record, target)

    if not (
        ("cqt" in record.index)
        and record["cqt"] is not None
        and isinstance(record["cqt"], str)
        and os.path.exists(record["cqt"])
    ):
        logger.error("No valid feature file specified for record: {}".format(record))
        if slice_logger:
            slice_logger.error(record)
        return

    # Load the npz file with they key specified.
    try:
        cqt = np.load(record["cqt"])[npz_data_key]
    except zipfile.BadZipFile:
        logger.error("This zip file is bad! Sadness :(  -  {}".format(record["cqt"]))
        if slice_logger:
            slice_logger.error(record)
        return

    # Take the logmagnitude of the cqt
    cqt = librosa.logamplitude(cqt ** 2, ref_power=np.max)

    # Make sure the data is long enough.
    # In practice this should no longer be necessary.
    cqt = utils.backfill_noise(cqt, t_len + 1)

    num_possible_obs = cqt.shape[-2] - t_len
    if shuffle:
        idx = np.arange(num_possible_obs)
        rng.shuffle(idx)
    else:
        idx = np.arange(1)

    counter = 0
    while True:
        if slice_logger:
            slice_logger.sample(record)

        obs = obs_slicer(cqt, idx, counter, t_len)
        if add_noise:
            obs = obs + utils.same_shape_noise(obs, 1, rng)
        data = dict(x_in=obs, target=np.atleast_1d((target,)))
        yield data

        counter += 1
        if counter >= len(idx):
            if not auto_restart:
                break
            if shuffle:
                rng.shuffle(idx)
            counter = 0

    if slice_logger:
        slice_logger.close(record)