def _extract_latent(fn, model, sr=22050, is_mulaw=True):
    """Helper function to extract latent embedding for a single music file

    Args:
        fn (str): filename for a single music file
        sr (int): sampling rate
    """
    if basename(fn).split('.')[-1] == 'npy':
        if is_mulaw:
            y = load_mulaw(fn)
        else:
            y = np.load(fn)
    else:
        y, sr = librosa.load(fn, sr=sr)

    y = y.astype(np.float32)
    if len(y) > model.sig_len:
        # find the center and crop from there
        mid = int(len(y) / 2)
        half_len = int(model.sig_len / 2)
        start_point = mid - half_len
        y = y[start_point:start_point + model.sig_len]

    elif len(y) < model.sig_len:
        # zero-pad
        rem = model.sig_len - len(y)
        y = np.r_[y, np.zeros((rem, ), dtype=y.dtype)]

    return model.get_bottleneck(torch.from_numpy(y)[None]).data.numpy()[0]
Example #2
0
def _transform(fn,
               transformer,
               out_root,
               perturbations=cfg.PERTURBATIONS,
               normalization=True):
    """Transform given signal and save

    Args:
        fn (str): path to the input signal
        transformer (BaseTransformer): transformation class
        out_root (str): path to dump outputs
        perturbations (OrderedDict): perturbations set
        normalization (bool): flag for normalization
    """
    # load the signal
    if basename(fn).split('.')[-1] == 'npy':
        x = load_mulaw(fn)
    else:
        x, sr = librosa.load(fn, sr=cfg.FS)

    # transform
    for a in get_transform_range(transformer, perturbations):
        y = transformer(x, a)

        if normalization:
            # normalization
            y = lufs_norm(y, x, cfg.FS)

        out_fn = '_'.join(
            [basename(fn).split('.')[0],
             get_pert_id(transformer, a)])

        save_mulaw(join(out_root, out_fn), y)
Example #3
0
def _extract_mfcc(fn,
                  out_root,
                  include_coef0=False,
                  sig_len=44100,
                  sr=22050,
                  n_mfccs=25):
    """Helper function to extract MFCC from a single music file

    Args:
        fn (str): filename for a single music file
        out_root (str): path to dump output MFCCs
        include_coef0 (bool): decides including 1st coefficient
        sig_len (int): length of desired signal
        sr (int): sampling rate
        n_mfcc (int): number of coefficients
    """
    if basename(fn).split('.')[-1] == 'npy':
        y = load_mulaw(fn)
    else:
        y, sr = librosa.load(fn, sr=sr)

    # we do not include the first coefficient
    M = librosa.feature.mfcc(y, sr, n_mfcc=n_mfccs).T
    if not include_coef0: M = M[:, 1:]
    out_fn = join(out_root, splitext(basename(fn))[0] + '.npy')
    np.save(out_fn, M)
def ext_latents(fns,
                out_fn,
                model=None,
                n_jobs=1,
                is_mulaw=True,
                batch_sz=100):
    """Extract latent features

    Args:
        fns (str): file name of the music
        out_fn (str): path to dump files
        model (BaseModel, None, 'mfcc'): model used for the extraction
        n_jobs (int): number of parallel jobs
    """
    is_gpu = next(model.parameters()).is_cuda

    batch = []
    Z = []
    for i, fn in tqdm(enumerate(fns), total=len(fns), ncols=80):
        if basename(fn).split('.')[-1] == 'npy':
            if is_mulaw:
                y = load_mulaw(fn)
            else:
                y = np.load(fn)
        else:
            y, sr = librosa.load(fn, sr=sr)

        y = y.astype(np.float32)
        if len(y) > model.sig_len:
            # find the center and crop from there
            mid = int(len(y) / 2)
            half_len = int(model.sig_len / 2)
            start_point = mid - half_len
            y = y[start_point:start_point + model.sig_len]

        elif len(y) < model.sig_len:
            # zero-pad
            rem = model.sig_len - len(y)
            y = np.r_[y, np.zeros((rem, ), dtype=y.dtype)]

        batch.append(y)

        if (len(batch) >= batch_sz) or ((i + 1) == len(fns)):
            batch = torch.from_numpy(np.array(batch).astype(np.float32))
            if is_gpu:
                batch = batch.cuda()

            z = model.get_bottleneck(batch)

            if is_gpu:
                z = z.data.cpu().numpy()
            else:
                z = z.data.numpy()
            Z.append(z)
            batch = []
    Z = np.concatenate(Z, axis=0)

    # save the output
    np.save(out_fn, np.array(Z))
def load_audio(fn, mulaw=True):
    """""" 
    if basename(fn).split('.')[-1] == 'npy':
        if mulaw:
            y = load_mulaw(fn)
        else:
            y = np.load(fn)
    else:
        y, sr = librosa.load(fn, sr=sr)
    
    # make sure the input is right dtype
    y = y.astype(np.float32)
    
    return y
Example #6
0
def _extract_mfcc(fn, sr=22050, is_mulaw=True):
    """Extract MFCC

    Args:
        fn (str): filename for a single music file
        sr (int): sampling rate

    Returns:
        np.ndarray: MFCC feature vector
    """
    if basename(fn).split('.')[-1] == 'npy':
        if is_mulaw:
            y = load_mulaw(fn)
        else:
            y = np.load(fn)
    else:
        y, sr = librosa.load(fn, sr=sr)
    return _mfcc(y, sr)
def _random_crop(in_fn, out_root, sig_len, n_samples=1, sr=22050):
    """Randomly drop audio and save

    Args:
        in_fn (str): path to load the original signal
        out_root (str): path to save the cropped signals
        sig_len (int): length of the cropped signal
        n_samples (int): the number of clips to be cropped
        sr (int): sampling rate of the signal
    
    Returns:
        str: path where the file is saved
        tuple: contains starting point and the end point
    """
    # helper function to save
    def _save(signal, sample_rate, start_ix):
        out_fn = join(
            out_root,
            splitext(basename(in_fn))[0] +
            '_{:d}_{:d}.npy'.format(
                int(start_ix), int(start_ix+sig_len)
            )
        )
        save_mulaw(out_fn, signal, sample_rate)
        return (out_fn, (start_ix, start_ix + sig_len))
        
        
    if basename(in_fn).split('.')[-1] == 'npy':
        y = load_mulaw(in_fn)
    else:
        y, sr = librosa.load(in_fn, sr=sr)
        
    # check nbr of chunks is too many considering signal length
    assert sig_len * n_samples < len(y)
    
    info = [_save(sig, sr, st)
            for sig, st
            in _random_crop_signal(y, n_samples, sig_len)]
    
    return info