Beispiel #1
0
def spectrogram_to_image(signal,
                         batch_first=False,
                         color='viridis',
                         origin='lower'):
    """
        For more details of the output shape, see the tensorboardx docs
    Args:
        signal: Shape (frames, batch [optional], features)
        batch_first: if true mask shape (batch [optional], frames, features]
        color: A color map name. The name is forwarded to
               `matplotlib.pyplot.cm.get_cmap` to get the color map.


    Returns: Shape(features, frames)
    """
    signal = to_numpy(signal, detach=True)

    signal = signal / (np.max(signal) + np.finfo(signal.dtype).tiny)

    signal = _remove_batch_axis(signal, batch_first=batch_first)

    visible_dB = 50

    # remove problematic small numbers
    floor = 10**(-visible_dB / 20)
    signal = np.maximum(signal, floor)

    # Scale such that X dB are visible (i.e. in the range 0 to 1)
    signal = (20 / visible_dB) * np.log10(signal) + 1

    signal = (signal * 255).astype(np.uint8)

    return _colorize(_apply_origin(signal.T, origin=origin), color)
def audio(signal,
          sampling_rate: int = 16000,
          batch_first=False,
          normalize=True):
    """

    Args:
        signal: Shape (samples, batch [optional]). If `batch_first = True`,
            (batch [optional], samples).
        sampling_rate: Sampling rate of the audio signal
        batch_first: If `True`, the optional batch dimension is assumed to be
            the first axis, otherwise the second one.
        normalize: If `True`, the signal is normalized to a max amplitude of
            0.95 to prevent clipping
    """
    signal = to_numpy(signal, detach=True)

    signal = _remove_batch_axis(signal, batch_first=batch_first, ndim=1)

    # Normalize so that there is no clipping
    if normalize:
        denominator = np.max(np.abs(signal))
        if denominator > 0:
            signal = signal / denominator
        signal *= 0.95

    return signal, sampling_rate
Beispiel #3
0
def example_to_numpy(example, detach=False):
    """
    Moves a nested structure to numpy. Opposite of example_to_device.

    Args:
        example:

    Returns:
        example on where each tensor is converted to numpy

    """
    from padertorch.utils import to_numpy

    if isinstance(example, dict):
        return example.__class__({
            key: example_to_numpy(value, detach=detach)
            for key, value in example.items()
        })
    elif isinstance(example, (tuple, list)):
        return example.__class__(
            [example_to_numpy(element, detach=detach) for element in example])
    elif torch.is_tensor(example) or 'ComplexTensor' in str(type(example)):
        return to_numpy(example, detach=detach)
    elif isinstance(example, np.ndarray):
        return example
    elif hasattr(example, '__dataclass_fields__'):
        return example.__class__(
            **{
                f: example_to_numpy(getattr(example, f), detach=detach)
                for f in example.__dataclass_fields__
            })
    else:
        return example
Beispiel #4
0
def mask_to_image(mask: _T_input,
                  batch_first: bool = False,
                  color: Optional[str] = None,
                  origin: str = 'lower') -> np.ndarray:
    """
    Creates an image from a mask `Tensor` or `ndarray`.

    For more details of the output shape, see the tensorboardx docs

    Args:
        mask: Mask to plot
        batch_first: If `True`, `signal` is expected to have shape
            `(batch [optional], frames, features)`. If `False`, the batch axis
            is assumed to be in the second position, i.e.,
            `(frames, batch [optional], features)`.
        color: A color map name. The name is forwarded to
               `matplotlib.pyplot.cm.get_cmap` to get the color map. If `None`,
               grayscale is used.
        origin: Origin of the plot. Can be `'upper'` or `'lower'`.

    Returns:
        Colorized image with shape (color (1 or 3), features, frames)
    """
    mask = to_numpy(mask, detach=True)

    image = np.clip(mask * 255, 0, 255)
    image = image.astype(np.uint8)

    image = _remove_batch_axis(image, batch_first=batch_first)

    return _colorize(_apply_origin(image.T, origin), color)
Beispiel #5
0
 def convert(value):
     id_ = id(value)
     if id_ in memo:
         return memo[id_]
     if isinstance(value, torch.Tensor) or 'ComplexTensor' in str(
             type(value)):
         value = to_numpy(value, detach=detach)
     memo[id_] = value
     return value
Beispiel #6
0
def spectrogram_to_image(
        signal: _T_input,
        batch_first: bool = False,
        color: str = 'viridis',
        origin: str = 'lower',
        log: bool = True,
        visible_dB: float = 50,
) -> np.ndarray:
    """
    Creates an image from a spectrogram (power).

    Note:
        When The input is the absolute value of the STFT, the value for
        visible_dB is effectively two times larger (i.e. default 100) and
        the image looks more noisy.

    For more details of the output shape, see the tensorboardx docs

    Args:
        signal: Spectrogram to plot.
        batch_first: If `True`, `signal` is expected to have shape
            `(batch [optional], frames, features)`. If `False`, the batch axis
            is assumed to be in the second position, i.e.,
            `(frames, batch [optional], features)`.
        color: A color map name. The name is forwarded to
               `matplotlib.pyplot.cm.get_cmap` to get the color map.
        origin: Origin of the plot. Can be `'upper'` or `'lower'`.
        log: If `True`, the spectrogram is plotted in log domain and shows a
            50dB range. The 50dB can be changed with the argument `visible_dB`.
        visible_dB: Only used when `log` is `True`. Specifies how many dB will
            be visible in the plot. Assumes the input is the power of the STFT
            signal, i.e., the abs square of it.

    Returns:
        Colorized image with shape (channels (3), features, frames)

    """
    signal = to_numpy(signal, detach=True)

    signal = signal / (np.max(np.abs(signal)) + np.finfo(signal.dtype).tiny)

    signal = _remove_batch_axis(signal, batch_first=batch_first)

    if log:
        # remove problematic small numbers
        floor = 10 ** (-visible_dB / 10)
        signal = np.maximum(signal, floor)

        # Scale such that X dB are visible (i.e. in the range 0 to 1)
        signal = (10 / visible_dB) * np.log10(signal) + 1

    signal = (signal * 255).astype(np.uint8)

    return _colorize(_apply_origin(signal.T, origin=origin), color)
Beispiel #7
0
def stft_to_image(
        signal: _T_input,
        batch_first: bool = False,
        color: str = 'viridis',
        origin: str = 'lower',
        visible_dB: float = 50,
) -> np.ndarray:
    """
    Creates an image from an STFT signal.
    For more details of the output shape, see the tensorboardx docs

    Args:
        signal: Shape (frames, batch [optional], features)
        batch_first: if true mask shape (batch [optional], frames, features]
        color: A color map name. The name is forwarded to
               `matplotlib.pyplot.cm.get_cmap` to get the color map. If `None`,
               grayscale is used.
        origin: Origin of the plot. Can be `'upper'` or `'lower'`.
        visible_dB: How many dezibel are visible in the image.
                    Note: `paderbox.visualization.plot.stft` uses
                          `visible_dB == 60` internally. So by default it shows
                          10 dB more.

    Returns:
        Colorized image with shape (color (1 or 3), features, frames)


    Small test to see the effect of `visible_dB`:

        >>> visible_dB = 60
        >>> 10 ** (-visible_dB / 20)
        0.001

        >>> data = [1, 0.004, 0.003, 0.001_05, 0.001]
        >>> np.squeeze(stft_to_image(np.array(data)[:, None], color=None))
        array([255,  10,   0,   0,   0], dtype=uint8)

        >>> np.squeeze(stft_to_image(
        ...     np.array(data)[:, None], color=None, visible_dB=60))
        array([255,  51,  40,   1,   0], dtype=uint8)

    """
    signal = to_numpy(signal, detach=True)

    return spectrogram_to_image(
        signal.real ** 2 + signal.imag ** 2,
        batch_first=batch_first,
        color=color,
        origin=origin,
        visible_dB=visible_dB,
    )
def spectrogram_to_image(signal, batch_first=False, color='viridis'):
    """
        For more details of the output shape, see the tensorboardx docs
    Args:
        signal: Shape (frames, batch [optional], features)
        batch_first: if true mask shape (batch [optional], frames, features]
        color: A color map name. The name is forwarded to
               `matplotlib.pyplot.cm.get_cmap` to get the color map.


    Returns: Shape(features, frames)
    """
    signal = to_numpy(signal, detach=True)

    signal = signal / (np.max(signal) + np.finfo(signal.dtype).tiny)

    signal = _remove_batch_axis(signal, batch_first=batch_first)

    visible_dB = 50

    # remove problematic small numbers
    floor = 10**(-visible_dB / 20)
    signal = np.maximum(signal, floor)

    # Scale such that X dB are visible (i.e. in the range 0 to 1)
    signal = (20 / visible_dB) * np.log10(signal) + 1

    signal = (signal * 255).astype(np.uint8)

    if color is not None:
        try:
            cmap = _spectrogram_to_image_cmap[color]
        except KeyError:
            try:
                import matplotlib.pyplot as plt
                cmap = plt.cm.get_cmap(color)
                _spectrogram_to_image_cmap[color] = cmap
            except ImportError:
                from warnings import warn
                gray_scale = lambda x: x.transpose(1, 0)[None, ::-1, :]
                warn('Since matplotlib is not installed, all images are '
                     'switched to grey scale')
                _spectrogram_to_image_cmap[color] = gray_scale
                # gray image
                return gray_scale(signal)

        return cmap(signal).transpose(2, 1, 0)[:, ::-1, :]
    else:
        # gray image
        return signal.transpose(1, 0)[None, ::-1, :]
def stft_to_image(signal, batch_first=False, color='viridis'):
    """
        For more details of the output shape, see the tensorboardx docs
    Args:
        signal: Shape (frames, batch [optional], features)
        batch_first: if true mask shape (batch [optional], frames, features]

    Returns: Shape(features, frames)

    """
    signal = to_numpy(signal, detach=True)

    return spectrogram_to_image(np.abs(signal),
                                batch_first=batch_first,
                                color=color)
def mask_to_image(mask, batch_first=False):
    """
    For more details of the output shape, see the tensorboardx docs
    Args:
        mask: Shape (frames, batch [optional], features)
        batch_first: if true mask shape (batch [optional], frames, features]

    Returns: Shape(color, features, frames)

    """
    mask = to_numpy(mask, detach=True)

    image = np.clip(mask * 255, 0, 255)
    image = image.astype(np.uint8)

    image = _remove_batch_axis(image, batch_first=batch_first)

    return image[None].transpose(0, 2, 1)[:, ::-1]
Beispiel #11
0
def mask_to_image(mask, batch_first=False, color=None, origin='lower'):
    """
    For more details of the output shape, see the tensorboardx docs
    Args:
        mask: Shape (frames, batch [optional], features)
        batch_first: if true mask shape (batch [optional], frames, features]

    Returns: Shape(color, features, frames)

    """
    mask = to_numpy(mask, detach=True)

    image = np.clip(mask * 255, 0, 255)
    image = image.astype(np.uint8)

    image = _remove_batch_axis(image, batch_first=batch_first)

    return _colorize(_apply_origin(image.T, origin), color)
Beispiel #12
0
def spectrogram_to_image(signal, batch_first=False, color='viridis'):
    """
        For more details of the output shape, see the tensorboardx docs
    Args:
        mask: Shape (frames, batch [optional], features)
        batch_first: if true mask shape (batch [optional], frames, features]

    Returns: Shape(features, frames)

    """
    signal = to_numpy(signal, detach=True)

    signal = signal / np.max(signal)

    signal = _remove_batch_axis(signal, batch_first=batch_first)

    visible_dB = 50

    # remove problematic small numbers
    floor = 10**(-visible_dB / 20)
    signal = np.maximum(signal, floor)

    # Scale such that X dB are visible (i.e. in the range 0 to 1)
    signal = (20 / visible_dB) * np.log10(signal) + 1

    signal = (signal * 255).astype(np.uint8)

    if color is not None:
        try:
            cmap = _spectrogram_to_image_cmap[color]
        except KeyError:
            import matplotlib.pyplot as plt
            cmap = plt.cm.get_cmap(color)
            _spectrogram_to_image_cmap[color] = cmap

        return cmap(signal).transpose(2, 1, 0)[:, ::-1, :]
    else:
        # gray image
        return signal.transpose(1, 0)[None, ::-1, :]
Beispiel #13
0
def audio(
        signal: _T_input,
        sampling_rate: int = 16000,
        batch_first: bool = False,
        normalize: bool = True,
) -> Tuple[np.ndarray, int]:
    """
    Adds an audio signal to tensorboard.

    Args:
        signal: Time-domain signal with shape (samples, batch [optional]).
            If `batch_first = True`, (batch [optional], samples).
        sampling_rate: Sampling rate of the audio signal
        batch_first: If `True`, `signal` is expected to have shape
            `(batch [optional], samples)`. If `False`, the batch axis
            is assumed to be in the second position, i.e.,
            `(samples, batch [optional])`.
        normalize: If `True`, the signal is normalized to a max amplitude of
            0.95 to prevent clipping.

    Returns:
        A tuple consisting of the signal and the sampling rate. See tensorboardX
        docs for further information on the return type.
    """
    signal = to_numpy(signal, detach=True)

    signal = _remove_batch_axis(signal, batch_first=batch_first, ndim=1)

    # Normalize so that there is no clipping
    if normalize:
        denominator = np.max(np.abs(signal))
        if denominator > 0:
            signal = signal / denominator
            signal *= 0.95

    return signal, sampling_rate
def pit_loss_from_loss_matrix(
        pair_wise_loss_matrix,
        *,
        reduction='mean',
        algorithm: ['optimal', 'greedy'] = 'optimal',
        return_permutation=False,
):
    """
    Calculates the PIT loss given a pair_wise_loss matrix.
    
    Args:
        pair_wise_loss_matrix: shape: (K, K)
        reduction: 'mean' or 'sum'
        algorithm:
        return_permutation:

    Returns:
        
    >>> import numpy as np
    >>> score_matrix = np.array([[11., 10, 0],[4, 5, 10],[6, 0, 5]])
    >>> score_matrix
    array([[11., 10.,  0.],
           [ 4.,  5., 10.],
           [ 6.,  0.,  5.]])
    >>> pair_wise_loss_matrix = torch.tensor(-score_matrix)
    >>> pit_loss_from_loss_matrix(pair_wise_loss_matrix, reduction='sum', algorithm='optimal')
    tensor(-26., dtype=torch.float64)
    >>> pit_loss_from_loss_matrix(pair_wise_loss_matrix, reduction='sum', algorithm='greedy')
    tensor(-21., dtype=torch.float64)

    """
    import scipy.optimize
    from padertorch.utils import to_numpy

    assert len(pair_wise_loss_matrix.shape) == 2, pair_wise_loss_matrix.shape
    assert pair_wise_loss_matrix.shape[-2] == pair_wise_loss_matrix.shape[-1], pair_wise_loss_matrix.shape
    sources = pair_wise_loss_matrix.shape[-1]

    # We have to detach here because pair_wise_loss_matrix should require grads
    pair_wise_loss_np = to_numpy(pair_wise_loss_matrix, detach=True)

    if algorithm == 'optimal':
        row_ind, col_ind = scipy.optimize.linear_sum_assignment(
            pair_wise_loss_np)
    elif algorithm == 'greedy':
        from pb_bss.permutation_alignment import _mapping_from_score_matrix
        col_ind = _mapping_from_score_matrix(-pair_wise_loss_np,
                                             algorithm='greedy')
        row_ind = range(sources)
    else:
        raise ValueError(algorithm)

    if reduction == 'mean':
        min_loss = pair_wise_loss_matrix[row_ind, col_ind].mean()
    elif reduction == 'sum':
        min_loss = pair_wise_loss_matrix[row_ind, col_ind].sum()
    else:
        raise ValueError(reduction)

    if return_permutation:
        return min_loss, col_ind
    else:
        return min_loss