def frame(signal, frame_length, frame_step, pad_end=False, pad_value=0, axis=-1, name=None): """Expands `signal`'s `axis` dimension into frames of `frame_length`. Slides a window of size `frame_length` over `signal`s `axis` dimension with a stride of `frame_step`, replacing the `axis` dimension with `[frames, frame_length]` frames. If `pad_end` is True, window positions that are past the end of the `axis` dimension are padded with `pad_value` until the window moves fully past the end of the dimension. Otherwise, only window positions that fully overlap the `axis` dimension are produced. For example: ```python pcm = tf.placeholder(tf.float32, [None, 9152]) frames = tf.contrib.signal.frame(pcm, 512, 180) magspec = tf.abs(tf.spectral.rfft(frames, [512])) image = tf.expand_dims(magspec, 3) ``` Args: signal: A `[..., samples, ...]` `Tensor`. The rank and dimensions may be unknown. Rank must be at least 1. frame_length: The frame length in samples. An integer or scalar `Tensor`. frame_step: The frame hop size in samples. An integer or scalar `Tensor`. pad_end: Whether to pad the end of `signal` with `pad_value`. pad_value: An optional scalar `Tensor` to use where the input signal does not exist when `pad_end` is True. axis: A scalar integer `Tensor` indicating the axis to frame. Defaults to the last axis. Supports negative values for indexing from the end. name: An optional name for the operation. Returns: A `Tensor` of frames with shape `[..., frames, frame_length, ...]`. Raises: ValueError: If `frame_length`, `frame_step`, or `pad_value` are not scalar. """ with ops.name_scope(name, "frame", [signal, frame_length, frame_step, pad_value]): signal = ops.convert_to_tensor(signal, name="signal") frame_length = ops.convert_to_tensor(frame_length, name="frame_length") frame_step = ops.convert_to_tensor(frame_step, name="frame_step") axis = ops.convert_to_tensor(axis, name="axis") signal.shape.with_rank_at_least(1) frame_length.shape.assert_has_rank(0) frame_step.shape.assert_has_rank(0) axis.shape.assert_has_rank(0) result_shape = _infer_frame_shape(signal, frame_length, frame_step, pad_end, axis) # Axis can be negative. Convert it to positive. signal_rank = array_ops.rank(signal) axis = math_ops.range(signal_rank)[axis] signal_shape = array_ops.shape(signal) outer_dimensions, length_samples, inner_dimensions = array_ops.split( signal_shape, [axis, 1, signal_rank - 1 - axis]) length_samples = array_ops.reshape(length_samples, []) num_outer_dimensions = array_ops.size(outer_dimensions) num_inner_dimensions = array_ops.size(inner_dimensions) # If padding is requested, pad the input signal tensor with pad_value. if pad_end: pad_value = ops.convert_to_tensor(pad_value, signal.dtype) pad_value.shape.assert_has_rank(0) # Calculate number of frames, using double negatives to round up. num_frames = -(-length_samples // frame_step) # Pad the signal by up to frame_length samples based on how many samples # are remaining starting from last_frame_position. pad_samples = math_ops.maximum( 0, frame_length + frame_step * (num_frames - 1) - length_samples) # Pad the inner dimension of signal by pad_samples. paddings = array_ops.concat( [array_ops.zeros([num_outer_dimensions, 2], dtype=pad_samples.dtype), [[0, pad_samples]], array_ops.zeros([num_inner_dimensions, 2], dtype=pad_samples.dtype)], 0) signal = array_ops.pad(signal, paddings, constant_values=pad_value) signal_shape = array_ops.shape(signal) length_samples = signal_shape[axis] else: num_frames = math_ops.maximum( 0, 1 + (length_samples - frame_length) // frame_step) subframe_length = util_ops.gcd(frame_length, frame_step) subframes_per_frame = frame_length // subframe_length subframes_per_hop = frame_step // subframe_length num_subframes = length_samples // subframe_length slice_shape = array_ops.concat([outer_dimensions, [num_subframes * subframe_length], inner_dimensions], 0) subframe_shape = array_ops.concat([outer_dimensions, [num_subframes, subframe_length], inner_dimensions], 0) subframes = array_ops.reshape(array_ops.strided_slice( signal, array_ops.zeros_like(signal_shape), slice_shape), subframe_shape) # frame_selector is a [num_frames, subframes_per_frame] tensor # that indexes into the appropriate frame in subframes. For example: # [[0, 0, 0, 0], [2, 2, 2, 2], [4, 4, 4, 4]] frame_selector = array_ops.reshape( math_ops.range(num_frames) * subframes_per_hop, [num_frames, 1]) # subframe_selector is a [num_frames, subframes_per_frame] tensor # that indexes into the appropriate subframe within a frame. For example: # [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]] subframe_selector = array_ops.reshape( math_ops.range(subframes_per_frame), [1, subframes_per_frame]) # Adding the 2 selector tensors together produces a [num_frames, # subframes_per_frame] tensor of indices to use with tf.gather to select # subframes from subframes. We then reshape the inner-most # subframes_per_frame dimension to stitch the subframes together into # frames. For example: [[0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]]. selector = frame_selector + subframe_selector frames = array_ops.reshape( array_ops.gather(subframes, selector, axis=axis), array_ops.concat([outer_dimensions, [num_frames, frame_length], inner_dimensions], 0)) if result_shape: frames.set_shape(result_shape) return frames
def overlap_and_add(signal, frame_step, name=None): """Reconstructs a signal from a framed representation. Adds potentially overlapping frames of a signal with shape `[..., frames, frame_length]`, offsetting subsequent frames by `frame_step`. The resulting tensor has shape `[..., output_size]` where output_size = (frames - 1) * frame_step + frame_length Args: signal: A [..., frames, frame_length] `Tensor`. All dimensions may be unknown, and rank must be at least 2. frame_step: An integer or scalar `Tensor` denoting overlap offsets. Must be less than or equal to `frame_length`. name: An optional name for the operation. Returns: A `Tensor` with shape `[..., output_size]` containing the overlap-added frames of `signal`'s inner-most two dimensions. Raises: ValueError: If `signal`'s rank is less than 2, `frame_step` is not a scalar integer or `frame_step` is greater than `frame_length`. """ with ops.name_scope(name, "overlap_and_add", [signal, frame_step]): signal = ops.convert_to_tensor(signal, name="signal") signal.shape.with_rank_at_least(2) frame_step = ops.convert_to_tensor(frame_step, name="frame_step") frame_step.shape.assert_has_rank(0) if not frame_step.dtype.is_integer: raise ValueError("frame_step must be an integer. Got %s" % frame_step.dtype) signal_shape = array_ops.shape(signal) # All dimensions that are not part of the overlap-and-add. Can be empty for # rank 2 inputs. outer_dimensions = signal_shape[:-2] # If frame_length and frame_step are known at graph construction time, check # frame_step is less than or equal to frame_length. frame_step_static = tensor_util.constant_value(frame_step) if (frame_step_static is not None and signal.shape.ndims is not None and signal.shape.dims[-1].value is not None): if frame_step_static > signal.shape.dims[-1].value: raise ValueError( "frame_step (%d) must be less than or equal to " "frame_length (%d)" % ( frame_step_static, signal.shape.dims[-1].value)) # If frame_length is equal to frame_step, there's no overlap so just # reshape the tensor. if frame_step_static == signal.shape.dims[-1].value: return array_ops.reshape(signal, array_ops.concat( [outer_dimensions, [-1]], 0)) signal_rank = array_ops.rank(signal) frames = signal_shape[-2] frame_length = signal_shape[-1] subframe_length = util_ops.gcd(frame_length, frame_step) subframe_step = frame_step // subframe_length subframes_per_frame = frame_length // subframe_length output_size = frame_step * (frames - 1) + frame_length output_subframes = output_size // subframe_length # To avoid overlap-adding sample-by-sample, we overlap-add at the "subframe" # level, where a subframe is gcd(frame_length, frame_step). Reshape signal # from [..., frames, frame_length] into [..., subframes, subframe_length]. subframe_shape = array_ops.concat( [outer_dimensions, [-1, subframe_length]], 0) subframe_signal = array_ops.reshape(signal, subframe_shape) # Now we shuffle the last [subframes, subframe_length] dimensions to the # front. # TODO(rjryan): Add an axis argument to unsorted_segment_sum so we can # avoid this pair of transposes. subframe_signal = _shuffle_to_front(subframe_signal, 2) # Use unsorted_segment_sum to add overlapping subframes together. segment_ids = array_ops.reshape(shape_ops.frame( math_ops.range(output_subframes), subframes_per_frame, subframe_step, pad_end=False), [-1]) result = math_ops.unsorted_segment_sum(subframe_signal, segment_ids, num_segments=output_subframes) # result is a [subframes, subframe_length, ...outer_dimensions] tensor. We # return a [...outer_dimensions, output_size] tensor with a transpose and # reshape. result_shape = array_ops.concat([outer_dimensions, [output_size]], 0) return array_ops.reshape(_shuffle_to_front(result, signal_rank - 2), result_shape)
def frame(signal, frame_length, frame_step, pad_end=False, pad_value=0, axis=-1, name=None): """Expands `signal`'s `axis` dimension into frames of `frame_length`. Slides a window of size `frame_length` over `signal`'s `axis` dimension with a stride of `frame_step`, replacing the `axis` dimension with `[frames, frame_length]` frames. If `pad_end` is True, window positions that are past the end of the `axis` dimension are padded with `pad_value` until the window moves fully past the end of the dimension. Otherwise, only window positions that fully overlap the `axis` dimension are produced. For example: ```python pcm = tf.placeholder(tf.float32, [None, 9152]) frames = tf.contrib.signal.frame(pcm, 512, 180) magspec = tf.abs(tf.spectral.rfft(frames, [512])) image = tf.expand_dims(magspec, 3) ``` Args: signal: A `[..., samples, ...]` `Tensor`. The rank and dimensions may be unknown. Rank must be at least 1. frame_length: The frame length in samples. An integer or scalar `Tensor`. frame_step: The frame hop size in samples. An integer or scalar `Tensor`. pad_end: Whether to pad the end of `signal` with `pad_value`. pad_value: An optional scalar `Tensor` to use where the input signal does not exist when `pad_end` is True. axis: A scalar integer `Tensor` indicating the axis to frame. Defaults to the last axis. Supports negative values for indexing from the end. name: An optional name for the operation. Returns: A `Tensor` of frames with shape `[..., frames, frame_length, ...]`. Raises: ValueError: If `frame_length`, `frame_step`, `pad_value`, or `axis` are not scalar. """ with ops.name_scope(name, "frame", [signal, frame_length, frame_step, pad_value]): signal = ops.convert_to_tensor(signal, name="signal") frame_length = ops.convert_to_tensor(frame_length, name="frame_length") frame_step = ops.convert_to_tensor(frame_step, name="frame_step") axis = ops.convert_to_tensor(axis, name="axis") signal.shape.with_rank_at_least(1) frame_length.shape.assert_has_rank(0) frame_step.shape.assert_has_rank(0) axis.shape.assert_has_rank(0) result_shape = _infer_frame_shape(signal, frame_length, frame_step, pad_end, axis) # Axis can be negative. Convert it to positive. signal_rank = array_ops.rank(signal) axis = math_ops.range(signal_rank)[axis] signal_shape = array_ops.shape(signal) outer_dimensions, length_samples, inner_dimensions = array_ops.split( signal_shape, [axis, 1, signal_rank - 1 - axis]) length_samples = array_ops.reshape(length_samples, []) num_outer_dimensions = array_ops.size(outer_dimensions) num_inner_dimensions = array_ops.size(inner_dimensions) # If padding is requested, pad the input signal tensor with pad_value. if pad_end: pad_value = ops.convert_to_tensor(pad_value, signal.dtype) pad_value.shape.assert_has_rank(0) # Calculate number of frames, using double negatives to round up. num_frames = -(-length_samples // frame_step) # Pad the signal by up to frame_length samples based on how many samples # are remaining starting from last_frame_position. pad_samples = math_ops.maximum( 0, frame_length + frame_step * (num_frames - 1) - length_samples) # Pad the inner dimension of signal by pad_samples. paddings = array_ops.concat( [array_ops.zeros([num_outer_dimensions, 2], dtype=pad_samples.dtype), [[0, pad_samples]], array_ops.zeros([num_inner_dimensions, 2], dtype=pad_samples.dtype)], 0) signal = array_ops.pad(signal, paddings, constant_values=pad_value) signal_shape = array_ops.shape(signal) length_samples = signal_shape[axis] else: num_frames = math_ops.maximum( 0, 1 + (length_samples - frame_length) // frame_step) subframe_length = util_ops.gcd(frame_length, frame_step) subframes_per_frame = frame_length // subframe_length subframes_per_hop = frame_step // subframe_length num_subframes = length_samples // subframe_length slice_shape = array_ops.concat([outer_dimensions, [num_subframes * subframe_length], inner_dimensions], 0) subframe_shape = array_ops.concat([outer_dimensions, [num_subframes, subframe_length], inner_dimensions], 0) subframes = array_ops.reshape(array_ops.strided_slice( signal, array_ops.zeros_like(signal_shape), slice_shape), subframe_shape) # frame_selector is a [num_frames, subframes_per_frame] tensor # that indexes into the appropriate frame in subframes. For example: # [[0, 0, 0, 0], [2, 2, 2, 2], [4, 4, 4, 4]] frame_selector = array_ops.reshape( math_ops.range(num_frames) * subframes_per_hop, [num_frames, 1]) # subframe_selector is a [num_frames, subframes_per_frame] tensor # that indexes into the appropriate subframe within a frame. For example: # [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]] subframe_selector = array_ops.reshape( math_ops.range(subframes_per_frame), [1, subframes_per_frame]) # Adding the 2 selector tensors together produces a [num_frames, # subframes_per_frame] tensor of indices to use with tf.gather to select # subframes from subframes. We then reshape the inner-most # subframes_per_frame dimension to stitch the subframes together into # frames. For example: [[0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]]. selector = frame_selector + subframe_selector frames = array_ops.reshape( array_ops.gather(subframes, selector, axis=axis), array_ops.concat([outer_dimensions, [num_frames, frame_length], inner_dimensions], 0)) if result_shape: frames.set_shape(result_shape) return frames