Ejemplo n.º 1
0
def circular_convolution_conv(keys, values, cuda=False):
    '''
    For the circular convolution of x and y to be equivalent,
    you must pad the vectors with zeros to length at least N + L - 1
    before you take the DFT. After you invert the product of the
    DFTs, retain only the first N + L - 1 elements.
    '''
    assert values.dim() == keys.dim() == 2, "only 2 dims supported"
    batch_size = keys.size(0)
    keys_feature_size = keys.size(1)
    values_feature_size = values.size(1)
    required_size = keys_feature_size + values_feature_size - 1

    # zero pad upto N+L-1
    zero_for_keys = Variable(float_type(cuda)(
        batch_size, required_size - keys_feature_size).zero_())
    zero_for_values = Variable(float_type(cuda)(
        batch_size, required_size - values_feature_size).zero_())
    keys = torch.cat([keys, zero_for_keys], -1)
    values = torch.cat([values, zero_for_values], -1)

    # do the conv and reshape and return
    print('values = ', values.view(batch_size, 1, -1).size(), ' keys = ', keys.view(batch_size, 1, -1).size())
    print('conv = ', F.conv1d(values.view(batch_size, 1, -1),
                    keys.view(batch_size, 1, -1)).size())
    return F.conv1d(values.view(batch_size, 1, -1),
                    keys.view(batch_size, 1, -1)).squeeze()[:, 0:required_size]
Ejemplo n.º 2
0
    def forward(self, x):
        """
        :param x: tensor with shape [batch_size, max_seq_len, max_word_len, char_embed_size]

        :return: tensor with shape [batch_size, max_seq_len, depth_sum]

        applies multikenrel 1d-conv layer along every word in input with max-over-time pooling
            to emit fixed-size output
        """

        input_size = x.size()
        input_size_len = len(input_size)

        assert input_size_len == 4, \
            'Wrong input rang, must be equal to 4, but {} found'.format(input_size_len)

        [batch_size, seq_len, _, embed_size] = input_size

        assert embed_size == self.params.char_embed_size, \
            'Wrong embedding size, must be equal to {}, but {} found'.format(self.params.char_embed_size, embed_size)

        # leaps with shape
        x = x.view(-1, self.params.max_word_len, self.params.char_embed_size).transpose(1, 2).contiguous()

        xs = [F.tanh(F.conv1d(x, kernel, bias=self.biases[i])) for i, kernel in enumerate(self.kernels)]
        xs = [x.max(2)[0].squeeze(2) for x in xs]

        x = t.cat(xs, 1)
        x = x.view(batch_size, seq_len, -1)

        return x
    def forward(self, x):
        if self.deterministic:
            assert self.training == False, "Flag deterministic is True. This should not be used in training."
            return F.conv1d(x, self.post_weight_mu, self.bias_mu)
        batch_size = x.size()[0]
        # apply local reparametrisation trick see [1] Eq. (6)
        # to the parametrisation given in [3] Eq. (6)
        mu_activations = F.conv1d(x, self.weight_mu, self.bias_mu, self.stride,
                                  self.padding, self.dilation, self.groups)

        var_activations = F.conv1d(x.pow(2), self.weight_logvar.exp(), self.bias_logvar.exp(), self.stride,
                                   self.padding, self.dilation, self.groups)
        # compute z
        # note that we reparametrise according to [2] Eq. (11) (not [1])
        z = reparametrize(self.z_mu.repeat(batch_size, 1, 1), self.z_logvar.repeat(batch_size, 1, 1),
                          sampling=self.training, cuda=self.cuda)
        z = z[:, :, None]

        return reparametrize(mu_activations * z, (var_activations * z.pow(2)).log(), sampling=self.training,
                             cuda=self.cuda)
Ejemplo n.º 4
0
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        we have:
        w(float) -- quant - dequant \
        x(float) ------------- F.conv1d ---

        In the full model, we will see
        w(float) -- quant - *dequant \
        x -- quant --- *dequant --  *F.conv1d --- *quant - dequant
        and the backend should be able to fuse the ops with `*` into a quantized conv1d
        """
        weight_dequant = self.get_weight()
        result = F.conv1d(
            x, weight_dequant, self.bias, self.stride,
            self.padding, self.dilation, self.groups)
        return result
Ejemplo n.º 5
0
def _upsample2(x, zeros=24):
    """
    Upsample x by a factor of two. The output will be exactly twice as long as the input.
    Args:
        x (Tensor): signal to upsample, time should be the last dimension
        zeros (int): number of zero crossing to keep in the sinc filter.

    This function is kept only for reference, you should use the more generic `resample_frac`
    one. This function does not perform anti-aliasing filtering.
    """
    *other, time = x.shape
    kernel = _kernel_upsample2_downsample2(zeros).to(x)
    out = F.conv1d(x.view(-1, 1, time), kernel,
                   padding=zeros)[..., 1:].view(*other, time)
    y = torch.stack([x, out], dim=-1)
    return y.view(*other, -1)
Ejemplo n.º 6
0
def bspline_kernel_1d(sigma,
                      order=2,
                      asTensor=False,
                      dtype=th.float32,
                      device='cpu'):

    kernel_ones = th.ones(1, 1, sigma)
    kernel = kernel_ones

    for i in range(1, order + 1):
        kernel = F.conv1d(kernel, kernel_ones, padding=i * sigma) / sigma

    if asTensor:
        return kernel[0, 0, ...].to(dtype=dtype, device=device)
    else:
        return kernel[0, 0, ...].numpy()
Ejemplo n.º 7
0
    def forward(self, inputs):
        if inputs.dim() == 2:
            inputs = torch.unsqueeze(inputs, 1)
        inputs = F.pad(
            inputs, [self.win_len - self.stride, self.win_len - self.stride])
        outputs = F.conv1d(inputs, self.weight, stride=self.stride)

        if self.feature_type == 'complex':
            return outputs
        else:
            dim = self.dim // 2 + 1
            real = outputs[:, :dim, :]
            imag = outputs[:, dim:, :]
            mags = torch.sqrt(real**2 + imag**2)
            phase = torch.atan2(imag, real)
            return mags, phase
Ejemplo n.º 8
0
def get_histogram_filter_indices(img_shifted: torch.Tensor,
                                 bins: int,
                                 r: int = 3):
    """Return indices of images not in the first/last histogram buckets"""
    indices = []
    weight = torch.ones([1, 1, r], device=img_shifted.device)
    for idx, img_shifted_i in enumerate(img_shifted):
        stats = torch.histc(img_shifted_i, bins, -1, 1)
        stats = F.conv1d(stats.view(1, 1, -1), weight, padding=r // 2)
        stats = stats.view(-1).cpu().numpy()
        maxes = np.r_[True,
                      stats[1:] >= stats[:-1]] & np.r_[stats[:-1] >= stats[1:],
                                                       True]
        maxes = np.nonzero(maxes)[0]
        indices.append(len(maxes) >= 2)
    return torch.tensor(indices)
Ejemplo n.º 9
0
    def __call__(self, u):
        """
        Args:
            u (Tensor): [B, C, H]
        Returns:
            div_u: [B, C, H]

        """
        u_shape = u.shape
        u = u.view(-1, 1, *u_shape[-1:])
        u = F.conv1d(F.pad(u, self.padding, mode='circular'),
                     self.weight,
                     stride=1,
                     padding=0,
                     bias=None) / (self.dx**4)
        return u.view(u_shape)
Ejemplo n.º 10
0
    def forward(self, x: torch.Tensor):
        def T(w):
            return w.T if self.fan_in_fan_out else w

        if self.merged:
            return F.linear(x, T(self.weight), bias=self.bias)
        else:
            result = F.linear(x, T(self.weight), bias=self.bias)
            if self.r > 0:
                after_A = F.linear(self.lora_dropout(x), self.lora_A)
                after_B = F.conv1d(after_A.transpose(-2, -1),
                                   self.lora_B.unsqueeze(-1),
                                   groups=sum(self.enable_lora)).transpose(
                                       -2, -1)
                result += self.zero_pad(after_B) * self.scaling
            return result
Ejemplo n.º 11
0
 def forward(self, x):
     k = 0
     min_freq = 1.0
     min_band = 10.0
     filters = torch.zeros(
         (self.N_filt * self.N_channels, self.Filt_dim)).to(self.device)
     for j in range(self.N_channels):
         filt_beg_freq = torch.abs(self.filt_low)[j] + min_freq / self.fs
         filt_end_freq = (filt_beg_freq + (torch.abs(self.filt_band)))[j]
         for i in range(self.N_filt):
             band_pass = self.get_filter_bank(filt_beg_freq[i],
                                              filt_end_freq[i])
             filters[k, :] = band_pass
             k += 1
     filters = filters.view(self.N_filt * self.N_channels, 1, self.Filt_dim)
     return F.conv1d(x, filters, groups=self.N_channels)
Ejemplo n.º 12
0
    def forward(self, predicts, target, norm=1.0):
        assert self.size == predicts.size(1)
        dist = torch.zeros_like(predicts)
        dist = dist.scatter(1, target.data.unsqueeze(1), 1.0).unsqueeze(1)
        kernel = cp(self.kernel).to(device=target.device)

        dist = F.conv1d(dist, kernel, padding=(4, )).squeeze(1)
        dist = Variable(dist, requires_grad=False).to(device=target.device)
        # print(predicts.size(), dist.size())

        # loss = 0.0
        # N = int(predicts.size()[0])
        # for i in range(N):
        # loss += self.crit(predicts[i], dist[i])

        return self.crit(predicts, dist) / norm
Ejemplo n.º 13
0
    def forward_frozen(self, x):
        # Computes the feedforward operation with the expected value for weight and biases (frozen-like)

        if self.bias:
            bias = self.bias_mu
            assert bias is self.bias_mu, "The bias inputed should be this layer parameter, not a clone."
        else:
            bias = torch.zeros(self.out_channels)

        return F.conv1d(input=x,
                        weight=self.weight_mu,
                        bias=bias,
                        stride=self.stride,
                        padding=self.padding,
                        dilation=self.dilation,
                        groups=self.groups)
Ejemplo n.º 14
0
    def forward(
        self,
        x: Tensor
    ) -> Tensor:

        x = F.conv1d(
            x,
            self.weight,
            self.bias,
            self.stride,
            self.padding,
            self.dilation,
            self.groups
        )

        return x
Ejemplo n.º 15
0
    def transform(self,
                  wav: torch.tensor) -> Tuple[torch.tensor, torch.tensor]:
        # reflect padding
        wav = wav.unsqueeze(1).unsqueeze(1)
        wav = F.pad(wav, (self.pad_amount, self.pad_amount, 0, 0),
                    mode='reflect').squeeze(1)

        # conv
        forward_trans = F.conv1d(wav,
                                 self.forward_basis,
                                 stride=self.hop_length,
                                 padding=0)
        real_part, imag_part = forward_trans.chunk(2, 1)

        return torch.sqrt(real_part**2 + imag_part**2), torch.atan2(
            imag_part.data, real_part.data)
Ejemplo n.º 16
0
 def forward(ctx, input, targets):
     input = input.clamp(-30, 30)
     output = input.gather(2, targets.unsqueeze(2)).sum()
     B = input.size(0)
     num_grad = torch.zeros_like(input)
     num_grad.scatter_(2, targets.unsqueeze(2), 1.0)
     kernel = torch.FloatTensor([[[0.1, 0.8,
                                   0.1]]]).repeat(input.size(-1), 1,
                                                  1).to(input.device)
     num_grad = F.conv1d(num_grad.transpose(1, 2),
                         kernel,
                         stride=1,
                         groups=input.size(-1),
                         padding=1).transpose(1, 2)
     ctx.save_for_backward(num_grad)
     return output
Ejemplo n.º 17
0
def stft(input_data):
    num_batches = input_data.size(0)
    num_samples = input_data.size(1)

    input_data = input_data.view(num_batches, 1, num_samples)
    forward_transform = F.conv1d(input_data,
                                 Variable(forward_basis, requires_grad=False),
                                 stride=hop_length,
                                 padding=filter_length)
    cutoff = int((filter_length / 2) + 1)
    real_part = forward_transform[:, :cutoff, :]
    imag_part = forward_transform[:, cutoff:, :]

    magnitude = torch.sqrt(real_part**2 + imag_part**2 + 1e-10)
    phase = torch.atan2(imag_part.data, real_part.data)
    return magnitude, phase
Ejemplo n.º 18
0
def downsample2(x, zeros=56):
    """
    Downsampling the input by 2 using sinc interpolation.
    Smith, Julius, and Phil Gossett. "A flexible sampling-rate conversion method."
    ICASSP'84. IEEE International Conference on Acoustics, Speech, and Signal Processing.
    Vol. 9. IEEE, 1984.
    """
    if x.shape[-1] % 2 != 0:
        x = F.pad(x, (0, 1))
    xeven = x[..., ::2]
    xodd = x[..., 1::2]
    *other, time = xodd.shape
    kernel = kernel_downsample2(zeros).to(x)
    out = xeven + F.conv1d(xodd.view(-1, 1, time), kernel,
                           padding=zeros)[..., :-1].view(*other, time)
    return out.view(*other, -1).mul(0.5)
Ejemplo n.º 19
0
    def forward(self, input_data):
        num_batches, _, num_samples = input_data.size()

        self.num_samples = num_samples

        forward_transform = F.conv1d(input_data,
                                     self.forward_basis,
                                     stride=self.hop_length,
                                     padding=self.filter_length)
        cutoff = int((self.filter_length / 2) + 1)
        real_part = forward_transform[:, :cutoff, :]
        imag_part = forward_transform[:, cutoff:, :]

        magnitude = torch.sqrt(real_part**2 + imag_part**2)
        phase = torch.autograd.Variable(torch.atan2(imag_part.data, real_part.data))
        return magnitude, phase
Ejemplo n.º 20
0
    def infer(self, z):
        # shape
        batch_size, group_size, n_of_groups = z.size()

        W = self.conv.weight.squeeze()

        if not hasattr(self, 'W_inverse'):
            # Reverse computation
            W_inverse = W.float().inverse()
            W_inverse = Variable(W_inverse[..., None])
            if z.type() == 'torch.cuda.HalfTensor' or z.type(
            ) == 'torch.HalfTensor':
                W_inverse = W_inverse.half()
            self.W_inverse = W_inverse
        z = F.conv1d(z, self.W_inverse, bias=None, stride=1, padding=0)
        return z
Ejemplo n.º 21
0
    def forward(self, waveforms):
        """
        Parameters
        ----------
        waveforms : `torch.Tensor` (batch_size, 1, n_samples)
            Batch of waveforms.
        Returns
        -------
        features : `torch.Tensor` (batch_size, out_channels, n_samples_out)
            Batch of sinc filters activations.
        """

        self.n_ = self.n_.to(waveforms.device)

        self.window_ = self.window_.to(waveforms.device)

        low = self.min_low_hz + torch.abs(self.low_hz_)

        high = torch.clamp(low + self.min_band_hz + torch.abs(self.band_hz_),
                           self.min_low_hz, self.sample_rate / 2)
        band = (high - low)[:, 0]

        f_times_t_low = torch.matmul(low, self.n_)
        f_times_t_high = torch.matmul(high, self.n_)

        band_pass_left = (
            (torch.sin(f_times_t_high) - torch.sin(f_times_t_low)) /
            (self.n_ / 2)
        ) * self.window_  # Equivalent of Eq.4 of the reference paper (SPEAKER RECOGNITION FROM RAW WAVEFORM WITH SINCNET).
        # I just have expanded the sinc and simplified the terms. This way I avoid several useless computations.
        band_pass_center = 2 * band.view(-1, 1)
        band_pass_right = torch.flip(band_pass_left, dims=[1])

        band_pass = torch.cat(
            [band_pass_left, band_pass_center, band_pass_right], dim=1)

        band_pass = band_pass / (2 * band[:, None])

        self.filters = (band_pass).view(self.out_channels, 1, self.kernel_size)

        return F.conv1d(waveforms,
                        self.filters,
                        stride=self.stride,
                        padding=self.padding,
                        dilation=self.dilation,
                        bias=None,
                        groups=1)
    def forward(self, input, mask_in=None):
        assert len(input.shape) == 3
        if mask_in is not None or self.last_size != tuple(input.shape):
            self.last_size = tuple(input.shape)

            with torch.no_grad():
                if self.weight_maskUpdater.type() != input.type():
                    self.weight_maskUpdater = self.weight_maskUpdater.to(input)

                if mask_in is None:
                    # if mask is not provided, create a mask
                    if self.multi_channel:
                        mask = torch.ones(input.data.shape[0],
                                          input.data.shape[1],
                                          input.data.shape[2]).to(input)
                    else:
                        mask = torch.ones(1, 1, input.data.shape[2]).to(input)
                else:
                    mask = mask_in

                self.update_mask = F.conv1d(mask,
                                            self.weight_maskUpdater,
                                            bias=None,
                                            stride=self.stride,
                                            padding=self.padding,
                                            dilation=self.dilation,
                                            groups=1)

                # for mixed precision training, change 1e-8 to 1e-6
                self.mask_ratio = self.slide_winsize / (self.update_mask +
                                                        1e-8)
                # self.mask_ratio = torch.max(self.update_mask)/(self.update_mask + 1e-8)
                self.update_mask = torch.clamp(self.update_mask, 0, 1)
                self.mask_ratio = torch.mul(self.mask_ratio, self.update_mask)

        raw_out = super(PartialConv1d, self).forward(
            torch.mul(input, mask) if mask_in is not None else input)

        if self.bias is not None:
            bias_view = self.bias.view(1, self.out_channels, 1)
            output = torch.mul(raw_out - bias_view,
                               self.mask_ratio) + bias_view
            output = torch.mul(output, self.update_mask)
        else:
            output = torch.mul(raw_out, self.mask_ratio)

        return output, self.update_mask
Ejemplo n.º 23
0
    def attention_conv(self,
                       values,
                       keys,
                       queries,
                       key_mask=None,
                       mask=None,
                       layer_i=0,
                       decoder_position=-1):

        queries_shape = queries.shape  # B*H x L x proj_dim
        values_shape = values.shape
        batch_size = queries_shape[0] // self.num_heads

        attn_configs, conv_filters = self.attn_configs[layer_i]
        attn_type, attn_std, attn_offset = attn_configs[
            'attn_type'], attn_configs['attn_std'], attn_configs['attn_offset']

        curr_conv_filter = []
        for i in range(self.num_heads):
            curr_conv_filter.append(conv_filters[attn_std[i]][attn_offset[i]])
        curr_conv_filter = torch.cat(curr_conv_filter, dim=0)

        values = values.view(batch_size, self.num_heads, values_shape[1],
                             values_shape[2])

        if key_mask is not None:
            values.masked_fill_(key_mask[:, None, :, None], float(0))

        values = values.transpose(3, 1).transpose(3, 2).contiguous().view(
            batch_size * self.projection_dim, self.num_heads, -1)
        attended = F.conv1d(values,
                            curr_conv_filter,
                            padding=self.half_window +
                            self.max_absolute_offset,
                            groups=self.num_heads)
        attended = attended.view(batch_size, self.projection_dim,
                                 self.num_heads,
                                 -1).transpose(1, 2).transpose(2,
                                                               3).contiguous()

        # recompute attended indices
        attn_indices = self.get_attn_indices(
            max(queries_shape[1], decoder_position + 1), attn_offset,
            values.device)

        return self.gather_reshape(attended, attn_indices, batch_size,
                                   queries_shape[1], decoder_position)
Ejemplo n.º 24
0
def conv1d_same_padding(input, weight, bias, stride, dilation, groups):
    # stride and dilation are expected to be tuples.
    kernel, dilation, stride = weight.size(2), dilation[0], stride[0]
    l_out = l_in = input.size(2)
    padding = ((l_out - 1) * stride) - l_in + (dilation * (kernel - 1)) + 1
    if padding % 2 != 0:
        input = F.pad(input, [0, 1])

    return F.conv1d(
        input=input,
        weight=weight,
        bias=bias,
        stride=stride,
        padding=padding // 2,
        dilation=dilation,
        groups=groups,
    )
Ejemplo n.º 25
0
    def forward(self, x):
        cuda = x.is_cuda
        filters = torch.zeros((self.N_filt, self.Filt_dim))
        N = self.Filt_dim
        t_right = torch.linspace(1, (N - 1) / 2, steps=int(
            (N - 1) / 2)) / self.fs
        if cuda:
            filters = filters.to('cuda')
            t_right = t_right.to('cuda')

        min_freq = 50.0
        min_band = 50.0
        filt_beg_freq = torch.abs(self.filt_b1) + min_freq / self.freq_scale
        filt_end_freq = filt_beg_freq + (torch.abs(self.filt_band) + \
                                         min_band / self.freq_scale)
        n = torch.linspace(0, N, steps=N)
        # Filter window (hamming)
        window = (0.54 - 0.46 * torch.cos(2 * math.pi * n / N)).float()
        if cuda:
            window = window.to('cuda')
        for i in range(self.N_filt):
            low_pass1 = 2 * filt_beg_freq[i].float()* \
                    sinc(filt_beg_freq[i].float() * self.freq_scale,
                         t_right, cuda)
            low_pass2 = 2 * filt_end_freq[i].float()* \
                    sinc(filt_end_freq[i].float() * self.freq_scale,
                         t_right, cuda)
            band_pass = (low_pass2 - low_pass1)
            band_pass = band_pass / torch.max(band_pass)
            if cuda:
                band_pass = band_pass.to('cuda')

            filters[i, :] = band_pass * window
        if self.padding == 'SAME':
            if self.stride > 1:
                x_p = F.pad(x, (self.Filt_dim // 2 - 1, self.Filt_dim // 2),
                            mode=self.pad_mode)
            else:
                x_p = F.pad(x, (self.Filt_dim // 2, self.Filt_dim // 2),
                            mode=self.pad_mode)
        else:
            x_p = x
        out = F.conv1d(x_p,
                       filters.view(self.N_filt, 1, self.Filt_dim),
                       stride=self.stride)
        return out
Ejemplo n.º 26
0
def conv1d_same_padding(input, weight, bias, stride, dilation, groups):
    kernel = weight.size(2)
    dilation = dilation[0]
    stride = stride[0]
    size = input.size(2)  # number of rows (= channels)
    padding = (
        ((size - 1) * stride) - size + (dilation * (kernel - 1)) + 1)  # // 2
    if padding % 2 != 0:
        input = F.pad(input, [0, 1])

    return F.conv1d(input=input,
                    weight=weight,
                    bias=bias,
                    stride=stride,
                    padding=padding // 2,
                    dilation=dilation,
                    groups=groups)
Ejemplo n.º 27
0
    def final_transformation(self, track_release):
        """
        final convolution with iGlusNFr kernel and affine transformation
        :param track_release:
        :return:
        """
        # convolve with iGluSNFR kernel
        # treat channel as batch dimension, because all get same iGlu kernel.
        # ToDo: change this when using batched inputs!
        x = track_release.T[:, None, :]
        x = F.conv1d(x, self.iglusnfr_kernel)
        x = x[:, 0, self.steady_state_steps:]  # CD

        # normalize (mean=0, norm=1) so correlation can be computed easily
        x = x - torch.mean(x, dim=1, keepdim=True)
        x = x / (torch.norm(x, 2, dim=1, keepdim=True) + 1e-10)
        return x
Ejemplo n.º 28
0
 def forward_mask(self, mask):
     new_mask = mask.unsqueeze(1).float()
     cnn_weight = torch.ones((1, 1, self.conv_layers[0].kernel_size[0]),
                             device=mask.device,
                             dtype=torch.float)
     new_mask = F.conv1d(new_mask, cnn_weight, None,
                         self.conv_layers[0].stride[0],
                         self.conv_layers[0].padding[0], 1, 1)
     if self.max_pool:
         new_mask = F.max_pool1d(new_mask,
                                 self.conv_layers[2].kernel_size[0],
                                 self.conv_layers[2].stride[0],
                                 self.conv_layers[2].padding[0], 1, False,
                                 False)
     new_mask = new_mask.squeeze(1)
     new_mask = (new_mask > 0)
     return new_mask
Ejemplo n.º 29
0
def _move_ptr_fw(stack_ptr):
    """
    Move the stack pointer forward (i.e. to push to stack).
    stack_ptr: (batch_size, stack_len)
    Return: (batch_size, stack_len)
    """
    filter_fw = torch.FloatTensor([1, 0, 0]).view(1, 1, 3).to(stack_ptr.device)
    batch_size, stack_len = stack_ptr.size()
    new_stack_ptr = F.conv1d(stack_ptr.view(batch_size, 1, stack_len),
                             filter_fw,
                             padding=1).view(batch_size, stack_len)
    # when the stack pointer is already at the stack top, keep
    # the pointer in the same location (otherwise the pointer will be all zero)
    stack_top_mask = torch.zeros(stack_len).to(stack_ptr.device)
    stack_top_mask[stack_len - 1] = 1  # [stack_len, ]
    new_stack_ptr += stack_top_mask * stack_ptr
    return new_stack_ptr
def _move_ptr_bw(stack_ptr):
    """
    Move the stack pointer backward (i.e. to pop from stack).
    """
    filter_fw = torch.tensor([0, 0, 1]).float().view(1, 1,
                                                     3).to(stack_ptr.device)
    batch_size, stack_len = stack_ptr.shape

    new_stack_ptr = F.conv1d(stack_ptr.view(batch_size, 1, stack_len),
                             filter_fw,
                             padding=1).view(batch_size, stack_len)
    # when the stack pointer is already at the stack bottom, keep
    # the pointer in the same location (otherwise the pointer will be all zero)
    stack_bottom_mask = torch.zeros(stack_len).to(stack_ptr.device)
    stack_bottom_mask[0] = 1
    new_stack_ptr += stack_bottom_mask * stack_ptr
    return new_stack_ptr
Ejemplo n.º 31
0
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward propagation

        Args:
            x: [N, in_channels, L]

        Returns:
            [N, out_channels, L]

        """
        out = F.conv1d(x, self.weight, self.bias, self.stride, self.padding,
                       self.dilation, self.groups)
        if self.kernel_size[0] > 0:
            out = slice_axis(out, axis=2, begin=0, end=-self.padding[0])

        return out
Ejemplo n.º 32
0
    def forward(self, x_waveform, lengths_waveform):

        x_preemp = self.preemp(x_waveform.permute(0, 2, 1))
        x_comp = self.comp(x_preemp)

        x_even = x_comp[:, 0::2, :]
        x_odd = x_comp[:, 1::2, :]
        x_abs = torch.sqrt(x_even * x_even + x_odd * x_odd + self.epsilon)

        x_lowpass = F.conv1d(x_abs, self.lowpass_weight, stride=160, groups=40)

        x_log = torch.log(1.0 + torch.abs(x_lowpass))

        x_norm = self.instancenorm(x_log).permute(0, 2, 1)

        x_lengths = lengths_waveform - 1
        x_lengths = (x_lengths - (400 - 1)) // 1
        x_lengths = (x_lengths - (400 - 160)) // 160

        seqlen = x_norm.shape[1]

        if hp.frame_stacking:
            if seqlen % 3 == 0:
                x_norm = torch.cat(
                    (x_norm[:, 0::3], x_norm[:, 1::3], x_norm[:, 2::3, :]),
                    dim=2)
            elif seqlen % 3 == 1:
                x_norm = torch.cat((x_norm[:, 0:-1:3, :], x_norm[:, 1::3, :],
                                    x_norm[:, 2::3, :]),
                                   dim=2)
            elif seqlen % 3 == 2:
                x_norm = torch.cat((x_norm[:, 0:-2:3, :], x_norm[:, 1:-1:3, :],
                                    x_norm[:, 2::3, :]),
                                   dim=2)

            x_lengths /= 3

        x = nn.utils.rnn.pack_padded_sequence(x_norm,
                                              x_lengths.tolist(),
                                              batch_first=True)

        h, (_, _) = self.bi_lstm(x)

        hbatch, lengths = nn.utils.rnn.pad_packed_sequence(h, batch_first=True)

        return hbatch
Ejemplo n.º 33
0
    def forward(self, x):
        device, n = x.device, x.shape[1]

        res, gate = x.chunk(2, dim=-1)
        gate = self.norm(gate)

        weight, bias = self.proj.weight, self.proj.bias
        if self.causal:
            weight, bias = weight[:n, :n], bias[:n]
            mask = torch.ones(weight.shape[:2], device=device).triu_(1).bool()
            weight = weight.masked_fill(mask[..., None], 0.)

        gate = F.conv1d(gate, weight, bias)

        if exists(self.attn):
            gate += self.attn(x)
        return gate * res
Ejemplo n.º 34
0
    def forward(self, decoder_input, z, drop_prob):
        """
        :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size]
        :param z: sequence latent variable with shape of [batch_size, latent_variable_size]
        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :return: unnormalized logits of sentense words distribution probabilities
                 with shape of [batch_size, seq_len, word_vocab_size]
        """

        assert parameters_allocation_check(self), \
            'Invalid CUDA options. Parameters should be allocated in the same memory'

        [batch_size, seq_len, _] = decoder_input.size()

        '''
            decoder is conditioned on context via additional bias = W_cond * z to every input token
        '''

        z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size)
        decoder_input = t.cat([decoder_input, z], 2)
        decoder_input = F.dropout(decoder_input, drop_prob)

        # x is tensor with shape [batch_size, input_size=in_channels, seq_len=input_width]
        x = decoder_input.transpose(1, 2).contiguous()

        for layer, kernel in enumerate(self.kernels):
            # apply conv layer with non-linearity and drop last elements of sequence to perfrom input shifting
            x = F.conv1d(x, kernel,
                         bias=self.biases[layer],
                         dilation=self.params.decoder_dilations[layer],
                         padding=self.params.decoder_paddings[layer])

            x_width = x.size()[2]
            x = x[:, :, :(x_width - self.params.decoder_paddings[layer])].contiguous()

            x = F.relu(x)


        x = x.transpose(1, 2).contiguous()
        x = x.view(-1, self.out_size)
        x = self.fc(x)
        result = x.view(-1, seq_len, self.params.word_vocab_size)

        return result
Ejemplo n.º 35
0
    def _transform(self, x):
        x = x.view(x.shape[0], 1, -1)

        # frequency decomposition
        features = F.conv1d(
            x, self.weights, stride=self.lap, padding=self.basis_size)

        # half-wave rectification
        features = F.relu(features)

        # log magnitude
        features = torch.log(1 + features * self.log_factor)

        # perceptual frequency weighting
        if self.frequency_weights is not None:
            features = features * self.frequency_weights

        return features
Ejemplo n.º 36
0
 def forward(self, x):
     x = x.view(-1, 1, x.shape[-1])
     filters = self._filter_bank().view(len(self.scale), 1, self.taps)
     x = F.conv1d(x, filters, stride=1, padding=self.taps // 2)
     return x
Ejemplo n.º 37
0
 def convolve(self, x):
     x = x.view(-1, 1, x.shape[-1])
     x = F.conv1d(
         x, self.filter_bank, padding=self.filter_bank.shape[-1] // 2)
     return x