def circular_convolution_conv(keys, values, cuda=False): ''' For the circular convolution of x and y to be equivalent, you must pad the vectors with zeros to length at least N + L - 1 before you take the DFT. After you invert the product of the DFTs, retain only the first N + L - 1 elements. ''' assert values.dim() == keys.dim() == 2, "only 2 dims supported" batch_size = keys.size(0) keys_feature_size = keys.size(1) values_feature_size = values.size(1) required_size = keys_feature_size + values_feature_size - 1 # zero pad upto N+L-1 zero_for_keys = Variable(float_type(cuda)( batch_size, required_size - keys_feature_size).zero_()) zero_for_values = Variable(float_type(cuda)( batch_size, required_size - values_feature_size).zero_()) keys = torch.cat([keys, zero_for_keys], -1) values = torch.cat([values, zero_for_values], -1) # do the conv and reshape and return print('values = ', values.view(batch_size, 1, -1).size(), ' keys = ', keys.view(batch_size, 1, -1).size()) print('conv = ', F.conv1d(values.view(batch_size, 1, -1), keys.view(batch_size, 1, -1)).size()) return F.conv1d(values.view(batch_size, 1, -1), keys.view(batch_size, 1, -1)).squeeze()[:, 0:required_size]
def forward(self, x): """ :param x: tensor with shape [batch_size, max_seq_len, max_word_len, char_embed_size] :return: tensor with shape [batch_size, max_seq_len, depth_sum] applies multikenrel 1d-conv layer along every word in input with max-over-time pooling to emit fixed-size output """ input_size = x.size() input_size_len = len(input_size) assert input_size_len == 4, \ 'Wrong input rang, must be equal to 4, but {} found'.format(input_size_len) [batch_size, seq_len, _, embed_size] = input_size assert embed_size == self.params.char_embed_size, \ 'Wrong embedding size, must be equal to {}, but {} found'.format(self.params.char_embed_size, embed_size) # leaps with shape x = x.view(-1, self.params.max_word_len, self.params.char_embed_size).transpose(1, 2).contiguous() xs = [F.tanh(F.conv1d(x, kernel, bias=self.biases[i])) for i, kernel in enumerate(self.kernels)] xs = [x.max(2)[0].squeeze(2) for x in xs] x = t.cat(xs, 1) x = x.view(batch_size, seq_len, -1) return x
def forward(self, x): if self.deterministic: assert self.training == False, "Flag deterministic is True. This should not be used in training." return F.conv1d(x, self.post_weight_mu, self.bias_mu) batch_size = x.size()[0] # apply local reparametrisation trick see [1] Eq. (6) # to the parametrisation given in [3] Eq. (6) mu_activations = F.conv1d(x, self.weight_mu, self.bias_mu, self.stride, self.padding, self.dilation, self.groups) var_activations = F.conv1d(x.pow(2), self.weight_logvar.exp(), self.bias_logvar.exp(), self.stride, self.padding, self.dilation, self.groups) # compute z # note that we reparametrise according to [2] Eq. (11) (not [1]) z = reparametrize(self.z_mu.repeat(batch_size, 1, 1), self.z_logvar.repeat(batch_size, 1, 1), sampling=self.training, cuda=self.cuda) z = z[:, :, None] return reparametrize(mu_activations * z, (var_activations * z.pow(2)).log(), sampling=self.training, cuda=self.cuda)
def forward(self, x: torch.Tensor) -> torch.Tensor: """ we have: w(float) -- quant - dequant \ x(float) ------------- F.conv1d --- In the full model, we will see w(float) -- quant - *dequant \ x -- quant --- *dequant -- *F.conv1d --- *quant - dequant and the backend should be able to fuse the ops with `*` into a quantized conv1d """ weight_dequant = self.get_weight() result = F.conv1d( x, weight_dequant, self.bias, self.stride, self.padding, self.dilation, self.groups) return result
def _upsample2(x, zeros=24): """ Upsample x by a factor of two. The output will be exactly twice as long as the input. Args: x (Tensor): signal to upsample, time should be the last dimension zeros (int): number of zero crossing to keep in the sinc filter. This function is kept only for reference, you should use the more generic `resample_frac` one. This function does not perform anti-aliasing filtering. """ *other, time = x.shape kernel = _kernel_upsample2_downsample2(zeros).to(x) out = F.conv1d(x.view(-1, 1, time), kernel, padding=zeros)[..., 1:].view(*other, time) y = torch.stack([x, out], dim=-1) return y.view(*other, -1)
def bspline_kernel_1d(sigma, order=2, asTensor=False, dtype=th.float32, device='cpu'): kernel_ones = th.ones(1, 1, sigma) kernel = kernel_ones for i in range(1, order + 1): kernel = F.conv1d(kernel, kernel_ones, padding=i * sigma) / sigma if asTensor: return kernel[0, 0, ...].to(dtype=dtype, device=device) else: return kernel[0, 0, ...].numpy()
def forward(self, inputs): if inputs.dim() == 2: inputs = torch.unsqueeze(inputs, 1) inputs = F.pad( inputs, [self.win_len - self.stride, self.win_len - self.stride]) outputs = F.conv1d(inputs, self.weight, stride=self.stride) if self.feature_type == 'complex': return outputs else: dim = self.dim // 2 + 1 real = outputs[:, :dim, :] imag = outputs[:, dim:, :] mags = torch.sqrt(real**2 + imag**2) phase = torch.atan2(imag, real) return mags, phase
def get_histogram_filter_indices(img_shifted: torch.Tensor, bins: int, r: int = 3): """Return indices of images not in the first/last histogram buckets""" indices = [] weight = torch.ones([1, 1, r], device=img_shifted.device) for idx, img_shifted_i in enumerate(img_shifted): stats = torch.histc(img_shifted_i, bins, -1, 1) stats = F.conv1d(stats.view(1, 1, -1), weight, padding=r // 2) stats = stats.view(-1).cpu().numpy() maxes = np.r_[True, stats[1:] >= stats[:-1]] & np.r_[stats[:-1] >= stats[1:], True] maxes = np.nonzero(maxes)[0] indices.append(len(maxes) >= 2) return torch.tensor(indices)
def __call__(self, u): """ Args: u (Tensor): [B, C, H] Returns: div_u: [B, C, H] """ u_shape = u.shape u = u.view(-1, 1, *u_shape[-1:]) u = F.conv1d(F.pad(u, self.padding, mode='circular'), self.weight, stride=1, padding=0, bias=None) / (self.dx**4) return u.view(u_shape)
def forward(self, x: torch.Tensor): def T(w): return w.T if self.fan_in_fan_out else w if self.merged: return F.linear(x, T(self.weight), bias=self.bias) else: result = F.linear(x, T(self.weight), bias=self.bias) if self.r > 0: after_A = F.linear(self.lora_dropout(x), self.lora_A) after_B = F.conv1d(after_A.transpose(-2, -1), self.lora_B.unsqueeze(-1), groups=sum(self.enable_lora)).transpose( -2, -1) result += self.zero_pad(after_B) * self.scaling return result
def forward(self, x): k = 0 min_freq = 1.0 min_band = 10.0 filters = torch.zeros( (self.N_filt * self.N_channels, self.Filt_dim)).to(self.device) for j in range(self.N_channels): filt_beg_freq = torch.abs(self.filt_low)[j] + min_freq / self.fs filt_end_freq = (filt_beg_freq + (torch.abs(self.filt_band)))[j] for i in range(self.N_filt): band_pass = self.get_filter_bank(filt_beg_freq[i], filt_end_freq[i]) filters[k, :] = band_pass k += 1 filters = filters.view(self.N_filt * self.N_channels, 1, self.Filt_dim) return F.conv1d(x, filters, groups=self.N_channels)
def forward(self, predicts, target, norm=1.0): assert self.size == predicts.size(1) dist = torch.zeros_like(predicts) dist = dist.scatter(1, target.data.unsqueeze(1), 1.0).unsqueeze(1) kernel = cp(self.kernel).to(device=target.device) dist = F.conv1d(dist, kernel, padding=(4, )).squeeze(1) dist = Variable(dist, requires_grad=False).to(device=target.device) # print(predicts.size(), dist.size()) # loss = 0.0 # N = int(predicts.size()[0]) # for i in range(N): # loss += self.crit(predicts[i], dist[i]) return self.crit(predicts, dist) / norm
def forward_frozen(self, x): # Computes the feedforward operation with the expected value for weight and biases (frozen-like) if self.bias: bias = self.bias_mu assert bias is self.bias_mu, "The bias inputed should be this layer parameter, not a clone." else: bias = torch.zeros(self.out_channels) return F.conv1d(input=x, weight=self.weight_mu, bias=bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups)
def forward( self, x: Tensor ) -> Tensor: x = F.conv1d( x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups ) return x
def transform(self, wav: torch.tensor) -> Tuple[torch.tensor, torch.tensor]: # reflect padding wav = wav.unsqueeze(1).unsqueeze(1) wav = F.pad(wav, (self.pad_amount, self.pad_amount, 0, 0), mode='reflect').squeeze(1) # conv forward_trans = F.conv1d(wav, self.forward_basis, stride=self.hop_length, padding=0) real_part, imag_part = forward_trans.chunk(2, 1) return torch.sqrt(real_part**2 + imag_part**2), torch.atan2( imag_part.data, real_part.data)
def forward(ctx, input, targets): input = input.clamp(-30, 30) output = input.gather(2, targets.unsqueeze(2)).sum() B = input.size(0) num_grad = torch.zeros_like(input) num_grad.scatter_(2, targets.unsqueeze(2), 1.0) kernel = torch.FloatTensor([[[0.1, 0.8, 0.1]]]).repeat(input.size(-1), 1, 1).to(input.device) num_grad = F.conv1d(num_grad.transpose(1, 2), kernel, stride=1, groups=input.size(-1), padding=1).transpose(1, 2) ctx.save_for_backward(num_grad) return output
def stft(input_data): num_batches = input_data.size(0) num_samples = input_data.size(1) input_data = input_data.view(num_batches, 1, num_samples) forward_transform = F.conv1d(input_data, Variable(forward_basis, requires_grad=False), stride=hop_length, padding=filter_length) cutoff = int((filter_length / 2) + 1) real_part = forward_transform[:, :cutoff, :] imag_part = forward_transform[:, cutoff:, :] magnitude = torch.sqrt(real_part**2 + imag_part**2 + 1e-10) phase = torch.atan2(imag_part.data, real_part.data) return magnitude, phase
def downsample2(x, zeros=56): """ Downsampling the input by 2 using sinc interpolation. Smith, Julius, and Phil Gossett. "A flexible sampling-rate conversion method." ICASSP'84. IEEE International Conference on Acoustics, Speech, and Signal Processing. Vol. 9. IEEE, 1984. """ if x.shape[-1] % 2 != 0: x = F.pad(x, (0, 1)) xeven = x[..., ::2] xodd = x[..., 1::2] *other, time = xodd.shape kernel = kernel_downsample2(zeros).to(x) out = xeven + F.conv1d(xodd.view(-1, 1, time), kernel, padding=zeros)[..., :-1].view(*other, time) return out.view(*other, -1).mul(0.5)
def forward(self, input_data): num_batches, _, num_samples = input_data.size() self.num_samples = num_samples forward_transform = F.conv1d(input_data, self.forward_basis, stride=self.hop_length, padding=self.filter_length) cutoff = int((self.filter_length / 2) + 1) real_part = forward_transform[:, :cutoff, :] imag_part = forward_transform[:, cutoff:, :] magnitude = torch.sqrt(real_part**2 + imag_part**2) phase = torch.autograd.Variable(torch.atan2(imag_part.data, real_part.data)) return magnitude, phase
def infer(self, z): # shape batch_size, group_size, n_of_groups = z.size() W = self.conv.weight.squeeze() if not hasattr(self, 'W_inverse'): # Reverse computation W_inverse = W.float().inverse() W_inverse = Variable(W_inverse[..., None]) if z.type() == 'torch.cuda.HalfTensor' or z.type( ) == 'torch.HalfTensor': W_inverse = W_inverse.half() self.W_inverse = W_inverse z = F.conv1d(z, self.W_inverse, bias=None, stride=1, padding=0) return z
def forward(self, waveforms): """ Parameters ---------- waveforms : `torch.Tensor` (batch_size, 1, n_samples) Batch of waveforms. Returns ------- features : `torch.Tensor` (batch_size, out_channels, n_samples_out) Batch of sinc filters activations. """ self.n_ = self.n_.to(waveforms.device) self.window_ = self.window_.to(waveforms.device) low = self.min_low_hz + torch.abs(self.low_hz_) high = torch.clamp(low + self.min_band_hz + torch.abs(self.band_hz_), self.min_low_hz, self.sample_rate / 2) band = (high - low)[:, 0] f_times_t_low = torch.matmul(low, self.n_) f_times_t_high = torch.matmul(high, self.n_) band_pass_left = ( (torch.sin(f_times_t_high) - torch.sin(f_times_t_low)) / (self.n_ / 2) ) * self.window_ # Equivalent of Eq.4 of the reference paper (SPEAKER RECOGNITION FROM RAW WAVEFORM WITH SINCNET). # I just have expanded the sinc and simplified the terms. This way I avoid several useless computations. band_pass_center = 2 * band.view(-1, 1) band_pass_right = torch.flip(band_pass_left, dims=[1]) band_pass = torch.cat( [band_pass_left, band_pass_center, band_pass_right], dim=1) band_pass = band_pass / (2 * band[:, None]) self.filters = (band_pass).view(self.out_channels, 1, self.kernel_size) return F.conv1d(waveforms, self.filters, stride=self.stride, padding=self.padding, dilation=self.dilation, bias=None, groups=1)
def forward(self, input, mask_in=None): assert len(input.shape) == 3 if mask_in is not None or self.last_size != tuple(input.shape): self.last_size = tuple(input.shape) with torch.no_grad(): if self.weight_maskUpdater.type() != input.type(): self.weight_maskUpdater = self.weight_maskUpdater.to(input) if mask_in is None: # if mask is not provided, create a mask if self.multi_channel: mask = torch.ones(input.data.shape[0], input.data.shape[1], input.data.shape[2]).to(input) else: mask = torch.ones(1, 1, input.data.shape[2]).to(input) else: mask = mask_in self.update_mask = F.conv1d(mask, self.weight_maskUpdater, bias=None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=1) # for mixed precision training, change 1e-8 to 1e-6 self.mask_ratio = self.slide_winsize / (self.update_mask + 1e-8) # self.mask_ratio = torch.max(self.update_mask)/(self.update_mask + 1e-8) self.update_mask = torch.clamp(self.update_mask, 0, 1) self.mask_ratio = torch.mul(self.mask_ratio, self.update_mask) raw_out = super(PartialConv1d, self).forward( torch.mul(input, mask) if mask_in is not None else input) if self.bias is not None: bias_view = self.bias.view(1, self.out_channels, 1) output = torch.mul(raw_out - bias_view, self.mask_ratio) + bias_view output = torch.mul(output, self.update_mask) else: output = torch.mul(raw_out, self.mask_ratio) return output, self.update_mask
def attention_conv(self, values, keys, queries, key_mask=None, mask=None, layer_i=0, decoder_position=-1): queries_shape = queries.shape # B*H x L x proj_dim values_shape = values.shape batch_size = queries_shape[0] // self.num_heads attn_configs, conv_filters = self.attn_configs[layer_i] attn_type, attn_std, attn_offset = attn_configs[ 'attn_type'], attn_configs['attn_std'], attn_configs['attn_offset'] curr_conv_filter = [] for i in range(self.num_heads): curr_conv_filter.append(conv_filters[attn_std[i]][attn_offset[i]]) curr_conv_filter = torch.cat(curr_conv_filter, dim=0) values = values.view(batch_size, self.num_heads, values_shape[1], values_shape[2]) if key_mask is not None: values.masked_fill_(key_mask[:, None, :, None], float(0)) values = values.transpose(3, 1).transpose(3, 2).contiguous().view( batch_size * self.projection_dim, self.num_heads, -1) attended = F.conv1d(values, curr_conv_filter, padding=self.half_window + self.max_absolute_offset, groups=self.num_heads) attended = attended.view(batch_size, self.projection_dim, self.num_heads, -1).transpose(1, 2).transpose(2, 3).contiguous() # recompute attended indices attn_indices = self.get_attn_indices( max(queries_shape[1], decoder_position + 1), attn_offset, values.device) return self.gather_reshape(attended, attn_indices, batch_size, queries_shape[1], decoder_position)
def conv1d_same_padding(input, weight, bias, stride, dilation, groups): # stride and dilation are expected to be tuples. kernel, dilation, stride = weight.size(2), dilation[0], stride[0] l_out = l_in = input.size(2) padding = ((l_out - 1) * stride) - l_in + (dilation * (kernel - 1)) + 1 if padding % 2 != 0: input = F.pad(input, [0, 1]) return F.conv1d( input=input, weight=weight, bias=bias, stride=stride, padding=padding // 2, dilation=dilation, groups=groups, )
def forward(self, x): cuda = x.is_cuda filters = torch.zeros((self.N_filt, self.Filt_dim)) N = self.Filt_dim t_right = torch.linspace(1, (N - 1) / 2, steps=int( (N - 1) / 2)) / self.fs if cuda: filters = filters.to('cuda') t_right = t_right.to('cuda') min_freq = 50.0 min_band = 50.0 filt_beg_freq = torch.abs(self.filt_b1) + min_freq / self.freq_scale filt_end_freq = filt_beg_freq + (torch.abs(self.filt_band) + \ min_band / self.freq_scale) n = torch.linspace(0, N, steps=N) # Filter window (hamming) window = (0.54 - 0.46 * torch.cos(2 * math.pi * n / N)).float() if cuda: window = window.to('cuda') for i in range(self.N_filt): low_pass1 = 2 * filt_beg_freq[i].float()* \ sinc(filt_beg_freq[i].float() * self.freq_scale, t_right, cuda) low_pass2 = 2 * filt_end_freq[i].float()* \ sinc(filt_end_freq[i].float() * self.freq_scale, t_right, cuda) band_pass = (low_pass2 - low_pass1) band_pass = band_pass / torch.max(band_pass) if cuda: band_pass = band_pass.to('cuda') filters[i, :] = band_pass * window if self.padding == 'SAME': if self.stride > 1: x_p = F.pad(x, (self.Filt_dim // 2 - 1, self.Filt_dim // 2), mode=self.pad_mode) else: x_p = F.pad(x, (self.Filt_dim // 2, self.Filt_dim // 2), mode=self.pad_mode) else: x_p = x out = F.conv1d(x_p, filters.view(self.N_filt, 1, self.Filt_dim), stride=self.stride) return out
def conv1d_same_padding(input, weight, bias, stride, dilation, groups): kernel = weight.size(2) dilation = dilation[0] stride = stride[0] size = input.size(2) # number of rows (= channels) padding = ( ((size - 1) * stride) - size + (dilation * (kernel - 1)) + 1) # // 2 if padding % 2 != 0: input = F.pad(input, [0, 1]) return F.conv1d(input=input, weight=weight, bias=bias, stride=stride, padding=padding // 2, dilation=dilation, groups=groups)
def final_transformation(self, track_release): """ final convolution with iGlusNFr kernel and affine transformation :param track_release: :return: """ # convolve with iGluSNFR kernel # treat channel as batch dimension, because all get same iGlu kernel. # ToDo: change this when using batched inputs! x = track_release.T[:, None, :] x = F.conv1d(x, self.iglusnfr_kernel) x = x[:, 0, self.steady_state_steps:] # CD # normalize (mean=0, norm=1) so correlation can be computed easily x = x - torch.mean(x, dim=1, keepdim=True) x = x / (torch.norm(x, 2, dim=1, keepdim=True) + 1e-10) return x
def forward_mask(self, mask): new_mask = mask.unsqueeze(1).float() cnn_weight = torch.ones((1, 1, self.conv_layers[0].kernel_size[0]), device=mask.device, dtype=torch.float) new_mask = F.conv1d(new_mask, cnn_weight, None, self.conv_layers[0].stride[0], self.conv_layers[0].padding[0], 1, 1) if self.max_pool: new_mask = F.max_pool1d(new_mask, self.conv_layers[2].kernel_size[0], self.conv_layers[2].stride[0], self.conv_layers[2].padding[0], 1, False, False) new_mask = new_mask.squeeze(1) new_mask = (new_mask > 0) return new_mask
def _move_ptr_fw(stack_ptr): """ Move the stack pointer forward (i.e. to push to stack). stack_ptr: (batch_size, stack_len) Return: (batch_size, stack_len) """ filter_fw = torch.FloatTensor([1, 0, 0]).view(1, 1, 3).to(stack_ptr.device) batch_size, stack_len = stack_ptr.size() new_stack_ptr = F.conv1d(stack_ptr.view(batch_size, 1, stack_len), filter_fw, padding=1).view(batch_size, stack_len) # when the stack pointer is already at the stack top, keep # the pointer in the same location (otherwise the pointer will be all zero) stack_top_mask = torch.zeros(stack_len).to(stack_ptr.device) stack_top_mask[stack_len - 1] = 1 # [stack_len, ] new_stack_ptr += stack_top_mask * stack_ptr return new_stack_ptr
def _move_ptr_bw(stack_ptr): """ Move the stack pointer backward (i.e. to pop from stack). """ filter_fw = torch.tensor([0, 0, 1]).float().view(1, 1, 3).to(stack_ptr.device) batch_size, stack_len = stack_ptr.shape new_stack_ptr = F.conv1d(stack_ptr.view(batch_size, 1, stack_len), filter_fw, padding=1).view(batch_size, stack_len) # when the stack pointer is already at the stack bottom, keep # the pointer in the same location (otherwise the pointer will be all zero) stack_bottom_mask = torch.zeros(stack_len).to(stack_ptr.device) stack_bottom_mask[0] = 1 new_stack_ptr += stack_bottom_mask * stack_ptr return new_stack_ptr
def forward(self, x: torch.Tensor) -> torch.Tensor: """ Forward propagation Args: x: [N, in_channels, L] Returns: [N, out_channels, L] """ out = F.conv1d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) if self.kernel_size[0] > 0: out = slice_axis(out, axis=2, begin=0, end=-self.padding[0]) return out
def forward(self, x_waveform, lengths_waveform): x_preemp = self.preemp(x_waveform.permute(0, 2, 1)) x_comp = self.comp(x_preemp) x_even = x_comp[:, 0::2, :] x_odd = x_comp[:, 1::2, :] x_abs = torch.sqrt(x_even * x_even + x_odd * x_odd + self.epsilon) x_lowpass = F.conv1d(x_abs, self.lowpass_weight, stride=160, groups=40) x_log = torch.log(1.0 + torch.abs(x_lowpass)) x_norm = self.instancenorm(x_log).permute(0, 2, 1) x_lengths = lengths_waveform - 1 x_lengths = (x_lengths - (400 - 1)) // 1 x_lengths = (x_lengths - (400 - 160)) // 160 seqlen = x_norm.shape[1] if hp.frame_stacking: if seqlen % 3 == 0: x_norm = torch.cat( (x_norm[:, 0::3], x_norm[:, 1::3], x_norm[:, 2::3, :]), dim=2) elif seqlen % 3 == 1: x_norm = torch.cat((x_norm[:, 0:-1:3, :], x_norm[:, 1::3, :], x_norm[:, 2::3, :]), dim=2) elif seqlen % 3 == 2: x_norm = torch.cat((x_norm[:, 0:-2:3, :], x_norm[:, 1:-1:3, :], x_norm[:, 2::3, :]), dim=2) x_lengths /= 3 x = nn.utils.rnn.pack_padded_sequence(x_norm, x_lengths.tolist(), batch_first=True) h, (_, _) = self.bi_lstm(x) hbatch, lengths = nn.utils.rnn.pad_packed_sequence(h, batch_first=True) return hbatch
def forward(self, x): device, n = x.device, x.shape[1] res, gate = x.chunk(2, dim=-1) gate = self.norm(gate) weight, bias = self.proj.weight, self.proj.bias if self.causal: weight, bias = weight[:n, :n], bias[:n] mask = torch.ones(weight.shape[:2], device=device).triu_(1).bool() weight = weight.masked_fill(mask[..., None], 0.) gate = F.conv1d(gate, weight, bias) if exists(self.attn): gate += self.attn(x) return gate * res
def forward(self, decoder_input, z, drop_prob): """ :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size] :param z: sequence latent variable with shape of [batch_size, latent_variable_size] :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :return: unnormalized logits of sentense words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' [batch_size, seq_len, _] = decoder_input.size() ''' decoder is conditioned on context via additional bias = W_cond * z to every input token ''' z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size) decoder_input = t.cat([decoder_input, z], 2) decoder_input = F.dropout(decoder_input, drop_prob) # x is tensor with shape [batch_size, input_size=in_channels, seq_len=input_width] x = decoder_input.transpose(1, 2).contiguous() for layer, kernel in enumerate(self.kernels): # apply conv layer with non-linearity and drop last elements of sequence to perfrom input shifting x = F.conv1d(x, kernel, bias=self.biases[layer], dilation=self.params.decoder_dilations[layer], padding=self.params.decoder_paddings[layer]) x_width = x.size()[2] x = x[:, :, :(x_width - self.params.decoder_paddings[layer])].contiguous() x = F.relu(x) x = x.transpose(1, 2).contiguous() x = x.view(-1, self.out_size) x = self.fc(x) result = x.view(-1, seq_len, self.params.word_vocab_size) return result
def _transform(self, x): x = x.view(x.shape[0], 1, -1) # frequency decomposition features = F.conv1d( x, self.weights, stride=self.lap, padding=self.basis_size) # half-wave rectification features = F.relu(features) # log magnitude features = torch.log(1 + features * self.log_factor) # perceptual frequency weighting if self.frequency_weights is not None: features = features * self.frequency_weights return features
def forward(self, x): x = x.view(-1, 1, x.shape[-1]) filters = self._filter_bank().view(len(self.scale), 1, self.taps) x = F.conv1d(x, filters, stride=1, padding=self.taps // 2) return x
def convolve(self, x): x = x.view(-1, 1, x.shape[-1]) x = F.conv1d( x, self.filter_bank, padding=self.filter_bank.shape[-1] // 2) return x