def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = F.relu_(out) out = self.conv2(out) out = self.bn2(out) out = F.relu_(out) out0 = self.conv3(out) out = self.bn3(out0) if self.downsample is not None: residual = self.downsample(x) out += residual out = F.relu_(out) return out
def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = F.relu_(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = F.avg_pool2d(x, x.size()[3]) x = torch.flatten(x, 1) x = self.fc(x) return x
def forward(self, x): c1 = self.layer0(x) c1 = F.relu_(c1) c1 = self.maxpool(c1) c2 = self.layer1(c1) c3 = self.layer2(c2) c4 = self.layer3(c3) c5 = self.layer4(c4) c6 = self.layer5(c5) c7 = self.layer6(F.relu_(c6)) p7 = c7 p6 = c6 p5 = self.latlayer1(c5) p4 = self._upsample(p5, self.latlayer2(c4)) p4 = self.toplayer1(p4) p3 = self._upsample(p4, self.latlayer3(c3)) p3 = self.toplayer2(p3) return p3, p4, p5, p6, p7
def nl_relu(x: Tensor, beta: float = 1., inplace: bool = False) -> Tensor: """Implements the natural logarithm ReLU activation function Args: x: input tensor beta: beta used for NReLU inplace: whether the operation should be performed inplace Returns: output tensor """ if inplace: return torch.log(F.relu_(x).mul_(beta).add_(1), out=x) else: return torch.log(1 + beta * F.relu(x))
def forward(self, x): num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1 if not isinstance(x, list): x = [x] * num_branch out = self.conv1(x) out = [F.relu_(b) for b in out] out = self.conv2(out) if self.shortcut is not None: shortcut = [self.shortcut(b) for b in x] else: shortcut = x out = [out_b + shortcut_b for out_b, shortcut_b in zip(out, shortcut)] out = [F.relu_(b) for b in out] if self.has_pool: out = [p(b) for p, b in zip(self.list_pool, out)] if self.concat_output: out = torch.cat(out) return out
def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = F.relu_(out) out = self.conv2(out) out = self.bn2(out) out = F.relu_(out) out0 = self.conv3(out) out = self.bn3(out0) if self.config['se_block']: out = self.domain_attention(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = F.relu_(out) return out
def forward(self, input): """ Take a mini batch of character embedding of each word, compute word embedding :param input (Tensor): shape (batch_size, char_embed_size, max_word_length) :return (Tensor): shape (batch_size, word_embed_size), word embedding of each word in batch """ print('Embedding size: ', self.char_embed_size) print('X_reshaped size: ',input.size()) x = self.conv1d(input) # (batch_size, word_embed_size, max_word_length - kernel_size + 1) x = F.relu_(x) print('x_size after conv with out_channels: ',self.num_filters, ', ',x.size()) x = self.max_pool_1d(x).squeeze() # (batch_size, word_embed_size) print('after pool x.size(): ',x.size()) return x
def forward(self, x): b, c, height, width = x.size() C = self.pool(x) H = self.pool(x.permute(0, 3, 1, 2).contiguous()) W = self.pool(x.permute(0, 2, 3, 1).contiguous()) self.lam = F.softmax(self.lam, -1) lam = torch.chunk(self.lam, dim=0, chunks=self.rank) list = [] for i in range(0, self.rank): list.append(lam[i] * self.TukerReconstruction( b, self.h, self.ps[0], self.conv1_1[i](C), self.conv1_2[i](H), self.conv1_3[i](W))) tensor1 = sum(list) tensor1 = torch.cat((x, F.relu_(x * tensor1)), 1) return tensor1
def nl_relu(x, beta=1., inplace=False): """Implements the natural logarithm ReLU activation function Args: x (torch.Tensor): input tensor beta (float): beta used for NReLU inplace (bool): whether the operation should be performed inplace Returns: torch.Tensor[x.size()]: output tensor """ if inplace: return torch.log(F.relu_(x).mul_(beta).add_(1), out=x) else: return torch.log(1 + beta * F.relu(x))
def forward(self, x): residual = x x = self.body(x) if self.se: w = F.adaptive_avg_pool2d(x, output_size=1) w = self.se(w) x = x * w if self.downsample: residual = self.downsample(residual) x = F.relu_(x + residual) return x
def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = F.relu_(out) out = self.conv2(out) out = self.bn2(out) out = F.relu_(out) out0 = self.conv3(out) out = self.bn3(out0) if self.with_ct: out = self.context_block(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = F.relu_(out) return out
def forward(self, input, pool_size=(2, 2), pool_type='avg'): x = input x = F.relu_(self.bn1(self.conv1(x))) if pool_type == 'max': x = F.max_pool2d(x, kernel_size=pool_size) elif pool_type == 'avg': x = F.avg_pool2d(x, kernel_size=pool_size) elif pool_type == 'avg+max': x1 = F.avg_pool2d(x, kernel_size=pool_size) x2 = F.max_pool2d(x, kernel_size=pool_size) x = x1 + x2 else: raise Exception('Incorrect argument!') return x
def forward(self, x): if isinstance(x, tuple): x, prev_dp = x else: prev_dp = None out = self.conv1(x) out = F.relu_(out) out = self.conv2(out) out = F.relu_(out) out = self.conv3(out) if self.shortcut is not None: shortcut = self.shortcut(x) else: shortcut = x dp = None if self.module is not None: out = self.module(out) if isinstance(out, tuple): out, dp = out if prev_dp is not None: dp = prev_dp + dp out += shortcut out = F.relu_(out) if dp is None: return out else: # diff loss return out, dp
def forward(self, x): x = F.relu(x) out = self.conv1(x) out = F.relu_(out) out = self.conv2(out) if self.shortcut is not None: shortcut = self.shortcut(x) else: shortcut = x out += shortcut # out = F.relu_(out) return out
def forward(self, x, y): activations = [] for lay, (conv, bn) in enumerate(zip(self.convs[:-1], self.bns[:-1])): if lay == 0: x = F.relu_(bn(conv(self.preconvbn(self.preconv(x))))) y = F.relu_(self.lblbn(self.lblconv(y))) x = torch.cat([x, y], 1) # activations.append(x) else: new_size = (x.shape[2] - 1) * 2 up = nn.Upsample(size=new_size, mode='trilinear', align_corners=False) x_res = up(x) oc = conv.out_channels x = F.relu_(bn(conv(x) + x_res[:, :oc])) # activations.append(x) up = nn.Upsample(size=2 * x.size()[2] - 2, mode='trilinear', align_corners=False) out = torch.softmax(self.postconv(up(x)), 1) # activations.append(out) return out
def forward(self, img, x_coord, x_adj, get_att_weights=False): img = self.cnn(img) img = img.unsqueeze(1).repeat(1, x_adj.shape[1], 1) input_sequence = torch.cat([x_coord, x_adj, img], dim=2) input_sequence = F.relu_(self.linear_att1(input_sequence)) input_sequence = self.linear_att2(input_sequence) mask = self.softmax(input_sequence) masked_img = mask * img # optionally, return the attention weights of the context attention if get_att_weights: mask = mask[0].reshape(1, 30, 30) return masked_img, mask[0] return masked_img
def correlate(input1, input2): out_corr = spatial_correlation_sample( input1, input2, kernel_size=1, patch_size=21, stride=1, padding=0, dilation_patch=2, ) # collate dimensions 1 and 2 in order to be treated as a # regular 4D tensor b, ph, pw, h, w = out_corr.size() out_corr = out_corr.view(b, ph * pw, h, w) / input1.size(1) return F.relu_(out_corr)
def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = F.relu_(out) out = self.conv2(out) out = self.bn2(out) out = F.relu_(out) out0 = self.conv3(out) out = self.bn3(out0) import pdb pdb.set_trace() if self.downsample is not None: identity = self.downsample(x) out += identity out = F.relu_(out) return out
def forward(self, input, spec_aug=False, mixup_lambda=None): #print(input.type()) # Input : (16, 144000) x = self.spectrogram_extractor(input.float()) # Output : (batch_size, 1, time_steps, n_fft + 1) : (16, 1, 696, 513) x = self.logmel_extractor(x) # Output : (batch_size, 1, time_steps, mel_bins) : (16, 1 , 696, 128) frames_num = x.shape[2] if self.training: x = self.spec_augmenter(x) # Mixup on spectrogram if mixup_lambda is not None: x = do_mixup(x, mixup_lambda) x = x.transpose(1, 3) x = self.batch_norm(x) x = x.transpose(1, 3) # (16, 1, 2087, 128) x = self.encoder.forward_features(x) # output : (batch_size, n_features, 66, 4) # Aggregate in time axis x = torch.mean(x, dim=3) # (16, 2048, 22) : (batch_size, n_features, _) x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1) x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1) x = x1 + x2 # (16, 2048, 22) x = F.dropout(x, p=0.5, training=self.training) x = x.transpose(1, 2) # (batch_size, 22, n_features) #x = self.encoder.classifier(x) # (16, 22, 2048) : (batch_size, time, n_features) x = F.relu_(self.encoder.fc(x)) # (16, 22, 2048) x = x.transpose(1, 2) # (16, 2048, 22) x = F.dropout(x, p=0.5, training=self.training) (clipwise_output, norm_att, segmentwise_output) = self.att_head(x) segmentwise_output = segmentwise_output.transpose(1, 2) #print("clipwise_output.size : {}".format(clipwise_output.size())) #(16, 24) : (batch_sizes, n_features) #print("norm_att.size : {}".format(norm_att.size())) #(16, 24, 22) : (batch_sizes, n_features, time) #print("segmentwise_output.size : {}".format(segmentwise_output.size())) #(16, 24, 22) : (batch_sizes, n_features, time) #Upscale back to original size framewise_output = interpolate(segmentwise_output, self.interpolate_ratio) # (16,696, 24) : (batch_sizes x time x num_classes) framewise_output = pad_framewise_output(framewise_output, frames_num) # (16,696, 24) : (batch_sizes x time x num_classes) output_dict = { 'framewise_output': framewise_output, 'clipwise_output': clipwise_output } return output_dict
def forward(self, x): x = F.pad( x, ([ self.padding_size[1], self.padding_size[1], self.padding_size[0], self.padding_size[0], ]), ) x = self.conv(x) x = self.batch_norm(x) # x = self.pair_norm(x) if self.activation is not None: x = F.relu_(x) return x
def forward(self, x_reshaped: torch.Tensor) -> torch.Tensor: ''' pass character embeddings through Conv1d layer,relu, and maxpool @param x_reshaped(Tensor): tensor of character leverl embeddings @returns x_conv (Tensor): tensor of word embedding of size ''' print('Embedding size: ', self.embed_size) print('X_reshaped size: ',x_reshaped.size()) x = self.conv1(x_reshaped) x = F.relu_(x) print('x_size after conv with out_channels: ',self.out_channels, ', ',x.size()) #x = nn.ReLU(x) x_conv = self.pool(x).squeeze() print('x_conv_size: ', x_conv.size()) return x_conv
def forward(self, input, mixup_lambda=None): """ Input: (batch_size, data_length)""" x = self.spectrogram_extractor( input) # (batch_size, 1, time_steps, freq_bins) x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins) x = x.transpose(1, 3) x = self.bn0(x) x = x.transpose(1, 3) if self.training: x = self.spec_augmenter(x) # Mixup on spectrogram if self.training and mixup_lambda is not None: x = do_mixup(x, mixup_lambda) x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg') x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg') x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg') x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg') x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block5(x, pool_size=(2, 2), pool_type='avg') x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block6(x, pool_size=(1, 1), pool_type='avg') x = F.dropout(x, p=0.2, training=self.training) x = torch.mean(x, dim=3) (x1, _) = torch.max(x, dim=2) x2 = torch.mean(x, dim=2) x = x1 + x2 x = F.dropout(x, p=0.5, training=self.training) x = F.relu_(self.fc1(x)) embedding = F.dropout(x, p=0.5, training=self.training) clipwise_output = torch.sigmoid(self.fc_audioset(x)) output_dict = { 'clipwise_output': clipwise_output, 'embedding': embedding } return output_dict
def forward(self, x, y=None): """ Input: (batch_size, data_length)""" x = self.spectrogram_extractor( x) # (batch_size, 1, time_steps, freq_bins) x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins) frames_num = x.shape[2] x = x.transpose(1, 3) x = self.bn0(x) x = x.transpose(1, 3) if self.training: x = self.spec_augmenter(x) # Mixup on spectrogram alpha = 1.0 if self.training: x, y = do_mixup(x, y, alpha) x = torch.cat([x, x, x], dim=1) x = self.fe(x) x = torch.mean(x, dim=3) # averaging across frequency dimension stride = 1 x1 = F.max_pool1d(x, kernel_size=3, stride=stride, padding=1) x2 = F.avg_pool1d(x, kernel_size=3, stride=stride, padding=1) x = x1 + x2 x = F.dropout(x, p=CONFIG.p, training=self.training) x = x.transpose(1, 2) x = F.relu_(self.fc1(x)) x = x.transpose(1, 2) x = F.dropout(x, p=CONFIG.p, training=self.training) clipwise, weights, framewise = self.att_block(x) if self.training: return clipwise, y return torch.max(framewise, dim=-1)[0]
def forward(self, input_data): # input_x, mixup_lambda = input_data input_x = input_data mixup_lambda = None """ Input: (batch_size, data_length)""" b, c, s = input_x.shape input_x = input_x.reshape(b * c, s) x, frames_num = self.preprocess(input_x, mixup_lambda=mixup_lambda) if mixup_lambda is not None: b = (b * c) // 2 c = 1 # Output shape (batch size, channels, time, frequency) x = x.expand(x.shape[0], 3, x.shape[2], x.shape[3]) x = self.cnn_feature_extractor(x) # Aggregate in frequency axis x = torch.mean(x, dim=3) x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1) x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1) x = x1 + x2 x = F.dropout(x, p=0.5, training=self.training) x = x.transpose(1, 2) x = F.relu_(self.fc1(x)) x = x.transpose(1, 2) x = F.dropout(x, p=0.5, training=self.training) (clipwise_output, norm_att, segmentwise_output) = self.att_block(x) segmentwise_output = segmentwise_output.transpose(1, 2) # Get framewise output framewise_output = interpolate(segmentwise_output, self.interpolate_ratio) framewise_output = pad_framewise_output(framewise_output, frames_num) frame_shape = framewise_output.shape clip_shape = clipwise_output.shape output_dict = { 'framewise_output': framewise_output.reshape(b, c, frame_shape[1], frame_shape[2]), 'clipwise_output': clipwise_output.reshape(b, c, clip_shape[1]), } return output_dict
def forward(self, x): x = x.transpose(2, 3) frames_num = x.shape[2] x = x.transpose(1, 3) x = self.bn0(x) x = x.transpose(1, 3) x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg") x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block2(x, pool_size=(2, 2), pool_type="avg") x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block3(x, pool_size=(2, 2), pool_type="avg") x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block4(x, pool_size=(2, 2), pool_type="avg") x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block5(x, pool_size=(2, 2), pool_type="avg") x = F.dropout(x, p=0.2, training=self.training) x = self.conv_block6(x, pool_size=(1, 1), pool_type="avg") x = F.dropout(x, p=0.2, training=self.training) x = torch.mean(x, dim=3) x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1) x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1) x = x1 + x2 x = F.dropout(x, p=0.5, training=self.training) x = x.transpose(1, 2) x = F.relu_(self.fc1(x)) x = x.transpose(1, 2) x = F.dropout(x, p=0.5, training=self.training) (clipwise_output, _, segmentwise_output) = self.att_block(x) segmentwise_output = segmentwise_output.transpose(1, 2) # Get framewise output framewise_output = interpolate(segmentwise_output, self.interpolate_ratio) framewise_output = pad_framewise_output(framewise_output, frames_num) output_dict = { "framewise_output": framewise_output, "clipwise_output": clipwise_output, } # print(clipwise_output.min(), clipwise_output.max()) return clipwise_output, framewise_output
def forward(self, x_conv_out: torch.Tensor) -> torch.Tensor: """ Map from x_conv_out to x_highway :param x_conv_out: Tensor output from cnn layer. Input size (batch_size, embedding_size) :return: x_highway: Tensor output from Highway network. Output size (batch_size, embedding_size) """ # In the comments we’ll describe the dimensions for a single example (not a batch). # Then, sent_len and batch_size should be taking into account. # Highway layer. # x_proj = ReLU(W_proj x_conv_out + b_proj); ∈ R e_{word} # x_gate = σ(W_gate x_conv_out + b_gate); ∈ R e_{word} # x_highway = x_gate ⊙ x_proj + (1 − x_gate) ⊙ x_conv_out; ∈ R e_{word} x_projection = F.relu_(self.projection(x_conv_out)) x_gate = torch.sigmoid(self.gate(x_conv_out)) x_highway = x_gate * x_projection + (1 - x_gate) * x_conv_out return x_highway
def forward(self, input): ''' :param input: (batch_size,time_steps, mel_bins) :return: () ''' x = self.feature(input) #(batch_size, 512, T/16, mel_bins/16) x = torch.mean(x, dim=3) #(batch_size, 512, T/16) (x1, _) = torch.max(x, dim=2) x2 = torch.mean(x, dim=2) x = x1 + x2 x = F.dropout(x, p=0.2, training=self.training) x = F.relu_(self.fc1(x)) #(batch_size,class_num) output = torch.sigmoid(self.fc(x)) # output = self.fc(x) return output
def forward(self, x_reshaped): """ Compute word embedding @param input (Tensor): shape (batch_size, char_embed_size, max_word_length) @return (Tensor): shape (batch_size, embed_size), word embedding of each word in batch """ # In the comments we’ll describe the dimensions for a single example (not a batch). # Then, sent_len and batch_size should be taking into account to reshape the tensor before the # convolutional stage and after the dropout layer. # Convolutional network. # x_conv = Conv1D(x_reshaped); ∈ R e_{word}x(m_{word}−k+1) # x_conv_out = MaxPool(ReLU(xconv)); ∈ R e_{word} # in our implementation e_{word} is equal to the number of filters f. x_conv = self.conv1d(x_reshaped) x_conv_out = self.max_pool_1d(F.relu_(x_conv)).squeeze() return x_conv_out
def forward(self, x): outputs = [] # stem x = self.conv1(x) x = self.bn1(x) x = F.relu_(x) x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) # blocks x = self.layer1(x) outputs.append(x) x = self.layer2(x) outputs.append(x) x = self.layer3(x) outputs.append(x) x = self.layer4(x) outputs.append(x) return outputs
def forward(self, input_data): x = input_data # (batch_size, 1, time_steps, mel_bins) frames_num = x.shape[2] x = x.transpose(1, 3) x = self.bn0(x) x = x.transpose(1, 3) if self.training: if random.random() < 0.25: x = self.spec_augmenter(x) x = x.transpose(2, 3) x = self.encoder(x) # Aggregate in frequency axis x = torch.mean(x, dim=3) x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1) x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1) x = x1 + x2 x = F.dropout(x, p=0.5, training=self.training) x = x.transpose(1, 2) x = F.relu_(self.fc1(x)) x = x.transpose(1, 2) x = F.dropout(x, p=0.5, training=self.training) (clipwise_output, norm_att, segmentwise_output) = self.att_block(x) segmentwise_output = segmentwise_output.transpose(1, 2) interpolate_ratio = frames_num // segmentwise_output.size(1) # Get framewise output framewise_output = interpolate(segmentwise_output, interpolate_ratio) framewise_output = pad_framewise_output(framewise_output, frames_num) frame_shape = framewise_output.shape clip_shape = clipwise_output.shape output_dict = { 'framewise_output': framewise_output, 'clipwise_output': clipwise_output, } return output_dict
def forward(self, X: torch.FloatTensor) -> torch.FloatTensor: """ Making a forward pass of the 2D-convolution block. Arg types: * **X** (PyTorch Float Tensor) - Input tensor, with shape (batch_size, num_his, num_nodes, input_dims). Return types: * **X** (PyTorch Float Tensor) - Output tensor, with shape (batch_size, num_his, num_nodes, output_dims). """ X = X.permute(0, 3, 2, 1) X = F.pad(X, ([self._padding_size[1], self._padding_size[1], self._padding_size[0], self._padding_size[0]])) X = self._conv2d(X) X = self._batch_norm(X) if self._activation is not None: X = F.relu_(X) return X.permute(0, 3, 2, 1)
def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = F.relu_(x) x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) return x