def forward(self, x, y): hx, hx2 = self.music_encoder(x) hy, hy2 = self.pose_encoder(y) feature_cat = torch.cat([hx, hy], dim=2) L3score = self.correspondFC(feature_cat) hx2 = torch.unsqueeze(hx2, dim=2) hy2 = torch.unsqueeze(hy2, dim=3) feature_mul = hx2.mul(hy2) feature_mul = torch.flatten(feature_mul, start_dim=2) h1 = self.conv1(feature_mul.transpose(1, 2)) h1 = torch.relu(self.ln1(h1.transpose(1, 2)).transpose(1, 2)) h1 = torch.avg_pool1d(h1, kernel_size=3, stride=5, padding=1) h2 = self.conv2(h1) h2 = torch.relu(self.ln2(h2.transpose(1, 2)).transpose(1, 2)) h2 = torch.avg_pool1d(h2, kernel_size=3, stride=5, padding=1) h3 = self.conv3(h2) #h3 = torch.relu(self.ln3(h3.transpose(1, 2)).transpose(1, 2)) Dc_out = torch.avg_pool1d(h3, kernel_size=3, stride=5, padding=1) Dc_out = torch.sigmoid(Dc_out) return L3score, Dc_out.transpose(1, 2), feature_cat, h1.transpose(1, 2)
def forward(self, x): """ x : Tensor(B, L) """ B = x.shape[0] x_len = [sum(x[i] != self.padding_idx) for i in range(B)] x = self.embedding(x) # x = self.dropout(x) x = pack_padded_sequence(x, x_len, batch_first=True) x, _ = self.LSTM(x) x = pad_packed_sequence(x, batch_first=True) x = x[0] y = self.vec.repeat(B, 1, 1) e = torch.bmm(x, y) # (B,L,1) mask = torch.ones_like(e) for i in range(B): mask[i, :x_len[i]] = 0 e.data.masked_fill_(mask.bool(), -1e30) e = F.softmax(e, dim=1) att = torch.bmm(e.transpose(1, 2), x).squeeze(1) x_m = x.transpose(1, 2) x_max = torch.max_pool1d(x_m, x_m.shape[-1]).squeeze(-1) x_avg = torch.avg_pool1d(x_m, x_m.shape[-1]).squeeze(-1) out = torch.cat((x_max, x_avg, att), 1) #(B,6H) out = self.fc(out) return out
def forward(self, x): x_ = -torch.unsqueeze(x, 1) + 1 x = torch.unsqueeze(x, 1) + 1 x_ = torch.cat([torch.ones_like(x_), x_], 1) x = torch.cat([torch.ones_like(x), x], 1) x = torch.log(torch.max(x, 1, keepdim=True).values) x_ = torch.log(torch.max(x_, 1, keepdim=True).values) x0 = torch.avg_pool1d(x, 3, 1, 1) x5 = torch.avg_pool1d(x, 199, 1, 99) x0_ = torch.avg_pool1d(x_, 3, 1, 1) x5_ = torch.avg_pool1d(x_, 199, 1, 99) x = torch.cat([x0 - x5, x0, x0_ - x5_, x0_], dim=1) return x
def forward(self, x): # x : [batch size, seq len, input dim] if x.size(1) < self.max_kernel_size: pd = [0, 0, 0, self.max_kernel_size - x.size(1)] # [batch size, max seq len, input dim] x = f.pad(x, pd, 'constant', 0) # x : [batch size, kernel num, max seq len, input dim] x = x.unsqueeze(1) # x : [batch size, kernel num, max seq_len - width] x = [torch.relu(conv(x).squeeze(-1)) for conv in self.convs] # x = [torch.max_pool1d(x_, x_.size(-1)).squeeze(-1) for x_ in x] x = [torch.avg_pool1d(x_, x_.size(-1)).squeeze(-1) for x_ in x] # [batch size, sum(kernel_num)] x = torch.cat(x, dim=-1) x = self.highway_layer.forward(x) # [batch size, num_class] logit = torch.log_softmax( self.feedforward_layer(self.dropout_layer(x)), -1) # [batch size] return logit
def forward(self, x, y): """ x : Tensor(B,L1) y : Tensor(B,L2) x_len : list[int] y_len : list[int] mask : Tensor(B,L1,L2) """ B = x.shape[0] x, x_len = self.step(x) y, y_len = self.step(y) y_T = y.transpose(1, 2) e = torch.bmm(x, y_T) mask = torch.ones_like(e) for i in range(B): mask[i, :x_len[i], :y_len[i]] = 0 # print(x_len) # print(y_len) # print(e[0]) e.data.masked_fill_(mask.bool(), float("-inf")) # print(e[0]) e_x = F.softmax(e, dim=1) e_y = F.softmax(e, dim=2) e_x.data.masked_fill_(mask.bool(), 0.0) e_y.data.masked_fill_(mask.bool(), 0.0) #print(e_x) x_att = torch.bmm(e_y, y) # (B, L1, 2H) y_att = torch.bmm(e_x.transpose(1, 2), x) x_m = torch.cat((x, x_att, x - x_att, x * x_att), 2) #(B, L1, 8H) y_m = torch.cat((y, y_att, y - y_att, y * y_att), 2) x_m = x_m.transpose(1, 2) y_m = y_m.transpose(1, 2) x_m = self.dropout(x_m) y_m = self.dropout(y_m) x_max = torch.max_pool1d(x_m, x_m.shape[-1]).squeeze(-1) # (B, 8H) x_avg = torch.avg_pool1d(x_m, x_m.shape[-1]).squeeze(-1) y_max = torch.max_pool1d(y_m, y_m.shape[-1]).squeeze(-1) y_avg = torch.avg_pool1d(y_m, y_m.shape[-1]).squeeze(-1) out = torch.cat((x_max, x_avg, y_max, y_avg), 1) #(B,32H) # out = torch.cat((y_max, y_avg), 1) #(B,32H) out = self.dropout(out) out = self.fc(out) return out
def forward(self, y): h1 = self.conv1(y.transpose(1, 2)).transpose(1, 2) h1 = torch.relu(self.ln1(h1)) h1 = torch.avg_pool1d(h1, kernel_size=3, stride=1, padding=1) h2 = self.conv2(h1.transpose(1, 2)).transpose(1, 2) h2 = torch.sigmoid(self.ln2(h2)) return torch.cat([h1, h2], dim=2), h2
def avg_pool_on_seq(tensor): """ 一般tensor为三维矩阵,pool的层级一般是seq_len层级 :param tensor: :return: """ avg_p = torch.avg_pool1d(tensor.transpose(1, 2), tensor.size(1)).squeeze(-1) return avg_p
def avg_pool1d(self,x,seq_lens): # x:[N,L,O_in] out = [] for index,t in enumerate(x): t = t[:seq_lens[index],:] t = torch.t(t).unsqueeze(0) out.append(torch.avg_pool1d(t,t.size(2))) out = torch.cat(out).squeeze(2) return out
def forward(self, x): x = torch.relu(self.bn1(self.conv1(x))) x = torch.relu(self.bn2(self.conv2(self.pool1(x)))) x = torch.relu(self.bn3(self.conv3(self.pool2(x)))) x = torch.relu(self.bn4(self.conv4(self.pool3(x)))) x = self.pool4(x) x = torch.avg_pool1d(x, x.shape[-1]) return torch.log_softmax(self.fc1(x.permute(0, 2, 1)), dim=2)
def forward(self, x): x_6 = torch.sigmoid(self.normalize_6(x[:, :, :6]).transpose(1, 2)) x_mfcc = torch.sigmoid(self.normalize_mfcc(x[:, :, 6:]).transpose(1, 2)) h1_x6 = self.conv1_x6(x_6) h1_mfcc = self.conv1_xmfcc(x_mfcc) h1 = torch.cat([h1_x6,h1_mfcc],dim=1) h1 = torch.relu(self.ln1(h1.transpose(1, 2))).transpose(1, 2) h1 = torch.avg_pool1d(h1,kernel_size=3,stride=1,padding=1) h2 = self.conv2(h1) h2 = torch.sigmoid(self.ln2(h2.transpose(1, 2))).transpose(1, 2) return torch.cat([h1,h2,x_6,x_mfcc],dim=1).transpose(1, 2), h2.transpose(1, 2)
def embedd_and_forward(self, x): conv1d_1 = self.conv1d_1(x) max_pooling1d_1 = torch.max_pool1d(conv1d_1, kernel_size=(4, ), stride=(4, ), padding=0, ceil_mode=False) conv1d_2 = self.conv1d_2(max_pooling1d_1) max_pooling1d_2 = torch.max_pool1d(conv1d_2, kernel_size=(4, ), stride=(4, ), padding=0, ceil_mode=False) conv1d_3 = self.conv1d_3(max_pooling1d_2) max_pooling1d_3 = torch.max_pool1d(conv1d_3, kernel_size=(4, ), stride=(4, ), padding=0, ceil_mode=False) conv1d_4 = self.conv1d_4(max_pooling1d_3) max_pooling1d_4 = torch.max_pool1d(conv1d_4, kernel_size=(4, ), stride=(4, ), padding=0, ceil_mode=False) conv1d_5 = self.conv1d_5(max_pooling1d_4) global_max_pooling1d_1 = torch.max_pool1d( input=conv1d_5, kernel_size=conv1d_5.size()[2:]) global_average_pooling1d_1 = torch.avg_pool1d( input=conv1d_5, kernel_size=conv1d_5.size()[2:]) global_max_pooling1d_1_flatten = global_max_pooling1d_1.view( global_max_pooling1d_1.size(0), -1) global_average_pooling1d_1_flatten = global_average_pooling1d_1.view( global_average_pooling1d_1.size(0), -1) concatenate_1 = torch.cat((global_max_pooling1d_1_flatten, global_average_pooling1d_1_flatten), 1) dense_1 = self.dense_1(concatenate_1) activation_1 = torch.sigmoid(dense_1) return activation_1