def forward(self, s): x = self.preprocess(s) x = F.leaky_relu(self.bn1(self.conv1(x))) x = F.leaky_relu(self.bn2(self.conv2(x))) x = F.leaky_relu(self.bn3(self.conv3(x))) x = F.leaky_relu(self.bn4(self.conv4(x))) x = F.leaky_relu(self.bn5(self.conv5(x))) x = F.leaky_relu(self.bn6(self.conv6(x))) # x = x.view(x.size(0), -1) policy = F.leaky_relu(self.policy_bn(self.conv_policy(x))).view( x.size(0), -1) policy = self.policy_dropout(policy) policy = F.dropout(policy, p=0.3, training=True) # change training method policy = self.softmax(self.linear_policy(policy)) value = F.leaky_relu(self.value_bn(self.conv_value(x))).view( x.size(0), -1) value = self.value_dropout(value) value = F.dropout(value, p=0.3, training=True) # change training method value = F.leaky_relu(self.fc_value(value)) value = torch.tanh(self.linear_output(value)) return policy, value
def forward(self, features, adj): x = self.conv1(features, adj) x = F.relu(x) x = F.dropout(x, self.dropout, self.training) x = self.conv2(x, adj) return F.log_softmax(x, dim=1)
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x)
def forward(self, segfeats, seglens, wordfeats, wordmasks): x1 = self.v2s(segfeats, wordfeats, wordmasks) frames1, x1 = self.cross_gate(segfeats, x1) mmfeats = torch.cat([frames1, x1], -1) # wordfeats = self.bilinear(frames1, x1, F.relu) mmfeats = self.rnn(mmfeats, seglens, self.video_segment_num) mmfeats = F.dropout(mmfeats, self.dropout, self.training) return mmfeats
def forward(self, seg_feats, seglen): """ seg_feats (tensor[B, seg, feat_dim]) seglen (tensor[B]) """ seg_feats = F.dropout(seg_feats, self.dropout, self.training) seg_feats = seg_feats.transpose(0, 1) for attention in self.attn_layers: res = seg_feats seg_feats, _ = attention(seg_feats, seg_feats, seg_feats, None, attn_mask=self.self_attn_mask) seg_feats = F.dropout(seg_feats, self.dropout, self.training) seg_feats = res + seg_feats seg_feats = self.rnn(seg_feats, seglen, self.video_segment_num) seg_feats = F.dropout(seg_feats, self.dropout, self.training) seg_feats = seg_feats.transpose(0, 1) return seg_feats
def forward(self, x): # CNNs x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = x.view(x.size(0), -1) # flatten the output of conv # FC layers x = F.relu(self.fc1(x)) # x = F.dropout(x, p=self.drop_p, training=self.training) x = F.relu(self.fc2(x)) x = F.dropout(x, p=self.drop_p, training=self.training) out = self.fc3(x) # swap time and sample dim such that (sample dim, time dim, CNN latent dim) # cnn_embed_seq: shape=(batch, time_step, input_size) return out
def forward(self, frames, seglens, x, node_mask): """ frames [B, seg, vdim] segfeats seglens [B] x [B, len, wdim] wordfeats node_mask [B, len] wordmasks """ frames_len = frames.shape[1] #attentive x1_att, x2_att, _, _ = self.atten(frames, x, node_mask) x1_m, x2_m = x1_att, x2_att#self.message_v(x1_att), self.message_s(x2_att) frames1 = self.update_v(x1_m, frames) x1 = self.update_s(x2_m, x) x1_m, _, a1, _ = self.intra_v(frames1, frames1, node_mask) x2_m, _, a2, _ = self.intra_s(x1, x1, node_mask) frames1 = self.update_v_intra(x1_m, frames1) x1 = self.update_s_intra(x2_m, x1) """ Below is what exactly appeared in CSMGAN's offical code """ #layer 2 #x1_att, x2_att, a1, a2 = self.atten(frames1, x1, node_mask) #x1_m, x2_m = x1_att, x2_att#self.message_v(x1_att), self.message_s(x2_att) #frames1 = self.update_v(x1_m, frames1) #x1 = self.update_s(x2_m, x1) #x1_m, _, a1, _ = self.intra_v(frames1, frames1, node_mask) #x2_m, _, a2, _ = self.intra_s(x1, x1, node_mask) #frames1 = self.update_v_intra(x1_m, frames1) #x1 = self.update_s_intra(x2_m, x1) #frames1, x1 = frames, x #a1, a2 = 1, 1 # interactive x1 = self.v2s(frames1, x1, node_mask) x = torch.cat([frames1, x1], -1) #x1 x = self.rnn(x, seglens, frames_len) x = F.dropout(x, self.dropout, self.training) return x
def forward(self, batch): """ First composes the input vectors into one representation. This is then feed trough a hidden layer with a Relu and finally trough an output layer that returns weights for each class. :param word1: word1: the representation of the first word (torch tensor) :param word2: word2: the representation of the second word (torch tensor) :param training: training: True if the model should be trained, False if the model is in inference :return: the raw weights for each class """ device = batch["device"] self._composed_phrase = self.compose(batch["w1"].to(device), batch["w2"].to(device), self.training) if self.add_single_words: w1_w2 = torch.cat((batch["w1"].to(device), batch["w2"].to(device)), 1) self._composed_phrase = torch.cat((w1_w2, self.composed_phrase), 1) hidden = F.relu(self.hidden(self.composed_phrase)) hidden = F.dropout(hidden, p=self.dropout_rate) class_weights = self.output(hidden) return class_weights
def forward(self, X, X_padding_mask=None, coverage=None, dropout=0.1): """ K / key: (L, B, H) encoder_outputs, encoder feature V / value: (L, B, H) to calculate the context vector Q / query: (L, B, H) last_hidden, deocder feature X_padding_mask: (B, 1, L) coverage: (B, L) """ X_dim = X.size(-1) X_query = X.transpose(0, 1) # -> (B, L, H) X_key = X.transpose(0, 1) # -> (B, L, H) X_value = X.transpose(0, 1) # -> (B, L, H) scores = torch.matmul(X_query, X_key.transpose(-2, -1)) / math.sqrt( X_dim) # (B, L, H) x (B, H, L) -> (B, L, L) attn_dist = F.softmax(scores, dim=-1) # (B, L, L) attn_dist = F.dropout(attn_dist, p=dropout) context = torch.matmul(attn_dist, X_value) # (B, L, L) x (B, L, H) -> (B, L, H) # calculate average context = context.sum(1) / context.size(1) return context, attn_dist
def forward(self, query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None): """Input shape: Time x Batch x Channel Self-attention can be implemented by passing in the same arguments for query, key and value. Timesteps can be masked by supplying a T x T mask in the `attn_mask` argument. Padding elements can be excluded from the key by passing a binary ByteTensor (`key_padding_mask`) with shape: batch x src_len, where padding elements are indicated by 1s. """ qkv_same = query.data_ptr() == key.data_ptr() == value.data_ptr() kv_same = key.data_ptr() == value.data_ptr() tgt_len, bsz, embed_dim = query.size() assert embed_dim == self.embed_dim assert list(query.size()) == [tgt_len, bsz, embed_dim] assert key.size() == value.size() saved_state = None if qkv_same: # self-attention q, k, v = self.in_proj_qkv(query) elif kv_same: # encoder-decoder attention q = self.in_proj_q(query) if key is None: assert value is None k = v = None else: k, v = self.in_proj_kv(key) else: q = self.in_proj_q(query) k = self.in_proj_k(key) v = self.in_proj_v(value) q = q * self.scaling if self.bias_k is not None: assert self.bias_v is not None k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) if attn_mask is not None: attn_mask = torch.cat( [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) if key_padding_mask is not None: key_padding_mask = torch.cat([ key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1) ], dim=1) q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1) if k is not None: k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) if v is not None: v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) # q: bsz * num_heads, tgt_len, head_dim # k, v: bsz * num_heads, src_len, head_dim # key_padding_mask: bsz, src_len src_len = k.size(1) # This is part of a workaround to get around fork/join parallelism # not supporting Optional types. if key_padding_mask is not None and key_padding_mask.shape == torch.Size( []): key_padding_mask = None if key_padding_mask is not None: assert key_padding_mask.size(0) == bsz assert key_padding_mask.size(1) == src_len if self.add_zero_attn: src_len += 1 k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) if attn_mask is not None: attn_mask = torch.cat( [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) if key_padding_mask is not None: key_padding_mask = torch.cat([ key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask) ], dim=1) attn_weights = torch.bmm(q, k.transpose(1, 2)) assert list( attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] # attn_weights: bsz * num_heads, tgt_len, src_len # attn_mask: tgt_len, src_len if attn_mask is not None: attn_mask = attn_mask.unsqueeze(0) if self.onnx_trace: attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) attn_weights += attn_mask if key_padding_mask is not None: # don't attend to padding symbols attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) if self.onnx_trace: attn_weights = torch.where( key_padding_mask.unsqueeze(1).unsqueeze(2), torch.Tensor([float("-Inf")]), attn_weights.float()).type_as(attn_weights) else: attn_weights = attn_weights.float().masked_fill( key_padding_mask.unsqueeze(1).unsqueeze(2), float('-inf'), ).type_as(attn_weights) # FP16 support: cast to float and back attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) attn_weights = self.softmax( attn_weights, dim=-1, onnx_trace=self.onnx_trace, ).type_as(attn_weights) attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training) attn = torch.bmm(attn_weights, v) assert list( attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] if self.onnx_trace and attn.size(1) == 1: # when ONNX tracing a single decoder step (sequence length == 1) # the transpose is a no-op copy before view, thus unnecessary attn = attn.contiguous().view(tgt_len, bsz, embed_dim) else: attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) attn = self.out_proj(attn) if need_weights: # average attention weights over heads attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.sum(dim=1) / self.num_heads else: attn_weights = None return attn, attn_weights
def forward(self, input): if not self.freezed: return F.dropout(input, self.p, self.training, self.inplace) else: return input * torch.stack([self.mask] * input.size(0)).type( input.type())