def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): """ LSTM.weight_ih_l[k] = (4*hidden_size, input_size) 将三个gate的ih和输入的ih上下拼接合成一个矩阵weight_ih_l[k], 所以为什么是4倍hidden_size拼接顺序无所谓 同理 LSTM.weight_hh_l[k] = (4*hidden_size, hidden_size),也是四个hh矩阵拼接合成一个矩阵 weight_ih = [ih_in ih_forget ih_cell ih_out ] """ hx, cx = hidden gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh) # 此处沿着1轴切成4块,分别作为4个输入, 返回顺序无所谓,都是原始参数矩阵的输出 # 重要是之后的激活函数确定是那个门 # 分解出来的四个矩阵shape是(hidden_size, hidden_size) ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) ingate = F.sigmoid(ingate) forgetgate = F.sigmoid(forgetgate) cellgate = F.tanh(cellgate) outgate = F.sigmoid(outgate) cy = (forgetgate * cx) + (ingate * cellgate) hy = outgate * F.tanh(cy) return hy, cy
def od_temporal_attention(self, y): H = F.tanh(self.proj2(y)) score1 = self.softmax(H.matmul(self.od_context_vector)) y = y.mul(score1) # x_mul = torch.mul(x, score) y = torch.sum(y, dim=1) return y, score1
def temporal_attention(self, x): H = F.tanh(self.proj1(x)) score = self.softmax(H.matmul(self.context_vector)) # print(score) x = x.mul(score) x = torch.sum(x, dim=1) return x, score
def residue_forward(self, input, conv_sigmoid, conv_tanh, skip_scale, residue_scale): output = input output_sigmoid, output_tanh = conv_sigmoid(output), conv_tanh(output) output = F.sigmoid(output_sigmoid) * F.tanh(output_tanh) skip = skip_scale(output) output = residue_scale(output) output = output + input[:, :, -output.size(2):] return output, skip
def forward(self, input_features): features_output1 = self.classifier1(input_features) if self.act_func == "gelu": features_output1 = F.gelu(features_output1) elif self.act_func == "relu": features_output1 = F.relu(features_output1) elif self.act_func == "tanh": features_output1 = F.tanh(features_output1) else: raise ValueError features_output1 = self.dropout(features_output1) features_output2 = self.classifier2(features_output1) return features_output2
def forward(self, x): """Given an image x, returns a transformed image.""" # define feedforward behavior, applying activations as necessary out = self.conv1(x) out = self.conv2(out) out = self.conv3(out) out = self.res_blocks(out) out = F.relu(self.deconv1(out)) out = F.relu(self.deconv2(out)) out = F.tanh(self.deconv3(out)) # tanh activation in last layer return out
def dec_act(self, inputs): if self.args.dec_act == 'tanh': return F.tanh(inputs) elif self.args.dec_act == 'elu': return F.elu(inputs) elif self.args.dec_act == 'relu': return F.relu(inputs) elif self.args.dec_act == 'selu': return F.selu(inputs) elif self.args.dec_act == 'sigmoid': return F.sigmoid(inputs) elif self.args.dec_act == 'linear': return inputs else: return F.elu(inputs)
def RNNTanhCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): hy = F.tanh(F.linear(input, w_ih, b_ih) + F.linear(hidden, w_hh, b_hh)) return hy