def __init__(self, vocab_size, embedding_size=300, hidden_size=512, num_layers=2, dropout=0.3, shared_weight=True, **kwargs): super().__init__() self.embedding = Embeddings(num_embeddings=vocab_size, embedding_dim=embedding_size, dropout=dropout) self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=False, dropout=dropout, batch_first=True) # 输出层 self.output = nn.Sequential(nn.Linear(hidden_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, embedding_size)) # 投影层 self.proj = nn.Linear(embedding_size, vocab_size, bias=False) if shared_weight: self.proj.weight = self.embedding.embeddings.weight else: my_init.default_init(self.proj.weight)
def _build_bridge(self): if self.bridge_type == "mlp": self.linear_bridge = nn.Linear(in_features=self.context_size, out_features=self.hidden_size) my_init.default_init(self.linear_bridge.weight) elif self.bridge_type == "zero": pass else: raise ValueError("Unknown bridge type {0}".format(self.bridge_type))
def __init__(self, d_model, n_head, feature_size=1024, hidden_size=512, dropout=0.0, **kwargs ): super(QE_ATTENTION, self).__init__() self.ctx_attn = MultiHeadedAttention(head_count=n_head, model_dim=d_model, dropout=dropout, dim_per_head=None) # Use PAD self.gru = RNN(type="gru", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.w = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w.weight) self.dropout = nn.Dropout(dropout) self.sigmoid = nn.Sigmoid()
def __init__(self, feature_size=768, hidden_size=512, dropout_rate=0.1, **kwargs): super(QE_PAIR, self).__init__() # Use PAD self.gru = RNN(type="gru", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm_src = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm_mt = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.w = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w.weight) self.w_all = nn.Linear(2 * 2 * hidden_size, 1) my_init.default_init(self.w_all.weight) self.w_1 = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w_1.weight) self.w_2 = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w_2.weight) self.dropout = nn.Dropout(dropout_rate) self.sigmoid = nn.Sigmoid()
def _reset_parameters(self): my_init.default_init(self.linear_input.weight) my_init.default_init(self.linear_hidden.weight) my_init.default_init(self.linear_ctx.weight)
def reset_parameters(self): init.default_init(self.linear.weight.data)
def _reset_parameters(self): for weight in self.parameters(): my_init.default_init(weight)
def _reset_paramters(self): my_init.default_init(self.intrinsic_predict_FFN1.weight) my_init.default_init(self.intrinsic_predict_FFN2.weight) my_init.default_init(self.reconstruct_action_FFN1.weight) my_init.default_init(self.reconstruct_action_FFN2.weight)
def _reset_parameters(self): # GRUs are initialized in itself. self.layer_norm.reset_parameters() my_init.default_init(self.ffn.weight)
def _reset_parameters(self): self.LN.reset_parameters() my_init.default_init(self.scorer_ffn.weight)
def _reset_parameters(self): my_init.default_init(self.ctx_linear.weight) my_init.default_init(self.input_linear.weight) self.rephrase_LN.reset_parameters() # RNN params are initialized by the RNN module my_init.default_init(self.rephraser_linear_base_mu.weight) my_init.default_init(self.rephraser_linear_base_log_sig.weight) my_init.default_init(self.rephraser_linear_mu.weight) my_init.default_init(self.rephraser_linear_log_sig.weight)
def _reset_parameters(self): my_init.default_init(self.ctx_linear.weight) my_init.default_init(self.input_linear.weight) my_init.default_init(self.action_linear.weight) self.critic_LN.reset_parameters() my_init.default_init(self.critic1_linear_base.weight) my_init.default_init(self.critic2_linear_base.weight) my_init.default_init(self.critic1_linear.weight) my_init.default_init(self.critic2_linear.weight)
def _reset_parameters(self): my_init.default_init(self.ctx_linear.weight) my_init.default_init(self.input_linear.weight) my_init.default_init(self.attacker_linear.weight) my_init.default_init(self.critic_linear.weight)
def _reset_parameters(self): if self.cell_type == 'cgru': my_init.default_init(self.linear_input.weight) my_init.default_init(self.linear_hidden.weight) my_init.default_init(self.linear_ctx.weight)
def _reset_parameters(self): for weight in self.parameters(): my_init.default_init(weight) if self.cover_size is not None: for weight in self.gru.parameters(): my_init.rnn_init(weight)