def __init__(self, n_max_seq, *, y_scale_by, steps, min_length, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, d_k=64, d_v=64, edrop=0.25, odrop=0.25, hdrop=0.1, propagate=False): super(TransformerModel, self).__init__(bidirectional=False, edrop=edrop, odrop=odrop, propagate=propagate, min_length=min_length, y_scale_by=y_scale_by, steps=steps) self.hidden_size = d_model self._create_layers(mlp=True) self.encoder = TransformerEncoder(n_max_seq, n_layers=n_layers, n_head=n_head, d_word_vec=d_word_vec, d_model=d_model, d_k=d_k, d_v=d_v, d_inner_hid=d_inner_hid, dropout=hdrop) self.decoder = TransformerDecoder(n_max_seq, n_layers=n_layers, n_head=n_head, d_word_vec=d_word_vec, d_model=d_model, d_k=d_k, d_v=d_v, d_inner_hid=d_inner_hid, dropout=hdrop) self.encoder_mapping = nn.Linear(self.input_dim, d_word_vec) self.decoder_mapping = nn.Linear(self.decode_dim, d_word_vec) self.step_one_network = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.ReLU(), LayerNormalization(self.hidden_size), nn.Linear(self.hidden_size, 1)) self.output_network = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.ReLU(), LayerNormalization(self.hidden_size), nn.Linear(self.hidden_size, 1)) self.init_linear_weights() assert d_model == d_word_vec 'To facilitate the residual connections, \
def __init__(self, d_hid, d_inner_hid, dropout=0.1): super(PositionwiseFeedForward, self).__init__() self.w_1 = nn.Conv1d(d_hid, d_inner_hid, 1) # position-wise self.w_2 = nn.Conv1d(d_inner_hid, d_hid, 1) # position-wise self.layer_norm = LayerNormalization(d_hid) self.dropout = nn.Dropout(dropout) self.relu = nn.ReLU()
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1, enc_output=None): ''' :param n_head: :param d_model: :param d_k: :param d_v: :param dropout: ''' super().__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v)) self.attention = ScaledDotProductAttention() self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head * d_v, d_model) self.dropout = nn.Dropout(dropout) init.xavier_normal_(self.w_qs) init.xavier_normal_(self.w_ks) init.xavier_normal_(self.w_vs)
def __init__(self, d_model, d_inner_hid, dropout=0.1): super().__init__() self.w_1 = nn.Linear(d_model, d_inner_hid) self.w_2 = nn.Linear(d_inner_hid, d_model) self.layer_norm = LayerNormalization(d_model) self.dropout = nn.Dropout(dropout) self.relu = nn.ReLU()
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): """初始化多头 Arguments: n_head {int} -- 头的数量 d_model {int} -- 模型总维度 d_k {int} -- Query和Key分别的子头维度 d_v {int} -- Value的子头维度 """ super(MultiHeadAttention, self).__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v)) self.attention = ScaledDotProductAttention(d_model) self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head * d_v, d_model) self.dropout = nn.Dropout(dropout) init.xavier_normal(self.w_qs) init.xavier_normal(self.w_ks) init.xavier_normal(self.w_vs)
def __init__(self, d_input, d_output, d_inner_hid, dropout=0.1): super(PositionwiseFeedForwardFunnel, self).__init__() self.w_1 = nn.Linear(d_input, d_inner_hid) # position-wise self.w_2 = nn.Linear(d_inner_hid, d_output) # position-wise self.layer_norm = LayerNormalization(d_output) self.dropout = nn.Dropout(dropout) self.d_output = d_output self.d_input = d_input self.relu = nn.ReLU() # we add one residual (of dimension d_input) and one output, of dimension d_output, into one output: self.add_residual = nn.Linear(d_input + d_output, d_output)
def _create_layers(self, mlp=False): self.store_area_em = nn.Embedding(103, 10, max_norm=10, norm_type=2) self.store_municipal_em = nn.Embedding(55, 5, max_norm=5, norm_type=2) self.store_prefecture_em = nn.Embedding(9, 2, max_norm=2, norm_type=2) self.store_genre_em = nn.Embedding(14, 5, max_norm=5, norm_type=2) # self.weekday_em = nn.Embedding(7, 5, max_norm=5, norm_type=2) self.day_em = nn.Embedding(31, 5, max_norm=5, norm_type=2) # self.month_em = nn.Embedding(12, 5, max_norm=5, norm_type=2) if not mlp: self.step_one_network = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.ReLU(), LayerNormalization(self.hidden_size), nn.Dropout(self.odrop), nn.Linear(self.hidden_size, 1))
def __init__(self, d_model, n_head, d_k=64, d_v=64, res_dropout=0.1): super(MultiHeadAttention, self).__init__() self.w_qs = nn.ModuleList( [Linear(d_model, d_k, bias=False) for _ in range(n_head)]) self.w_ks = nn.ModuleList( [Linear(d_model, d_k, bias=False) for _ in range(n_head)]) self.w_vs = nn.ModuleList( [Linear(d_model, d_v, bias=False) for _ in range(n_head)]) self.attention = ScaledDotProductAttention(d_model) self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head * d_v, d_model) self.dropout = nn.Dropout(res_dropout)
def __init__(self, d_hid, d_inner_hid, dropout=0.1): """[summary] Arguments: d_hid {int} -- 输出维度,等于输入维度 d_inner_hid {int} -- 中间隐藏层维度,一般比输入大 """ super(PositionwiseFeedForward, self).__init__() self.w_1 = nn.Conv1d(d_hid, d_inner_hid, 1) # position-wise self.w_2 = nn.Conv1d(d_inner_hid, d_hid, 1) # position-wise self.layer_norm = LayerNormalization(d_hid) self.dropout = nn.Dropout(dropout) self.relu = nn.ReLU()
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): super(MultiHeadAttention, self).__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v)) self.attention = ScaledDotProductAttention(d_model) self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head * d_v, d_model) self.dropout = nn.Dropout(dropout) init.xavier_normal(self.w_qs) init.xavier_normal(self.w_ks) init.xavier_normal(self.w_vs)