def add_layers(self, n_new_layer): self.new_modules = list() self.layers += n_new_layer for i in range(n_new_layer): layer = EncoderLayer(self.n_heads, self.model_size, self.dropout, self.inner_size, self.attn_dropout) # the first layer will use the preprocessing which is the last postprocessing if i == 0: layer.preprocess_attn = self.postprocess_layer # replace the last postprocessing layer with a new one self.postprocess_layer = PrePostProcessing(d_model, 0, sequence='n') self.layer_modules.append(layer)
def build_modules(self): self.layer_modules = nn.ModuleList([ EncoderLayer(self.n_heads, self.model_size, self.dropout, self.inner_size, self.attn_dropout, self.residual_dropout) for _ in range(self.layers) ])
def __init__(self, opt, dicts, positional_encoder): super(TransformerEncoder, self).__init__() self.model_size = opt.model_size self.n_heads = opt.n_heads self.inner_size = opt.inner_size self.layers = opt.layers self.dropout = opt.dropout self.word_dropout = opt.word_dropout self.attn_dropout = opt.attn_dropout self.emb_dropout = opt.emb_dropout self.time = opt.time self.version = opt.version self.word_lut = nn.Embedding(dicts.size(), self.model_size, padding_idx=onmt.Constants.PAD) if opt.time == 'positional_encoding': self.time_transformer = positional_encoder elif opt.time == 'gru': self.time_transformer = nn.GRU(self.model_size, self.model_size, 1, batch_first=True) elif opt.time == 'lstm': self.time_transformer = nn.LSTM(self.model_size, self.model_size, 1, batch_first=True) self.preprocess_layer = PrePostProcessing(self.model_size, self.emb_dropout, sequence='d', static=False) self.postprocess_layer = PrePostProcessing(self.model_size, 0, sequence='n') self.positional_encoder = positional_encoder self.layer_modules = nn.ModuleList([EncoderLayer(self.n_heads, self.model_size, self.dropout, self.inner_size, self.attn_dropout) for _ in range(self.layers)]) self.pretrained_point = -1
def build_modules(self, limit_rhs_steps=None): self.layer_modules = nn.ModuleList([ EncoderLayer(self.n_heads, self.model_size, self.dropout, self.inner_size, self.attn_dropout, limit_rhs_steps=limit_rhs_steps) for _ in range(self.layers) ])