def get_a_test_layer(use_cuda, activation): d_model = 4 nhead = 2 dim_feedforward = 16 dropout = 0.0 # layer_norm_eps = 0 # first test of # 1e-7, 0.00012575089931488037 # 0., 0.00012575089931488037 # 1e-5, 0.0001255124807357788 # 1e-6, 0.00012575089931488037 # 1e-4, 0.00012433528900146484 # 1e-3, 0.00011134892702102661 # 1e-2, 0.000179290771484375 device = torch.device("cuda" if use_cuda else "cpu") layer = TransformerDecoderLayer( d_model, nhead, dim_feedforward=dim_feedforward, dropout=dropout, activation=activation, # layer_norm_eps=layer_norm_eps, ).to(device) with torch.no_grad(): # set constant weights of the model for idx, p in enumerate(layer.parameters()): x = p.data sz = x.view(-1).size(0) shape = x.shape x = torch.cos(torch.arange(0, sz).float().view(shape)) p.data.copy_(x) return layer
def __init__(self, ntoken_in, ntoken_out, embed_size, drop_it, trns_model='base'): super(Transformer_Model, self).__init__() self.src_mask = None self.trg_mask = None self.drop = drop_it self.PAD_token = 2 self.src_pad_mask = None self.trg_pad_mask = None self.embed_in = nn.Embedding(ntoken_in, embed_size) self.embed_out = nn.Embedding(ntoken_out, embed_size) self.pos_encoder = PositionalEncoding(embed_size, self.drop, max_len=5000) if trns_model == 'base': # base model encoder_layer = TransformerEncoderLayer(512, 8, 2048, 0.1) encoder_norm = LayerNorm(512) self.encoder = TransformerEncoder(encoder_layer, 6, encoder_norm) decoder_layer = TransformerDecoderLayer(512, 8, 2048, 0.1) decoder_norm = LayerNorm(512) self.decoder = TransformerDecoder(decoder_layer, 6, decoder_norm) else: # big model encoder_layer = TransformerEncoderLayer(1024, 16, 4096, 0.3) encoder_norm = LayerNorm(1024) self.encoder = TransformerEncoder(encoder_layer, 6, encoder_norm) decoder_layer = TransformerDecoderLayer(1024, 16, 4096, 0.3) decoder_norm = LayerNorm(1024) self.decoder = TransformerDecoder(decoder_layer, 6, decoder_norm) self.ninp = embed_size self.linear_dec = nn.Linear(embed_size, ntoken_out) # initialise embedding & linear layer parameters self.init_weights() # initialise transformer parameters self.reset_params()
def __init__(self, vocab_size, embed_dim, n_head, hid_dim, n_layer, dropout=0.5, pad_idx=0): super(TransformerModel, self).__init__() self.model_type = 'Transformer' self.pad_idx = pad_idx self.pos_encoder = PositionalEncoding(embed_dim, dropout) encoder_layers = TransformerEncoderLayer(embed_dim, n_head, hid_dim, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, n_layer) decoder_layers = TransformerDecoderLayer(embed_dim, n_head, hid_dim, dropout) self.transformer_decoder = TransformerDecoder(decoder_layers, n_layer) self.embed_src = nn.Embedding(vocab_size, embed_dim) self.embed_tgt = nn.Embedding(vocab_size, embed_dim) self.fc_out = nn.Linear(embed_dim, vocab_size) self.embed_dim = embed_dim
def __init__(self, vocab_size, d_model: int = 512, nhead: int = 8, num_encoder_layers: int = 6, num_decoder_layers: int = 6, dim_feedforward: int = 2048, dropout: float = 0.1, activation: str = "relu", custom_encoder=None, custom_decoder=None): super(ChatBot, self).__init__() self.dropout = dropout self.position_encoder = PositionEmbedding(d_model=d_model, dropout=dropout, max_len=80) self.emb = nn.Embedding(vocab_size, d_model) if custom_encoder is not None: self.encoder = custom_encoder else: from torch.nn import TransformerEncoderLayer encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation) encoder_norm = LayerNorm(d_model) self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) if custom_decoder is not None: self.decoder = custom_decoder else: decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation) decoder_norm = LayerNorm(d_model) self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm) self.d_model = d_model self.nhead = nhead self.pred = nn.Linear(d_model, vocab_size) self._reset_parameters()
def __init__(self, num_tokens, embedding_table, max_time_steps, text_dim, quat_dim, quat_channels, offsets_dim, intended_emotion_dim, intended_polarity_dim, acting_task_dim, gender_dim, age_dim, handedness_dim, native_tongue_dim, num_heads, num_hidden_units, num_layers, dropout=0.5): super(T2GNet, self).__init__() self.T = max_time_steps self.text_dim = text_dim self.quat_channels = quat_channels self.text_mask = None self.quat_mask = None self.text_embedding = nn.Embedding.from_pretrained(embedding_table, freeze=True) self.text_pos_encoder = PositionalEncoding(text_dim, dropout) encoder_layers = TransformerEncoderLayer(text_dim, num_heads, num_hidden_units, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers) intermediate_dim = int((text_dim + quat_dim) / 2) self.text_embed = nn.Linear(text_dim + intended_emotion_dim + intended_polarity_dim +\ acting_task_dim + gender_dim + age_dim + handedness_dim + native_tongue_dim, intermediate_dim) self.text_offsets_to_gestures = nn.Linear(intermediate_dim + offsets_dim, quat_dim) self.quat_pos_encoder = PositionalEncoding(quat_dim, dropout) decoder_layers = TransformerDecoderLayer(quat_dim, num_heads, num_hidden_units, dropout) self.transformer_decoder = TransformerDecoder(decoder_layers, num_layers) self.temporal_smoothing = nn.ModuleList(( nn.Conv1d(max_time_steps, max_time_steps, 3, padding=1), nn.Conv1d(max_time_steps, max_time_steps, 3, padding=1), )) self.decoder = nn.Linear(text_dim, num_tokens) self.init_weights()
def __init__(self, max_memory_length, max_tgt_length, vocab_size, pad_idx, embedding_dim, num_heads, hidden_dim, num_layers, dropout=0.1): """ Decodes target sequence using memory from idEncoder. :param max_memory_length: the max number of vectors that is used in memory, :param max_tgt_length: the max length of target sequence, :param vocab_size: the target vocabulary size, :param pad_idx: the index of padding token in target vocabulary, :param embedding_dim: the dimension of target token embedding, :param num_heads: the number of heads in multiHeadAttention, :param hidden_dim: the dimension of the feedforward network after multiHeadAttention, :param num_layers: the number of sub-decoder-layers, :param dropout: the dropout value. """ super(IdDecoder, self).__init__() self.max_memory_length = max_memory_length self.max_tgt_length = max_tgt_length self.pad_idx = pad_idx self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx) self.positional_encoder = PositionalEncoding(embedding_dim, dropout) decoder_layer = TransformerDecoderLayer(embedding_dim, num_heads, hidden_dim, dropout) self.decoder = TransformerDecoder(decoder_layer, num_layers) self.fc = nn.Linear(hidden_dim, vocab_size)
def __init__(self, config): super(XfmrDecoder, self).__init__() self.vocab = Vocab.load(config["vocab_file"]) with open(config["typelib_file"]) as type_f: self.typelib = TypeLibCodec.decode(type_f.read()) self.typelib = self.typelib.fix() self.target_embedding = nn.Embedding(len(self.vocab.subtypes), config["target_embedding_size"]) self.target_transform = nn.Linear( config["target_embedding_size"] + config["hidden_size"], config["hidden_size"], ) # self.cached_decode_mask: Dict[int, torch.Tensor] = {} # self.size = torch.zeros(len(self.vocab.types), dtype=torch.long) # concat variable encoding and previous target token embedding as input decoder_layer = TransformerDecoderLayer( config["hidden_size"], 1, config["hidden_size"], config["dropout"], activation="gelu", ) decoder_norm = LayerNorm(config["hidden_size"]) self.decoder = TransformerDecoder(decoder_layer, config["num_layers"], decoder_norm) self.output = nn.Linear(config["hidden_size"], len(self.vocab.subtypes)) self.config: Dict = config
def __init__(self, ntoken, tgt_ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): super(TransformerModel, self).__init__() from torch.nn import TransformerEncoder, TransformerEncoderLayer from torch.nn import TransformerDecoder, TransformerDecoderLayer self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) self.encoder_embed = nn.Embedding(ntoken, ninp) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) self.decoder_embed = nn.Embedding(tgt_ntoken, ninp) decoder_layers = TransformerDecoderLayer(ninp, nhead, nhid, dropout) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers) self.decoder_out = nn.Linear(ninp, tgt_ntoken) self.ninp = ninp self.init_weights()
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, reader_, params, dropout=0.5, embedding=None): """ Args: ntoken: vocab size ninp: embedding dimension nhead: number of heads nhid: hidden layer size nlayers: number of layers reader: instance of `Reader` dropout: dropout rate """ super().__init__() from torch.nn import TransformerDecoder, TransformerDecoderLayer self.model_type = 'TransformerDecoder' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) decoder_layers = TransformerDecoderLayer(ninp, nhead, nhid, dropout) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers) self.embedding = nn.Embedding(ntoken, ninp) if embedding is None else embedding self.ninp = ninp self.linear = nn.Linear(ninp, ntoken) self.reader_ = reader_ self.params = params self.init_weights()
def __init__( self, root_size, feature_size, latent_size, batch_size, ninp, nhead, nhid, nlayers, max_num_limbs, transformer_norm=False, dropout=0.5, ): super(Decoder, self).__init__() self.model_type = "Decoder" self.root_size = root_size self.batch_size = batch_size self.max_num_limbs = max_num_limbs self.input_projection = nn.Linear(latent_size * 2, ninp) decoder_layers = TransformerDecoderLayer(ninp, nhead, nhid, dropout) self.transformer_decoder = TransformerDecoder( decoder_layers, nlayers, norm=nn.LayerNorm(ninp) if transformer_norm else None, ) pe = PositionalEncoding(ninp, self.max_num_limbs) self.add_module("pe", pe) self.structure_emb = StructureEncoding(ninp, self.max_num_limbs) self.root_projection = nn.Linear(ninp, root_size) self.output_projection = nn.Linear(ninp, feature_size) self.latent_size = latent_size
def __init__(self, decoder_params: Dict, tgt_vocab_size: int, tgt_padding_idx: int): super(NMTDecoder, self).__init__() self.decoder_params = decoder_params self.tgt_embedding = nn.Embedding( num_embeddings=tgt_vocab_size + 1, embedding_dim=self.decoder_params['embedding_dim'], padding_idx=tgt_padding_idx) self.pos_encoding = PositionalEncoding( d_model=self.decoder_params['hidden_size'], dropout=self.decoder_params['pos_encoding_dropout']) self.decoder_layer = TransformerDecoderLayer( d_model=self.decoder_params['hidden_size'], nhead=self.decoder_params['nheads'], dim_feedforward=self.decoder_params['ff_size'], dropout=self.decoder_params['dropout']) self.layer_norm = nn.LayerNorm( normalized_shape=self.decoder_params['hidden_size'], eps=1e-6) self.decoder = TransformerDecoder( decoder_layer=self.decoder_layer, num_layers=self.decoder_params['nlayers'], norm=self.layer_norm) self.latent2vocab = nn.Linear(self.decoder_params['hidden_size'], tgt_vocab_size) self.drop = nn.Dropout(p=self.decoder_params['fc_dropout'])
def __init__(self, config): super(XfmrDecoder, self).__init__() self.vocab = Vocab.load(config["vocab_file"]) with open(config["typelib_file"]) as type_f: self.typelib = TypeLibCodec.decode(type_f.read()) retype_vocab_size = len(self.vocab.types) rename_vocab_size = len(self.vocab.names) self.target_embedding = nn.Embedding( retype_vocab_size + rename_vocab_size, config["target_embedding_size"] ) self.target_transform = nn.Linear( config["target_embedding_size"] + config["hidden_size"], config["hidden_size"], ) # concat variable encoding and previous target token embedding as input decoder_layer = TransformerDecoderLayer( config["hidden_size"], 1, config["hidden_size"], config["dropout"], activation="gelu", ) decoder_norm = LayerNorm(config["hidden_size"]) self.decoder = TransformerDecoder( decoder_layer, config["num_layers"], decoder_norm ) self.output = nn.Linear( config["hidden_size"], retype_vocab_size + rename_vocab_size ) self.mem_mask = config["mem_mask"] self.config: Dict = config self.retype_vocab_size = retype_vocab_size
def __init__(self, src_vocab_size: int, tgt_vocab_size: int, encoder_layer_num: int, decoder_layer_num: int, hidden_size: int, feedback_size: int, num_head: int, dropout: float, device: str): super().__init__() self.Encoder = Encoder(src_vocab_size, num_encoder_layer=encoder_layer_num, hidden_size=hidden_size, num_head=num_head, feedward=feedback_size, dropout=dropout, device=device) self.Decoder = Decoder(tgt_vocab_size, num_layer=decoder_layer_num, hiddensize=hidden_size, num_head=num_head, feed_back=feedback_size, dropout=dropout, device=device) Encoder_layer = TransformerEncoderLayer(nhead=8, d_model=512) self.Encoder_off = TransformerEncoder(encoder_layer=Encoder_layer, num_layers=6) Decoder_layer = TransformerDecoderLayer(nhead=8, dim_feedforward=2048, d_model=512) self.Decoder_off = TransformerDecoder(decoder_layer=Decoder_layer, num_layers=6) self.model = tf() self.device = device self.input_embedding = torch.nn.Embedding(src_vocab_size, 512) self.output_embedding = torch.nn.Embedding(tgt_vocab_size, 512) self.positional = Positional_Encoding(512, 512, device) self.linear = torch.nn.Linear(512, tgt_vocab_size)
def __init__(self, args, device, d_model=256, nhead=4, d_ff=1024, nlayers=2, dropout=0.5): super(Autoencoder, self).__init__() self.model_type = 'Transformer' self.d_model = d_model self.src_mask = None self.pos_encoder = PositionalEncoding(d_model, dropout) # encoder's position self.pos_decoder = PositionalEncoding(d_model, dropout) # decoder's position decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout) decoder_norm = nn.LayerNorm(d_model) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers, decoder_norm) self.bert_encoder = BertModel.from_pretrained( args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2) self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed self.distill_2 = args.distill_2 self.gru = nn.GRU(d_model, d_model, 1) self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1) self.sigmoid = nn.Sigmoid() self.device = device self.init_weights()
def __init__(self, d_model: int = 512, num_heads: int = 8, num_encoder_layers: int = 6, num_decoder_layers: int = 6, units: int = 2048, dropout: float = 0.1, activation: str = "relu") -> NoReturn: """ :param d_model: 深度,词嵌入维度 :param num_heads: 注意力头数 :param num_encoder_layers: encoder层数 :param num_decoder_layers: decoder层数 :param units: 单元数 :param dropout: 采样率 :param activation: 激活方法 """ super(Transformer, self).__init__() encoder_layer = TransformerEncoderLayer(d_model, num_heads, units, dropout, activation) encoder_norm = torch.nn.LayerNorm(d_model) self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer(d_model, num_heads, units, dropout, activation) decoder_norm = torch.nn.LayerNorm(d_model) self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm) self._reset_parameters() self.d_model = d_model self.num_heads = num_heads
def __init__(self, src_feature_size, ntoken, nhead, nhid, nlayers, dropout=0.5, decoder_d_model=2048): super(TransformerModel, self).__init__() from torch.nn import TransformerEncoder, TransformerEncoderLayer, TransformerDecoder, TransformerDecoderLayer, Transformer self.encoder_d_model = src_feature_size self.decoder_d_model = decoder_d_model self.noken = ntoken self.model_type = 'Transformer' self.trg_mask = None self.encoder_pre = PreProcessing() self.enc_linear = nn.Linear(src_feature_size, decoder_d_model) self.pos_encoder = PositionalEncoding(self.decoder_d_model, dropout) encoder_layers = TransformerEncoderLayer(d_model=src_feature_size, nhead=nhead, dim_feedforward=nhid, dropout=dropout) self.transformer_encoder = TransformerEncoder( encoder_layer=encoder_layers, num_layers=nlayers) self.embedder = Embedder(ntoken, decoder_d_model) self.pos_decoder = PositionalEncoding(decoder_d_model, dropout) decoder_layers = TransformerDecoderLayer(d_model=decoder_d_model, nhead=nhead, dim_feedforward=nhid, dropout=dropout) self.transformer_decoder = TransformerDecoder( decoder_layer=decoder_layers, num_layers=nlayers) self.out = nn.Linear(decoder_d_model, ntoken)
def __init__(self, num_layers, em_dim, num_heads, ff_dim, input_vocab, target_vocab, max_pos_enc, max_pos_dec): super().__init__() self.em_dim = em_dim # encoder layers self.encode_embedding = nn.Embedding(input_vocab, em_dim) self.encode_pos_embedding = PositionalEncoding(max_pos=max_pos_enc, em_dim=em_dim) encoder_layer = TransformerEncoderLayer(em_dim, nhead=num_heads, dim_feedforward=ff_dim) self.transformer_encoder = TransformerEncoder( encoder_layer=encoder_layer, num_layers=num_layers) self.encode_dropout = nn.Dropout(p=0.1) # decoder layers self.decode_embedding = nn.Embedding(target_vocab, em_dim) self.decode_pos_embedding = PositionalEncoding(max_pos=max_pos_dec, em_dim=em_dim) decoder_layer = TransformerDecoderLayer(em_dim, nhead=num_heads, dim_feedforward=ff_dim) self.transformer_decoder = TransformerDecoder( decoder_layer=decoder_layer, num_layers=num_layers) self.decode_dropout = nn.Dropout(p=0.1) self.final_linear = nn.Linear(em_dim, target_vocab)
def __init__(self, n_src_vocab, n_tgt_vocab, d_model, nhead, nhid, nlayers, dropout=0.1): super(TransformerModel, self).__init__() try: from torch.nn import TransformerEncoder, TransformerEncoderLayer, \ TransformerDecoder, TransformerDecoderLayer except: raise ImportError( 'TransformerEncoder module does not exist in PyTorch 1.1 or lower.' ) self.model_type = 'transformer' self.d_model = d_model self.src_embedding = PositionalEmbedding(n_src_vocab, d_model) self.tgt_embedding = PositionalEmbedding(n_tgt_vocab, d_model) encoder_layers = TransformerEncoderLayer(d_model, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) decoder_layers = TransformerDecoderLayer(d_model, nhead, nhid, dropout) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers) # 生成概率输出 self.generator = nn.Linear(d_model, n_tgt_vocab) self._reset_parameters()
def __init__(self, cfg: DictConfig, cwd: Path): super().__init__() #self.example_input_array = torch.rand(2, 1, 1024, 128) self.cfg = cfg self.dataset = cfg.dataset self.hparams = cfg.hparams self.cwd = cwd self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(self.hparams["n_mels"], self.hparams["dropout"]) encoder_layers = TransformerEncoderLayer(self.hparams["n_mels"], self.hparams["n_head"], self.hparams["n_hid"], self.hparams["dropout"]) self.encoder = TransformerEncoder(encoder_layers, self.hparams["n_layers"]) decoder_layers = TransformerDecoderLayer(self.hparams["n_mels"], self.hparams["n_head"], self.hparams["n_hid"], self.hparams["dropout"]) self.decoder = TransformerDecoder(decoder_layers, self.hparams["n_layers"])
def __init__(self, ntoken, ninp, num_heads, hidden_dim, num_layers, dropout=0.5): super(TransformerModel, self).__init__() self.model_type = "Transformer" self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layer = TransformerEncoderLayer(ninp, num_heads, hidden_dim, dropout) self.transformer_encoder = TransformerEncoder( encoder_layer=encoder_layer, num_layers=num_layers, norm=LayerNorm(ninp), ) decoder_layer = TransformerDecoderLayer(ninp, num_heads, hidden_dim, dropout) self.transformer_decoder = TransformerDecoder( decoder_layer=decoder_layer, num_layers=num_layers, norm=LayerNorm(ninp), ) # Use Linear instead of Embedding for continuous valued input self.encoder = nn.Linear(ntoken, ninp) self.project = nn.Linear(ninp, ntoken) self.ninp = ninp self.init_weights()
def __init__(self,emb_size,NHEAD,num_decoder_layers,maxlen,vocab_size,dim_feedforward:int = 512, dropout:float = 0.1): super(TextDecode, self).__init__() decoder_layer = TransformerDecoderLayer(d_model=emb_size, nhead=NHEAD, dim_feedforward=dim_feedforward) self.transformer_decoder = TransformerDecoder(decoder_layer, num_layers=num_decoder_layers) self.embeding=TokenEmbedding(maxlen,emb_size) self.positional_encoding=PositionalEncoding(emb_size=emb_size,dropout=dropout) self.generator = nn.Linear(emb_size, vocab_size)
def __init__(self, seq_len, nheads, T, dropout, emb_size, *args, **kwargs): super().__init__(*args, **kwargs) self.T = T layer_norm = nn.LayerNorm((seq_len, emb_size)) self.dec = TransformerDecoder(TransformerDecoderLayer( emb_size, nheads, dim_feedforward=4096, dropout=dropout), 1, norm=layer_norm) self.pos = PositionalTimeEncoding(emb_size, seq_len)
def __init__(self, args, device, d_model=256, nhead=4, d_ff=1024, nlayers=2, dropout=0.5): super(Autoencoder, self).__init__() self.model_type = 'Transformer' self.d_model = d_model self.src_mask = None self.pos_encoder = PositionalEncoding(d_model, dropout) # encoder's position self.pos_decoder = PositionalEncoding(d_model, dropout) # decoder's position decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout) decoder_norm = nn.LayerNorm(d_model) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers, decoder_norm) # self.bert_encoder = BertModel.from_pretrained(args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2) if args.use_albert: self.bert_encoder = AlbertModel.from_pretrained( "clue/albert_chinese_tiny") self.bert_embed = self.bert_encoder.embeddings # self.tgt_embed = self.bert_embed d_vocab = self.bert_encoder.config.vocab_size + 1 self.tgt_embed = nn.Sequential( Embeddings(d_model, d_vocab), PositionalEncoding(d_model, dropout)) elif args.use_tiny_bert: self.bert_encoder = AutoModel.from_pretrained( "google/bert_uncased_L-2_H-256_A-4") self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed elif args.use_distil_bert: configuration = DistilBertConfig() self.bert_encoder = DistilBertModel(configuration) self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed # self.tgt_embed = self.bert.embeddings else: self.bert_encoder = BertModel.from_pretrained( args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2) self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed self.distill_2 = args.distill_2 self.gru = nn.GRU(d_model, d_model, 1) self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1) self.sigmoid = nn.Sigmoid() self.device = device self.init_weights()
def __init__(self, ninp, nhead, nhid, nlayers, dropout=0.1): super(TransformerModel, self).__init__() from torch.nn import TransformerEncoder, TransformerDecoder, TransformerEncoderLayer, TransformerDecoderLayer self.model_type = 'Transformer' self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) decoder_layers = TransformerDecoderLayer(ninp, nhead, nhid, dropout) self.decoder = TransformerDecoder(decoder_layers, nlayers) self.ninp = ninp
def get_decoder(self): """The decoder: RNN or Transformer""" if self.config.decoder_type == "transformer": dec = TransformerDecoderLayer(d_model=self.config.dec_hidden_dim, nhead=self.config.n_att_heads) dec = TransformerDecoder(dec, num_layers=self.config.decoder_layers) else: raise NotImplementedError("No decoder implementation of {}".format( self.decoder_type)) return dec
def __init__(self, num_output_features, embedding_dim, n_heads, hidden_dim, n_layers, max_length, dropout): super(Decoder, self).__init__() from torch.nn import TransformerDecoder, TransformerDecoderLayer self.embedding_dim = embedding_dim self.embedding = nn.Embedding(num_output_features, embedding_dim) # learn positional encoding self.pos_embedding = nn.Embedding(max_length, embedding_dim) decoder_layers = TransformerDecoderLayer(embedding_dim, n_heads, hidden_dim, dropout) self.transformer_decoder = TransformerDecoder(decoder_layers, n_layers) self.fc = nn.Linear(hidden_dim, num_output_features) self.init_weights()
def __post_init__(self): super().__init__() self.emb_layer = Embedding(self.n_vocab, self.emb_dim, self.padding_idx) self.pos_enc = PositionalEncoding(self.emb_dim, self.dropout_p, self.max_len) self.decoder_layer = TransformerDecoderLayer(self.enc_dim, self.nhead, self.dim_ff, self.dropout_p, self.activation) self.transformer_decoder = TransformerDecoder(self.decoder_layer, self.depth)
def __init__(self, output_dim, emb_dim, n_layers, heads, dropout=0.1): super().__init__() self.emb_dim = emb_dim self.output_dim = output_dim self.embedding = nn.Embedding(output_dim, emb_dim) self.pe = PositionalEncoding(emb_dim, dropout) decoder_layer = TransformerDecoderLayer(emb_dim, heads, dropout=dropout) norm = nn.LayerNorm(emb_dim) self.transformer_decoder = TransformerDecoder(decoder_layer, n_layers, norm) self.out = nn.Linear(emb_dim, self.output_dim)
def __init__(self, bert='bert-base-cased', out_size=256, num_layers=6, nhead=8, dim_feedforward=2048, dropout=0.1, activation='relu', audio_dim=512, tie_weights=False): super().__init__() self.dropout = dropout self.bert_config = BertConfig.from_pretrained(bert) self.bert_config.hidden_dropout_prob = self.dropout self.bert_config.attention_probs_dropout_prob = self.dropout self.bert = BertModel.from_pretrained(bert, config=self.bert_config) self.hidden_size = self.bert.config.hidden_size self.out_size = out_size self.audio_dim = audio_dim self.decoder_num_layers = num_layers # 0 decoder layers (=) feed BERT output directly to self.linear if self.decoder_num_layers == 0: self.decoder = None # else init decoder as usual else: self.position_ids = None self.position_embeddings = Embedding(self.out_size, self.hidden_size) # linear layer so that audio and text embeddings have the same dimension self.resize_audio = Linear(self.audio_dim, self.hidden_size) # init decoder_layer with the parameters decoder_layer = TransformerDecoderLayer(self.hidden_size, nhead, dim_feedforward, self.dropout, activation) decoder_norm = LayerNorm(self.hidden_size) # init decoder with decoder_layer self.decoder = TransformerDecoder(decoder_layer, self.decoder_num_layers, decoder_norm) # handle classification layer and weight-tying self.linear = Linear(self.hidden_size, self.out_size, bias=not tie_weights) if tie_weights: self.linear.weight = self.bert.embeddings.position_embeddings.weight self.activation = Sigmoid()
def __init__(self, in_token, out_token, ninp, nhead, nhid, nlayers, dropout=0.5): super(TransformerModel, self).__init__() self.model_type = 'Transformer' self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_norm = nn.LayerNorm(ninp) encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers, encoder_norm) self.encoder_embedding = nn.Embedding(in_token, ninp) self.decoder_embedding = nn.Embedding(out_token, ninp) self.ninp = ninp decoder_norm = nn.LayerNorm(ninp) decoder_layers = TransformerDecoderLayer(ninp, nhead, nhid, dropout) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers, norm = decoder_norm) self.linear = nn.Linear(ninp, out_token) self.init_weights()