def __init__( self, vocab_size, embedding_dim=300, d_model=420, num_layers=6, nhead=6 ): super().__init__() # Embedding Layer self.embedding_layer = nn.Embedding(vocab_size, embedding_dim, padding_idx=0) # Using nn.TransformerEncoderLayer because we do not want encoder-decoder attention decoder_layer = nn.TransformerEncoderLayer(d_model, nhead) self.transformer = nn.TransformerEncoder(decoder_layer, num_layers) self.output_layer = nn.Linear(d_model, vocab_size)
def __init__(self, d_model, n_head, dim_ff, dropout=0.0, num_layers = 1): super(TransEncoder, self).__init__() self.pos_encoder = PositionalEncoding(d_model, 0.1, 64) encoder_layer = nn.TransformerEncoderLayer(d_model = d_model, nhead = n_head, dim_feedforward = dim_ff, dropout = dropout, activation = 'relu') encoder_norm = nn.LayerNorm(d_model) self.trans_encoder = nn.TransformerEncoder(encoder_layer, num_layers, encoder_norm)
def __init__(self, emb_dim=4, nheads=2, dropout=0.1): super(Transformer, self).__init__() self.emb_dim = emb_dim self.nheads = nheads self.dim_feedforward = int(emb_dim * 2) self.dropout = dropout self.encoder =\ nn.TransformerEncoderLayer(emb_dim, nheads, dim_feedforward=self.dim_feedforward, dropout=dropout)
def __init__(self,feature_size=250,num_layers=1,dropout=0.1, heads=10, device=None, max_enc_len=50000): super(TransAm, self).__init__() self.model_type = 'Transformer' self._device = device self.heads = heads self.src_mask = None self.pos_encoder = PositionalEncoding(feature_size, max_enc_len, self._device) self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=self.heads, dropout=dropout).to(self._device) self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers).to(self._device) self.decoder = nn.Linear(feature_size,1).to(self._device) self.init_weights()
def __init__(self, asset_dim=4, nheads=2, dropout=0.1): super(CrossSectionalTransformer, self).__init__() self.asset_dim = asset_dim self.nheads = nheads self.dim_feedforward = int(asset_dim * 2) self.dropout = dropout self.encoder =\ nn.TransformerEncoderLayer(asset_dim, nheads, dim_feedforward=self.dim_feedforward, dropout=dropout)
def __init__(self, input_dim, self_state_dim, joint_state_dim, in_mlp_dims, sort_mlp_dims, sort_mlp_attention, action_dims, with_dynamic_net=True, with_global_state=True, multi_process_type="average", act_steps=3, act_fixed=False): super().__init__() self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.self_state_dim = self_state_dim self.global_state_dim = in_mlp_dims[-1] self.joint_state_dim = joint_state_dim self.in_mlp_dims = in_mlp_dims self.input_dim = input_dim self.lstm_hidden_dim = sort_mlp_attention[0] * 2 self.with_dynamic_net = with_dynamic_net self.with_global_state = with_global_state self.sort_mlp_attention = sort_mlp_attention self.sort_mlp_global_state_dim = sort_mlp_dims[-1] self.act_fixed = act_fixed self.act_steps = act_steps self.multi_process_type = multi_process_type if self.with_dynamic_net: self.in_mlp = ATCBasic(self.input_dim, in_mlp_dims, epsilon=0.05, last_relu=True, act_steps=self.act_steps, act_fixed=self.act_fixed) else: self.in_mlp = mlp(self.input_dim, in_mlp_dims, last_relu=True) self.encoder_layer = nn.TransformerEncoderLayer(d_model=50, nhead=2, dim_feedforward=150) self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=3) action_input_dim = 50 + self.self_state_dim # 50 + 6 self.action_mlp = mlp(action_input_dim, action_dims) # 56,150,100,100,1 # self.transmlp = mlp(input_dim=250, mlp_dims=[250, 100]) self.attention_weights = None self.step_cnt = 0
def __init__(self, dModel, nHeads, numLayers, peMaxLen, inSize, fcHiddenSize, dropout, numClasses): super(AVNet, self).__init__() self.audioConv = nn.Conv1d(inSize, dModel, kernel_size=4, stride=4, padding=0) self.positionalEncoding = PositionalEncoding(dModel=dModel, maxLen=peMaxLen) encoderLayer = nn.TransformerEncoderLayer(d_model=dModel, nhead=nHeads, dim_feedforward=fcHiddenSize, dropout=dropout) self.audioEncoder = nn.TransformerEncoder(encoderLayer, num_layers=numLayers) self.videoEncoder = nn.TransformerEncoder(encoderLayer, num_layers=numLayers) self.jointConv = nn.Conv1d(2*dModel, dModel, kernel_size=1, stride=1, padding=0) self.jointDecoder = nn.TransformerEncoder(encoderLayer, num_layers=numLayers) self.outputConv = nn.Conv1d(dModel, numClasses, kernel_size=1, stride=1, padding=0) return
def __init__(self, input_dim, hidden_dim=128, n_classes=2, num_layers=2): super(LSTMTrans1_deep, self).__init__() self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, dropout=0.2, batch_first=True) encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=2) self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=4) self.hidden2out = nn.Linear(hidden_dim, n_classes)
def __init__(self, config, bert_config): self.config = config self.bert_config = bert_config self.transformer_layer = nn.TransformerEncoderLayer( self.config.hidden_size, self.config.attention_head) self.transformer_encoder = nn.TransformerEncoder( self.transformer_layer, num_layers=self.config.num_layers) self.low_dropout = nn.Dropout(self.config.low_dropout) self.high_dropout = nn.Dropout(self.config.high_dropout) self.classifier = nn.Linear(self.bert_config.hidden_size, self.config.ocnli['num_classes'])
def _get_attention_module(self): """ Inspired by https://pytorch.org/docs/master/generated/torch.nn.TransformerEncoder.html#torch.nn.TransformerEncoder @param attention_dropout: @param num_attention_heads: @return: """ encoder_layer = nn.TransformerEncoderLayer(d_model=self.rnn_hidden_size * 2, nhead=self.num_attention_heads, dropout=self.attention_dropout) return nn.TransformerEncoder(encoder_layer, num_layers=self.num_encoder_layer)
def __init__(self, n_skill, max_seq=100, embed_dim=128, num_heads=8, dropout=0.2, cont_emb=None): super(SAKTModel, self).__init__() self.n_skill = n_skill self.embed_dim_cat = embed_dim embed_dim_small_cat = 32 embed_dim_middle_cat = 32 embed_dim_cat_all = embed_dim_small_cat * 5 + embed_dim_middle_cat * 5 + embed_dim embed_dim_all = embed_dim_cat_all + cont_emb self.embedding = nn.Embedding(4, embed_dim_small_cat) self.user_answer_embedding = nn.Embedding(6, self.embed_dim_cat) self.prior_question_had_explanation_embedding = nn.Embedding( 4, embed_dim_small_cat) self.e_embedding = nn.Embedding(n_skill + 1, self.embed_dim_cat) self.part_embedding = nn.Embedding(8, self.embed_dim_cat) self.elapsed_time_embedding = nn.Embedding(302, embed_dim_middle_cat) self.duration_previous_content_embedding = nn.Embedding( 302, embed_dim_middle_cat) self.container_embedding = nn.Embedding(302, embed_dim_middle_cat) self.prev_ans_idx_embedding = nn.Embedding(302, embed_dim_middle_cat) self.prev_ans_content_id_embedding = nn.Embedding( 4, embed_dim_small_cat) self.timediff_elapsedtime_embedding = nn.Embedding( 502, embed_dim_middle_cat) self.timedelta_log10_embedding = nn.Embedding(15, embed_dim_small_cat) encoder_layer = TransformerEncoderLayer(d_model=embed_dim_all, nhead=num_heads, dropout=dropout) self.transformer_enc = nn.TransformerEncoder( encoder_layer=encoder_layer, num_layers=4) self.gru = nn.GRU(input_size=embed_dim_all, hidden_size=embed_dim_all) self.continuous_embedding = ContEmbedding(input_dim=1, embed_dim=cont_emb, seq_len=max_seq) self.prior_content_embedding = nn.Sequential( nn.Linear(self.embed_dim_cat, embed_dim_small_cat), nn.LayerNorm(embed_dim_small_cat)) encoder_layer_cat = nn.TransformerEncoderLayer( d_model=embed_dim_cat_all, nhead=1, dropout=dropout) self.cat_embedding = nn.TransformerEncoder( encoder_layer=encoder_layer_cat, num_layers=1) self.layer_normal = nn.LayerNorm(embed_dim_all) self.ffn = FFN(embed_dim_all) self.dropout = nn.Dropout(dropout / 2) self.pred = nn.Linear(embed_dim_all, 1)
def __init__(self, num_features=64, num_classes=0, activation=F.relu, transform_space=None): super(SNResNetProjectionDiscriminator, self).__init__() self.num_features = num_features self.num_classes = num_classes self.activation = activation self.block1 = OptimizedBlock(3, num_features) self.block2 = Block(num_features, num_features * 2, activation=activation, downsample=True) self.block3 = Block(num_features * 2, num_features * 4, activation=activation, downsample=True) self.block4 = Block(num_features * 4, num_features * 8, activation=activation, downsample=True) self.block5 = Block(num_features * 8, num_features * 16, activation=activation, downsample=True) self.l6 = utils.spectral_norm(nn.Linear(num_features * 16, 1)) if transform_space is not None: # self.transformer = nn.Transformer(d_model=1024,nhead=4,num_encoder_layers=4).encoder encoder_layer = nn.TransformerEncoderLayer(d_model=1024, nhead=8) transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=4) self.transformer = transformer_encoder if transform_space == 'batch': self.transform_space = 'batch' elif transform_space == 'embeddings': self.transform_space = 'embeddings' else: print( "You specified a transform space but did not provide an appropriate one. Exiting" ) exit() else: #transform_space is None self.transform_space = None if num_classes > 0: self.l_y = utils.spectral_norm( nn.Embedding(num_classes, num_features * 16)) self._initialize()
def __init__(self, dim_model, num_heads, dim_feedforward, num_layers): super().__init__() encoder_layer = nn.TransformerEncoderLayer( d_model=dim_model, dim_feedforward=dim_feedforward, nhead=num_heads, dropout=0.0, ) self.transformer_encoder = nn.TransformerEncoder( encoder_layer, num_layers=num_layers )
def __init__(self, config: dict): super(Model, self).__init__() self.word_embedding = config["embedding"] if not config["freeze_embedding"]: self.word_embedding.requires_grad_(True) else: self.word_embedding.requires_grad_(False) _layers = nn.TransformerEncoderLayer(config["embedding_dim"], config["word_nhead"], config["word_dimfeedward_size"]) _transformer = nn.TransformerEncoder(_layers, config["word_encoder_layers"]) self.word_encoder = TransformerCLSEncoder( config["embedding_dim"], _transformer, max_lens=config["max_tokens"], learned_pos_emb=config["learned_word_pos_embedding"]) self.padding_layer = PaddingLayer() _layers = nn.TransformerEncoderLayer(config["embedding_dim"], config["sent_nhead"], config["sent_dimfeedward_size"]) _transformer = nn.TransformerEncoder(_layers, config["sent_encoder_layers"]) self.sent_encoder = TransformerCLSEncoder( config["embedding_dim"], _transformer, max_lens=config["max_sents"], learned_pos_emb=config["learned_sent_pos_embedding"]) self.img_encoder = SimpleImageEncoder(config["img_input_size"], config["img_output_size"], config["img_num"], config["dropout"]) self.output_layer = OutputLayer( config["task"], config["embedding_dim"] + config["img_output_size"], config["output_size"], config["dropout"])
def __init__(self): super(MMIL_Net, self).__init__() self.fc_prob = nn.Linear(512, 25) self.fc_frame_att = nn.Linear(512, 25) self.fc_av_att = nn.Linear(512, 25) self.fc_a = nn.Linear(128, 512) self.fc_v = nn.Linear(2048, 512) self.fc_st = nn.Linear(512, 512) self.fc_fusion = nn.Linear(1024, 512) self.audio_encoder = nn.TransformerEncoder \ (nn.TransformerEncoderLayer(d_model=512, nhead=1, dim_feedforward=512), num_layers=1) self.visual_encoder = nn.TransformerEncoder \ (nn.TransformerEncoderLayer(d_model=512, nhead=1, dim_feedforward=512), num_layers=1) self.cmt_encoder = Encoder(CMTLayer(d_model=512, nhead=1, dim_feedforward=512), num_layers=1) self.hat_encoder = Encoder(HANLayer(d_model=512, nhead=1, dim_feedforward=512), num_layers=1) self.t_att = MultiHeadAttention2(512, 512, 512) self.t_att2 = MultiHeadAttention2(512, 512, 512) self.fc1= nn.Linear(1024, 256) self.fc2= nn.Linear(256, 2)
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout): super(TransFormeLate, self).__init__() self.model_type = 'Transformer' self.src_mask = None self.pos_encoder = PositionalEncoding(ninp, dropout) encoder_layers = nn.TransformerEncoderLayer(ninp, nhead, nhid, dropout) self.transformer_encoder = nn.TransformerEncoder(encoder_layers, nlayers) self.encoder = nn.Embedding(ntoken, ninp) self.ninp = ninp self.decoder = nn.Linear(ninp, ntoken) self.init_weights()
def get_transformer_encoder(d_model, nhead, num_layers, norm=None): ############################################################ # Creates and returns a transformer encoder # d_model: The dimension of the encoder input # nhead: Number of heads for multihead attention # num_layers: The number of encoder layers for the encoder # norm: The layer normalization component ############################################################ encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead) return nn.TransformerEncoder(encoder_layer=encoder_layer, num_layers=num_layers, norm=norm)
def __init__(self, num_layers, input_size, d_model, nhead, dim_feedforward, dropout, activation, num_embeddings=None, norm=None, proj=None): nn.Module.__init__(self) Saver.__init__(self) log.info(f" >> num_layers= {num_layers}") log.info(f" >> input_size= {input_size}") log.info(f" >> d_model= {d_model}") log.info(f" >> nhead= {nhead}") log.info(f" >> dim_feedforward= {dim_feedforward}") log.info(f" >> dropout= {dropout}") log.info(f" >> activation= {activation}") """ if num_embeddings is not None: log.info(f" >> num input embeddings= {num_embeddings}") self.emb = nn.Embedding(num_embeddings=num_embeddings + 1, embedding_dim=input_size, padding_idx=0) """ if num_embeddings is not None: log.info(f" >> num input embeddings= {num_embeddings}") self.emb = nn.Embedding(num_embeddings=num_embeddings + 1, embedding_dim=d_model, padding_idx=0) else: self.emb = nn.Linear(in_features=input_size, out_features=d_model) self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, activation=activation) self.transformer_encoder = nn.TransformerEncoder(encoder_layer=self.encoder_layer, num_layers=num_layers, norm=norm) self.pos_encoder = PositionalEncoding(d_model=d_model) if proj is not None: log.info(f" >> proj after transformer= {proj}") self.proj = Linear(self.output_size, proj)
def __init__( self, transformer_encoder_params_for_load, tokenizer_name=None, emb_size: int = 512, nhead: int = 8, num_encoder_layers: int = 6, num_decoder_layers: int = 6, dim_feedforward: int = 512, dropout: float = 0.1 ): super().__init__() self.transformer_encoder_params_for_load = transformer_encoder_params_for_load self.transformer_encoder_for_classes = load_model( TransformerEncoderClassifier, **transformer_encoder_params_for_load )[0] for p in self.transformer_encoder_for_classes.parameters(): p.requires_grad_(False) self.tokenizer_name = tokenizer_name self.tokenizer = get_tokenizer(self.tokenizer_name) self.src_vocab_size = len(self.tokenizer) self.tgt_vocab_size = len(self.tokenizer) self.emb_size = emb_size self.nhead = nhead self.num_encoder_layers = num_encoder_layers self.num_decoder_layers = num_decoder_layers self.dim_feedforward = dim_feedforward self.dropout = dropout encoder_layer = nn.TransformerEncoderLayer( d_model=emb_size, nhead=nhead, dim_feedforward=dim_feedforward ) self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers) decoder_emb_size = emb_size + self.transformer_encoder_for_classes.emb_size decoder_layer = nn.TransformerDecoderLayer( d_model=decoder_emb_size, nhead=nhead, dim_feedforward=dim_feedforward ) self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_decoder_layers) self.generator = nn.Linear(decoder_emb_size, self.tgt_vocab_size) self.src_tok_emb = TokenEmbedding(self.src_vocab_size, emb_size) self.tgt_tok_emb = TokenEmbedding(self.tgt_vocab_size, decoder_emb_size) self.src_positional_encoding = PositionalEncoding(emb_size, dropout=dropout) self.tgt_positional_encoding = PositionalEncoding(decoder_emb_size, dropout=dropout)
def __init__(self, class_num): super(CRNN, self).__init__() self.class_num = class_num # self.cnn = densenet18() self.cnn = resnet18() # self.cnn = shufflenet_v2_x1_0() d_model = 512 encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=8) self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=1) self.pos_encoder = PositionalEncoding(d_model=d_model, dropout=0.5) self.fc_classify = nn.Linear(d_model, self.class_num)
def __init__(self, config, vocab, REL_DIC): # pdb.set_trace() super(RELATION, self).__init__(config, vocab, REL_DIC) # self.rel_size = len(self.REL_DIC) self.rel_size = 2 self.hidden_dim = int(config.get('CNNs', 'HIDDEN_DIM')) # self.hidden_dim = 1000 self.rel_conv = nn.Conv1d(self.output_conv1_dim+2, self.hidden_dim, self.window_size, padding=((self.window_size//2), )) self.rel_linear1 = nn.Linear(self.max_sent_len, 1) self.rel_linear2 = nn.Linear(self.hidden_dim, 2) self.encoder_layer1 = nn.TransformerEncoderLayer(d_model=self.hidden_dim, nhead=8) self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer1, num_layers=6)
def __init__(self, dim, dim_feedforward, mode, n_head=8, dropout=0.1): super().__init__() if mode == 'shrink': out_dim = dim // 2 elif mode == 'expand': out_dim = dim * 2 else: raise ValueError('mode argument must be either shrink or expand') self.encoder = nn.TransformerEncoderLayer(dim, n_head, dim_feedforward, dropout) self.shrink = nn.Linear(dim, out_dim)
def __init__(self, nhead=8, nlayers=6, dropout=0.5): super().__init__() # bert encoder self.bert = g_bert # transformer encoder, as bert last layer fine-tune self.pos_encoder = PositionalEncoding(g_bert_emb_dim, dropout) encoder_layers = nn.TransformerEncoderLayer(d_model=g_bert_emb_dim, nhead=nhead) self.transformer_encoder = nn.TransformerEncoder( encoder_layers, nlayers)
def __init__(self, n_vocab): super().__init__() self.token_embedding = nn.Embedding(n_vocab, d_model) self.segment_embedding = nn.Embedding(2, d_model) self.posit_embedding = nn.Embedding(100, d_model) # max sequence length -> 100 self.matcher = nn.TransformerEncoder( nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head), num_layers=n_layer ) self.hidden2logits = nn.Linear(d_model, 1)
def __init__(self, d_model=64, nhead=4, dim_feedforward=256, n_class=3): super(TransformerClassifier, self).__init__() encoder_layer = nn.TransformerEncoderLayer( d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward) self.expend_dims = nn.Sequential( nn.Conv1d(3, d_model, kernel_size=1, stride=1, padding=0), nn.ReLU()) self.encoder = nn.Sequential( PositionalEncoding(d_model), nn.TransformerEncoder(encoder_layer, 3) # seq_len, bs, 3 ) self.decoder = nn.Sequential(nn.Linear(d_model, n_class))
def __init__(self, embedding, embed_dims, trans_input_dims, num_heads, hidden_dims, num_layers, classifier_mlp_hidden=16, dropout=0.5): super(TransformerModel, self).__init__() torch.manual_seed(params.torch_seed) self.model_type = "Transformer" self.padding_idx = len(embedding) - 1 self.embed_dims = embed_dims self.embedding_layer = nn.Embedding(len(embedding), embed_dims, padding_idx=self.padding_idx) self.embedding_layer.weight.data.copy_(torch.Tensor(embedding)) self.pos_encoder = PositionalEncoding(embed_dims, dropout) if params.concat: self.final_embed_dims = embed_dims + 2 else: raise Exception("Bad idea! for params.concat") self.input_dims = trans_input_dims if trans_input_dims != embed_dims + 2: self.embed2input_space = nn.Linear(self.final_embed_dims, self.input_dims) encoder_layers = nn.TransformerEncoderLayer(self.input_dims, num_heads, hidden_dims, dropout) self.transformer_encoder = nn.TransformerEncoder( encoder_layers, num_layers) self.transformer_output_dims = self.input_dims self.last_att_linear = nn.Linear(self.transformer_output_dims, self.transformer_output_dims) self.last_att_tanh = nn.Tanh() self.dropout_mlp = nn.Dropout(p=dropout) self.classifier_mlp = nn.ModuleList([ nn.Linear(self.transformer_output_dims, classifier_mlp_hidden), nn.Tanh(), self.dropout_mlp, nn.Linear(classifier_mlp_hidden, 4), # nn.Softmax(dim=1) ]) # 4 labels = Support, Refute, unrelated, comment self.__init_weights__()
def __init__(self,d_model,nhead,num_layer): super(MyTransformer,self).__init__() #Model Parameter self.d_model = d_model self.nhead = nhead self.num_layer = num_layer #Model Architecture encoderLayer = nn.TransformerEncoderLayer(self.d_model, self.nhead) self.encoder = nn.TransformerEncoder(encoderLayer,self.num_layer) self.layer1 = nn.Linear(d_model,2048) self.layer2 = nn.Linear(2048,1024) self.layer3 = nn.Linear(1024,540)
def __init__(self, config: ModelConfigNERStatus): super(EncoderLayer, self).__init__() self.gru = nn.GRU(input_size=config.embedding_dim, hidden_size=config.hidden_size, num_layers=config.num_layers, batch_first=True, bidirectional=True, dropout=0.2) self.transformerEncoderLayer = nn.TransformerEncoderLayer( d_model=config.hidden_size * 2, nhead=config.n_head) self.transformerEncoder = nn.TransformerEncoder( self.transformerEncoderLayer, num_layers=config.n_encoder)
def __init__(self, num_embeddings, d_model=100, num_layers=6, nhead=4): super(LinearizedCodesTransformer, self).__init__() self.embeddings = nn.Embedding(num_embeddings, d_model) self.positional_encodings = PositionalEncoding(d_model=d_model) encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=4 * d_model) self.transformer_encoder = nn.TransformerEncoder( encoder_layer, num_layers) self.hidden_size = d_model self.num_layers = num_layers self.num_heads = nhead
def __init__(self, d_model=1, d_embed=64, nhead=1, num_layers=6, num_joints_in=15, num_joints_out=15, dim_feedforward=2048, meth=2): super().__init__() if meth==1: self.pe = myPositionalEncoder(d_model) encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward) self.transformer = nn.TransformerEncoder(encoder_layer, num_layers) self.linear = nn.Linear(num_joints_in*2, num_joints_out*3, bias=False) elif meth==2: # self.lin_in = nn.Linear(d_model, d_embed, bias=False) self.pe = PositionalEncoder(d_model) encoder_layer = nn.TransformerEncoderLayer(d_embed, nhead, dim_feedforward) self.transformer = nn.TransformerEncoder(encoder_layer, num_layers) self.lin_out = nn.Linear(num_joints_out*2, num_joints_out*3, bias=False) self.meth = meth self.d_model = d_model self.nhead = nhead self.num_joints_in = num_joints_in self.num_joints_out = num_joints_out