def __init__( self, word_vocab: Dict, char_vocab_size: int, tag_vocab_size: int, config_model: Config, ): super().__init__() self.word_embedder = WordEmbedder(init_value=texar.data.Embedding( vocab=word_vocab, hparams={ "dim": config_model.word_emb.dim, "file": config_model.embedding_path, "read_fn": "load_glove", }, ).word_vecs) self.char_embedder = WordEmbedder(vocab_size=char_vocab_size, hparams=config_model.char_emb) self.char_cnn = torch.nn.Conv1d(**config_model.char_cnn_conv) self.dropout_in = nn.Dropout2d(config_model.dropout_rate) # standard dropout self.dropout_rnn_in = nn.Dropout(config_model.dropout_rate) self.dropout_out = nn.Dropout(config_model.dropout_rate) self.rnn = nn.LSTM( config_model.bilstm_sentence_encoder.rnn_cell_fw.input_size, config_model.bilstm_sentence_encoder.rnn_cell_fw.kwargs.num_units, num_layers=1, batch_first=True, bidirectional=True, ) self.dense = nn.Linear( config_model.bilstm_sentence_encoder.rnn_cell_fw.kwargs.num_units * 2, config_model.output_hidden_size, ) self.tag_projection_layer = nn.Linear(config_model.output_hidden_size, tag_vocab_size) self.crf = ConditionalRandomField(tag_vocab_size, constraints=None, include_start_end_transitions=True) if config_model.initializer is None or callable( config_model.initializer): self.initializer = config_model.initializer else: self.initializer = texar.core.layers.get_initializer( config_model["initializer"]) self.reset_parameters()
def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): self.load_pretrained_config(pretrained_model_name, cache_dir, hparams) # Word embedding word_embedder = WordEmbedder(vocab_size=self._hparams.vocab_size, hparams=self._hparams.embed) # Position embedding position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embed) # The GPT2 decoder (a TransformerDecoder) super().__init__(vocab_size=self._hparams.vocab_size, output_layer=word_embedder.embedding, hparams=None) # Register modules after `__init__` is called. self.word_embedder = word_embedder self.position_embedder = position_embedder self.init_pretrained_weights()
def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): super().__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams=self._hparams.embed) # Position embedding self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embed) # The GPT2 decoder (a TransformerDecoder) def func(tokens, positions): word_embeds = self.word_embedder(tokens) pos_embeds = self.position_embedder(positions) return word_embeds + pos_embeds class GPT2TransformerDecoder(TransformerDecoder): def embed_tokens(self, tokens: torch.LongTensor, positions: torch.LongTensor) -> torch.Tensor: return func(tokens, positions) self.decoder = GPT2TransformerDecoder( vocab_size=self._hparams.vocab_size, output_layer=self.word_embedder.embedding, hparams=self._hparams.decoder) self.init_pretrained_weights()
def __init__(self, word_embedding_table: torch.Tensor, char_vocab_size: int, tag_vocab_size: int, config_model: HParams): super().__init__() # TODO: Fix this. init_value doesn't need to be tensor but # we have to set it for type check self.word_embedder = WordEmbedder(init_value=word_embedding_table) self.char_embedder = WordEmbedder(vocab_size=char_vocab_size, hparams=config_model.char_emb) self.char_cnn = torch.nn.Conv1d(**config_model.char_cnn_conv) self.dropout_in = nn.Dropout2d(config_model.dropout_rate) # standard dropout self.dropout_rnn_in = nn.Dropout(config_model.dropout_rate) self.dropout_out = nn.Dropout(config_model.dropout_rate) self.rnn = nn.LSTM( config_model.bilstm_sentence_encoder.rnn_cell_fw.input_size, config_model.bilstm_sentence_encoder.rnn_cell_fw.kwargs.num_units, num_layers=1, batch_first=True, bidirectional=True) self.dense = nn.Linear( config_model.bilstm_sentence_encoder.rnn_cell_fw.kwargs.num_units * 2, config_model.output_hidden_size) self.tag_projection_layer = nn.Linear(config_model.output_hidden_size, tag_vocab_size) self.crf = ConditionalRandomField(tag_vocab_size, constraints=None, include_start_end_transitions=True) if config_model.initializer is None or callable( config_model.initializer): self.initializer = config_model.initializer else: self.initializer = texar.core.layers.get_initializer( config_model["initializer"]) self.reset_parameters()
def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): super().__init__(hparams=hparams) # self.load_pretrained_config(pretrained_model_name, cache_dir) # Word embedding self.word_embedder = WordEmbedder(vocab_size=self._hparams.vocab_size, hparams=self._hparams.embed) # Position embedding self.position_embedder = PositionEmbedder( position_size=self._hparams.position_size, hparams=self._hparams.position_embed) # The GPT2 decoder (a TransformerDecoder) def func(tokens, positions): word_embeds = self.word_embedder(tokens) pos_embeds = self.position_embedder(positions) return word_embeds + pos_embeds class GPT2TransformerDecoder(TransformerDecoder): def embed_tokens(self, tokens: torch.LongTensor, positions: torch.LongTensor) -> torch.Tensor: return func(tokens, positions) class FNN(nn.Module): def __init__(self): super(FNN, self).__init__() self.fnn = nn.Linear(768, 256) self.tanh = nn.Tanh() self.fnn2 = nn.Linear(256, 1) def forward(self, x): x = self.tanh(self.fnn(x)) return self.fnn2(x) # This part can add modified inputs. # self.decoder = GPT2TransformerDecoder( # vocab_size=self._hparams.vocab_size, # output_layer=self.word_embedder.embedding, # hparams=self._hparams.decoder) # self.decoder = GPT2TransformerDecoder( # output_layer=nn.Linear(768,2), # hparams=self._hparams.decoder # ) self.decoder = GPT2TransformerDecoder(output_layer=FNN(), hparams=self._hparams.decoder)