def __init__(self, num_classes: int, input_dim: int, output_dim: int) -> None: super().__init__() self.embedder = Embedding(num_classes, input_dim) self.decoder_cell = GRUCell(input_dim, output_dim) self.output_projection_layer = Linear(output_dim, num_classes) self.recall = UnigramRecall()
def __init__(self, input_dim, latent_dim, device, concat_mask=False, obsrv_std=0.1, use_binary_classif=False, linear_classifier=False, classif_per_tp=False, input_space_decay=False, cell="gru", n_units=100, n_labels=1, train_classif_w_reconstr=False): super(Classic_RNN, self).__init__(input_dim, latent_dim, device, obsrv_std=obsrv_std, use_binary_classif=use_binary_classif, classif_per_tp=classif_per_tp, linear_classifier=linear_classifier, n_labels=n_labels, train_classif_w_reconstr=train_classif_w_reconstr) self.concat_mask = concat_mask encoder_dim = int(input_dim) if concat_mask: encoder_dim = encoder_dim * 2 self.decoder = nn.Sequential( nn.Linear(latent_dim, n_units), nn.Tanh(), nn.Linear(n_units, input_dim), ) #utils.init_network_weights(self.encoder) utils.init_network_weights(self.decoder) if cell == "gru": self.rnn_cell = GRUCell(encoder_dim + 1, latent_dim) # +1 for delta t elif cell == "expdecay": self.rnn_cell = GRUCellExpDecay(input_size=encoder_dim, input_size_for_decay=input_dim, hidden_size=latent_dim, device=device) else: raise Exception("Unknown RNN cell: {}".format(cell)) if input_space_decay: self.w_input_decay = Parameter(torch.Tensor(1, int(input_dim))).to( self.device) self.b_input_decay = Parameter(torch.Tensor(1, int(input_dim))).to( self.device) self.input_space_decay = input_space_decay self.z0_net = lambda hidden_state: hidden_state
def __init__(self, vocab: Vocabulary, token_embedder: TextFieldEmbedder, document_encoder: Seq2VecEncoder, utterance_encoder: Seq2VecEncoder, context_encoder: Seq2SeqEncoder, beam_size: int = None, max_decoding_steps: int = 50, scheduled_sampling_ratio: float = 0., use_bleu: bool = True) -> None: super(MultiTurnHred, self).__init__(vocab) self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL) self._end_index = self.vocab.get_token_index(END_SYMBOL) if use_bleu: pad_index = self.vocab.get_token_index(self.vocab._padding_token) # pylint: disable=protected-access self._bleu = BLEU(exclude_indices={pad_index, self._end_index, self._start_index}) else: self._bleu = None # At prediction time, we use a beam search to find the most likely sequence of target tokens. self._beam_size = beam_size or 1 self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=self._beam_size) # At prediction time, we use a beam search to find the most likely sequence of target tokens. self._max_decoding_steps = max_decoding_steps # Dense embedding of word level tokens. self._token_embedder = token_embedder # Document word level encoder. self._document_encoder = document_encoder # Dialogue word level encoder. self._utterance_encoder = utterance_encoder # Sentence level encoder. self._context_encoder = context_encoder num_classes = self.vocab.get_vocab_size() document_output_dim = self._document_encoder.get_output_dim() utterance_output_dim = self._utterance_encoder.get_output_dim() context_output_dim = self._context_encoder.get_output_dim() decoder_output_dim = utterance_output_dim decoder_input_dim = token_embedder.get_output_dim() + document_output_dim + context_output_dim # We'll use an LSTM cell as the recurrent cell that produces a hidden state # for the decoder at each time step. # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = GRUCell(decoder_input_dim, decoder_output_dim) # We project the hidden state from the decoder into the output vocabulary space # in order to get log probabilities of each target token, at each time step. self._output_projection_layer = Linear(decoder_output_dim, num_classes)
def __init__(self, input_size, hidden_size, num_layers=1, dropout=0.0): super(StackedGRU, self).__init__() self.dropout = nn.Dropout(dropout) self.num_layers = num_layers self.layers = nn.ModuleList() for i in range(num_layers): self.layers.append(GRUCell(input_size, hidden_size)) input_size = hidden_size
def __init__(self, name: str, event2mind: Event2Mind, num_classes: int, input_dim: int, output_dim: int) -> None: self.embedder = Embedding(num_classes, input_dim) event2mind.add_module(f"{name}_embedder", self.embedder) self.decoder_cell = GRUCell(input_dim, output_dim) event2mind.add_module(f"{name}_decoder_cell", self.decoder_cell) self.output_projection_layer = Linear(output_dim, num_classes) event2mind.add_module(f"{name}_output_project_layer", self.output_projection_layer) self.recall = UnigramRecall()
def __init__(self, opt): super(BootDecoder, self).__init__() self.opt = opt self.rnn_step = opt['rnn_step'] self.dropout = opt['dropout'] self.seed_count = opt['seed_count'] self.ave_method = opt['ave_method'] self.min_match = max(1, opt['min_match']) self.layers = nn.ModuleList() self.n_class, self.n_feature = opt['num_class'], opt['num_feature'] self.rnn_cell = GRUCell(self.n_feature, self.n_feature) self.layer_norm = nn.LayerNorm(self.n_feature) self.dev = False
def __init__(self, input_dim, latent_dim, eps_decay, encoder_z0, decoder, timer, z0_prior, device): super(VAEGRU, self).__init__(input_dim, latent_dim, eps_decay, encoder_z0, decoder, timer, z0_prior, device) self.gru_cell = GRUCell(input_dim, latent_dim).to(device)
def __init__(self, input_dim, latent_dim, eps_decay, decoder, diffeq_solver, timer, device): super(ODEGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device) self.diffeq_solver = diffeq_solver self.gru_cell = GRUCell(input_dim, latent_dim).to(device)
def __init__(self, input_dim, latent_dim, eps_decay, decoder, timer, device): super(ExpDecayGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device) self.gru_cell = GRUCell(input_dim, latent_dim).to(device) self.decay_layer = nn.Linear(1, 1).to(device)
def __init__(self, input_dim, latent_dim, eps_decay, decoder, timer, device): super(DeltaTGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device) # +1 dim for time gaps self.input_dim = input_dim + 1 self.gru_cell = GRUCell(input_dim + 1, latent_dim).to(device)
def __init__(self, input_dim, latent_dim, eps_decay, decoder, timer, device): super(VanillaGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device) self.gru_cell = GRUCell(input_dim, latent_dim)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, target_namespace: str, encoder: Seq2SeqEncoder, decoder: Dict, max_decoding_steps: int, target_embedding_dim: int = None, attention: Dict = None, beam_size: int = None, scheduled_sampling_ratio: float = 0., use_bleu: bool = True, visualize_attention: bool = True) -> None: super(NmtSeq2Seq, self).__init__(vocab) self._scheduled_sampling_ratio = scheduled_sampling_ratio self._target_namespace = target_namespace # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) if use_bleu: pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access self._bleu = BLEU(exclude_indices={ pad_index, self._end_index, self._start_index }) else: self._bleu = None # At prediction time, we use a beam search to find the most likely sequence of target tokens. beam_size = beam_size or 1 self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) # Dense embedding of source vocab tokens. self._source_embedder = source_embedder # Encodes the sequence of source embeddings into a sequence of hidden states. self._encoder = encoder num_classes = self.vocab.get_vocab_size(self._target_namespace) # Attention mechanism params applied to the encoder output for each step. self._attention = attention self._visualize_attention = visualize_attention # Dense embedding of vocab words in the target space. target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim( ) self._target_embedder = Embedding(num_classes, target_embedding_dim) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. self._encoder_output_dim = self._encoder.get_output_dim() # self._decoder_output_dim = self._encoder_output_dim self._decoder_input_dim = decoder["input_size"] # If using attention make sure the .jsonnet params reflect this architecture: # input_to_decoder_rnn = [prev_word + attended_context_vector] self._decoder_output_dim = decoder['hidden_size'] # We'll use an RNN cell as the recurrent cell that produces a hidden state # for the decoder at each time step. decoder_cell_type = decoder["type"] if decoder_cell_type == "gru": self._decoder_cell = GRUCell(self._decoder_input_dim, self._decoder_output_dim) elif decoder_cell_type == "lstm": self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) else: raise ValueError( "Dialogue encoder of type {} not supported yet!".format( decoder_cell_type)) # We project the hidden state from the decoder into the output vocabulary space # in order to get log probabilities of each target token, at each time step. self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)
def __init__(self, input_embedding: InputEmbedding, config: CopyNetConfig) -> None: super().__init__() self.data_len = config.data_len # Encoding modules. self._encoder = PytorchSeq2SeqWrapper( torch.nn.GRU(input_size=config.hidden, hidden_size=config.encoder_GRU_hidden, num_layers=config.encoder_layers, bidirectional=True, batch_first=True)) # Embedding modules. self.input_embed = input_embedding # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. # We arbitrarily set the decoder's input dimension to be the same as the output dimension. self.encoder_output_dim = config.encoder_GRU_hidden * 2 self.decoder_input_dim = config.decoder_hidden_size self.decoder_output_dim = config.decoder_GRU_hidden # = config.decoder_GRU_hidden * 2 # Reduce dimensionality of encoder output to reduce the number of decoder parameters. self.encoder_output_projection = Linear(self.encoder_output_dim, self.decoder_output_dim) # The decoder input will be a function of the embedding of the previous predicted token, # an attended encoder hidden state called the "attentive read", and another # weighted sum of the encoder hidden state called the "selective read". # While the weights for the attentive read are calculated by an `Attention` module, # the weights for the selective read are simply the predicted probabilities # corresponding to each token in the source sentence that matches the target # token from the previous timestep. self._attention = LinearAttention( self.decoder_output_dim, self.decoder_output_dim, activation=Activation.by_name('tanh')()) # config.hidden * 2: bidirectional self._input_projection_layer = Linear( config.feature_dim + self.decoder_output_dim * 2, self.decoder_input_dim) # We then run the projected decoder input through an LSTM cell to produce # the next hidden state. self._decoder_cell = GRUCell(self.decoder_input_dim, self.decoder_output_dim) self._command_token_size = config.num_cmd_tokens # We create a "generation" score for each token in the target vocab # with a linear projection of the decoder hidden state. self._output_generation_layer_1 = Linear(self.decoder_output_dim, self._command_token_size) self._output_generation_layer_2 = Linear(self.decoder_output_dim, self._command_token_size) # We create a "copying" score for each source token by applying a non-linearity # (tanh) to a linear projection of the encoded hidden state for that token, # and then taking the dot product of the result with the decoder hidden state. self._output_copying_layer_1 = Linear(self.decoder_output_dim, self.decoder_output_dim) self._output_copying_layer_2 = Linear(self.decoder_output_dim, self.decoder_output_dim) self._softmax = nn.LogSoftmax(dim=-1)
def __init__(self, dim, attented=True, dropout=0.): super(MemoryLayer, self).__init__() self.combine_layer = CombineLayer(dim, dim // 2, dropout=dropout) self.memory_cell = GRUCell(dim, dim) self.attented = attented