예제 #1
0
 def __init__(self, num_classes: int, input_dim: int,
              output_dim: int) -> None:
     super().__init__()
     self.embedder = Embedding(num_classes, input_dim)
     self.decoder_cell = GRUCell(input_dim, output_dim)
     self.output_projection_layer = Linear(output_dim, num_classes)
     self.recall = UnigramRecall()
    def __init__(self,
                 input_dim,
                 latent_dim,
                 device,
                 concat_mask=False,
                 obsrv_std=0.1,
                 use_binary_classif=False,
                 linear_classifier=False,
                 classif_per_tp=False,
                 input_space_decay=False,
                 cell="gru",
                 n_units=100,
                 n_labels=1,
                 train_classif_w_reconstr=False):

        super(Classic_RNN,
              self).__init__(input_dim,
                             latent_dim,
                             device,
                             obsrv_std=obsrv_std,
                             use_binary_classif=use_binary_classif,
                             classif_per_tp=classif_per_tp,
                             linear_classifier=linear_classifier,
                             n_labels=n_labels,
                             train_classif_w_reconstr=train_classif_w_reconstr)

        self.concat_mask = concat_mask

        encoder_dim = int(input_dim)
        if concat_mask:
            encoder_dim = encoder_dim * 2

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, n_units),
            nn.Tanh(),
            nn.Linear(n_units, input_dim),
        )

        #utils.init_network_weights(self.encoder)
        utils.init_network_weights(self.decoder)

        if cell == "gru":
            self.rnn_cell = GRUCell(encoder_dim + 1,
                                    latent_dim)  # +1 for delta t
        elif cell == "expdecay":
            self.rnn_cell = GRUCellExpDecay(input_size=encoder_dim,
                                            input_size_for_decay=input_dim,
                                            hidden_size=latent_dim,
                                            device=device)
        else:
            raise Exception("Unknown RNN cell: {}".format(cell))

        if input_space_decay:
            self.w_input_decay = Parameter(torch.Tensor(1, int(input_dim))).to(
                self.device)
            self.b_input_decay = Parameter(torch.Tensor(1, int(input_dim))).to(
                self.device)
        self.input_space_decay = input_space_decay

        self.z0_net = lambda hidden_state: hidden_state
예제 #3
0
    def __init__(self,
                 vocab: Vocabulary,
                 token_embedder: TextFieldEmbedder,
                 document_encoder: Seq2VecEncoder,
                 utterance_encoder: Seq2VecEncoder,
                 context_encoder: Seq2SeqEncoder,
                 beam_size: int = None,
                 max_decoding_steps: int = 50,
                 scheduled_sampling_ratio: float = 0.,
                 use_bleu: bool = True) -> None:
        super(MultiTurnHred, self).__init__(vocab)
        self._scheduled_sampling_ratio = scheduled_sampling_ratio

        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self._start_index = self.vocab.get_token_index(START_SYMBOL)
        self._end_index = self.vocab.get_token_index(END_SYMBOL)

        if use_bleu:
            pad_index = self.vocab.get_token_index(self.vocab._padding_token)  # pylint: disable=protected-access
            self._bleu = BLEU(exclude_indices={pad_index, self._end_index, self._start_index})
        else:
            self._bleu = None

        # At prediction time, we use a beam search to find the most likely sequence of target tokens.
        self._beam_size = beam_size or 1
        self._max_decoding_steps = max_decoding_steps
        self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=self._beam_size)

        # At prediction time, we use a beam search to find the most likely sequence of target tokens.
        self._max_decoding_steps = max_decoding_steps

        # Dense embedding of word level tokens.
        self._token_embedder = token_embedder

        # Document word level encoder.
        self._document_encoder = document_encoder

        # Dialogue word level encoder.
        self._utterance_encoder = utterance_encoder

        # Sentence level encoder.
        self._context_encoder = context_encoder

        num_classes = self.vocab.get_vocab_size()

        document_output_dim = self._document_encoder.get_output_dim()
        utterance_output_dim = self._utterance_encoder.get_output_dim()
        context_output_dim = self._context_encoder.get_output_dim()
        decoder_output_dim = utterance_output_dim
        decoder_input_dim = token_embedder.get_output_dim() + document_output_dim + context_output_dim

        # We'll use an LSTM cell as the recurrent cell that produces a hidden state
        # for the decoder at each time step.
        # TODO (pradeep): Do not hardcode decoder cell type.
        self._decoder_cell = GRUCell(decoder_input_dim, decoder_output_dim)

        # We project the hidden state from the decoder into the output vocabulary space
        # in order to get log probabilities of each target token, at each time step.
        self._output_projection_layer = Linear(decoder_output_dim, num_classes)
예제 #4
0
    def __init__(self, input_size, hidden_size, num_layers=1, dropout=0.0):
        super(StackedGRU, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.num_layers = num_layers
        self.layers = nn.ModuleList()

        for i in range(num_layers):
            self.layers.append(GRUCell(input_size, hidden_size))
            input_size = hidden_size
예제 #5
0
 def __init__(self, name: str, event2mind: Event2Mind, num_classes: int,
              input_dim: int, output_dim: int) -> None:
     self.embedder = Embedding(num_classes, input_dim)
     event2mind.add_module(f"{name}_embedder", self.embedder)
     self.decoder_cell = GRUCell(input_dim, output_dim)
     event2mind.add_module(f"{name}_decoder_cell", self.decoder_cell)
     self.output_projection_layer = Linear(output_dim, num_classes)
     event2mind.add_module(f"{name}_output_project_layer",
                           self.output_projection_layer)
     self.recall = UnigramRecall()
예제 #6
0
 def __init__(self, opt):
     super(BootDecoder, self).__init__()
     self.opt = opt
     self.rnn_step = opt['rnn_step']
     self.dropout = opt['dropout']
     self.seed_count = opt['seed_count']
     self.ave_method = opt['ave_method']
     self.min_match = max(1, opt['min_match'])
     self.layers = nn.ModuleList()
     self.n_class, self.n_feature = opt['num_class'], opt['num_feature']
     self.rnn_cell = GRUCell(self.n_feature, self.n_feature)
     self.layer_norm = nn.LayerNorm(self.n_feature)
     self.dev = False
예제 #7
0
파일: model.py 프로젝트: dtak/mbrl-smdp-ode
 def __init__(self, input_dim, latent_dim, eps_decay, encoder_z0, decoder, timer, z0_prior, device):
     super(VAEGRU, self).__init__(input_dim, latent_dim, eps_decay, encoder_z0, decoder, timer, z0_prior, device)
     self.gru_cell = GRUCell(input_dim, latent_dim).to(device)
예제 #8
0
파일: model.py 프로젝트: dtak/mbrl-smdp-ode
 def __init__(self, input_dim, latent_dim, eps_decay, decoder, diffeq_solver, timer, device):
     super(ODEGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device)
     self.diffeq_solver = diffeq_solver
     self.gru_cell = GRUCell(input_dim, latent_dim).to(device)
예제 #9
0
파일: model.py 프로젝트: dtak/mbrl-smdp-ode
 def __init__(self, input_dim, latent_dim, eps_decay, decoder, timer, device):
     super(ExpDecayGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device)
     self.gru_cell = GRUCell(input_dim, latent_dim).to(device)
     self.decay_layer = nn.Linear(1, 1).to(device)
예제 #10
0
파일: model.py 프로젝트: dtak/mbrl-smdp-ode
 def __init__(self, input_dim, latent_dim, eps_decay, decoder, timer, device):
     super(DeltaTGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device)
     # +1 dim for time gaps
     self.input_dim = input_dim + 1
     self.gru_cell = GRUCell(input_dim + 1, latent_dim).to(device)
예제 #11
0
파일: model.py 프로젝트: dtak/mbrl-smdp-ode
 def __init__(self, input_dim, latent_dim, eps_decay, decoder, timer, device):
     super(VanillaGRU, self).__init__(input_dim, latent_dim, eps_decay, decoder, timer, device)
     self.gru_cell = GRUCell(input_dim, latent_dim)
예제 #12
0
    def __init__(self,
                 vocab: Vocabulary,
                 source_embedder: TextFieldEmbedder,
                 target_namespace: str,
                 encoder: Seq2SeqEncoder,
                 decoder: Dict,
                 max_decoding_steps: int,
                 target_embedding_dim: int = None,
                 attention: Dict = None,
                 beam_size: int = None,
                 scheduled_sampling_ratio: float = 0.,
                 use_bleu: bool = True,
                 visualize_attention: bool = True) -> None:
        super(NmtSeq2Seq, self).__init__(vocab)

        self._scheduled_sampling_ratio = scheduled_sampling_ratio
        self._target_namespace = target_namespace
        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)

        if use_bleu:
            pad_index = self.vocab.get_token_index(self.vocab._padding_token,
                                                   self._target_namespace)  # pylint: disable=protected-access
            self._bleu = BLEU(exclude_indices={
                pad_index, self._end_index, self._start_index
            })
        else:
            self._bleu = None

        # At prediction time, we use a beam search to find the most likely sequence of target tokens.
        beam_size = beam_size or 1
        self._max_decoding_steps = max_decoding_steps
        self._beam_search = BeamSearch(self._end_index,
                                       max_steps=max_decoding_steps,
                                       beam_size=beam_size)

        # Dense embedding of source vocab tokens.
        self._source_embedder = source_embedder

        # Encodes the sequence of source embeddings into a sequence of hidden states.
        self._encoder = encoder

        num_classes = self.vocab.get_vocab_size(self._target_namespace)

        # Attention mechanism params applied to the encoder output for each step.
        self._attention = attention

        self._visualize_attention = visualize_attention

        # Dense embedding of vocab words in the target space.
        target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim(
        )
        self._target_embedder = Embedding(num_classes, target_embedding_dim)

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        self._encoder_output_dim = self._encoder.get_output_dim()
        # self._decoder_output_dim = self._encoder_output_dim

        self._decoder_input_dim = decoder["input_size"]
        # If using attention make sure the .jsonnet params reflect this architecture:
        # input_to_decoder_rnn = [prev_word + attended_context_vector]
        self._decoder_output_dim = decoder['hidden_size']

        # We'll use an RNN cell as the recurrent cell that produces a hidden state
        # for the decoder at each time step.
        decoder_cell_type = decoder["type"]

        if decoder_cell_type == "gru":
            self._decoder_cell = GRUCell(self._decoder_input_dim,
                                         self._decoder_output_dim)
        elif decoder_cell_type == "lstm":
            self._decoder_cell = LSTMCell(self._decoder_input_dim,
                                          self._decoder_output_dim)
        else:
            raise ValueError(
                "Dialogue encoder of type {} not supported yet!".format(
                    decoder_cell_type))

        # We project the hidden state from the decoder into the output vocabulary space
        # in order to get log probabilities of each target token, at each time step.
        self._output_projection_layer = Linear(self._decoder_output_dim,
                                               num_classes)
예제 #13
0
    def __init__(self, input_embedding: InputEmbedding,
                 config: CopyNetConfig) -> None:
        super().__init__()

        self.data_len = config.data_len
        # Encoding modules.
        self._encoder = PytorchSeq2SeqWrapper(
            torch.nn.GRU(input_size=config.hidden,
                         hidden_size=config.encoder_GRU_hidden,
                         num_layers=config.encoder_layers,
                         bidirectional=True,
                         batch_first=True))
        # Embedding modules.
        self.input_embed = input_embedding
        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        # We arbitrarily set the decoder's input dimension to be the same as the output dimension.
        self.encoder_output_dim = config.encoder_GRU_hidden * 2
        self.decoder_input_dim = config.decoder_hidden_size
        self.decoder_output_dim = config.decoder_GRU_hidden  # = config.decoder_GRU_hidden * 2

        # Reduce dimensionality of encoder output to reduce the number of decoder parameters.
        self.encoder_output_projection = Linear(self.encoder_output_dim,
                                                self.decoder_output_dim)

        # The decoder input will be a function of the embedding of the previous predicted token,
        # an attended encoder hidden state called the "attentive read", and another
        # weighted sum of the encoder hidden state called the "selective read".
        # While the weights for the attentive read are calculated by an `Attention` module,
        # the weights for the selective read are simply the predicted probabilities
        # corresponding to each token in the source sentence that matches the target
        # token from the previous timestep.
        self._attention = LinearAttention(
            self.decoder_output_dim,
            self.decoder_output_dim,
            activation=Activation.by_name('tanh')())
        # config.hidden * 2: bidirectional
        self._input_projection_layer = Linear(
            config.feature_dim + self.decoder_output_dim * 2,
            self.decoder_input_dim)

        # We then run the projected decoder input through an LSTM cell to produce
        # the next hidden state.
        self._decoder_cell = GRUCell(self.decoder_input_dim,
                                     self.decoder_output_dim)
        self._command_token_size = config.num_cmd_tokens

        # We create a "generation" score for each token in the target vocab
        # with a linear projection of the decoder hidden state.
        self._output_generation_layer_1 = Linear(self.decoder_output_dim,
                                                 self._command_token_size)
        self._output_generation_layer_2 = Linear(self.decoder_output_dim,
                                                 self._command_token_size)

        # We create a "copying" score for each source token by applying a non-linearity
        # (tanh) to a linear projection of the encoded hidden state for that token,
        # and then taking the dot product of the result with the decoder hidden state.
        self._output_copying_layer_1 = Linear(self.decoder_output_dim,
                                              self.decoder_output_dim)
        self._output_copying_layer_2 = Linear(self.decoder_output_dim,
                                              self.decoder_output_dim)

        self._softmax = nn.LogSoftmax(dim=-1)
예제 #14
0
 def __init__(self, dim, attented=True, dropout=0.):
     super(MemoryLayer, self).__init__()
     self.combine_layer = CombineLayer(dim, dim // 2, dropout=dropout)
     self.memory_cell = GRUCell(dim, dim)
     self.attented = attented