Python ModelEmbeddingsの例

プログラミング言語: Python

名前空間/パッケージ名: model_embeddings

クラス/型: ModelEmbeddings

hotexamples.comのコード掲載数: 30

model_embeddings.ModelEmbeddingsは、Pythonで実装されたディープラーニングモデルです。このモデルは、自然言語処理タスクにおいて単語の埋め込み表現を生成するために使用されます。埋め込み表現は、単語をベクトル形式で表現したものであり、単語間の意味的な関係性を捉えることができます。このモデルは、大規模なコーパスから学習された事前トレーニング済みの埋め込み表現を利用し、単語の意味を効果的に表現します。モデルは、単語の出現頻度や文脈を考慮して埋め込み表現を生成するため、文書分類、情報検索、機械翻訳などの自然言語処理タスクにおいて高い性能を発揮します。

Python ModelEmbeddings - 30件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmodel_embeddings.ModelEmbeddingsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ModelEmbeddings(30)

source(14)

target(12)

embed_sentence(2)

note_embeds(2)

dst_embedding(1)

forward(1)

get_bert_embed(1)

src(1)

src_embed(1)

src_embedding(1)

tgt(1)

to(1)

コード例 #1

ファイルを表示

ファイル: nmt_model.py プロジェクト: hyc2026/cs224n-2019-assignment

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # Bidirectional LSTM with bias
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=self.hidden_size,
                               bidirectional=True,
                               bias=True)
        # LSTM Cell with bias
        self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size,
                                   hidden_size=self.hidden_size,
                                   bias=True)
        # Linear Layer with no bias, W_{h}
        self.h_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        # Linear Layer with no bias, W_{c}
        self.c_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        # Linear Layer with no bias, W_{attProj}
        self.att_projection = nn.Linear(in_features=self.hidden_size * 2,
                                        out_features=self.hidden_size,
                                        bias=False)
        # Linear Layer with no bias, W_{u}
        self.combined_output_projection = nn.Linear(
            in_features=self.hidden_size * 3,
            out_features=self.hidden_size,
            bias=False)
        # Linear Layer with no bias, W_{vocab}
        self.target_vocab_projection = nn.Linear(in_features=self.hidden_size,
                                                 out_features=len(vocab.tgt),
                                                 bias=False)
        # Dropout Layer
        self.dropout = nn.Dropout(p=self.dropout_rate)

コード例 #2

ファイルを表示

ファイル: nmt_model.py プロジェクト: Algebrazebra/cs224n-a4-nmt

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # For sanity check only, not relevant to implementation
        self.gen_sanity_check = False
        self.counter = 0

        self.encoder = torch.nn.LSTM(
            input_size=embed_size,
            hidden_size=self.hidden_size,
            bias=True,
            bidirectional=True,
        )
        self.decoder = torch.nn.LSTMCell(
            input_size=embed_size + hidden_size,
            hidden_size=self.hidden_size,
            bias=True,
        )
        self.h_projection = torch.nn.Linear(
            in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.c_projection = torch.nn.Linear(
            in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.att_projection = torch.nn.Linear(
            in_features=2 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.combined_output_projection = torch.nn.Linear(
            in_features=3 * self.hidden_size, out_features=self.hidden_size, bias=False
        )
        self.target_vocab_projection = torch.nn.Linear(
            in_features=self.hidden_size, out_features=len(self.vocab.tgt), bias=False
        )
        self.dropout = torch.nn.Dropout(p=self.dropout_rate)

コード例 #3

ファイルを表示

    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 no_char_decoder=False):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (VocabEntry): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()

        self.model_embeddings_source = ModelEmbeddings(embed_size, vocab.src)
        # print(self.model_embeddings_source.parameter_counter)

        self.model_embeddings_target = ModelEmbeddings(embed_size, vocab.tgt)

        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        self.encoder = nn.LSTM(embed_size, hidden_size, bidirectional=True)
        self.decoder = nn.LSTMCell(embed_size + hidden_size, hidden_size)

        self.h_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False)
        self.c_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False)
        self.att_projection = nn.Linear(hidden_size * 2,
                                        hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(hidden_size * 2 +
                                                    hidden_size,
                                                    hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(hidden_size,
                                                 len(vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(self.dropout_rate)

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None

コード例 #4

ファイルを表示

ファイル: nmt_model.py プロジェクト: MiYu1996/CS-224N-HW4

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               num_layers=1,
                               bias=True,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)
        self.h_projection = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        self.c_projection = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        self.att_projection = nn.Linear(2 * hidden_size,
                                        hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(3 * hidden_size,
                                                    hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout2d(p=dropout_rate)

コード例 #5

ファイルを表示

ファイル: nmt_model.py プロジェクト: vinayk19/Assignment

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None 
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None


        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
		self.encoder = nn.LSTM(input_size=embed_size,
			hidden_size=self.hidden_size, num_layers=1,
			bias=True, batch_first=false,
			dropout=self.dropout_rate, bidirectional=True)
		self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size, hidden_size=self.hidden_size, bias=True) #input should be hidden_size (from encoder)+Embed of output language 
		Self.h_projection = nn.Linear(in_features=2*self.hidden_size, out_features=self.hidden_size)
		self.c_projection = nn.linear(in_features=2*self.hidden_size, out_features=self.hidden_size)
		self.att_projection = nn.linear(in_features=2*self.hidden_size, out_features=self.hidden_size)
		self.combined_output_projection = nn.linear(in_features=3*self.hidden_size, out_features=self.hidden_size)
		self.target_vocab_projection = nn.linear(in_features=self.hidden_size, out_features=self.model_embeddings.target.shape[0]) 
		self.dropout = nn.Dropout(drop=self.dropout_rate , impulse=False)

コード例 #6

ファイルを表示

 def __init__(self,
              embed_size,
              hidden_size,
              src_vocab: Vocabulary,
              dst_vocab: Vocabulary,
              device,
              dropout_rate=0.2):
     super(NMT, self).__init__()
     self.device = device
     self.model_embeddings = ModelEmbeddings(embed_size, src_vocab,
                                             dst_vocab)
     self.hidden_size = hidden_size
     self.src_vocab = src_vocab
     self.dst_vocab = dst_vocab
     self.dropout_rate = dropout_rate
     # encoder是双向LSTM，有bias
     self.encoder = nn.LSTM(input_size=embed_size,
                            hidden_size=hidden_size,
                            bidirectional=True,
                            dropout=dropout_rate,
                            bias=True)
     # decoder是单向LSTM，有bias
     self.decoder = nn.LSTMCell(
         input_size=embed_size + hidden_size,
         # input-feeding方法：将注意力向量和下一个时间步的输入连接在一起，使模型在做对齐决策时，也会考虑过去的对齐信息
         hidden_size=hidden_size,
         bias=True)
     # h_projection, c_projection分别是src对decoder状态和cell的初始化
     self.h_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False)
     self.c_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False)
     # att_projection是src对decoder隐空间的映射（到context vector）
     self.att_projection = nn.Linear(hidden_size * 2,
                                     hidden_size,
                                     bias=False)
     # attention向量和下个时间步的输入连接在一起输入decoder
     self.combined_output_projection = nn.Linear(hidden_size * 2 +
                                                 hidden_size,
                                                 hidden_size,
                                                 bias=False)
     # decoder神经网络的输入到vocab的映射
     self.target_vocab_projection = nn.Linear(hidden_size,
                                              len(dst_vocab),
                                              bias=False)
     self.dropout = nn.Dropout(dropout_rate)

コード例 #7

ファイルを表示

    def __init__(self, word_embed_size, hidden_size, vocab, dropout_rate=0.3,
                 no_char_decoder=False):
        """ Init NMT Model.

        @param word_embed_size (int): Embedding size (dimensionality) of word
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()

        self.model_embeddings_source = ModelEmbeddings(word_embed_size, vocab.src)
        self.model_embeddings_target = ModelEmbeddings(word_embed_size, vocab.tgt)

        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # COPY OVER YOUR CODE FROM ASSIGNMENT 4

        self.encoder = nn.LSTM(word_embed_size, self.hidden_size, bidirectional=True,
                               bias=True)
        self.decoder = nn.LSTMCell(word_embed_size + self.hidden_size, self.hidden_size,
                                   bias=True)
        self.h_projection = nn.Linear(2 * self.hidden_size, self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(2 * self.hidden_size, self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(2 * self.hidden_size, self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(3 * self.hidden_size,
                                                    self.hidden_size, bias=False)
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(vocab.tgt), bias=False)
        self.dropout = nn.Dropout(self.dropout_rate)

        # END YOUR CODE FROM ASSIGNMENT 4

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None

コード例 #8

ファイルを表示

ファイル: nmt_model.py プロジェクト: huangmgithub/CS224n

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab) # src&tgt Embedding Init
        self.hidden_size = hidden_size # hidden size
        self.dropout_rate = dropout_rate # Dropout
        self.vocab = vocab # 

        # default values
        self.encoder = None 
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None


        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
        self.encoder = nn.LSTM(embed_size, hidden_size, bias=True, bidirectional=True)
        self.decoder = nnn.LSTMCell(embed_size+hidden_size, hidden_size, bias=True)
        self.h_projection = nn.Linear(2*hidden_size, hidden_size, bias=False)
        self.c_projection = nn.Linear(2*hidden_size, hidden_size, bias=False)
        self.att_projection = nn.Linear(2*hidden_size, hidden_size, bias=False)
        self.combined_output_projection = nn.Linear(3*hidden_size, hidden_size, bias=False) 
        self.target_vocab_projection = nn.Linear(hidden_size, len(vocab.tgt), bias=False)
        self.dropout = nn.Dropout(p=dropout_rate)

コード例 #9

ファイルを表示

def load_dev_data(embed_size=50, dev_perct=1., binary=False):
    M = ModelEmbeddings(embed_size=embed_size)
    X = [
        labeledTree.to_labeled_lines()[0][1].split(" ")
        for labeledTree in data['dev']
    ]
    Y = [labeledTree.to_labeled_lines()[0][0] for labeledTree in data['dev']]

    if binary:
        X = [x for (x, y) in list(zip(X, Y)) if y != 3]
        Y = [1 if y > 3 else 0 for y in Y if y != 3]

    dev_size = int(len(X) * dev_perct)
    X = X[:dev_size]
    Y = Y[:dev_size]
    X = M.embed_sentence(X)

    # dev data doesn't need to be augmented, hence it's already zipped and
    # ready to be passed into model.forward()
    return list(zip(X, Y))

コード例 #10

ファイルを表示

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for  documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ###
        ### YOUR CODE HERE (~8 Lines)
        ###
        self.embed_size = embed_size
        self.encoder = nn.LSTM(self.embed_size, self.hidden_size, bias=True, bidirectional=True) # do i have to do make self.embed_size? Also I think since bidirectional is specified I don't need to use 2*self.hidden_size
        self.decoder = nn.LSTMCell(self.hidden_size + embed_size, self.hidden_size, bias=True) # need input size, hidden size. I think they are the same, except that for the input you concatenate the embedding for the current word. 
        self.h_projection = nn.Linear(2*self.hidden_size, self.hidden_size, bias=False) # W_h 
        self.c_projection = nn.Linear(2*self.hidden_size, self.hidden_size, bias=False) # W_c
        self.att_projection = nn.Linear(2*self.hidden_size, self.hidden_size, bias=False) # W_attProj Not sure about this one; it seems to actually take two inputs, h^dec_t to the left and h^enc_i to the right.
        self.combined_output_projection = nn.Linear(3*self.hidden_size, self.hidden_size, bias=False) # W_u
        self.target_vocab_projection = nn.Linear(self.hidden_size, len(self.vocab.tgt), bias=False) # W_vocab. Is len(self.vocab.tgt) the length of the target vocab? that is what we want.
        self.dropout = nn.Dropout(self.dropout_rate) #Dropout layer.
        ###
        ### END YOUR CODE
        ###
        '''TODO - Initialize the following variables:

コード例 #11

ファイルを表示

ファイル: lstm.py プロジェクト: veraxrl/the-sheldon-machine

    def __init__(self, vocab, embed_size, hidden_size, output_size, batch_size, dropout_rate=0.2):
        super(LSTMClassifier, self).__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size

        self.embedding = ModelEmbeddings(vocab, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, bidirectional=False)
        self.proj = nn.Linear(hidden_size, output_size, bias=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.softmax = nn.LogSoftmax(dim=1)
        self.hidden = self.init_hidden()

コード例 #12

ファイルを表示

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2, no_char_decoder=False):
        """ Initalize the NMT Model.

        :param int embed_size: Embedding size (dimensionality)
        :param int hidden_size: Hidden Size (dimensionality)
        :param Vocab vocab: Vocabulary object containing src and tgt languages
                             See vocab.py for documentation.
        :param float dropout_rate: Dropout probability, for the attention combination layer
        """
        super(NMT, self).__init__()

        self.model_embeddings_source = ModelEmbeddings(embed_size, vocab.src)
        self.model_embeddings_target = ModelEmbeddings(embed_size, vocab.tgt)

        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        self.encoder = nn.LSTM(embed_size, hidden_size, bidirectional=True)
        self.decoder = nn.LSTMCell(embed_size + hidden_size, hidden_size)

        # Need to feed in transpose of [h_enc(1)(<-) ; h_enc(m)(->)], and output is 1xh
        self.h_projection = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        # Need to feed in transpose of [c_enc(1)(<-); c_enc(m)(->)], and output is 1xh
        self.c_projection = nn.Linear(2 * hidden_size, hidden_size, bias=False)

        self.att_projection = nn.Linear(2 * hidden_size, hidden_size, bias=False)

        # Need to feed in transpose of u(t), and output is 1xh (v(t))
        self.combined_output_projection = nn.Linear(3 * hidden_size, hidden_size, bias=False)

        # Need to feed in transpose of o(t), and output is 1x|Vtg|
        self.target_vocab_projection = nn.Linear(hidden_size, len(vocab.tgt), bias=False)

        self.dropout = nn.Dropout(dropout_rate)

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None

コード例 #13

ファイルを表示

 def __init__(self,
              vocab,
              embed_size,
              hidden_size,
              enc_bidir,
              attn_size,
              dropout=0.2):
     super(QGModel, self).__init__()
     self.vocab = vocab
     self.args = {
         'embed_size': embed_size,
         'hidden_size': hidden_size,
         'dropout': dropout,
         'enc_bidir': enc_bidir,
         'attn_size': attn_size
     }
     self.embeddings = ModelEmbeddings(embed_size, vocab)
     self.encoder = Encoder(embed_size, hidden_size, dropout, enc_bidir)
     self.decoder_init_hidden_proj = nn.Linear(self.encoder.hidden_size,
                                               hidden_size)
     self.decoder = Decoder(embed_size, hidden_size, attn_size,
                            len(vocab.tgt), dropout)

コード例 #14

ファイルを表示

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ 初始化 NMT 模型.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): 词总述，包括 src 和 tgt
        @param dropout_rate (float): 对注意力的dropout概率
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # 初始化各层次
        # LSTM层 输入词嵌入，输出隐藏状态
        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               dropout=self.dropout_rate,
                               bidirectional=True)  # 可以选择双向
        # LSTMCell 输入词嵌入与隐藏状态连接，输出隐藏状态
        self.decoder = nn.LSTMCell(embed_size + self.hidden_size,
                                   self.hidden_size)  # 可以控制每个时间步
        self.h_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)  # 降维2h->h
        self.c_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)  # 降维2h->h
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)  # 降维2h->h
        self.combined_output_projection = nn.Linear(self.hidden_size * 3,
                                                    self.hidden_size,
                                                    bias=False)  # 降维3h->h
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)  # 输出投影到词库
        self.dropout = nn.Dropout(p=self.dropout_rate)

コード例 #15

ファイルを表示

ファイル: node_model2.py プロジェクト: ryannetwork/notes-to-ICD9

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate):
        super(Node2, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        print("vocab.num_labels: ", vocab.num_labels)
        self.num_labels = vocab.num_labels

        self.encoder0 = nn.LSTM(
            input_size=embed_size,
            hidden_size=hidden_size,
            bias=True,
            # dropout=self.dropout_rate,
            bidirectional=True)

        self.encoder1 = nn.LSTM(
            input_size=embed_size,
            hidden_size=hidden_size,
            bias=True,
            # dropout=self.dropout_rate,
            bidirectional=True)

        self.dropout1 = nn.Dropout()

        self.attention_projection = nn.Linear(in_features=2 * hidden_size,
                                              out_features=self.num_labels,
                                              bias=False)
        self.attention_softmax = nn.Softmax(dim=0)
        #         self.labels_projection = nn.Linear(in_features=2*hidden_size,
        #                                           out_features=1,
        #                                           bias=False)
        self.labels_projection = nn.Linear(in_features=2 * hidden_size,
                                           out_features=100,
                                           bias=False)

        self.labels_projection2 = nn.Linear(in_features=100,
                                            out_features=1,
                                            bias=False)

コード例 #16

ファイルを表示

    def __init__(self, vocab, embed_size, embeddings, sim_scale=5):
        """
        @param vocab (Vocab): vocab object
        @param embed_size (int): embedding size
        @param embeddings (torch.tensor (len(vocab), embed_dim)): pretrained word embeddings
        @param sim_scale (float): scale the sim score by this scalar
        """
        super(AvgSim, self).__init__()
        self.pretrained_embeddings = embeddings
        self.embeddings = ModelEmbeddings(vocab, embed_size, self.pretrained_embeddings)
        self.vocab = vocab
        self.sim_scale = sim_scale

        self.scoring_fn = nn.CosineSimilarity(dim=-1)

コード例 #17

ファイルを表示

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for  documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

コード例 #18

ファイルを表示

 def __init__(self,
              input_size,
              hidden_size,
              vocab,
              fasttext_model,
              device='cpu'):
     super(LSTMModel, self).__init__()
     self.hidden_size = hidden_size
     self.input_size = input_size
     self.vocab = vocab
     self.embedding = ModelEmbeddings(input_size, vocab, fasttext_model,
                                      device)
     self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True)
     self.linear = nn.Linear(self.hidden_size * 2,
                             self.hidden_size,
                             bias=True)
     self.linear2 = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
     self.attention = Attention(self.hidden_size)

コード例 #19

ファイルを表示

    def __init__(self, vocab, embed_size, embeddings, hidden_size,
                 dropout_rate):
        """
        @param vocab (Vocab): vocab object
        @param embed_size (int): embedding size
        @param embeddings (torch.tensor (len(vocab), embed_dim)): pretrained word embeddings
        @param hidden_size (int): hidden size
        @param dropout_rate (float): dropout prob
        """
        super(NeuralModel, self).__init__()
        self.pretrained_embeddings = embeddings
        self.embeddings = ModelEmbeddings(vocab, embed_size,
                                          self.pretrained_embeddings)
        self.vocab = vocab
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate

        self.h_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)
        self.combined_out_projection = nn.Linear(self.hidden_size * 3,
                                                 self.hidden_size,
                                                 bias=False)
        self.vocab_projection = nn.Linear(self.hidden_size,
                                          len(self.vocab),
                                          bias=False)

        self.dropout = nn.Dropout(self.dropout_rate)

        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=self.hidden_size,
                               bias=True,
                               bidirectional=True)

        self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size,
                                   hidden_size=self.hidden_size,
                                   bias=True)

コード例 #20

ファイルを表示

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate):
        super(Node, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        print("vocab.num_labels: ", vocab.num_labels)
        self.num_labels = vocab.num_labels
        
#         self.encoder = nn.LSTM(input_size=embed_size,
#                                hidden_size=hidden_size, 
#                                bias=True, 
#                                # dropout=self.dropout_rate,
#                                bidirectional=True)
        
        self.first_bilstm = BiLSTM(embed_size=embed_size,
                                    hidden_size=hidden_size,
                                    dropout_rate=dropout_rate,
                                    vocab=vocab)
        self.second_bilstm = BiLSTM(embed_size=embed_size,
                                    hidden_size=hidden_size,
                                    dropout_rate=dropout_rate,
                                    vocab=vocab)

コード例 #21

ファイルを表示

ファイル: nmt_model.py プロジェクト: anaana35/CS224n-2020-Winter

class NMT(nn.Module):
    """ Simple Neural Machine Translation Model:
        - Bidrectional LSTM Encoder
        - Unidirection LSTM Decoder
        - Global Attention Model (Luong, et al. 2015)
    """
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None
        # For sanity check only, not relevant to implementation
        self.gen_sanity_check = False
        self.counter = 0

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(self.hidden_size + embed_size,
                                   self.hidden_size)
        self.h_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(2 * self.hidden_size,
                                        self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(3 * self.hidden_size,
                                                    self.hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(self.dropout_rate)

        ### END YOUR CODE

    def forward(self, source: List[List[str]],
                target: List[List[str]]) -> torch.Tensor:
        """ Take a mini-batch of source and target sentences, compute the log-likelihood of
        target sentences under the language models learned by the NMT system.

        @param source (List[List[str]]): list of source sentence tokens
        @param target (List[List[str]]): list of target sentence tokens, wrapped by `<s>` and `</s>`

        @returns scores (Tensor): a variable/tensor of shape (b, ) representing the
                                    log-likelihood of generating the gold-standard target sentence for
                                    each example in the input batch. Here b = batch size.
        """
        # Compute sentence lengths
        source_lengths = [len(s) for s in source]

        # Convert list of lists into tensors
        source_padded = self.vocab.src.to_input_tensor(
            source, device=self.device)  # Tensor: (src_len, b)
        target_padded = self.vocab.tgt.to_input_tensor(
            target, device=self.device)  # Tensor: (tgt_len, b)

        ###     Run the network forward:
        ###     1. Apply the encoder to `source_padded` by calling `self.encode()`
        ###     2. Generate sentence masks for `source_padded` by calling `self.generate_sent_masks()`
        ###     3. Apply the decoder to compute combined-output by calling `self.decode()`
        ###     4. Compute log probability distribution over the target vocabulary using the
        ###        combined_outputs returned by the `self.decode()` function.

        enc_hiddens, dec_init_state = self.encode(source_padded,
                                                  source_lengths)
        enc_masks = self.generate_sent_masks(enc_hiddens, source_lengths)
        combined_outputs = self.decode(enc_hiddens, enc_masks, dec_init_state,
                                       target_padded)
        P = F.log_softmax(self.target_vocab_projection(combined_outputs),
                          dim=-1)

        # Zero out, probabilities for which we have nothing in the target text
        target_masks = (target_padded != self.vocab.tgt['<pad>']).float()

        # Compute log probability of generating true target words
        target_gold_words_log_prob = torch.gather(
            P, index=target_padded[1:].unsqueeze(-1),
            dim=-1).squeeze(-1) * target_masks[1:]
        scores = target_gold_words_log_prob.sum(dim=0)
        return scores

    def encode(
        self, source_padded: torch.Tensor, source_lengths: List[int]
    ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        """ Apply the encoder to source sentences to obtain encoder hidden states.
            Additionally, take the final states of the encoder and project them to obtain initial states for decoder.

        @param source_padded (Tensor): Tensor of padded source sentences with shape (src_len, b), where
                                        b = batch_size, src_len = maximum source sentence length. Note that
                                       these have already been sorted in order of longest to shortest sentence.
        @param source_lengths (List[int]): List of actual lengths for each of the source sentences in the batch
        @returns enc_hiddens (Tensor): Tensor of hidden units with shape (b, src_len, h*2), where
                                        b = batch size, src_len = maximum source sentence length, h = hidden size.
        @returns dec_init_state (tuple(Tensor, Tensor)): Tuple of tensors representing the decoder's initial
                                                hidden state and cell.
        """
        enc_hiddens, dec_init_state = None, None

        ### YOUR CODE HERE (~ 8 Lines)
        ### TODO:
        ###     1. Construct Tensor `X` of source sentences with shape (src_len, b, e) using the source model embeddings.
        ###         src_len = maximum source sentence length, b = batch size, e = embedding size. Note
        ###         that there is no initial hidden state or cell for the decoder.
        ###     2. Compute `enc_hiddens`, `last_hidden`, `last_cell` by applying the encoder to `X`.
        ###         - Before you can apply the encoder, you need to apply the `pack_padded_sequence` function to X.
        ###         - After you apply the encoder, you need to apply the `pad_packed_sequence` function to enc_hiddens.
        ###         - Note that the shape of the tensor returned by the encoder is (src_len, b, h*2) and we want to
        ###           return a tensor of shape (b, src_len, h*2) as `enc_hiddens`.
        ###     3. Compute `dec_init_state` = (init_decoder_hidden, init_decoder_cell):
        ###         - `init_decoder_hidden`:
        ###             `last_hidden` is a tensor shape (2, b, h). The first dimension corresponds to forwards and backwards.
        ###             Concatenate the forwards and backwards tensors to obtain a tensor shape (b, 2*h).
        ###             Apply the h_projection layer to this in order to compute init_decoder_hidden.
        ###             This is h_0^{dec} in the PDF. Here b = batch size, h = hidden size
        ###         - `init_decoder_cell`:
        ###             `last_cell` is a tensor shape (2, b, h). The first dimension corresponds to forwards and backwards.
        ###             Concatenate the forwards and backwards tensors to obtain a tensor shape (b, 2*h).
        ###             Apply the c_projection layer to this in order to compute init_decoder_cell.
        ###             This is c_0^{dec} in the PDF. Here b = batch size, h = hidden size
        ###
        ### See the following docs, as you may need to use some of the following functions in your implementation:
        ###     Pack the padded sequence X before passing to the encoder:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.utils.rnn.pack_padded_sequence
        ###     Pad the packed sequence, enc_hiddens, returned by the encoder:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.utils.rnn.pad_packed_sequence
        ###     Tensor Concatenation:
        ###         https://pytorch.org/docs/stable/torch.html#torch.cat
        ###     Tensor Permute:
        ###         https://pytorch.org/docs/stable/tensors.html#torch.Tensor.permute
        X = self.model_embeddings.source(source_padded)
        X = pack_padded_sequence(X, source_lengths)

        enc_hiddens, (last_hidden, last_cell) = self.encoder(X)
        enc_hiddens, _ = pad_packed_sequence(enc_hiddens)
        enc_hiddens = enc_hiddens.permute(1, 0, 2)

        init_decoder_hidden = self.h_projection(
            torch.cat((last_hidden[0], last_hidden[1]), 1))
        init_decoder_cell = self.c_projection(
            torch.cat((last_cell[0], last_cell[1]), 1))

        dec_init_state = (init_decoder_hidden, init_decoder_cell)
        # print(enc_hiddens.shape, init_decoder_cell.shape, init_decoder_hidden.shape)
        ### END YOUR CODE

        return enc_hiddens, dec_init_state

    def decode(self, enc_hiddens: torch.Tensor, enc_masks: torch.Tensor,
               dec_init_state: Tuple[torch.Tensor, torch.Tensor],
               target_padded: torch.Tensor) -> torch.Tensor:
        """Compute combined output vectors for a batch.

        @param enc_hiddens (Tensor): Hidden states (b, src_len, h*2), where
                                     b = batch size, src_len = maximum source sentence length, h = hidden size.
        @param enc_masks (Tensor): Tensor of sentence masks (b, src_len), where
                                     b = batch size, src_len = maximum source sentence length.
        @param dec_init_state (tuple(Tensor, Tensor)): Initial state and cell for decoder
        @param target_padded (Tensor): Gold-standard padded target sentences (tgt_len, b), where
                                       tgt_len = maximum target sentence length, b = batch size.

        @returns combined_outputs (Tensor): combined output tensor  (tgt_len, b,  h), where
                                        tgt_len = maximum target sentence length, b = batch_size,  h = hidden size
        """
        # Chop of the <END> token for max length sentences.
        target_padded = target_padded[:-1]

        # Initialize the decoder state (hidden and cell)
        dec_state = dec_init_state

        # Initialize previous combined output vector o_{t-1} as zero
        batch_size = enc_hiddens.size(0)
        o_prev = torch.zeros(batch_size, self.hidden_size, device=self.device)

        # Initialize a list we will use to collect the combined output o_t on each step
        combined_outputs = []

        ### YOUR CODE HERE (~9 Lines)
        ### TODO:
        ###     1. Apply the attention projection layer to `enc_hiddens` to obtain `enc_hiddens_proj`,
        ###         which should be shape (b, src_len, h),
        ###         where b = batch size, src_len = maximum source length, h = hidden size.
        ###         This is applying W_{attProj} to h^enc, as described in the PDF.
        ###     2. Construct tensor `Y` of target sentences with shape (tgt_len, b, e) using the target model embeddings.
        ###         where tgt_len = maximum target sentence length, b = batch size, e = embedding size.
        ###     3. Use the torch.split function to iterate over the time dimension of Y.
        ###         Within the loop, this will give you Y_t of shape (1, b, e) where b = batch size, e = embedding size.
        ###             - Squeeze Y_t into a tensor of dimension (b, e).
        ###             - Construct Ybar_t by concatenating Y_t with o_prev on their last dimension
        ###             - Use the step function to compute the the Decoder's next (cell, state) values
        ###               as well as the new combined output o_t.
        ###             - Append o_t to combined_outputs
        ###             - Update o_prev to the new o_t.
        ###     4. Use torch.stack to convert combined_outputs from a list length tgt_len of
        ###         tensors shape (b, h), to a single tensor shape (tgt_len, b, h)
        ###         where tgt_len = maximum target sentence length, b = batch size, h = hidden size.
        ###
        ### Note:
        ###    - When using the squeeze() function make sure to specify the dimension you want to squeeze
        ###      over. Otherwise, you will remove the batch dimension accidentally, if batch_size = 1.
        ###
        ### You may find some of these functions useful:
        ###     Zeros Tensor:
        ###         https://pytorch.org/docs/stable/torch.html#torch.zeros
        ###     Tensor Splitting (iteration):
        ###         https://pytorch.org/docs/stable/torch.html#torch.split
        ###     Tensor Dimension Squeezing:
        ###         https://pytorch.org/docs/stable/torch.html#torch.squeeze
        ###     Tensor Concatenation:
        ###         https://pytorch.org/docs/stable/torch.html#torch.cat
        ###     Tensor Stacking:
        ###         https://pytorch.org/docs/stable/torch.html#torch.stack

        enc_hiddens_proj = self.att_projection(enc_hiddens)
        Y = self.model_embeddings.target(target_padded)
        for i in torch.split(Y, 1, dim=0):
            Y_t = i.squeeze(dim=0)
            Ybar_t = torch.cat((Y_t, o_prev), 1)
            dec_state, o_t, e_t = self.step(Ybar_t, dec_state, enc_hiddens,
                                            enc_hiddens_proj, enc_masks)
            combined_outputs.append(o_t)
            o_prev = o_t
        combined_outputs = torch.stack(combined_outputs, dim=0)

        ### END YOUR CODE

        return combined_outputs

    def step(
            self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor,
                                                         torch.Tensor],
            enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor,
            enc_masks: torch.Tensor
    ) -> Tuple[Tuple, torch.Tensor, torch.Tensor]:
        """ Compute one forward step of the LSTM decoder, including the attention computation.

        @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder,
                                where b = batch size, e = embedding size, h = hidden size.
        @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size.
                First tensor is decoder's prev hidden state, second tensor is decoder's prev cell.
        @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size,
                                    src_len = maximum source length, h = hidden size.
        @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h),
                                    where b = batch size, src_len = maximum source length, h = hidden size.
        @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len),
                                    where b = batch size, src_len is maximum source length.

        @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size.
                First tensor is decoder's new hidden state, second tensor is decoder's new cell.
        @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size.
        @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution.
                                Note: You will not use this outside of this function.
                                      We are simply returning this value so that we can sanity check
                                      your implementation.
        """

        combined_output = None

        ### YOUR CODE HERE (~3 Lines)
        ### TODO:
        ###     1. Apply the decoder to `Ybar_t` and `dec_state`to obtain the new dec_state.
        ###     2. Split dec_state into its two parts (dec_hidden, dec_cell)
        ###     3. Compute the attention scores e_t, a Tensor shape (b, src_len).
        ###        Note: b = batch_size, src_len = maximum source length, h = hidden size.
        ###
        ###       Hints:
        ###         - dec_hidden is shape (b, h) and corresponds to h^dec_t in the PDF (batched)
        ###         - enc_hiddens_proj is shape (b, src_len, h) and corresponds to W_{attProj} h^enc (batched).
        ###         - Use batched matrix multiplication (torch.bmm) to compute e_t (be careful about the input/ output shapes!)
        ###         - To get the tensors into the right shapes for bmm, you will need to do some squeezing and unsqueezing.
        ###         - When using the squeeze() function make sure to specify the dimension you want to squeeze
        ###             over. Otherwise, you will remove the batch dimension accidentally, if batch_size = 1.
        ###
        ### Use the following docs to implement this functionality:
        ###     Batch Multiplication:
        ###        https://pytorch.org/docs/stable/torch.html#torch.bmm
        ###     Tensor Unsqueeze:
        ###         https://pytorch.org/docs/stable/torch.html#torch.unsqueeze
        ###     Tensor Squeeze:
        ###         https://pytorch.org/docs/stable/torch.html#torch.squeeze

        dec_state = self.decoder(Ybar_t, dec_state)
        (dec_hidden, dec_cell) = dec_state
        e_t = torch.squeeze(torch.bmm(enc_hiddens_proj,
                                      torch.unsqueeze(dec_hidden, dim=2)),
                            dim=2)

        ### END YOUR CODE

        # Set e_t to -inf where enc_masks has 1
        if enc_masks is not None:
            e_t.data.masked_fill_(enc_masks.bool(), -float('inf'))

        ### YOUR CODE HERE (~6 Lines)
        ### TODO:
        ###     1. Apply softmax to e_t to yield alpha_t
        ###     2. Use batched matrix multiplication between alpha_t and enc_hiddens to obtain the
        ###         attention output vector, a_t.
        ###           - alpha_t is shape (b, src_len)
        ###           - enc_hiddens is shape (b, src_len, 2h)
        ###           - a_t should be shape (b, 2h)
        ###           - You will need to do some squeezing and unsqueezing.
        ###     Note: b = batch size, src_len = maximum source length, h = hidden size.
        ###
        ###     3. Concatenate dec_hidden with a_t to compute tensor U_t
        ###     4. Apply the combined output projection layer to U_t to compute tensor V_t
        ###     5. Compute tensor O_t by first applying the Tanh function and then the dropout layer.
        ###
        ### Use the following docs to implement this functionality:
        ###     Softmax:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.functional.softmax
        ###     Batch Multiplication:
        ###        https://pytorch.org/docs/stable/torch.html#torch.bmm
        ###     Tensor View:
        ###         https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view
        ###     Tensor Concatenation:
        ###         https://pytorch.org/docs/stable/torch.html#torch.cat
        ###     Tanh:
        ###         https://pytorch.org/docs/stable/torch.html#torch.tanh

        #print(e_t.shape)
        alpha_t = F.softmax(e_t, dim=1)
        a_t = torch.bmm(alpha_t.unsqueeze(dim=1), enc_hiddens).squeeze(1)
        U_t = torch.cat((dec_hidden, a_t), dim=1)
        V_t = self.combined_output_projection(U_t)
        O_t = self.dropout(torch.tanh(V_t))

        ### END YOUR CODE

        combined_output = O_t
        return dec_state, combined_output, e_t

    def generate_sent_masks(self, enc_hiddens: torch.Tensor,
                            source_lengths: List[int]) -> torch.Tensor:
        """ Generate sentence masks for encoder hidden states.

        @param enc_hiddens (Tensor): encodings of shape (b, src_len, 2*h), where b = batch size,
                                     src_len = max source length, h = hidden size.
        @param source_lengths (List[int]): List of actual lengths for each of the sentences in the batch.

        @returns enc_masks (Tensor): Tensor of sentence masks of shape (b, src_len),
                                    where src_len = max source length, h = hidden size.
        """
        enc_masks = torch.zeros(enc_hiddens.size(0),
                                enc_hiddens.size(1),
                                dtype=torch.float)
        for e_id, src_len in enumerate(source_lengths):
            enc_masks[e_id, src_len:] = 1
        return enc_masks.to(self.device)

    def beam_search(self,
                    src_sent: List[str],
                    beam_size: int = 5,
                    max_decoding_time_step: int = 70) -> List[Hypothesis]:
        """ Given a single source sentence, perform beam search, yielding translations in the target language.
        @param src_sent (List[str]): a single source sentence (words)
        @param beam_size (int): beam size
        @param max_decoding_time_step (int): maximum number of time steps to unroll the decoding RNN
        @returns hypotheses (List[Hypothesis]): a list of hypothesis, each hypothesis has two fields:
                value: List[str]: the decoded target sentence, represented as a list of words
                score: float: the log-likelihood of the target sentence
        """
        src_sents_var = self.vocab.src.to_input_tensor([src_sent], self.device)

        src_encodings, dec_init_vec = self.encode(src_sents_var,
                                                  [len(src_sent)])
        src_encodings_att_linear = self.att_projection(src_encodings)

        h_tm1 = dec_init_vec
        att_tm1 = torch.zeros(1, self.hidden_size, device=self.device)

        eos_id = self.vocab.tgt['</s>']

        hypotheses = [['<s>']]
        hyp_scores = torch.zeros(len(hypotheses),
                                 dtype=torch.float,
                                 device=self.device)
        completed_hypotheses = []

        t = 0
        while len(completed_hypotheses
                  ) < beam_size and t < max_decoding_time_step:
            t += 1
            hyp_num = len(hypotheses)

            exp_src_encodings = src_encodings.expand(hyp_num,
                                                     src_encodings.size(1),
                                                     src_encodings.size(2))

            exp_src_encodings_att_linear = src_encodings_att_linear.expand(
                hyp_num, src_encodings_att_linear.size(1),
                src_encodings_att_linear.size(2))

            y_tm1 = torch.tensor(
                [self.vocab.tgt[hyp[-1]] for hyp in hypotheses],
                dtype=torch.long,
                device=self.device)
            y_t_embed = self.model_embeddings.target(y_tm1)

            x = torch.cat([y_t_embed, att_tm1], dim=-1)

            (h_t, cell_t), att_t, _ = self.step(x,
                                                h_tm1,
                                                exp_src_encodings,
                                                exp_src_encodings_att_linear,
                                                enc_masks=None)

            # log probabilities over target words
            log_p_t = F.log_softmax(self.target_vocab_projection(att_t),
                                    dim=-1)

            live_hyp_num = beam_size - len(completed_hypotheses)
            contiuating_hyp_scores = (
                hyp_scores.unsqueeze(1).expand_as(log_p_t) + log_p_t).view(-1)
            top_cand_hyp_scores, top_cand_hyp_pos = torch.topk(
                contiuating_hyp_scores, k=live_hyp_num)

            prev_hyp_ids = top_cand_hyp_pos / len(self.vocab.tgt)
            hyp_word_ids = top_cand_hyp_pos % len(self.vocab.tgt)

            new_hypotheses = []
            live_hyp_ids = []
            new_hyp_scores = []

            for prev_hyp_id, hyp_word_id, cand_new_hyp_score in zip(
                    prev_hyp_ids, hyp_word_ids, top_cand_hyp_scores):
                prev_hyp_id = prev_hyp_id.item()
                hyp_word_id = hyp_word_id.item()
                cand_new_hyp_score = cand_new_hyp_score.item()

                hyp_word = self.vocab.tgt.id2word[hyp_word_id]
                new_hyp_sent = hypotheses[prev_hyp_id] + [hyp_word]
                if hyp_word == '</s>':
                    completed_hypotheses.append(
                        Hypothesis(value=new_hyp_sent[1:-1],
                                   score=cand_new_hyp_score))
                else:
                    new_hypotheses.append(new_hyp_sent)
                    live_hyp_ids.append(prev_hyp_id)
                    new_hyp_scores.append(cand_new_hyp_score)

            if len(completed_hypotheses) == beam_size:
                break

            live_hyp_ids = torch.tensor(live_hyp_ids,
                                        dtype=torch.long,
                                        device=self.device)
            h_tm1 = (h_t[live_hyp_ids], cell_t[live_hyp_ids])
            att_tm1 = att_t[live_hyp_ids]

            hypotheses = new_hypotheses
            hyp_scores = torch.tensor(new_hyp_scores,
                                      dtype=torch.float,
                                      device=self.device)

        if len(completed_hypotheses) == 0:
            completed_hypotheses.append(
                Hypothesis(value=hypotheses[0][1:],
                           score=hyp_scores[0].item()))

        completed_hypotheses.sort(key=lambda hyp: hyp.score, reverse=True)

        return completed_hypotheses

    @property
    def device(self) -> torch.device:
        """ Determine which device to place the Tensors upon, CPU or GPU.
        """
        return self.model_embeddings.source.weight.device

    @staticmethod
    def load(model_path: str):
        """ Load the model from a file.
        @param model_path (str): path to model
        """
        params = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
        args = params['args']
        model = NMT(vocab=params['vocab'], **args)
        model.load_state_dict(params['state_dict'])

        return model

    def save(self, path: str):
        """ Save the odel to a file.
        @param path (str): path to the model
        """
        print('save model parameters to [%s]' % path, file=sys.stderr)

        params = {
            'args':
            dict(embed_size=self.model_embeddings.embed_size,
                 hidden_size=self.hidden_size,
                 dropout_rate=self.dropout_rate),
            'vocab':
            self.vocab,
            'state_dict':
            self.state_dict()
        }

        torch.save(params, path)

コード例 #22

ファイルを表示

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None
        # For sanity check only, not relevant to implementation
        self.gen_sanity_check = False
        self.counter = 0

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               bidirectional=True,
                               bias=True)
        self.decoder = nn.LSTMCell(embed_size + self.hidden_size,
                                   self.hidden_size,
                                   bias=True)
        self.h_projection = nn.Linear(
            self.hidden_size * 2, self.hidden_size,
            bias=False)  # The final vector of hidden states
        self.c_projection = nn.Linear(
            self.hidden_size * 2, self.hidden_size,
            bias=False)  # The final vector of cell states
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(self.hidden_size * 3,
                                                    self.hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(
            self.hidden_size, len(self.vocab.tgt), bias=False
        )  # We don't need embedding since we don't need to embed the output on low dimensions?
        self.dropout = nn.Dropout(self.dropout_rate)

コード例 #23

ファイルを表示

class NMT(nn.Module):
    """ Simple Neural Machine Translation Model:
        - Bidrectional LSTM Encoder
        - Unidirection LSTM Decoder
        - Global Attention Model (Luong, et al. 2015)
    """
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = nn.LSTM(embed_size, hidden_size, bias=True, bidirectional=True) 
        self.decoder = nn.LSTMCell(embed_size + hidden_size, hidden_size, bias=True)
        self.h_projection = nn.Linear(2*hidden_size, hidden_size, bias=False)
        self.c_projection = nn.Linear(2*hidden_size, hidden_size, bias=False)
        self.att_projection = nn.Linear(2*hidden_size, hidden_size, bias=False)
        self.combined_output_projection = nn.Linear(3*hidden_size, hidden_size, bias=False)
        self.target_vocab_projection = nn.Linear(len(vocab.tgt), hidden_size, bias=False)
        self.dropout = nn.Dropout()


    def forward(self, source: List[List[str]], target: List[List[str]]) -> torch.Tensor:
        """ Take a mini-batch of source and target sentences, compute the log-likelihood of
        target sentences under the language models learned by the NMT system.

        @param source (List[List[str]]): list of source sentence tokens
        @param target (List[List[str]]): list of target sentence tokens, wrapped by `<s>` and `</s>`

        @returns scores (Tensor): a variable/tensor of shape (b, ) representing the
                                    log-likelihood of generating the gold-standard target sentence for
                                    each example in the input batch. Here b = batch size.
        """
        # Compute sentence lengths
        source_lengths = [len(s) for s in source]

        # Convert list of lists into tensors
        source_padded = self.vocab.src.to_input_tensor(source, device=self.device)   # Tensor: (src_len, b)
        target_padded = self.vocab.tgt.to_input_tensor(target, device=self.device)   # Tensor: (tgt_len, b)

        ###     Run the network forward:
        ###     1. Apply the encoder to `source_padded` by calling `self.encode()`
        ###     2. Generate sentence masks for `source_padded` by calling `self.generate_sent_masks()`
        ###     3. Apply the decoder to compute combined-output by calling `self.decode()`
        ###     4. Compute log probability distribution over the target vocabulary using the
        ###        combined_outputs returned by the `self.decode()` function.

        enc_hiddens, dec_init_state = self.encode(source_padded, source_lengths)
        enc_masks = self.generate_sent_masks(enc_hiddens, source_lengths)
        combined_outputs = self.decode(enc_hiddens, enc_masks, dec_init_state, target_padded)
        P = F.log_softmax(self.target_vocab_projection(combined_outputs), dim=-1)

        # Zero out, probabilities for which we have nothing in the target text
        target_masks = (target_padded != self.vocab.tgt['<pad>']).float()
        
        # Compute log probability of generating true target words
        target_gold_words_log_prob = torch.gather(P, index=target_padded[1:].unsqueeze(-1), dim=-1).squeeze(-1) * target_masks[1:]
        scores = target_gold_words_log_prob.sum(dim=0)
        return scores


    def encode(self, source_padded: torch.Tensor, source_lengths: List[int]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        """ Apply the encoder to source sentences to obtain encoder hidden states.
            Additionally, take the final states of the encoder and project them to obtain initial states for decoder.

        @param source_padded (Tensor): Tensor of padded source sentences with shape (src_len, b), where
                                        b = batch_size, src_len = maximum source sentence length. Note that 
                                       these have already been sorted in order of longest to shortest sentence.
        @param source_lengths (List[int]): List of actual lengths for each of the source sentences in the batch
        @returns enc_hiddens (Tensor): Tensor of hidden units with shape (b, src_len, h*2), where
                                        b = batch size, src_len = maximum source sentence length, h = hidden size.
        @returns dec_init_state (tuple(Tensor, Tensor)): Tuple of tensors representing the decoder's initial
                                                hidden state and cell.
        """
        enc_hiddens, dec_init_state = None, None

        X = self.model_embeddings.source(source_padded)
        packed_sequence = torch.nn.utils.rnn.pack_padded_sequence(X, source_lengths)
        enc_hiddens, (last_hidden, last_cell) = self.encoder(packed_sequence)
        enc_hiddens = torch.nn.utils.rnn.pad_packed_sequence(enc_hiddens)[0]
        enc_hiddens = enc_hiddens.permute(1,0,2)
        concat_last_hidden = torch.cat((last_hidden[0], last_hidden[1]), 1)
        concat_last_cell = torch.cat((last_cell[0], last_cell[1]), 1)
        dec_init_state = (self.h_projection(concat_last_hidden), self.c_projection(concat_last_cell))
        return enc_hiddens, dec_init_state


    def decode(self, enc_hiddens: torch.Tensor, enc_masks: torch.Tensor,
                dec_init_state: Tuple[torch.Tensor, torch.Tensor], target_padded: torch.Tensor) -> torch.Tensor:
        """Compute combined output vectors for a batch.

        @param enc_hiddens (Tensor): Hidden states (b, src_len, h*2), where
                                     b = batch size, src_len = maximum source sentence length, h = hidden size.
        @param enc_masks (Tensor): Tensor of sentence masks (b, src_len), where
                                     b = batch size, src_len = maximum source sentence length.
        @param dec_init_state (tuple(Tensor, Tensor)): Initial state and cell for decoder
        @param target_padded (Tensor): Gold-standard padded target sentences (tgt_len, b), where
                                       tgt_len = maximum target sentence length, b = batch size. 

        @returns combined_outputs (Tensor): combined output tensor  (tgt_len, b,  h), where
                                        tgt_len = maximum target sentence length, b = batch_size,  h = hidden size
        """
        # Chop of the <END> token for max length sentences.
        target_padded = target_padded[:-1]

        # Initialize the decoder state (hidden and cell)
        dec_state = dec_init_state

        # Initialize previous combined output vector o_{t-1} as zero
        batch_size = enc_hiddens.size(0)
        o_prev = torch.zeros(batch_size, self.hidden_size, device=self.device)

        # Initialize a list we will use to collect the combined output o_t on each step
        combined_outputs = []


        enc_hiddens_proj =  self.att_projection(enc_hiddens)
        Y = self.model_embeddings.target(target_padded)
        
        for Y_t in torch.split(Y, 1):
            Y_t = torch.squeeze(Y_t)
            Ybar_t = torch.cat((Y_t, o_prev), 1)
            dec_state, o_t, et = self.step(Ybar_t, dec_state, enc_hiddens, enc_hiddens_proj, enc_masks)
            combined_outputs.append(o_t)
            o_prev = o_t

        combined_outputs = torch.stack(combined_outputs)

        return combined_outputs


    def step(self, Ybar_t: torch.Tensor,
            dec_state: Tuple[torch.Tensor, torch.Tensor],
            enc_hiddens: torch.Tensor,
            enc_hiddens_proj: torch.Tensor,
            enc_masks: torch.Tensor) -> Tuple[Tuple, torch.Tensor, torch.Tensor]:
        """ Compute one forward step of the LSTM decoder, including the attention computation.

        @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder,
                                where b = batch size, e = embedding size, h = hidden size.
        @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size.
                First tensor is decoder's prev hidden state, second tensor is decoder's prev cell.
        @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size,
                                    src_len = maximum source length, h = hidden size.
        @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h),
                                    where b = batch size, src_len = maximum source length, h = hidden size.
        @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len),
                                    where b = batch size, src_len is maximum source length. 

        @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size.
                First tensor is decoder's new hidden state, second tensor is decoder's new cell.
        @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size.
        @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution.
                                Note: You will not use this outside of this function.
                                      We are simply returning this value so that we can sanity check
                                      your implementation.
        """

        combined_output = None

        dec_state = self.decoder(Ybar_t, dec_state)
        dec_hidden, dec_cell = dec_state
        e_t = torch.bmm(enc_hiddens_proj, torch.unsqueeze(dec_hidden, 2))
        e_t = torch.squeeze(e_t, 2)

        # Set e_t to -inf where enc_masks has 1
        if enc_masks is not None:
            e_t.data.masked_fill_(enc_masks.byte(), -float('inf'))

        alpha_t = torch.nn.functional.softmax(e_t, 1)
        a_t = torch.squeeze(torch.bmm(torch.unsqueeze(alpha_t,1), enc_hiddens), 1)
        U_t = torch.cat((a_t, dec_hidden), 1)
        V_t = self.combined_output_projection(U_t)
        O_t = torch.tanh(V_t)
        # print("Ot", O_t)
        # O_t = self.dropout(O_t)
        # print("Ot", O_t)

        combined_output = O_t
        return dec_state, combined_output, e_t

    def generate_sent_masks(self, enc_hiddens: torch.Tensor, source_lengths: List[int]) -> torch.Tensor:
        """ Generate sentence masks for encoder hidden states.

        @param enc_hiddens (Tensor): encodings of shape (b, src_len, 2*h), where b = batch size,
                                     src_len = max source length, h = hidden size. 
        @param source_lengths (List[int]): List of actual lengths for each of the sentences in the batch.
        
        @returns enc_masks (Tensor): Tensor of sentence masks of shape (b, src_len),
                                    where src_len = max source length, h = hidden size.
        """
        enc_masks = torch.zeros(enc_hiddens.size(0), enc_hiddens.size(1), dtype=torch.float)
        for e_id, src_len in enumerate(source_lengths):
            enc_masks[e_id, src_len:] = 1
        return enc_masks.to(self.device)


    def beam_search(self, src_sent: List[str], beam_size: int=5, max_decoding_time_step: int=70) -> List[Hypothesis]:
        """ Given a single source sentence, perform beam search, yielding translations in the target language.
        @param src_sent (List[str]): a single source sentence (words)
        @param beam_size (int): beam size
        @param max_decoding_time_step (int): maximum number of time steps to unroll the decoding RNN
        @returns hypotheses (List[Hypothesis]): a list of hypothesis, each hypothesis has two fields:
                value: List[str]: the decoded target sentence, represented as a list of words
                score: float: the log-likelihood of the target sentence
        """
        src_sents_var = self.vocab.src.to_input_tensor([src_sent], self.device)

        src_encodings, dec_init_vec = self.encode(src_sents_var, [len(src_sent)])
        src_encodings_att_linear = self.att_projection(src_encodings)

        h_tm1 = dec_init_vec
        att_tm1 = torch.zeros(1, self.hidden_size, device=self.device)

        eos_id = self.vocab.tgt['</s>']

        hypotheses = [['<s>']]
        hyp_scores = torch.zeros(len(hypotheses), dtype=torch.float, device=self.device)
        completed_hypotheses = []

        t = 0
        while len(completed_hypotheses) < beam_size and t < max_decoding_time_step:
            t += 1
            hyp_num = len(hypotheses)

            exp_src_encodings = src_encodings.expand(hyp_num,
                                                     src_encodings.size(1),
                                                     src_encodings.size(2))

            exp_src_encodings_att_linear = src_encodings_att_linear.expand(hyp_num,
                                                                           src_encodings_att_linear.size(1),
                                                                           src_encodings_att_linear.size(2))

            y_tm1 = torch.tensor([self.vocab.tgt[hyp[-1]] for hyp in hypotheses], dtype=torch.long, device=self.device)
            y_t_embed = self.model_embeddings.target(y_tm1)

            x = torch.cat([y_t_embed, att_tm1], dim=-1)

            (h_t, cell_t), att_t, _  = self.step(x, h_tm1,
                                                      exp_src_encodings, exp_src_encodings_att_linear, enc_masks=None)

            # log probabilities over target words
            log_p_t = F.log_softmax(self.target_vocab_projection(att_t), dim=-1)

            live_hyp_num = beam_size - len(completed_hypotheses)
            contiuating_hyp_scores = (hyp_scores.unsqueeze(1).expand_as(log_p_t) + log_p_t).view(-1)
            top_cand_hyp_scores, top_cand_hyp_pos = torch.topk(contiuating_hyp_scores, k=live_hyp_num)

            prev_hyp_ids = top_cand_hyp_pos / len(self.vocab.tgt)
            hyp_word_ids = top_cand_hyp_pos % len(self.vocab.tgt)

            new_hypotheses = []
            live_hyp_ids = []
            new_hyp_scores = []

            for prev_hyp_id, hyp_word_id, cand_new_hyp_score in zip(prev_hyp_ids, hyp_word_ids, top_cand_hyp_scores):
                prev_hyp_id = prev_hyp_id.item()
                hyp_word_id = hyp_word_id.item()
                cand_new_hyp_score = cand_new_hyp_score.item()

                hyp_word = self.vocab.tgt.id2word[hyp_word_id]
                new_hyp_sent = hypotheses[prev_hyp_id] + [hyp_word]
                if hyp_word == '</s>':
                    completed_hypotheses.append(Hypothesis(value=new_hyp_sent[1:-1],
                                                           score=cand_new_hyp_score))
                else:
                    new_hypotheses.append(new_hyp_sent)
                    live_hyp_ids.append(prev_hyp_id)
                    new_hyp_scores.append(cand_new_hyp_score)

            if len(completed_hypotheses) == beam_size:
                break

            live_hyp_ids = torch.tensor(live_hyp_ids, dtype=torch.long, device=self.device)
            h_tm1 = (h_t[live_hyp_ids], cell_t[live_hyp_ids])
            att_tm1 = att_t[live_hyp_ids]

            hypotheses = new_hypotheses
            hyp_scores = torch.tensor(new_hyp_scores, dtype=torch.float, device=self.device)

        if len(completed_hypotheses) == 0:
            completed_hypotheses.append(Hypothesis(value=hypotheses[0][1:],
                                                   score=hyp_scores[0].item()))

        completed_hypotheses.sort(key=lambda hyp: hyp.score, reverse=True)

        return completed_hypotheses

    @property
    def device(self) -> torch.device:
        """ Determine which device to place the Tensors upon, CPU or GPU.
        """
        return self.model_embeddings.source.weight.device

    @staticmethod
    def load(model_path: str):
        """ Load the model from a file.
        @param model_path (str): path to model
        """
        params = torch.load(model_path, map_location=lambda storage, loc: storage)
        args = params['args']
        model = NMT(vocab=params['vocab'], **args)
        model.load_state_dict(params['state_dict'])

        return model

    def save(self, path: str):
        """ Save the odel to a file.
        @param path (str): path to the model
        """
        print('save model parameters to [%s]' % path, file=sys.stderr)

        params = {
            'args': dict(embed_size=self.model_embeddings.embed_size, hidden_size=self.hidden_size, dropout_rate=self.dropout_rate),
            'vocab': self.vocab,
            'state_dict': self.state_dict()
        }

        torch.save(params, path)

コード例 #24

ファイルを表示

ファイル: nmt_model.py プロジェクト: yrpang/CS224N-2019-assignment

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        # YOUR CODE HERE (~8 Lines)
        # TODO - Initialize the following variables:
        # self.encoder (Bidirectional LSTM with bias)
        # self.decoder (LSTM Cell with bias)
        # self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        # self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        # self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        # self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        # self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        # self.dropout (Dropout Layer)
        ###
        # Use the following docs to properly initialize these variables:
        # LSTM:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        # LSTM Cell:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        # Linear Layer:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        # Dropout Layer:
        # https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               bias=True,
                               bidirectional=True)

        self.decoder = nn.LSTMCell(
            embed_size + self.hidden_size, self.hidden_size
        )  # embed_size+hidden_size means:concat input with the output of the last step.
        self.h_projection = nn.Linear(
            2 * self.hidden_size, self.hidden_size,
            bias=False)  # used to init the Hidden state of Decoder
        self.c_projection = nn.Linear(
            2 * self.hidden_size, self.hidden_size,
            bias=False)  # used to init the Cell state of Decoder

        self.att_projection = nn.Linear(
            2 * self.hidden_size, self.hidden_size, bias=False
        )  # change Encoder hidden state which is (2h, 1) to (h, 1)

        self.combined_output_projection = nn.Linear(
            3 * self.hidden_size, self.hidden_size,
            bias=False)  # attention是把Encoder的每个隐藏层乘上softmax得到的权重再求和

        self.dropout = nn.Dropout(dropout_rate)  # 用于 dropout 最后一个隐藏层状态

        self.target_vocab_projection = nn.Linear(
            self.hidden_size,
            len(vocab.tgt),
            bias=False  # 把最后的隐藏层状态 projection 到词典维度，softmax后，得到每个词的概率
        )

コード例 #25

ファイルを表示

ファイル: nmt_model.py プロジェクト: positivepeng/cs224n-assignments

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        # LSTM parameters
        # input_size – The number of expected features in the input x
        # hidden_size – The number of features in the hidden state h
        # num_layers – Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results. Default: 1
        # bias – If False, then the layer does not use bias weights b_ih and b_hh. Default: True
        # batch_first – If True, then the input and output tensors are provided as (batch, seq, feature). Default: False
        # dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0
        # bidirectional – If True, becomes a bidirectional LSTM. Default: False
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               num_layers=1,
                               bias=True,
                               bidirectional=True)

        # LSTMCell parameters
        # input_size – The number of expected features in the input x
        # hidden_size – The number of features in the hidden state h
        # bias – If False, then the layer does not use bias weights b_ih and b_hh. Default: True
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)

        # Linear parameters
        # in_features – size of each input sample
        # out_features – size of each output sample
        # bias – If set to False, the layer will not learn an additive bias. Default: True
        self.h_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=2 * hidden_size,
                                        out_features=hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(in_features=3 *
                                                    hidden_size,
                                                    out_features=hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(in_features=hidden_size,
                                                 out_features=len(
                                                     self.vocab.tgt),
                                                 bias=False)

        self.dropout = nn.Dropout(p=dropout_rate)

コード例 #26

ファイルを表示

ファイル: nmt_model8layer.py プロジェクト: vsahil/mathQA-cse599

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        src_embed_size = 768  # given by BERT model
        tgt_embed_size = embed_size
        self.model_embeddings = ModelEmbeddings(src_embed_size, tgt_embed_size,
                                                vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
        # self.encoder = nn.LSTM(embed_size, self.hidden_size, num_layers = 1, dropout=self.dropout_rate, bidirectional=True)
        self.encoder = nn.LSTM(src_embed_size,
                               self.hidden_size,
                               num_layers=8,
                               dropout=self.dropout_rate,
                               bidirectional=True)
        self.decoder = nn.LSTMCell(
            self.hidden_size * 2, self.hidden_size
        )  # input side of decoder is the output size of self.h_projection, so self.hidden_size
        self.h_projection = nn.Linear(
            self.hidden_size * 2, self.hidden_size, bias=False
        )  #inpu t is double of hidden size and output is hidden size
        self.c_projection = nn.Linear(self.hidden_size * 2,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(self.hidden_size * 2,
                                        self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(self.hidden_size * 3,
                                                    self.hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)  # V_t * h
        self.dropout = nn.Dropout(self.dropout_rate)

コード例 #27

ファイルを表示

ファイル: nmt_model.py プロジェクト: lerhshong/CS224n_2019_a5

    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 no_char_decoder=False):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()

        self.model_embeddings_source = ModelEmbeddings(embed_size, vocab.src)
        self.model_embeddings_target = ModelEmbeddings(embed_size, vocab.tgt)

        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        self.embed_size = embed_size

        ### COPY OVER YOUR CODE FROM ASSIGNMENT 4

        # LSTM is an RNN
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=hidden_size,
                               bidirectional=True,
                               bias=True)

        # LSTMCell is just one Cell
        self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size,
                                   hidden_size=hidden_size,
                                   bias=True)

        self.h_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=2 * hidden_size,
                                      out_features=hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=2 * hidden_size,
                                        out_features=hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(in_features=3 *
                                                    hidden_size,
                                                    out_features=hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(in_features=hidden_size,
                                                 out_features=len(vocab.tgt),
                                                 bias=False)
        self.dropout = nn.Dropout(p=dropout_rate)

        ### END YOUR CODE FROM ASSIGNMENT 4

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None

コード例 #28

ファイルを表示

    def __init__(self,
                 embed_size,
                 hidden_size,
                 vocab,
                 dropout_rate=0.2,
                 spectrum_cnn_kernel_size=3,
                 location_attention_window=64,
                 no_char_decoder=False):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()

        # self.voiceCNN = VoiceCNN(embed_size, 5)
        self.location_attention_window = location_attention_window
        self.spectrum_cnn_kernel_size = spectrum_cnn_kernel_size
        self.spectrumCNN = nn.Conv1d(embed_size, embed_size,
                                     self.spectrum_cnn_kernel_size)
        # self.model_embeddings_source = ModelEmbeddings(embed_size, vocab.src)
        self.model_embeddings_target = ModelEmbeddings(embed_size, vocab.tgt)

        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # COPY OVER YOUR CODE FROM ASSIGNMENT 4

        self.encoder = torch.nn.LSTM(embed_size,
                                     hidden_size,
                                     bidirectional=True)
        self.decoder = torch.nn.LSTMCell(embed_size + hidden_size, hidden_size)
        self.h_projection = torch.nn.Linear(2 * hidden_size,
                                            hidden_size,
                                            bias=False)
        self.c_projection = torch.nn.Linear(2 * hidden_size,
                                            hidden_size,
                                            bias=False)
        self.loc_window = 5
        self.loc_att_projection = torch.nn.Linear(embed_size, 1, bias=False)
        self.loc_att_conv1D = nn.Conv1d(self.loc_window, embed_size, 1)
        self.att_projection = torch.nn.Linear(2 * hidden_size,
                                              hidden_size,
                                              bias=False)
        self.combined_output_projection = torch.nn.Linear(3 * hidden_size,
                                                          hidden_size,
                                                          bias=False)
        self.target_vocab_projection = torch.nn.Linear(hidden_size,
                                                       len(vocab.tgt),
                                                       bias=False)
        self.dropout = nn.Dropout(p=dropout_rate)

        # END YOUR CODE FROM ASSIGNMENT 4

        if not no_char_decoder:
            self.charDecoder = CharDecoder(hidden_size, target_vocab=vocab.tgt)
        else:
            self.charDecoder = None

コード例 #29

ファイルを表示

ファイル: nmt_model.py プロジェクト: wandokan/ABC-NMT_with_RNNs

    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None

        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout

        ### (c)按照TODO中要求进行初始化

        ## LSTM层，输入的参数列表包括
        ##input_size:输入维度（即词向量的维度）
        ##hidden_size：隐藏层的维度（h的维度）
        ##num_layers：LSTM的层数（纵向堆叠深度）
        ##bias：是否需要偏置，默认为True
        ##batch_first：是否需要调换将batch作为第一维度（针对非(batch_size,seq_length,embedding_dim)的输入），默认为False
        ##dropout:dropout，默认为0
        ##bidirectional：LSTM双向与否，默认False
        self.encoder = nn.LSTM(embed_size,
                               self.hidden_size,
                               bias=True,
                               bidirectional=True)

        ## LSTMCell层，单个的LSTM单元（结构上也恰好构成单层LSTM）
        ##（与LSTM不同处在于，LSTMCell输入为单个的x_t,而LSTM为序列x_0...x_T。如果需要跑完整个序列LSTMCell需要使用循环）
        ## 输入的参数列表包括
        ##input_size:输入维度（即词向量的维度）
        ##hidden_size：隐藏层的维度（h的维度）
        ##bias：是否需要偏置，默认为True
        self.decoder = nn.LSTMCell(embed_size + self.hidden_size,
                                   self.hidden_size,
                                   bias=True)

        ## 线性层，顾名思义。参数依次为：input_dimension,output_dimension,是否需要bias
        self.h_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(2 * self.hidden_size,
                                      self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(2 * self.hidden_size,
                                        self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(3 * self.hidden_size,
                                                    self.hidden_size,
                                                    bias=False)
        self.target_vocab_projection = nn.Linear(self.hidden_size,
                                                 len(self.vocab.tgt),
                                                 bias=False)

        ##dropout层，设置dropout_rate
        self.dropout = nn.Dropout(dropout_rate)

コード例 #30

ファイルを表示

ファイル: node_model2.py プロジェクト: ryannetwork/notes-to-ICD9

class Node2(nn.Module):
    """
    Node Class that inherits the BiLSTM models created in bilstm_model.py
    Beginning Node models have 2 BiLSTMs, future versions can have more
    
    """
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate):
        super(Node2, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab
        print("vocab.num_labels: ", vocab.num_labels)
        self.num_labels = vocab.num_labels

        self.encoder0 = nn.LSTM(
            input_size=embed_size,
            hidden_size=hidden_size,
            bias=True,
            # dropout=self.dropout_rate,
            bidirectional=True)

        self.encoder1 = nn.LSTM(
            input_size=embed_size,
            hidden_size=hidden_size,
            bias=True,
            # dropout=self.dropout_rate,
            bidirectional=True)

        self.dropout1 = nn.Dropout()

        self.attention_projection = nn.Linear(in_features=2 * hidden_size,
                                              out_features=self.num_labels,
                                              bias=False)
        self.attention_softmax = nn.Softmax(dim=0)
        #         self.labels_projection = nn.Linear(in_features=2*hidden_size,
        #                                           out_features=1,
        #                                           bias=False)
        self.labels_projection = nn.Linear(in_features=2 * hidden_size,
                                           out_features=100,
                                           bias=False)

        self.labels_projection2 = nn.Linear(in_features=100,
                                            out_features=1,
                                            bias=False)

    def forward(self, in_sents: List[List[str]],
                target_labels: List[List[int]]):

        # in_sents should be (1000, whatever)
        # split in half
        num_notes = len(in_sents)
        length_of_each_note = int(len(in_sents[0]) / 2)

        # Convert list of lists into tensors
        source_padded = self.vocab.notes_.to_input_tensor(in_sents,
                                                          device=self.device)
        # Tensor: (src_len, b)
        #         print(num_notes)

        X = self.model_embeddings.note_embeds(source_padded)
        #         print("X.shape: ", X.shape) # (1000, 16, 256)
        #         print(in_sents[0])
        X0 = X[:length_of_each_note, :, :]
        X1 = X[length_of_each_note:, :, :]

        enc_hiddens0, _ = self.encoder0(X0)
        enc_hiddens1, _ = self.encoder1(X1)
        #         print(enc_hiddens0.shape)
        #         print(enc_hiddens1.shape)

        enc_hiddens = torch.cat([enc_hiddens0, enc_hiddens1], 0)
        #         print(enc_hiddens.shape)

        #         scores0 = self.first_bilstm(in_sents0,target_labels)
        #         scores1 = self.second_bilstm(in_sents1,target_labels)
        #         print(scores0[0])
        #         print(scores1[0])

        alpha = self.attention_projection(enc_hiddens)
        #         print("alpha.shape: ", alpha.shape)

        #         alpha = self.dropout1(alpha)

        alpha_soft = self.attention_softmax(alpha)
        #         print(np.sum(alpha_soft.detach().numpy(),axis=0))

        M = torch.bmm(alpha_soft.permute([1, 2, 0]),
                      enc_hiddens.permute([1, 0, 2]))
        #         print("M.shape: ", M.shape)
        #         torch.stack(combined_outputs, dim=0)

        M = self.dropout1(M)

        M = self.labels_projection(M)

        M = F.relu(M)

        scores = self.labels_projection2(M)

        scores = torch.sigmoid(torch.squeeze(scores, -1))

        #         print("scores.shape: ", scores.shape)

        return scores

# #         print("alpha_soft.shape: ", alpha_soft.shape)
# #         print("alpha_permuted shape: ", alpha_soft.permute([2,1,0]).shape)

#         M = torch.bmm(alpha_soft.permute([1,2,0]), enc_hiddens.permute([1,0,2]))
# #         print("M.shape: ", M.shape)
# #         torch.stack(combined_outputs, dim=0)

#         scores = self.labels_projection(M)
# #         print(scores)
# #         print(F.sigmoid(scores))
#         scores = torch.sigmoid(torch.squeeze(scores,-1))
# #         print("scores.shape: ", scores.shape)
# #         print("scores squeezed shape: ", torch.squeeze(scores,-1).shape)
# #         print("scores: \n", scores)
# #         scores = 0.

#         return scores

    def encode(
        self, source_padded: torch.Tensor, source_lengths: List[int]
    ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        pass

    @property
    def device(self) -> torch.device:
        """ Determine which device to place the Tensors upon, CPU or GPU.
        """
        return self.model_embeddings.note_embeds.weight.device

    @staticmethod
    def load(model_path: str):
        """ Load the model from a file.
        @param model_path (str): path to model
        """
        #         params = torch.load(model_path, map_location=lambda storage, loc: storage)
        #         args = params['args']
        #         model = NMT(vocab=params['vocab'], **args)
        #         model.load_state_dict(params['state_dict'])

        params = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
        args = params['args']
        model = Node(vocab=params['vocab'], **args)
        model.load_state_dict(params['state_dict'])

        return model

    def save(self, path: str):
        """ Save the odel to a file.
        @param path (str): path to the model
        """
        print('save model parameters to [%s]' % path, file=sys.stderr)

        #         params = {
        #             'args': dict(embed_size=self.model_embeddings.embed_size, hidden_size=self.hidden_size, dropout_rate=self.dropout_rate),
        #             'vocab': self.vocab,
        #             'state_dict': self.state_dict()
        #         }

        params = {
            'args':
            dict(embed_size=self.model_embeddings.embed_size,
                 hidden_size=self.hidden_size,
                 dropout_rate=self.dropout_rate),
            'vocab':
            self.vocab,
            'state_dict':
            self.state_dict()
        }

        torch.save(params, path)