Python EmbeddedSequenceの例

プログラミング言語: Python

名前空間/パッケージ名: neuralmonkey.model.sequence

クラス/型: EmbeddedSequence

hotexamples.comのコード掲載数: 6

Python EmbeddedSequence - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのneuralmonkey.model.sequence.EmbeddedSequenceの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

EmbeddedSequence(6)

register_input(1)

よく使われるメソッド

EmbeddedSequence (6)

register_input (1)

コード例 #1

ファイルを表示

def build_encoder(config: Dict) -> Tuple[RecurrentEncoder, str]:
    vocabulary = from_nematus_json(config["src_vocabulary"],
                                   max_size=config["n_words_src"],
                                   pad_to_max_size=True)

    vocabulary_ini = VOCABULARY_TEMPLATE.format("src",
                                                config["src_vocabulary"],
                                                config["n_words_src"])

    inp_seq_name = "{}_input".format(ENCODER_NAME)
    inp_seq = EmbeddedSequence(name=inp_seq_name,
                               vocabulary=vocabulary,
                               data_id="source",
                               embedding_size=config["embedding_size"])

    encoder = RecurrentEncoder(name=ENCODER_NAME,
                               input_sequence=inp_seq,
                               rnn_size=config["rnn_size"],
                               rnn_cell="NematusGRU")

    encoder_ini = ENCODER_TEMPLATE.format(ENCODER_NAME, config["rnn_size"],
                                          inp_seq_name,
                                          config["embedding_size"],
                                          config["max_length"])

    return encoder, "\n".join([vocabulary_ini, encoder_ini])

コード例 #2

ファイルを表示

def build_encoder(hparams: Dict,
                  vocab_path: str) -> Tuple[TransformerEncoder, str]:
    vocabulary = from_t2t_vocabulary(vocab_path)
    vocabulary_ini = VOCABULARY_TEMPLATE.format(vocab_path)

    inp_seq_name = "{}_input".format(ENCODER_NAME)
    inp_seq = EmbeddedSequence(
        name=inp_seq_name,
        vocabulary=vocabulary,
        data_id="source_wp",
        embedding_size=hparams["embedding_size"],
        scale_embeddings_by_depth=hparams[
            "multiply_embedding_mode"] == "sqrt_depth",
        add_end_symbol=True)

    encoder = TransformerEncoder(
        name=ENCODER_NAME,
        input_sequence=inp_seq,
        ff_hidden_size=hparams["ff_hidden_size"],
        depth=hparams["depth"],
        n_heads=hparams["n_heads"],
        target_space_id=21,
        use_att_transform_bias=True)

    encoder_ini = ENCODER_TEMPLATE.format(
        inp_seq_name, hparams["embedding_size"],
        hparams["multiply_embedding_mode"] == "sqrt_depth",
        hparams["max_length"],
        ENCODER_NAME, hparams["ff_hidden_size"], hparams["depth"],
        hparams["n_heads"])

    return encoder, vocabulary, "\n".join([vocabulary_ini, encoder_ini])

コード例 #3

ファイルを表示

    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 rnn_size: int,
                 rnn_cell: str = "GRU",
                 rnn_direction: str = "bidirectional",
                 max_input_len: int = None,
                 dropout_keep_prob: float = 1.0,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Create a new instance of the sentence encoder.

        Arguments:
            name: ModelPart name.
            vocabulary: The input vocabulary.
            data_id: The input sequence data ID.
            embedding_size: The dimension of the embedding vectors in the input
                sequence.
            max_input_len: Maximum length of the input sequence (disregard
                tokens after this position).
            rnn_size: The dimension of the RNN hidden state vector.
            rnn_cell: One of "GRU", "NematusGRU", "LSTM". Which kind of memory
                cell to use.
            rnn_direction: One of "forward", "backward", "bidirectional". In
                what order to process the input sequence. Note that choosing
                "bidirectional" will double the resulting vector dimension as
                well as the number of encoder parameters.
            dropout_keep_prob: 1 - dropout probability.
            save_checkpoint: ModelPart save checkpoint file.
            load_checkpoint: ModelPart load checkpoint file.
        """
        check_argument_types()
        s_ckp = "input_{}".format(save_checkpoint) if save_checkpoint else None
        l_ckp = "input_{}".format(load_checkpoint) if load_checkpoint else None

        # TODO! Representation runner needs this. It is not simple to do it in
        # recurrent encoder since there may be more source data series. The
        # best way could be to enter the data_id parameter manually to the
        # representation runner
        self.data_id = data_id

        input_sequence = EmbeddedSequence(name="{}_input".format(name),
                                          vocabulary=vocabulary,
                                          data_id=data_id,
                                          embedding_size=embedding_size,
                                          max_length=max_input_len,
                                          save_checkpoint=s_ckp,
                                          load_checkpoint=l_ckp)

        RecurrentEncoder.__init__(self,
                                  name=name,
                                  input_sequence=input_sequence,
                                  rnn_size=rnn_size,
                                  rnn_cell=rnn_cell,
                                  rnn_direction=rnn_direction,
                                  dropout_keep_prob=dropout_keep_prob,
                                  save_checkpoint=save_checkpoint,
                                  load_checkpoint=load_checkpoint)

コード例 #4

ファイルを表示

ファイル: test_model_part.py プロジェクト: sonineties/macaque-reloaded

    def test_reuse(self):
        vocabulary = Vocabulary()
        vocabulary.add_word("a")
        vocabulary.add_word("b")

        seq1 = EmbeddedSequence(name="seq1",
                                vocabulary=vocabulary,
                                data_id="id",
                                embedding_size=10)

        seq2 = EmbeddedSequence(name="seq2",
                                vocabulary=vocabulary,
                                embedding_size=10,
                                data_id="id")

        seq3 = EmbeddedSequence(name="seq3",
                                vocabulary=vocabulary,
                                data_id="id",
                                embedding_size=10,
                                reuse=seq1)

        # blessing
        self.assertIsNotNone(seq1.embedding_matrix)
        self.assertIsNotNone(seq2.embedding_matrix)
        self.assertIsNotNone(seq3.embedding_matrix)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        params = sess.run((seq1.embedding_matrix, seq2.embedding_matrix,
                           seq3.embedding_matrix))

        with self.assertRaises(AssertionError):
            assert_array_equal(params[0], params[1])

        assert_array_equal(params[0], params[2])

コード例 #5

ファイルを表示

ファイル: recurrent.py プロジェクト: sanjeeku/neuralmonkey

    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 rnn_size: int,
                 max_input_len: int = None,
                 dropout_keep_prob: float = 1.0,
                 rnn_cell: str = "GRU",
                 attention_type: type = None,
                 attention_fertility: int = 3,
                 attention_state_size: int = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Create a new instance of the sentence encoder. """

        # TODO Think this through.
        s_ckp = "input_{}".format(save_checkpoint) if save_checkpoint else None
        l_ckp = "input_{}".format(load_checkpoint) if load_checkpoint else None

        # TODO! Representation runner needs this. It is not simple to do it in
        # recurrent encoder since there may be more source data series. The
        # best way could be to enter the data_id parameter manually to the
        # representation runner
        self.data_id = data_id

        input_sequence = EmbeddedSequence(name="{}_input".format(name),
                                          vocabulary=vocabulary,
                                          data_id=data_id,
                                          embedding_size=embedding_size,
                                          max_length=max_input_len,
                                          save_checkpoint=s_ckp,
                                          load_checkpoint=l_ckp)

        RecurrentEncoder.__init__(self,
                                  name=name,
                                  input_sequence=input_sequence,
                                  rnn_size=rnn_size,
                                  dropout_keep_prob=dropout_keep_prob,
                                  rnn_cell=rnn_cell,
                                  attention_type=attention_type,
                                  attention_fertility=attention_fertility,
                                  attention_state_size=attention_state_size,
                                  save_checkpoint=save_checkpoint,
                                  load_checkpoint=load_checkpoint)

コード例 #6

ファイルを表示

ファイル: test_encoders_init.py プロジェクト: gargaditya/bandit-neuralmonkey

#!/usr/bin/env python3.5
"""Test init methods of encoders."""

import unittest
import copy

from typing import Dict, List, Any, Iterable

from neuralmonkey.encoders.numpy_encoder import (VectorEncoder,
                                                 PostCNNImageEncoder)
from neuralmonkey.encoders.recurrent import SentenceEncoder
from neuralmonkey.encoders.sentence_cnn_encoder import SentenceCNNEncoder
from neuralmonkey.model.sequence import EmbeddedSequence
from neuralmonkey.tests.test_vocabulary import VOCABULARY

INPUT_SEQUENCE = EmbeddedSequence("seq", VOCABULARY, "marmelade", 300)

SENTENCE_ENCODER_GOOD = {
    "name": ["encoder"],
    "vocabulary": [VOCABULARY],
    "data_id": ["marmelade"],
    "embedding_size": [20],
    "rnn_size": [30],
    "max_input_len": [None, 15],
    "dropout_keep_prob": [0.5, 1.],
}

SENTENCE_ENCODER_BAD = {
    "nonexistent": ["ahoj"],
    "name": [None, 1],
    "vocabulary": [0, None, "ahoj", dict()],