コード例 #1
0
ファイル: experiment.py プロジェクト: masonreznov/nn
def autoregressive_model(c: Configs):
    from labml_nn.transformers.feedback import FeedbackTransformer, FeedbackTransformerLayer, \
        FeedbackAttention, FeedForward

    return AutoregressiveModel(
        c.n_tokens, c.d_model,
        FeedbackTransformer(
            FeedbackTransformerLayer(
                d_model=c.d_model,
                attn=FeedbackAttention(c.heads, c.d_model, c.dropout),
                feed_forward=FeedForward(c.d_model, c.d_ff, c.dropout),
                dropout_prob=c.dropout), c.n_layers)).to(c.device)
コード例 #2
0
ファイル: experiment.py プロジェクト: Sandy4321/nn-1
def feedback_transformer(c: Configs):
    """
    Create [original feedback transformer](index.html).
    """
    from labml_nn.transformers.feedback import FeedbackTransformer, FeedbackTransformerLayer, \
        FeedbackAttention, FeedForward

    return AutoregressiveModel(
        c.n_tokens, c.d_model,
        FeedbackTransformer(
            FeedbackTransformerLayer(
                d_model=c.d_model,
                attn=FeedbackAttention(c.heads, c.d_model, c.dropout),
                feed_forward=FeedForward(c.d_model, c.d_ff, c.dropout),
                dropout_prob=c.dropout), c.n_layers)).to(c.device)
コード例 #3
0
ファイル: experiment.py プロジェクト: Sandy4321/nn-1
def feedback_transformer_kv(c: Configs):
    """
    Create [updated feedback transformer](index.html#kv_shared), with precalculated keys and values.
    """
    from labml_nn.transformers.feedback import FeedbackTransformerKV, FeedbackTransformerLayer, \
        FeedbackAttention, FeedForward

    return AutoregressiveModel(
        c.n_tokens, c.d_model,
        FeedbackTransformerKV(
            FeedbackTransformerLayer(
                d_model=c.d_model,
                attn=FeedbackAttention(c.heads,
                                       c.d_model,
                                       c.dropout,
                                       is_kv_precomputed=True),
                feed_forward=FeedForward(c.d_model, c.d_ff, c.dropout),
                dropout_prob=c.dropout), c.n_layers, c.d_model,
            c.heads)).to(c.device)