コード例 #1
0
    def __init__(self,
                 d_model,
                 d_inner_hid,
                 n_head,
                 dim_per_head,
                 dropout=0.1,
                 dim_capsule=100,
                 num_capsules=0,
                 null_capsule=False):
        super(DecoderBlock, self).__init__()

        self.slf_attn = MultiHeadedAttention(head_count=n_head,
                                             model_dim=d_model,
                                             dropout=dropout,
                                             dim_per_head=dim_per_head)
        # self.ctx_attn = MultiHeadedAttention(head_count=n_head, model_dim=d_model, dropout=dropout,
        #                                      dim_per_head=dim_per_head)
        self.pos_ffn = PositionwiseFeedForward(size=d_model,
                                               hidden_size=d_inner_hid)

        self.layer_norm_1 = nn.LayerNorm(d_model)
        self.layer_norm_2 = nn.LayerNorm(d_model)

        self.dropout = nn.Dropout(dropout)

        # contextual capsule layer
        self.apply_capsule = True
        # self.pre_capsule_layer_norm = nn.LayerNorm(d_model)

        assert dim_capsule % num_capsules == 0
        self.dim_per_cap = dim_capsule // num_capsules
        dim_per_part = dim_capsule // 3
        total_num_capsules = num_capsules

        self.null_caps = null_capsule
        if null_capsule:
            INFO("Using Null Capsules to attract irrelevant routing.")
            total_num_capsules += num_capsules // 3

        self.capsule_layer = ContextualCapsuleLayer(
            num_out_caps=total_num_capsules,
            num_in_caps=None,
            dim_in_caps=d_model,
            dim_out_caps=self.dim_per_cap,
            dim_context=d_model,
            num_iterations=3,
            share_route_weights_for_in_caps=True)

        self.out_and_cap_ffn = MultiInputPositionwiseFeedForward(
            size=d_model,
            hidden_size=d_inner_hid,
            dropout=dropout,
            inp_sizes=[dim_per_part, dim_per_part, dim_per_part])