Beispiel #1
0
 def __init__(self, in_dim):
     super().__init__()
     self.channle_in = in_dim
     self.query_conv = nn.Conv1D(
         in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1)
     self.key_conv = nn.Conv1D(
         in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1)
     self.value_conv_vis = nn.Conv1D(
         in_channels=in_dim, out_channels=in_dim, kernel_size=1)
     self.value_conv_word = nn.Conv1D(
         in_channels=in_dim, out_channels=in_dim, kernel_size=1)
     self.softmax_vis = nn.Softmax(axis=-1)
     self.softmax_word = nn.Softmax(axis=-2)
Beispiel #2
0
    def __init__(self,
                 num_attention_heads,
                 attention_probs_dropout_prob,
                 cin,
                 q_groups=1,
                 k_groups=1,
                 v_groups=1):
        super().__init__()
        if cin % num_attention_heads != 0:
            raise ValueError(
                f"cin ({cin}) is not a multiple of the number of attention heads ({num_attention_heads})"
            )
        self.num_attention_heads = num_attention_heads
        self.attention_head_size = int(cin / num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        self.query = nn.Conv1D(in_channels=cin,
                               out_channels=cin,
                               kernel_size=1,
                               groups=q_groups)
        self.key = nn.Conv1D(in_channels=cin,
                             out_channels=cin,
                             kernel_size=1,
                             groups=k_groups)
        self.value = nn.Conv1D(in_channels=cin,
                               out_channels=cin,
                               kernel_size=1,
                               groups=v_groups)

        self.dropout = nn.Dropout(attention_probs_dropout_prob)
        self.softmax = nn.Softmax(axis=-1)

        self.matmul_qk = MatMulWrapper()
        self.matmul_qkv = MatMulWrapper()
Beispiel #3
0
    def __init__(self,
                 attention_layer,
                 vocab_size,
                 num_classes,
                 emb_dim=128,
                 lstm_hidden_size=196,
                 fc_hidden_size=96,
                 lstm_layers=1,
                 dropout_rate=0.0,
                 padding_idx=0):
        super().__init__()
        self.padding_idx = padding_idx

        self.embedder = nn.Embedding(num_embeddings=vocab_size,
                                     embedding_dim=emb_dim,
                                     padding_idx=padding_idx)
        self.bilstm = nn.LSTM(input_size=emb_dim,
                              hidden_size=lstm_hidden_size,
                              num_layers=lstm_layers,
                              dropout=dropout_rate,
                              direction='bidirect')
        self.attention = attention_layer
        if isinstance(attention_layer, SelfAttention):
            self.fc = nn.Linear(lstm_hidden_size, fc_hidden_size)
        elif isinstance(attention_layer, SelfInteractiveAttention):
            self.fc = nn.Linear(lstm_hidden_size * 2, fc_hidden_size)
        else:
            raise RuntimeError("Unknown attention type %s." %
                               attention_layer.__class__.__name__)
        self.output_layer = nn.Linear(fc_hidden_size, num_classes)
        self.softmax = nn.Softmax(axis=1)
Beispiel #4
0
 def __init__(self, dim=32):
     super(Attention, self).__init__()
     self.dim = dim
     self.q_layer = nn.Linear(dim, dim, bias_attr=False)
     self.k_layer = nn.Linear(dim, dim, bias_attr=False)
     self.v_layer = nn.Linear(dim, dim, bias_attr=False)
     self.softmax = nn.Softmax(1)
Beispiel #5
0
    def __init__(self, num_classes=10):
        super(ImperativeLenet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2D(
                in_channels=1,
                out_channels=6,
                kernel_size=3,
                stride=1,
                padding=1,
                bias_attr=False),
            nn.BatchNorm2D(6),
            nn.ReLU(),
            nn.MaxPool2D(
                kernel_size=2, stride=2),
            nn.Conv2D(
                in_channels=6,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=0),
            nn.BatchNorm2D(16),
            nn.PReLU(),
            nn.MaxPool2D(
                kernel_size=2, stride=2))

        self.fc = nn.Sequential(
            nn.Linear(
                in_features=400, out_features=120),
            nn.LeakyReLU(),
            nn.Linear(
                in_features=120, out_features=84),
            nn.Sigmoid(),
            nn.Linear(
                in_features=84, out_features=num_classes),
            nn.Softmax())
Beispiel #6
0
    def forward(self, x, H, W):
        B, N, C = x.shape
        q = self.q(x).reshape([B, N, self.num_heads,
                               C // self.num_heads]).transpose([0, 2, 1, 3])

        if self.sr_ratio > 1:
            x_ = x.transpose([0, 2, 1]).reshape([B, C, H, W])
            tmp_n = H * W // self.sr_ratio**2
            x_ = self.sr(x_).reshape([B, C, tmp_n]).transpose([0, 2, 1])
            x_ = self.norm(x_)
            kv = self.kv(x_).reshape(
                [B, tmp_n, 2, self.num_heads,
                 C // self.num_heads]).transpose([2, 0, 3, 1, 4])
        else:
            kv = self.kv(x).reshape(
                [B, N, 2, self.num_heads,
                 C // self.num_heads]).transpose([2, 0, 3, 1, 4])
        k, v = kv[0], kv[1]

        attn = paddle.matmul(q, k.transpose([0, 1, 3, 2])) * self.scale
        attn = nn.Softmax(axis=-1)(attn)
        attn = self.attn_drop(attn)

        x = paddle.matmul(attn, v).transpose([0, 2, 1, 3]).reshape([B, N, C])
        x = self.proj(x)
        x = self.proj_drop(x)
        return x
Beispiel #7
0
 def __init__(self,
              dnn_units=[8, 64, 16],
              dnn_activation='sigmoid',
              weight_normalization=False,
              name=None):
     super().__init__()
     self.dnn_units = dnn_units
     self.dnn_activation = 'sigmoid'
     self.weight_normalization = weight_normalization
     self.name = name
     layer_list = []
     #bn_list = []
     for i in range(len(dnn_units) - 1):
         dnn_layer = nn.Linear(in_features=self.dnn_units[i]
                               if i != 0 else self.dnn_units[i] * 4,
                               out_features=self.dnn_units[i + 1],
                               weight_attr=self._weight_init())
         self.add_sublayer(self.name + f'linear_{i}', dnn_layer)
         layer_list.append(dnn_layer)
         #layer_list.append(copy.deepcopy(dnn_layer))
         #bn_layer = nn.BatchNorm(50)
         #self.add_sublayer(self.name + f'bn_{i}', bn_layer)
         #bn_list.append(bn_layer)
         #bn_list.append(copy.deepcopy(bn_layer))
     #self.bn_layer = nn.LayerList(bn_list)
     self.layers = nn.LayerList(layer_list)
     self.dnn = nn.Linear(self.dnn_units[-1],
                          1,
                          weight_attr=self._weight_init())
     self.activation = nn.Sigmoid()
     self.soft = nn.Softmax()
Beispiel #8
0
def main(args):
    # define model
    model = paddlevision.models.__dict__[args.model](
        pretrained=args.pretrained, num_classes=args.num_classes)

    model = nn.Sequential(model, nn.Softmax())
    model.eval()

    # define transforms
    eval_transforms = ClassificationPresetEval(args.resize_size,
                                               args.crop_size)

    with open(args.img_path, 'rb') as f:
        img = Image.open(f).convert('RGB')

    img = eval_transforms(img)
    img = paddle.to_tensor(img)
    img = img.expand([1] + img.shape)

    output = model(img).numpy()[0]

    class_id = output.argmax()
    prob = output[class_id]
    print(f"class_id: {class_id}, prob: {prob}")
    return output
Beispiel #9
0
    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):

        super().__init__()
        self.dim = dim
        self.window_size = window_size  # Wh, Ww
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim ** -0.5

        # define a parameter table of relative position bias
        relative_position_bias_table = self.create_parameter(
            shape=((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads), default_initializer=nn.initializer.Constant(value=0))  # 2*Wh-1 * 2*Ww-1, nH
        self.add_parameter("relative_position_bias_table", relative_position_bias_table)

        # get pair-wise relative position index for each token inside the window
        coords_h = paddle.arange(self.window_size[0])
        coords_w = paddle.arange(self.window_size[1])
        coords = paddle.stack(paddle.meshgrid([coords_h, coords_w]))                   # 2, Wh, Ww
        coords_flatten = paddle.flatten(coords, 1)                                     # 2, Wh*Ww
        relative_coords = coords_flatten.unsqueeze(-1) - coords_flatten.unsqueeze(1)   # 2, Wh*Ww, Wh*Ww
        relative_coords = relative_coords.transpose([1, 2, 0])                         # Wh*Ww, Wh*Ww, 2
        relative_coords[:, :, 0] += self.window_size[0] - 1                            # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        self.relative_position_index = relative_coords.sum(-1)                         # Wh*Ww, Wh*Ww
        self.register_buffer("relative_position_index", self.relative_position_index)

        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        self.softmax = nn.Softmax(axis=-1)
Beispiel #10
0
    def forward(self, hidden_states, attention_mask):
        """Self-Attention block."""
        temp_q = self.q(hidden_states)
        temp_k = self.k(hidden_states)
        temp_v = self.v(hidden_states)

        q_layer = self.score_transpose(temp_q)
        k_layer = self.score_transpose(temp_k)
        v_layer = self.score_transpose(temp_v)

        attention_score = paddle.matmul(q_layer, k_layer.transpose(-1, -2))
        attention_score = attention_score / math.sqrt(self.head_size)
        attention_score = attention_score + attention_mask

        attention_prob = nn.Softmax(axis=-1)(attention_score)
        attention_prob = self.dropout(attention_prob)

        attention_layer = paddle.matmul(attention_prob, v_layer)
        attention_layer = attention_layer.permute(0, 2, 1, 3).contiguous()

        temp_attention_layer = attention_layer.size()[:-2] + [
            self.all_head_size
        ]
        attention_map = attention_layer.view(*temp_attention_layer)
        return attention_map
Beispiel #11
0
    def __init__(self,
                 block,
                 layers,
                 num_filters,
                 feature_dim,
                 encoder_type='SAP',
                 n_mels=40,
                 log_input=True,
                 **kwargs):
        super(ResNetSE, self).__init__()

        print('Embedding size is %d, encoder %s.' %
              (feature_dim, encoder_type))

        self.inplanes = num_filters[0]
        self.encoder_type = encoder_type
        self.n_mels = n_mels
        self.log_input = log_input

        self.conv1 = nn.Conv2D(1,
                               num_filters[0],
                               kernel_size=3,
                               stride=1,
                               padding=1)
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm2D(num_filters[0])

        self.layer1 = self._make_layer(block, num_filters[0], layers[0])
        self.layer2 = self._make_layer(block,
                                       num_filters[1],
                                       layers[1],
                                       stride=(2, 2))
        self.layer3 = self._make_layer(block,
                                       num_filters[2],
                                       layers[2],
                                       stride=(2, 2))
        self.layer4 = self._make_layer(block,
                                       num_filters[3],
                                       layers[3],
                                       stride=(2, 2))

        outmap_size = int(self.n_mels / 8)

        self.attention = nn.Sequential(
            nn.Conv1D(num_filters[3] * outmap_size, 128, kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm1D(128),
            nn.Conv1D(128, num_filters[3] * outmap_size, kernel_size=1),
            nn.Softmax(axis=2),
        )

        if self.encoder_type == "SAP":
            out_dim = num_filters[3] * outmap_size
        elif self.encoder_type == "ASP":
            out_dim = num_filters[3] * outmap_size * 2
        else:
            raise ValueError('Undefined encoder')

        self.fc = nn.Linear(out_dim, feature_dim)
Beispiel #12
0
 def __init__(self, roberta, num_classes=2, dropout=None):
     super(RobertaForSequenceClassification, self).__init__()
     self.num_classes = num_classes
     self.roberta = roberta  # allow roberta to be config
     self.dropout = nn.Dropout(dropout if dropout is not None else self.
                               roberta.config["hidden_dropout_prob"])
     self.classifier = nn.Linear(self.roberta.config["hidden_size"],
                                 num_classes)
     self.softmax = nn.Softmax()
     self.apply(self.init_weights)
Beispiel #13
0
 def __init__(self, act=None, axis=-1):
     super().__init__()
     if act is not None:
         assert act in ["softmax", "sigmoid"]
     if act == "softmax":
         self.act = nn.Softmax(axis=axis)
     elif act == "sigmoid":
         self.act = nn.Sigmoid()
     else:
         self.act = None
Beispiel #14
0
 def __init__(self, in_channels, ds=8, activation=nn.ReLU):
     super(BAM, self).__init__()
     self.key_channel = in_channels //8
     self.activation = activation
     self.ds = ds
     self.pool = nn.AvgPool2D(self.ds)
     self.query_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1)
     self.key_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1)
     self.value_conv = nn.Conv2D(in_channels=in_channels, out_channels=in_channels, kernel_size=1)
     self.gamma = nn.ParameterList([paddle.create_parameter(shape=[1], dtype='float32', default_initializer=nn.initializer.Constant(value=0))])
     self.softmax = nn.Softmax(axis=-1)
Beispiel #15
0
 def __init__(self, bond_dim, hidden_dim, num_angle):
     super(PiPoolLayer, self).__init__()
     self.bond_dim = bond_dim
     self.num_angle = num_angle
     self.num_type = 4 * 9
     fc_in_dim = num_angle * bond_dim
     self.fc_1 = DenseLayer(fc_in_dim,
                            hidden_dim,
                            activation=F.relu,
                            bias=True)
     self.fc_2 = nn.Linear(hidden_dim, 1, bias_attr=False)
     self.softmax = nn.Softmax(axis=1)
Beispiel #16
0
    def __init__(self, act=None):
        super().__init__()
        if act is not None:
            assert act in ["softmax", "sigmoid"]
        if act == "softmax":
            self.act = nn.Softmax(axis=-1)
        elif act == "sigmoid":
            self.act = nn.Sigmoid()
        else:
            self.act = None

        self.jskl_loss = KLJSLoss(mode="js")
    def __init__(self, num_classes=10, classifier_activation='softmax'):
        super(LeNetDygraph, self).__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(nn.Conv2d(1, 6, 3, stride=1, padding=1),
                                      nn.ReLU(), nn.Pool2D(2, 'max', 2),
                                      nn.Conv2d(6, 16, 5, stride=1, padding=0),
                                      nn.ReLU(), nn.Pool2D(2, 'max', 2))

        if num_classes > 0:
            self.fc = nn.Sequential(nn.Linear(400, 120), nn.Linear(120, 84),
                                    nn.Linear(84, 10),
                                    nn.Softmax())  #Todo: accept any activation
Beispiel #18
0
    def __init__(self, act='softmax', axis=-1, reduction='mean'):
        super().__init__()

        assert act in ['softmax', 'sigmoid', None]
        self.reduction = reduction

        if act == 'softmax':
            self.act = nn.Softmax(axis=axis)
        elif act == 'sigmoid':
            self.act = nn.Sigmoid()
        else:
            self.act = None
def build_mlp(hidden_size: int, num_hidden_layers: int, output_size: int):
    FC_layers = []
    FC_layers.extend([
        nn.Linear(in_features=hidden_size, out_features=output_size),
        nn.ReLU()
    ])
    for _ in range(num_hidden_layers - 1):
        FC_layers.extend([
            nn.Linear(in_features=output_size, out_features=output_size),
            nn.ReLU()
        ])
    FC_layers.append(nn.Softmax())  # 最后一层使用softmax
    return nn.Sequential(*FC_layers)
Beispiel #20
0
 def __init__(self, stride=1, padding=0, dilation=1, groups=1, padding_mode="zeros"):
     super(Conv2D1, self).__init__()
     self.conv = nn.Conv2D(
         3,
         6,
         3,
         stride=stride,
         padding=padding,
         dilation=dilation,
         groups=groups,
         padding_mode=padding_mode,
     )
     self.softmax = nn.Softmax()
Beispiel #21
0
def export(args):
    model = paddlevision.models.__dict__[args.model](
        pretrained=args.pretrained, num_classes=args.num_classes)
    model = nn.Sequential(model, nn.Softmax())
    model.eval()

    model = paddle.jit.to_static(
        model,
        input_spec=[
            InputSpec(shape=[None, 3, args.img_size, args.img_size],
                      dtype='float32')
        ])
    paddle.jit.save(model, os.path.join(args.save_inference_dir, "inference"))
    print(f"inference model has been saved into {args.save_inference_dir}")
Beispiel #22
0
    def __init__(self,
                 nsp_reader,
                 num_layers,
                 n_head,
                 hidden_size,
                 vocab_size=8001,
                 type_size=2,
                 latent_type_size=20,
                 max_position_seq_len=256,
                 act_dropout=0.1,
                 attn_dropout=0.1,
                 max_dec_len=64,
                 min_dec_len=1,
                 topk=10):
        super(Plato2InferModel, self).__init__()

        self.nsp_reader = nsp_reader
        self.num_layers = num_layers
        self.latent_type_size = latent_type_size
        self.max_dec_len = max_dec_len
        self.min_dec_len = min_dec_len
        self.topk = topk
        self.unk_id = 0
        self.bos_id = 1
        self.eos_id = 2
        self.mask_id = 8000
        self.after_eos = paddle.ones([vocab_size]) * -1e9
        self.after_eos[self.eos_id] = 0
        self.is_cn = False
        self.batch_size = 1

        self.latent_weight = paddle.create_parameter(
            [hidden_size, latent_type_size], 'float32')

        self.plato2_encoder = Plato2Encoder(
            vocab_size, type_size, max_position_seq_len, num_layers, n_head,
            hidden_size, attn_dropout, act_dropout)

        self.logits_fc_layer = nn.Linear(hidden_size, hidden_size)
        self.logits_layer_norm = nn.LayerNorm(hidden_size)
        self.logits_bias = paddle.create_parameter(
            [vocab_size], 'float32', is_bias=True)

        self.nsp_predictor = NSP(vocab_size, type_size, max_position_seq_len,
                                 num_layers, n_head, hidden_size, attn_dropout,
                                 act_dropout)

        self.gelu_layer = nn.GELU()
        self.softmax = nn.Softmax()
    def __init__(self, inplanes, use_scale=True, **kwargs):
        planes = inplanes // 2
        self.use_scale = use_scale

        super(NonLocal, self).__init__(inplanes)
        self.t = nn.Conv2D(inplanes, planes, kernel_size=1,
                           stride=1, bias_attr=True)
        self.p = nn.Conv2D(inplanes, planes, kernel_size=1,
                           stride=1, bias_attr=True)
        self.g = nn.Conv2D(inplanes, planes, kernel_size=1,
                           stride=1, bias_attr=True)
        self.softmax = nn.Softmax(axis=2)
        self.z = nn.Conv2D(planes, inplanes, kernel_size=1,
                           stride=1, bias_attr=True)
        self.bn = nn.BatchNorm2D(inplanes)
    def __init__(self, ernie, pinyin_vocab_size, pad_pinyin_id=0):
        super(ErnieForCSC, self).__init__()
        self.ernie = ernie
        emb_size = self.ernie.config["hidden_size"]
        hidden_size = self.ernie.config["hidden_size"]
        vocab_size = self.ernie.config["vocab_size"]

        self.pad_token_id = self.ernie.config["pad_token_id"]
        self.pinyin_vocab_size = pinyin_vocab_size
        self.pad_pinyin_id = pad_pinyin_id
        self.pinyin_embeddings = nn.Embedding(self.pinyin_vocab_size,
                                              emb_size,
                                              padding_idx=pad_pinyin_id)
        self.detection_layer = nn.Linear(hidden_size, 2)
        self.correction_layer = nn.Linear(hidden_size, vocab_size)
        self.softmax = nn.Softmax()
Beispiel #25
0
    def __init__(self, in_channels, ratio):
        super().__init__()

        self.conv_mask = nn.Conv2D(in_channels=in_channels,
                                   out_channels=1,
                                   kernel_size=1)

        self.softmax = nn.Softmax(axis=2)

        inter_channels = int(in_channels * ratio)
        self.channel_add_conv = nn.Sequential(
            nn.Conv2D(in_channels=in_channels,
                      out_channels=inter_channels,
                      kernel_size=1),
            nn.LayerNorm(normalized_shape=[inter_channels, 1, 1]), nn.ReLU(),
            nn.Conv2D(in_channels=inter_channels,
                      out_channels=in_channels,
                      kernel_size=1))
Beispiel #26
0
    def __init__(self, config, model, use_multilabel):
        super().__init__()
        self.base_model = model
        # we should choose a final model to export
        if isinstance(self.base_model, DistillationModel):
            self.infer_model_name = config["infer_model_name"]
        else:
            self.infer_model_name = None

        self.infer_output_key = config.get("infer_output_key", None)
        if self.infer_output_key == "features" and isinstance(
                self.base_model, RecModel):
            self.base_model.head = IdentityHead()
        if use_multilabel:
            self.out_act = nn.Sigmoid()
        else:
            if config.get("infer_add_softmax", True):
                self.out_act = nn.Softmax(axis=-1)
            else:
                self.out_act = None
Beispiel #27
0
    def __init__(self, in_channels=3, out_classes=5, hid=64, num=64):
        super(FallNet, self).__init__()
        self.cnn0 = Block(in_channels, hid, 7, 0)

        self.cnn1 = Block(hid, hid, 5, 0)
        self.cnn2 = Block(hid, hid, 3, 0)
        self.cnn3 = Block(hid, hid, 1, 0)
        self.avg = nn.AdaptiveAvgPool1D(output_size=num)

        # self.rnn0 = nn.LSTM(input_size=145, hidden_size=num, dropout=.2, num_layers=3)
        self.rnn0 = nn.GRU(input_size=145,
                           hidden_size=num,
                           num_layers=1,
                           dropout=0.2)
        self.rnn1 = Block(hid, hid, 1, 0)
        self.rnn2 = Block(hid, 4, 3, 0)

        self.cls = nn.Sequential(
            nn.Linear(in_features=1016, out_features=128), nn.Dropout(p=.2),
            nn.Linear(in_features=128, out_features=out_classes),
            nn.Softmax(axis=1))
Beispiel #28
0
    def forward(self, x, H, W):
        B, N, C = x.shape
        h_group, w_group = H // self.ws, W // self.ws
        total_groups = h_group * w_group
        x = x.reshape([B, h_group, self.ws, w_group, self.ws,
                       C]).transpose([0, 1, 3, 2, 4, 5])
        qkv = self.qkv(x).reshape([
            B, total_groups, self.ws**2, 3, self.num_heads, C // self.num_heads
        ]).transpose([3, 0, 1, 4, 2, 5])
        q, k, v = qkv[0], qkv[1], qkv[2]
        attn = paddle.matmul(q, k.transpose([0, 1, 2, 4, 3])) * self.scale

        attn = nn.Softmax(axis=-1)(attn)
        attn = self.attn_drop(attn)
        attn = paddle.matmul(attn, v).transpose([0, 1, 3, 2, 4]).reshape(
            [B, h_group, w_group, self.ws, self.ws, C])

        x = attn.transpose([0, 1, 3, 2, 4, 5]).reshape([B, N, C])
        x = self.proj(x)
        x = self.proj_drop(x)
        return x
Beispiel #29
0
 def forward(
     self,
     query: paddle.Tensor,
     key: paddle.Tensor,
     value: paddle.Tensor,
     attn_mask: Optional[paddle.Tensor] = None,
 ) -> Tuple[paddle.Tensor, paddle.Tensor]:
     r"""
     Args:
         query: [batch, num_attention_heads, len_query, dim_query]
         key: [batch, num_attention_heads, len_key, dim_key]
         value: [batch, num_attention_heads, len_value, dim_value]
         attn_mask: [batch, num_attention_heads, len_query, len_key]
     """
     attention = paddle.matmul(query, key.transpose((0, 1, 3, 2)))
     attention = attention / math.sqrt(query.shape[-1])
     if attn_mask is not None:
         attention = attention + attn_mask
     attention = nn.Softmax(axis=-1)(attention)
     attention = self.dropout(attention)
     context = paddle.matmul(attention, value)
     return context, attention
Beispiel #30
0
    def __init__(self,
                 vocab_size,
                 num_classes,
                 emb_dim=128,
                 padding_idx=0,
                 lstm_hidden_size=198,
                 direction='forward',
                 lstm_layers=1,
                 dropout_rate=0.0,
                 pooling_type=None,
                 fc_hidden_size=96):
        super().__init__()

        self.direction = direction

        self.embedder = nn.Embedding(num_embeddings=vocab_size,
                                     embedding_dim=emb_dim,
                                     padding_idx=padding_idx)

        # self.lstm_encoder = nlp.seq2vec.LSTMEncoder(emb_dim,
        #                                             lstm_hidden_size,
        #                                             num_layers=lstm_layers,
        #                                             direction=direction,
        #                                             dropout=dropout_rate,
        #                                             pooling_type=pooling_type)

        self.lstm_layer = nn.LSTM(input_size=emb_dim,
                                  hidden_size=lstm_hidden_size,
                                  num_layers=lstm_layers,
                                  direction=direction,
                                  dropout=dropout_rate)

        self.fc = nn.Linear(
            lstm_hidden_size * (2 if direction == 'bidirect' else 1),
            fc_hidden_size)
        self.output_layer = nn.Linear(fc_hidden_size, num_classes)
        self.softmax = nn.Softmax(axis=1)