Esempio n. 1
0
    def forward(self, x, init_states=None):
        """Assumes x is of shape (batch, sequence, feature)"""
        seq_sz, bs, _ = x.size()
        hidden_seq = []
        if init_states is None:
            h_t, c_t = (
                flow.zeros((bs, self.hidden_size)).to("cuda"),
                flow.zeros((bs, self.hidden_size)).to("cuda"),
            )
        else:
            h_t, c_t = init_states

        HS = self.hidden_size
        for t in range(seq_sz):
            x_t = x[t, :, :].reshape(x.shape[1], x.shape[2])
            # batch the computations into a single matrix multiplication
            # NOTE(Xu Zhiqiu): flow does not support view now, use reshape instead
            gates = flow.matmul(x_t, self.W) + flow.matmul(h_t,
                                                           self.U) + self.bias
            i_t, f_t, g_t, o_t = (
                flow.sigmoid(gates[:, :HS]),
                flow.sigmoid(gates[:, HS:HS * 2]),
                flow.tanh(gates[:, HS * 2:HS * 3]),
                flow.sigmoid(gates[:, HS * 3:]),
            )
            c_t = f_t * c_t + i_t * g_t
            h_t = o_t * flow.tanh(c_t)
            hidden_seq.append(h_t.unsqueeze(0))
        hidden_seq = flow.cat(hidden_seq, dim=0)
        return hidden_seq, (h_t, c_t)
Esempio n. 2
0
    def forward(self, x, init_states=None):
        seq_sz, bs, _ = x.size()
        hidden_seq = []
        if init_states is None:
            h_t, c_t = (
                flow.zeros((bs, self.hidden_size)).to("cuda"),
                flow.zeros((bs, self.hidden_size)).to("cuda"),
            )
        else:
            h_t, c_t = init_states

        HS = self.hidden_size
        for t in range(seq_sz):
            x_t = x[t, :, :]
            x_t = x_t.reshape(x.shape[1], x.shape[2])
            gates = flow.matmul(x_t, self.W) + flow.matmul(h_t,
                                                           self.U) + self.bias
            i_t, f_t, g_t, o_t = (
                flow.sigmoid(gates[:, :HS]),
                flow.sigmoid(gates[:, HS:HS * 2]),
                flow.tanh(gates[:, HS * 2:HS * 3]),
                flow.sigmoid(gates[:, HS * 3:]),
            )
            c_t = f_t * c_t + i_t * g_t
            h_t = o_t * flow.tanh(c_t)
            hidden_seq.append(h_t.unsqueeze(0))
        hidden_seq = flow.cat(hidden_seq, dim=0)
        return hidden_seq, (h_t, c_t)
Esempio n. 3
0
 def forward(self, x, init_states=None):
     """Assumes x is of shape (batch, sequence, feature)"""
     bs, seq_sz, _ = x.size()
     hidden_seq = []
     if init_states is None:
         h_t, c_t = (
             flow.zeros((bs, self.hidden_size)).to(x.device),
             flow.zeros((bs, self.hidden_size)).to(x.device),
         )
     else:
         h_t, c_t = init_states
     HS = self.hidden_size
     for t in range(seq_sz):
         x_t = x[:, t, :].reshape(x.shape[0], x.shape[2])
         gates = flow.matmul(x_t, self.W) + flow.matmul(h_t, self.U) + self.bias
         i_t, f_t, g_t, o_t = (
             flow.sigmoid(gates[:, :HS]),
             flow.sigmoid(gates[:, HS : HS * 2]),
             flow.tanh(gates[:, HS * 2 : HS * 3]),
             flow.sigmoid(gates[:, HS * 3 :]),
         )
         c_t = f_t * c_t + i_t * g_t
         h_t = o_t * flow.tanh(c_t)
         hidden_seq.append(h_t.unsqueeze(1))
     hidden_seq = flow.cat(hidden_seq, dim=1)
     return hidden_seq, (h_t, c_t)
Esempio n. 4
0
    def forward(self, x, c):
        c = c.view(c.size(0), c.size(1), 1, 1)

        c1 = c.repeat(1, 1, x.size(2), x.size(3))
        x = flow.cat([x, c1], dim=1)
        x = self.d1(x)

        c2 = c.repeat(1, 1, x.size(2), x.size(3))
        x = flow.cat([x, c2], dim=1)
        x = self.d2(x)

        c3 = c.repeat(1, 1, x.size(2), x.size(3))
        x = flow.cat([x, c3], dim=1)
        x = self.d3(x)

        c4 = c.repeat(1, 1, x.size(2), x.size(3))
        x = flow.cat([x, c4], dim=1)
        x = self.d4(x)

        c5 = c.repeat(1, 1, x.size(2), x.size(3))
        x = flow.cat([x, c5], dim=1)
        x = self.conv(x)

        x = self.pool(x)
        x = flow.squeeze(x)
        x = flow.tanh(x)
        return x
Esempio n. 5
0
    def _test_body_tanh_v2(test_case, input_arr):
        x = flow.Tensor(input_arr)

        y = flow.tanh(x)
        z = np.tanh(input_arr)

        test_case.assertTrue(np.allclose(y.numpy(), z, rtol=1e-4, atol=1e-4))
Esempio n. 6
0
def gelu(x):
    """
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    """
    return (0.5 * x * (1.0 + flow.tanh(
        math.sqrt(2.0 / math.pi) * (x + 0.044715 * flow.pow(x, 3.0)))))
Esempio n. 7
0
    def forward(self, x, hidden=None):
        batch_size, seq_len, _ = x.size()
        H_S = self.hidden_size
        hidden_seq = []

        if hidden is None:
            h_t = flow.zeros((batch_size, self.hidden_size))
        else:
            h_t = hidden

        for t in range(seq_len):
            x_t = x[:, t, :]
            gates_1 = flow.matmul(x_t, self.inp_W) + self.inp_b
            gates_2 = flow.matmul(h_t, self.hid_W) + self.hid_b

            r_gate = flow.sigmoid(gates_1[:, :H_S] + gates_2[:, :H_S])
            z_gate = flow.sigmoid(gates_1[:, H_S:H_S * 2] +
                                  gates_2[:, H_S:H_S * 2])
            h_t_ = flow.tanh(gates_1[:, H_S * 2:H_S * 3] +
                             r_gate * gates_2[:, H_S * 2:H_S * 3])
            h_t = (1 - z_gate) * h_t_ + z_gate * h_t

            hidden_seq.append(h_t.unsqueeze(1))

        hidden_seq = flow.cat(hidden_seq, dim=1)
        return hidden_seq, h_t
Esempio n. 8
0
def _tanh(self):
    return flow.tanh(self)
Esempio n. 9
0
File: rnn.py Progetto: zzk0/oneflow
    def forward(self, input, h_0=None):
        if self.batch_first == False:
            input = self.permute_tensor(input)
        D = 2 if self.bidirectional else 1
        num_layers = self.num_layers
        batch_size, seq_len, _ = input.size()

        if h_0 is None:
            real_hidden_size = (
                self.proj_size if self.proj_size > 0 else self.hidden_size
            )
            h_t = flow.zeros(
                (D * num_layers, batch_size, real_hidden_size),
                dtype=input.dtype,
                device=input.device,
            )
            c_t = flow.zeros(
                (D * num_layers, batch_size, self.hidden_size),
                dtype=input.dtype,
                device=input.device,
            )
            h_0 = (h_t, c_t)
        else:
            h_t, c_t = h_0

        if self.bidirectional:
            if h_0 is None:
                h_t_f = h_t[:num_layers, :, :]
                h_t_b = h_t[num_layers:, :, :]
                c_t_f = c_t[:num_layers, :, :]
                c_t_b = c_t[num_layers:, :, :]
            else:
                h_t_f = flow.cat(
                    [
                        h_t[l, :, :].unsqueeze(0)
                        for l in range(h_t.size(0))
                        if l % 2 == 0
                    ],
                    dim=0,
                )
                h_t_b = flow.cat(
                    [
                        h_t[l, :, :].unsqueeze(0)
                        for l in range(h_t.size(0))
                        if l % 2 != 0
                    ],
                    dim=0,
                )
                c_t_f = flow.cat(
                    [
                        c_t[l, :, :].unsqueeze(0)
                        for l in range(c_t.size(0))
                        if l % 2 == 0
                    ],
                    dim=0,
                )
                c_t_b = flow.cat(
                    [
                        c_t[l, :, :].unsqueeze(0)
                        for l in range(c_t.size(0))
                        if l % 2 != 0
                    ],
                    dim=0,
                )
        else:
            h_t_f = h_t
            c_t_f = c_t

        layer_hidden = []
        layer_cell = []

        for layer in range(self.num_layers):

            hidden_seq_f = []
            if self.bidirectional:
                hidden_seq_b = []

            hid_t_f = h_t_f[layer, :, :]
            h_c_t_f = c_t_f[layer, :, :]
            if self.bidirectional:
                hid_t_b = h_t_b[layer, :, :]
                h_c_t_b = c_t_b[layer, :, :]

            for t in range(seq_len):
                if layer == 0:
                    x_t_f = input[:, t, :]
                    if self.bidirectional:
                        x_t_b = input[:, seq_len - 1 - t, :]
                else:
                    x_t_f = hidden_seq[:, t, :]
                    if self.bidirectional:
                        x_t_b = hidden_seq[:, seq_len - 1 - t, :]

                # TODO: Modify after adding the stride attribute
                # gi_f = flow.matmul(
                #     x_t_f,
                #     getattr(self, "weight_ih_l{}{}".format(layer, "")).permute(1, 0),
                # )
                # gh_f = flow.matmul(
                #     hid_t_f,
                #     getattr(self, "weight_hh_l{}{}".format(layer, "")).permute(1, 0),
                # )

                gi_f = flow.matmul(
                    x_t_f, getattr(self, "weight_ih_l{}{}".format(layer, "")),
                )
                gh_f = flow.matmul(
                    hid_t_f, getattr(self, "weight_hh_l{}{}".format(layer, "")),
                )
                if self.bias:
                    gi_f += getattr(self, "bias_ih_l{}{}".format(layer, ""))
                    gh_f += getattr(self, "bias_hh_l{}{}".format(layer, ""))
                gates_f = gi_f + gh_f
                ingate_f, forgetgate_f, cellgate_f, outgate_f = gates_f.chunk(4, dim=1)
                ingate_f = flow.sigmoid(ingate_f)
                forgetgate_f = flow.sigmoid(forgetgate_f)
                cellgate_f = flow.tanh(cellgate_f)
                outgate_f = flow.sigmoid(outgate_f)
                h_c_t_f = (forgetgate_f * h_c_t_f) + (ingate_f * cellgate_f)
                hid_t_f = outgate_f * flow.tanh(h_c_t_f)
                if self.proj_size > 0:

                    # TODO:Modify after adding the stride attribute
                    # hid_t_f = flow.matmul(
                    #     hid_t_f,
                    #     getattr(self, "weight_hr_l{}{}".format(layer, "")).permute(
                    #         1, 0
                    #     ),
                    # )

                    hid_t_f = flow.matmul(
                        hid_t_f, getattr(self, "weight_hr_l{}{}".format(layer, ""))
                    )
                hidden_seq_f.append(hid_t_f.unsqueeze(1))

                if self.bidirectional:

                    # TODO:Modify after adding the stride attribute
                    # gi_b = flow.matmul(
                    #     x_t_b,
                    #     getattr(
                    #         self, "weight_ih_l{}{}".format(layer, "_reverse")
                    #     ).permute(1, 0),
                    # )
                    # gh_b = flow.matmul(
                    #     hid_t_b,
                    #     getattr(
                    #         self, "weight_hh_l{}{}".format(layer, "_reverse")
                    #     ).permute(1, 0),
                    # )

                    gi_b = flow.matmul(
                        x_t_b,
                        getattr(self, "weight_ih_l{}{}".format(layer, "_reverse")),
                    )
                    gh_b = flow.matmul(
                        hid_t_b,
                        getattr(self, "weight_hh_l{}{}".format(layer, "_reverse")),
                    )

                    if self.bias:
                        gi_b += getattr(self, "bias_ih_l{}{}".format(layer, "_reverse"))
                        gh_b += getattr(self, "bias_hh_l{}{}".format(layer, "_reverse"))
                    gates_b = gi_b + gh_b
                    ingate_b, forgetgate_b, cellgate_b, outgate_b = gates_b.chunk(
                        4, dim=1
                    )
                    ingate_b = flow.sigmoid(ingate_b)
                    forgetgate_b = flow.sigmoid(forgetgate_b)
                    cellgate_b = flow.tanh(cellgate_b)
                    outgate_b = flow.sigmoid(outgate_b)
                    h_c_t_b = (forgetgate_b * h_c_t_b) + (ingate_b * cellgate_b)
                    hid_t_b = outgate_b * flow.tanh(h_c_t_b)
                    if self.proj_size > 0:

                        # TODO:Modify after adding the stride attribute
                        # hid_t_b = flow.matmul(
                        #     hid_t_b,
                        #     getattr(
                        #         self, "weight_hr_l{}{}".format(layer, "_reverse")
                        #     ).permute(1, 0),
                        # )

                        hid_t_b = flow.matmul(
                            hid_t_b,
                            getattr(self, "weight_hr_l{}{}".format(layer, "_reverse")),
                        )
                    hidden_seq_b.insert(0, hid_t_b.unsqueeze(1))

            hidden_seq_f = flow.cat(hidden_seq_f, dim=1)
            if self.bidirectional:
                hidden_seq_b = flow.cat(hidden_seq_b, dim=1)

            if self.dropout != 0 and layer != self.num_layers - 1:
                hidden_seq_f = self.drop(hidden_seq_f)
                if self.bidirectional:
                    hidden_seq_b = self.drop(hidden_seq_b)

            if self.bidirectional:
                hidden_seq = flow.cat([hidden_seq_f, hidden_seq_b], dim=2)
            else:
                hidden_seq = hidden_seq_f

            if self.bidirectional:
                h_t = flow.cat([hid_t_f.unsqueeze(0), hid_t_b.unsqueeze(0)], dim=0)
                c_t = flow.cat([h_c_t_f.unsqueeze(0), h_c_t_b.unsqueeze(0)], dim=0)
            else:
                h_t = hid_t_f.unsqueeze(0)
                c_t = h_c_t_f.unsqueeze(0)

            layer_hidden.append(h_t)
            layer_cell.append(c_t)

        h_t = flow.cat(layer_hidden, dim=0)
        c_t = flow.cat(layer_cell, dim=0)

        if self.batch_first == False:
            hidden_seq = self.permute_tensor(hidden_seq)

        return hidden_seq, (h_t, c_t)
Esempio n. 10
0
File: rnn.py Progetto: zzk0/oneflow
    def forward(self, input, h_0=None):
        if self.batch_first == False:
            input = self.permute_tensor(input)
        D = 2 if self.bidirectional else 1
        num_layers = self.num_layers
        batch_size, seq_len, _ = input.size()

        if h_0 is None:
            h_t = flow.zeros(
                (D * num_layers, batch_size, self.hidden_size),
                dtype=input.dtype,
                device=input.device,
            )
        else:
            h_t = h_0

        if self.bidirectional:
            if h_0 is None:
                h_t_f = h_t[:num_layers, :, :]
                h_t_b = h_t[num_layers:, :, :]
            else:
                h_t_f = flow.cat(
                    [
                        h_t[l, :, :].unsqueeze(0)
                        for l in range(h_t.size(0))
                        if l % 2 == 0
                    ],
                    dim=0,
                )
                h_t_b = flow.cat(
                    [
                        h_t[l, :, :].unsqueeze(0)
                        for l in range(h_t.size(0))
                        if l % 2 != 0
                    ],
                    dim=0,
                )
        else:
            h_t_f = h_t

        layer_hidden = []

        for layer in range(self.num_layers):
            hidden_seq_f = []
            if self.bidirectional:
                hidden_seq_b = []

            hid_t_f = h_t_f[layer, :, :]
            if self.bidirectional:
                hid_t_b = h_t_b[layer, :, :]

            for t in range(seq_len):
                if layer == 0:
                    x_t_f = input[:, t, :]
                    if self.bidirectional:
                        x_t_b = input[:, seq_len - 1 - t, :]
                else:
                    x_t_f = hidden_seq[:, t, :]
                    if self.bidirectional:
                        x_t_b = hidden_seq[:, seq_len - 1 - t, :]

                # TODO: Modify after adding the stride attribute
                # gi_f = flow.matmul(
                #     x_t_f,
                #     getattr(self, "weight_ih_l{}{}".format(layer, "")).permute(1, 0),
                # )
                # gh_f = flow.matmul(
                #     hid_t_f,
                #     getattr(self, "weight_hh_l{}{}".format(layer, "")).permute(1, 0),
                # )

                gi_f = flow.matmul(
                    x_t_f, getattr(self, "weight_ih_l{}{}".format(layer, "")),
                )
                gh_f = flow.matmul(
                    hid_t_f, getattr(self, "weight_hh_l{}{}".format(layer, "")),
                )
                if self.bias:
                    gi_f += getattr(self, "bias_ih_l{}{}".format(layer, ""))
                    gh_f += getattr(self, "bias_hh_l{}{}".format(layer, ""))

                i_r_f, i_i_f, i_n_f = gi_f.chunk(3, dim=1)
                h_r_f, h_i_f, h_n_f = gh_f.chunk(3, dim=1)

                resetgate_f = flow.sigmoid(i_r_f + h_r_f)
                inputgate_f = flow.sigmoid(i_i_f + h_i_f)
                newgate_f = flow.tanh(i_n_f + resetgate_f * h_n_f)

                hid_t_f = newgate_f + inputgate_f * (hid_t_f - newgate_f)

                hidden_seq_f.append(hid_t_f.unsqueeze(1))

                if self.bidirectional:

                    # TODO:Modify after adding the stride attribute
                    # gi_b = flow.matmul(
                    #     x_t_b,
                    #     getattr(
                    #         self, "weight_ih_l{}{}".format(layer, "_reverse")
                    #     ).permute(1, 0),
                    # )
                    # gh_b = flow.matmul(
                    #     hid_t_b,
                    #     getattr(
                    #         self, "weight_hh_l{}{}".format(layer, "_reverse")
                    #     ).permute(1, 0),
                    # )

                    gi_b = flow.matmul(
                        x_t_b,
                        getattr(self, "weight_ih_l{}{}".format(layer, "_reverse")),
                    )
                    gh_b = flow.matmul(
                        hid_t_b,
                        getattr(self, "weight_hh_l{}{}".format(layer, "_reverse")),
                    )
                    if self.bias:
                        gi_b += getattr(self, "bias_ih_l{}{}".format(layer, "_reverse"))
                        gh_b += getattr(self, "bias_hh_l{}{}".format(layer, "_reverse"))

                    i_r_b, i_i_b, i_n_b = gi_b.chunk(3, dim=1)
                    h_r_b, h_i_b, h_n_b = gh_b.chunk(3, dim=1)

                    resetgate_b = flow.sigmoid(i_r_b + h_r_b)
                    inputgate_b = flow.sigmoid(i_i_b + h_i_b)
                    newgate_b = flow.tanh(i_n_b + resetgate_b * h_n_b)

                    hid_t_b = newgate_b + inputgate_b * (hid_t_b - newgate_b)

                    hidden_seq_b.insert(0, hid_t_b.unsqueeze(1))

            hidden_seq_f = flow.cat(hidden_seq_f, dim=1)
            if self.bidirectional:
                hidden_seq_b = flow.cat(hidden_seq_b, dim=1)

            if self.dropout != 0 and layer != self.num_layers - 1:
                hidden_seq_f = self.drop(hidden_seq_f)
                if self.bidirectional:
                    hidden_seq_b = self.drop(hidden_seq_b)

            if self.bidirectional:
                hidden_seq = flow.cat([hidden_seq_f, hidden_seq_b], dim=2)
            else:
                hidden_seq = hidden_seq_f

            if self.bidirectional:
                h_t = flow.cat([hid_t_f.unsqueeze(0), hid_t_b.unsqueeze(0)], dim=0)
            else:
                h_t = hid_t_f.unsqueeze(0)

            layer_hidden.append(h_t)

        h_t = flow.cat(layer_hidden, dim=0)

        if self.batch_first == False:
            hidden_seq = self.permute_tensor(hidden_seq)

        return hidden_seq, h_t
Esempio n. 11
0
import logging
import oneflow as flow
import oneflow.nn as nn
import oneflow.nn.functional as F

logger = logging.getLogger(__name__)

_ACTIVATION = {
    "relu": F.relu,
    "gelu": F.gelu,
    "glu": F.glu,
    "tanh": lambda x: flow.tanh(x),
    "swish": lambda x: x * flow.sigmoid(x),
}


class PositionwiseFeedForward(nn.Module):
    """Positionwise feed forward
    """
    def __init__(self, d_model, d_ff, dropout, activation="relu"):
        super(PositionwiseFeedForward, self).__init__()
        self.activation = activation

        assert activation in ["relu", "gelu", "glu", "tanh", "swish"]

        self.w_1 = nn.Linear(d_model,
                             d_ff * 2 if activation == "glu" else d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):