Beispiel #1
0
class MatGRUGate(torch.nn.Module):
    """
    GRU gate for matrix, similar to the official code.
    Please refer to section 3.4 of the paper for the formula.
    """

    def __init__(self, rows, cols, activation):
        super().__init__()
        self.activation = activation
        self.W = Parameter(torch.Tensor(rows, rows))
        self.U = Parameter(torch.Tensor(rows, rows))
        self.bias = Parameter(torch.Tensor(rows, cols))
        self.reset_parameters()

    def reset_parameters(self):
        init.xavier_uniform_(self.W)
        init.xavier_uniform_(self.U)
        init.zeros_(self.bias)

    def forward(self, x, hidden):
        out = self.activation(self.W.matmul(x) + \
                              self.U.matmul(hidden) + \
                              self.bias)

        return out
Beispiel #2
0
class mat_GRU_gate(torch.nn.Module):
    def __init__(self, rows, cols, activation):
        super().__init__()
        self.activation = activation
        #the k here should be in_feats which is actually the rows
        self.W = Parameter(torch.Tensor(rows, rows))
        nn.init.orthogonal_(self.W)
        # self.reset_param(self.W)

        self.U = Parameter(torch.Tensor(rows, rows))
        nn.init.orthogonal_(self.U)
        # self.reset_param(self.U)

        self.bias = Parameter(torch.zeros(rows, cols))

    def reset_param(self, t):
        #Initialize based on the number of columns
        stdv = 1. / math.sqrt(t.size(1))
        t.data.uniform_(-stdv, stdv)

    def forward(self, x, hidden):
        out = self.activation(self.W.matmul(x) + \
                              self.U.matmul(hidden) + \
                              self.bias)

        return out
Beispiel #3
0
class LeftSVDLayer(nn.Module):
    def __init__(self, ih, oh, dropout=None, bias=True):
        super().__init__()

        self.weight = Parameter(torch.Tensor(oh, ih))
        self.dropout = dropout

        if bias:
            self.bias = Parameter(torch.Tensor(oh, 1))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))

        if self.bias is not None:
            fin, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1. / math.sqrt(fin / 2.)
            nn.init.uniform_(self.bias, -bound, bound)

    def forward(self, x):
        y = self.weight.matmul(x)
        if self.bias is not None:
            y = y + self.bias

        if self.dropout is not None:
            y = F.dropout(y, p=self.dropout)

        return y
Beispiel #4
0
class SVDLayer(nn.Module):
    def __init__(self, in_size, out_size, bias=True):
        super().__init__()

        ih, iw = _pair(in_size)
        oh, ow = _pair(out_size)

        self.w1 = Parameter(torch.Tensor(oh, ih))
        # def.d transposed instead of transposing every forward call
        self.w2 = Parameter(torch.Tensor(iw, ow))

        if bias:
            self.bias = Parameter(torch.Tensor(oh, ow))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.w1, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.w2, a=math.sqrt(5))

        if self.bias is not None:
            fin1, _ = nn.init._calculate_fan_in_and_fan_out(self.w1)
            fin2, _ = nn.init._calculate_fan_in_and_fan_out(self.w2)

            bound = 1. / math.sqrt((fin1 + fin2) / 2.)
            nn.init.uniform_(self.bias, -bound, bound)

    def forward(self, x):
        a = self.w1.matmul(x)
        y = a.matmul(self.w2)

        if self.bias is not None:
            return y + self.bias
        else:
            return y

    def __repr__(self):
        oh, ih = self.w1.shape
        iw, ow = self.w2.shape
        return f'SVDLayer ({ih}, {iw}) -> ({oh}, {ow})'
Beispiel #5
0
class coattention(nn.Module):
    """coattention

    get high-level h from given V (d*N) and Q (d*T).
    """
    def __init__(self, dim_d):
        super(coattention, self).__init__()
        dim_k = dim_d
        self.W_b = Parameter(torch.Tensor(dim_d, dim_d))
        self.W_v = Parameter(torch.Tensor(dim_k, dim_d))
        self.W_q = Parameter(torch.Tensor(dim_k, dim_d))
        self.w_hv = Parameter(torch.Tensor(1, dim_k))
        self.w_hq = Parameter(torch.Tensor(1, dim_k))
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax()

    def forward(self, Q, V):
        """
        :param Q: [batch, dim_d, dim_T]
        :param V: [batch, dim_d, dim_N]
        :return: q_hat [dim_d], v_hat [dim_d]
        """
        # print('\n Q:', Q.size())
        # print('\n V:', V.size())
        QT = torch.transpose(Q, 1, 2)
        C = QT.matmul(self.W_b.matmul(V))  # [dim_d, dim_d]
        C = self.tanh(C)
        # print('\n size C:', C.size())
        Hv = self.tanh(self.W_v.matmul(V) + self.W_q.matmul(Q).matmul(C))
        av = self.softmax(self.w_hv.matmul(Hv))
        v_hat = torch.bmm(av, V.transpose(1, 2)).squeeze()
        # print('\n v_hat:', v_hat.size())
        # print('\n size 1:', self.W_q.matmul(Q).size())
        # print('\n size 2 part:', self.W_v.matmul(V).transpose(0, 1).size())
        Hq = self.tanh(
            self.W_q.matmul(Q) +
            self.W_v.matmul(V).matmul(torch.transpose(C, 1, 2)))
        aq = self.softmax(self.w_hq.matmul(Hq))
        q_hat = torch.bmm(aq, Q.transpose(1, 2)).squeeze()
        return q_hat, v_hat