Esempio n. 1
0
    def test_decoding2(self):
        X = 'HECDRKTCDESFSTKGNLRVHKLGH'
        Y = 'LKCSGCGKNFKSQYAYKRHEQTH'

        needle = NeedlemanWunschDecoder(self.operator)
        dm = torch.Tensor(np.loadtxt(get_data_path('dm.txt')))
        decoded = needle.traceback(dm)
        pred_x, pred_y, pred_states = list(zip(*decoded))
        states2alignment(np.array(pred_states), X, Y)
Esempio n. 2
0
 def test_decoding(self):
     theta = torch.tensor(make_data().astype(np.float32),
                          device=self.theta.device).unsqueeze(0)
     theta.requires_grad_()
     A = 0.1 * torch.ones_like(
         theta, dtype=torch.float32, device=self.theta.device)
     needle = NeedlemanWunschDecoder(self.operator)
     v = needle(theta, A)
     v.backward()
     decoded = needle.traceback(theta.grad.squeeze())
     decoded = [(x[0], x[1]) for x in decoded]
     states = [(0, 0), (1, 0), (2, 0), (3, 1), (4, 2), (4, 3)]
     self.assertListEqual(states, decoded)
Esempio n. 3
0
theta = torch.rand(B,
                   N,
                   M,
                   requires_grad=True,
                   dtype=torch.float32,
                   device=cuda_device)
A = torch.ones(B, dtype=torch.float32, device=cuda_device) * -1.0


def mytime(func, args):
    starttime = time.time()
    res = func(*args)
    return res, time.time() - starttime


needle = NeedlemanWunschDecoder('softmax')
v = needle(theta, A)
v.sum().backward()

ft, bt = [], []
for i in range(nruns):
    v, forwardtime = mytime(needle, (theta, A))
    _, backtime = mytime(v.sum().backward, ())
    del v

    ft.append(forwardtime)
    bt.append(backtime)

print(np.array(ft).mean())
print(np.array(bt).mean())
Esempio n. 4
0
 def test_hessian_needlemanwunsch_function(self):
     needle = NeedlemanWunschDecoder(self.operator)
     inputs = (self.theta, self.A)
     gradgradcheck(needle, inputs, eps=1e-1, atol=1e-1, rtol=1e-1)
Esempio n. 5
0
 def test_grad_needlemanwunsch_function(self):
     needle = NeedlemanWunschDecoder(self.operator)
     theta, A = self.theta, self.A
     theta.requires_grad_()
     gradcheck(needle, (theta, A), eps=1e-1, atol=1e-1, rtol=1e-1)
Esempio n. 6
0
class NeedlemanWunschAligner(nn.Module):
    def __init__(self,
                 n_alpha,
                 n_input,
                 n_units,
                 n_embed,
                 n_layers=2,
                 lm=None,
                 device='gpu'):
        """ NeedlemanWunsch Alignment model

        Parameters
        ----------
        n_alpha : int
           Size of the alphabet (default 22)
        n_input : int
           Input dimensions.
        n_units : int
           Number of hidden units in RNN.
        n_embed : int
           Embedding dimension
        n_layers : int
           Number of RNN layers.
        lm : BiLM
           Pretrained language model (optional)
        padding_idx : int
           Location of padding index in embedding (default -1)
        transform : function
           Activation function (default relu)
        sparse : False?
        """
        super(NeedlemanWunschAligner, self).__init__()
        if lm is None:
            path = pretrained_language_models['bilstm']
            self.lm = BiLM()
            self.lm.load_state_dict(torch.load(path))
            self.lm.eval()
        if n_layers > 1:
            self.match_embedding = StackedRNN(n_alpha,
                                              n_input,
                                              n_units,
                                              n_embed,
                                              n_layers,
                                              lm=lm)
            self.gap_embedding = StackedRNN(n_alpha,
                                            n_input,
                                            n_units,
                                            n_embed,
                                            n_layers,
                                            lm=lm)
        else:
            self.match_embedding = EmbedLinear(n_alpha,
                                               n_input,
                                               n_embed,
                                               lm=lm)
            self.gap_embedding = EmbedLinear(n_alpha, n_input, n_embed, lm=lm)

        # TODO: make cpu compatible version
        # if device == 'cpu':
        #     self.nw = NWDecoderCPU(operator='softmax')
        # else:
        self.nw = NWDecoderCUDA(operator='softmax')

    def forward(self, x, order):
        """ Generate alignment matrix.

        Parameters
        ----------
        x : PackedSequence
            Packed sequence object of proteins to align.
        order : np.array
            The origin order of the sequences

        Returns
        -------
        aln : torch.Tensor
            Alignment Matrix (dim B x N x M)
        """
        with torch.enable_grad():
            zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order)
            gx, _, gy, _ = unpack_sequences(self.gap_embedding(x), order)

            # Obtain theta through an inner product across latent dimensions
            theta = F.softplus(torch.einsum('bid,bjd->bij', zx, zy))
            A = F.logsigmoid(torch.einsum('bid,bjd->bij', gx, gy))
            aln = self.nw.decode(theta, A)
            return aln, theta, A

    def traceback(self, x, order):
        # dim B x N x D
        with torch.enable_grad():
            zx, _, zy, _ = unpack_sequences(self.match_embedding(x), order)
            gx, xlen, gy, ylen = unpack_sequences(self.gap_embedding(x), order)
            match = F.softplus(torch.einsum('bid,bjd->bij', zx, zy))
            gap = F.logsigmoid(torch.einsum('bid,bjd->bij', gx, gy))
            B, _, _ = match.shape
            for b in range(B):
                aln = self.nw.decode(match[b, :xlen[b], :ylen[b]].unsqueeze(0),
                                     gap[b, :xlen[b], :ylen[b]].unsqueeze(0))
                decoded = self.nw.traceback(aln.squeeze())
                yield decoded, aln
Esempio n. 7
0
    def __init__(self,
                 n_alpha,
                 n_input,
                 n_units,
                 n_embed,
                 n_layers=2,
                 lm=None,
                 device='gpu'):
        """ NeedlemanWunsch Alignment model

        Parameters
        ----------
        n_alpha : int
           Size of the alphabet (default 22)
        n_input : int
           Input dimensions.
        n_units : int
           Number of hidden units in RNN.
        n_embed : int
           Embedding dimension
        n_layers : int
           Number of RNN layers.
        lm : BiLM
           Pretrained language model (optional)
        padding_idx : int
           Location of padding index in embedding (default -1)
        transform : function
           Activation function (default relu)
        sparse : False?
        """
        super(NeedlemanWunschAligner, self).__init__()
        if lm is None:
            path = pretrained_language_models['bilstm']
            self.lm = BiLM()
            self.lm.load_state_dict(torch.load(path))
            self.lm.eval()
        if n_layers > 1:
            self.match_embedding = StackedRNN(n_alpha,
                                              n_input,
                                              n_units,
                                              n_embed,
                                              n_layers,
                                              lm=lm)
            self.gap_embedding = StackedRNN(n_alpha,
                                            n_input,
                                            n_units,
                                            n_embed,
                                            n_layers,
                                            lm=lm)
        else:
            self.match_embedding = EmbedLinear(n_alpha,
                                               n_input,
                                               n_embed,
                                               lm=lm)
            self.gap_embedding = EmbedLinear(n_alpha, n_input, n_embed, lm=lm)

        # TODO: make cpu compatible version
        # if device == 'cpu':
        #     self.nw = NWDecoderCPU(operator='softmax')
        # else:
        self.nw = NWDecoderCUDA(operator='softmax')