Esempio n. 1
0
    def test(self):
        #output from custom implementation
        X = torch.Tensor([[1, 1, 1, 0, 0], [0, 1, 1, 1, 0], [0, 0, 1, 1, 1],
                          [0, 0, 1, 1, 0], [0, 1, 1, 0, 0]])
        X = X.reshape(
            (1, 1, 5, 5))  # (batch_size x channel_size x height x weight)
        conv = Conv()
        K = torch.Tensor([[[[1, 0, 1], [0, 1, 0], [1, 0, 1]]]])
        conv.init_params(K, 3, stride=1, padding=1)
        output_custom = conv.forward(X)
        print(output_custom)

        #ouput from pytorch's implementation
        output_pytorch = F.conv2d(X, K, padding=1, stride=1)
        print(output_pytorch)
        self.assertEqual(output_custom, output_pytorch)
Esempio n. 2
0
class CRF(nn.Module):
    def __init__(self,
                 input_dim,
                 embed_dim,
                 conv_layers,
                 num_labels,
                 batch_size,
                 m=14):
        """
        Linear chain CRF as in Assignment 2
        """
        super(CRF, self).__init__()

        # crf param
        self.input_dim = input_dim
        self.embed_dim = embed_dim
        self.num_labels = num_labels
        self.batch_size = batch_size
        self.use_cuda = torch.cuda.is_available()
        self.m = m

        # conv layer params
        self.out_channels = 1  # output channel of conv layer
        self.conv_layers = conv_layers
        self.stride = (1, 1)
        self.padding = True
        self.cout_shape = self.get_cout_dim()  # output shape of conv layer
        self.cout_numel = self.cout_shape[0] * self.cout_shape[1]

        self.init_params()

        ### Use GPU if available
        if self.use_cuda:
            [m.cuda() for m in self.modules()]

    def init_params(self):
        """
        Initialize trainable parameters of CRF here
        """
        self.conv = Conv(self.conv_layers[0][-1],
                         self.out_channels,
                         padding=self.padding,
                         stride=self.stride)
        self.W = torch.randn(self.num_labels,
                             self.cout_numel,
                             requires_grad=True)
        self.T = torch.randn(self.num_labels,
                             self.num_labels,
                             requires_grad=True)

    def get_cout_dim(self):
        if self.padding:
            return (int(np.ceil(self.input_dim[0] / self.stride[0])),
                    int(np.ceil(int(self.input_dim[1] / self.stride[1]))))
        return None

    # X: (batch_size, 14, 16, 8) dimensional tensor
    # iterates over all words in a batch, and decodes them one by one
    def forward(self, X):
        """
        Implement the objective of CRF here.
        The input (features) to the CRF module should be convolution features.
        """
        decods = torch.zeros(self.batch_size, self.m, 1, dtype=torch.int)
        for i in range(self.batch_size):
            # Reshape the word to (14,1,16,8)
            word = X[i].reshape(self.m, 1, self.input_dim[0],
                                self.input_dim[1])
            # conv operation performed for one word independently to every letter
            features = self.get_conv_features(word)
            # now decode the sequence using conv features
            decods[i] = self.dp_infer(features)

        return decods

    # input: x: (m, d), m is # of letters a word has, d is the feature dimension of letter image
    # input: w: (26, d), letter weight vector
    # input: T: (26, 26), letter-letter transition matrix
    # output: letter_indices: (m, 1), letter labels of a word

    # decode a sequence of letters for one word
    def dp_infer(self, x):
        w = self.W
        T = self.T
        m = self.m

        pos_letter_value_table = torch.zeros((m, 26), dtype=torch.float64)
        pos_best_prevletter_table = torch.zeros((m, 26), dtype=torch.int)
        # for the position 1 (1st letter), special handling
        # because only w and x dot product is covered and transition is not considered.
        for i in range(26):
            # print(w)
            # print(x)
            pos_letter_value_table[0, i] = torch.dot(w[i, :], x[0, :])

        # pos_best_prevletter_table first row is all zero as there is no previous letter for the first letter

        # start from 2nd position
        for pos in range(1, m):
            # go over all possible letters
            for letter_ind in range(self.num_labels):
                # get the previous letter scores
                prev_letter_scores = pos_letter_value_table[pos - 1, :].clone()
                # we need to calculate scores of combining the current letter and all previous letters
                # no need to calculate the dot product because dot product only covers current letter and position
                # which means it is independent of all previous letters
                for prev_letter_ind in range(self.num_labels):
                    prev_letter_scores[prev_letter_ind] += T[prev_letter_ind,
                                                             letter_ind]

                # find out which previous letter achieved the largest score by now
                best_letter_ind = torch.argmax(prev_letter_scores)
                # update the score of current positive with current letter
                pos_letter_value_table[pos, letter_ind] = prev_letter_scores[
                    best_letter_ind] + torch.dot(w[letter_ind, :], x[pos, :])
                # save the best previous letter for following tracking to generate most possible word
                pos_best_prevletter_table[pos, letter_ind] = best_letter_ind
        letter_indicies = torch.zeros((m, 1), dtype=torch.int)
        letter_indicies[m - 1,
                        0] = torch.argmax(pos_letter_value_table[m - 1, :])
        max_obj_val = pos_letter_value_table[m - 1, letter_indicies[m - 1, 0]]
        # print(max_obj_val)
        for pos in range(m - 2, -1, -1):
            letter_indicies[pos, 0] = pos_best_prevletter_table[
                pos + 1, letter_indicies[pos + 1, 0]]
        return letter_indicies

    def loss(self, X, labels):
        """
        Compute the negative conditional log-likelihood of a labelling given a sequence.
        """
        features = self.get_conv_features(X)
        loss = blah
        return loss

    def backward(self):
        """
        Return the gradient of the CRF layer
        :return:
        """
        gradient = blah
        return gradient

    # performs conv operation to every (16,8) image in the word. m = 14 (default) - word length
    # returns flattened vector of new conv features
    def get_conv_features(self, word):
        """
        Generate convolution features for a given word
        """
        cout = self.conv.forward(word)
        cout = cout.reshape(cout.shape[0], self.cout_numel)
        return cout