Beispiel #1
0
    def forward(self, inputs):

        torch.embedding()
        conv1 = self.conv1(inputs)
        conv2 = self.conv2(conv1)
        res = self.out(conv2)

        return res
Beispiel #2
0
    def forward(self, indices: Union[Tensor, PackedSequence], dim: int = -2) -> Union[Tensor, PackedSequence]:
        if torch.is_tensor(indices):
            return self.weight[:indices.size()[dim]]

        batch_sizes = indices.batch_sizes.to(device=indices.data.device)
        indices, _ = major_sizes_to_ptr(batch_sizes=batch_sizes)
        return torch.embedding(weight=self.weight, indices=indices)
Beispiel #3
0
    def embedding(self,
                  input,
                  weight,
                  padding_idx=None,
                  max_norm=None,
                  norm_type=2,
                  scale_grad_by_freq=False,
                  sparse=False):

        input = input.contiguous()
        if padding_idx is not None:
            if padding_idx > 0:
                assert padding_idx < weight.size(
                    0), 'Padding_idx must be within num_embeddings'
            elif padding_idx < 0:
                assert padding_idx >= -weight.size(
                    0), 'Padding_idx must be within num_embeddings'
                padding_idx = weight.size(0) + padding_idx
        elif padding_idx is None:
            padding_idx = -1
        if max_norm is not None:
            with torch.no_grad():
                torch.embedding_renorm_(weight, input, max_norm, norm_type)
        return torch.embedding(weight, input, padding_idx, scale_grad_by_freq,
                               sparse)
Beispiel #4
0
    def forward(self, position_ids):
        max_pos_id = position_ids.max()
        # update positional encoding if needed
        if max_pos_id >= self._max_sequence_length:
            self._max_sequence_length = max_pos_id + 1
            self._build_pos_enc(
                hidden_size=self._hidden_size,
                max_sequence_length=self._max_sequence_length,
                device=position_ids.device,
            )

        return torch.embedding(self.pos_enc, position_ids)
Beispiel #5
0
    def forward(self, sentences: torch.Tensor) -> torch.Tensor:
        batch, maxlen = sentences.size()

        swapped_sentence = sentences.transpose(0, 1)
        norm_input = torch.log_softmax(self.input, dim=0)
        forward_emission = torch.embedding(swapped_sentence.reshape(-1), norm_input).reshape(maxlen, batch, self.num_state)
        forward_transition = self.logsoftmax(self.transition.unsqueeze(0))
        # forward
        forwards = [forward_emission[0]]
        # forwards = [self.tanh(forward_emission[0])]
        for i in range(1, maxlen):
            pre_forward = forwards[i - 1]
            current_forward = self.bmv_log_product(forward_transition, pre_forward)
            forwards.append(current_forward + forward_emission[i])
            # current_forward = torch.matmul(forward_transition, pre_forward.unsqueeze(-1)).squeeze(-1)
            # forwards.append(current_forward * forward_emission[i])

        # backward
        backward_emission = forward_emission.flip(dims=[0])
        backward_transition = forward_transition.transpose(1, 2)
        backwards = [backward_emission[0]]

        for i in range(1, maxlen):
            pre_backward = backwards[i - 1]
            current_backward = self.bmv_log_product(backward_transition, pre_backward)
            backwards.append(current_backward + backward_emission[i])
            # current_backward = torch.matmul(backward_transition, pre_backward.unsqueeze(-1)).squeeze(-1)
            # backwards.append(current_backward * backward_emission[i])

        forwards = torch.stack(forwards, dim=0)
        backwards = torch.stack(backwards[::-1], dim=0)
        # nan_detection(forwards)
        # nan_detection(backwards)

        expected_count = forwards + backwards - forward_emission
        expected_count = expected_count - torch.logsumexp(expected_count, dim=-1, keepdim=True)
        # expected_count_debug = forwards * backwards / forward_emission
        # expected_count1 = backwards[0].unsqueeze(0)
        # expected_count2 = torch.matmul(forward_transition.unsqueeze(0), forwards[:-1].unsqueeze(-1)).squeeze(-1) * backwards[1:]
        # expected_count = torch.cat([expected_count1, expected_count2], dim=0)
        # expected_count = smoothing(expected_count)

        # nan_detection(expected_count)
        score = self.bmm_log_product(expected_count, self.logsoftmax(self.output.unsqueeze(0)))
        # score = torch.matmul(expected_count, self.output.unsqueeze(0))
        # nan_detection(score)
        return score.transpose(0, 1)
def test_vq_st_gradient2():
    inputs = torch.rand((2, 3, 5, 7), dtype=torch.float32, requires_grad=True)
    codebook = torch.rand((11, 7), dtype=torch.float32, requires_grad=True)
    codes, _ = vq_st(inputs, codebook)

    indices = vq(inputs, codebook)
    codes_torch = torch.embedding(codebook, indices, padding_idx=-1,
        scale_grad_by_freq=False, sparse=False)

    grad_output = torch.rand((2, 3, 5, 7), dtype=torch.float32)
    grad_codebook, = torch.autograd.grad(codes, codebook,
        grad_outputs=[grad_output])
    grad_codebook_torch, = torch.autograd.grad(codes_torch, codebook,
        grad_outputs=[grad_output])

    # Gradient is the same as torch.embedding function
    assert grad_codebook.size() == (11, 7)
    assert np.allclose(grad_codebook.numpy(), grad_codebook_torch.numpy())
Beispiel #7
0
def test_emb_grad():
    # Embedding vector gradient is the same as torch.embedding function
    input = torch.rand((2, 3, 5, 7), dtype=torch.float32, requires_grad=True)
    emb = torch.rand((11, 7), dtype=torch.float32, requires_grad=True)
    output, latents = QuantizeVector.apply(input, emb)

    output_torch = torch.embedding(emb, latents, padding_idx=-1,
        scale_grad_by_freq=False, sparse=False)

    grad_output = torch.rand((2, 3, 5, 7), dtype=torch.float32)
    grad_emb, = torch.autograd.grad(output, emb,
        grad_outputs=[grad_output])
    grad_emb_torch, = torch.autograd.grad(output_torch, emb,
        grad_outputs=[grad_output])

    assert grad_emb.size() == (11, 7), \
        'Embedding gradient shape does not match the embedding space.'
    assert np.allclose(grad_emb.numpy(), grad_emb_torch.numpy()), \
        'Embedding gradients are not equal to the torch.embedding function.'
Beispiel #8
0
    def forward(self, idx):
        """pull emb from server"""
        with torch.no_grad():
            bsz, slen = idx.size()
            cpu_idx = idx.cpu()
            unique_idx = torch.unique(cpu_idx)
            unique_emb = self.client.pull(self.name, unique_idx)
            gpu_emb = unique_emb.to(idx.device).detach_().requires_grad_(True)
        idx_mapping = {i.item(): j for j, i in enumerate(unique_idx)}
        mapped_idx = torch.zeros((bsz, slen), dtype=torch.long)
        for i in range(bsz):
            for j in range(slen):
                mapped_idx[i][j] = idx_mapping[cpu_idx[i][j].item()]

        # emb = torch.index_select(gpu_emb, 0, mapped_idx.cuda().view(-1)).view(bsz, slen, -1)
        emb = torch.embedding(gpu_emb, mapped_idx.cuda())
        # print('emb norm: {:.3f}, dtype: {}'.format(torch.norm(emb.data), emb.dtype))
        if self.training:
            self.trace.append((unique_idx, gpu_emb))
        return emb
Beispiel #9
0
    def forward(self, sentences: torch.Tensor, length: torch.Tensor) -> torch.Tensor:
        batch, maxlen = sentences.size()

        swapped_sentence = sentences.transpose(0, 1)
        norm_embeding = torch.log_softmax(self.input, dim=0)
        forward_emission = self.logsoftmax(torch.embedding(swapped_sentence.view(-1), norm_embeding).view(maxlen, batch, self.num_state))
        forward_transition = self.logsoftmax(self.transition).unsqueeze(0)
        # forward
        forwards = [forward_emission[0]]
        # forwards = [self.tanh(forward_emission[0])]
        for i in range(1, maxlen):
            pre_forward = forwards[i - 1]
            current_forward = self.bmv_log_product(forward_transition, pre_forward)
            forwards.append(current_forward + forward_emission[i])
            # current_forward = torch.matmul(forward_transition, pre_forward.unsqueeze(-1)).squeeze(-1)
            # forwards.append(current_forward * forward_emission[i])
        # shape [batch, dim]
        hidden_states = torch.stack(forwards)[length-1, torch.arange(batch), :]

        # shape [batch, label]
        score = self.bmv_log_product(self.logsoftmax(self.output.unsqueeze(0)), hidden_states)
        # score = torch.matmul(expected_count, self.output.unsqueeze(0))
        # nan_detection(score)
        return score
def Fembedding(input, weight, padding_idx=None, max_norm=None, norm_type=2,
            scale_grad_by_freq=False, sparse=False):
    r"""A simple lookup table that looks up embeddings in a fixed dictionary and size.

    This module is often used to retrieve word embeddings using indices.
    The input to the module is a list of indices, and the embedding matrix,
    and the output is the corresponding word embeddings.

    See :class:`torch.nn.Embedding` for more details.

    Args:
        input (LongTensor): Tensor containing indices into the embedding matrix
        weight (Tensor): The embedding matrix
            Number of rows should correspond to the maximum possible index + 1,
            number of columns is the embedding size
        padding_idx (int, optional): If given, pads the output with the embedding vector at :attr:`padding_idx`
                                        (initialized to zeros) whenever it encounters the index.
        max_norm (float, optional): If given, will renormalize the embedding vectors to have a norm lesser than
                                    this before extracting. Note: this will modify :attr:`weight` in-place.
        norm_type (float, optional): The p of the p-norm to compute for the max_norm option. Default ``2``.
        scale_grad_by_freq (boolean, optional): if given, this will scale gradients by the inverse of frequency of
                                                the words in the mini-batch. Default ``False``.
        sparse (bool, optional): if ``True``, gradient w.r.t. :attr:`weight` will be a sparse tensor. See Notes under
                                :class:`torch.nn.Embedding` for more details regarding sparse gradients.

    Shape:
        - Input: LongTensor of arbitrary shape containing the indices to extract
        - Weight: Embedding matrix of floating point type with shape `(V, embedding_dim)`,
                            where V = maximum index + 1 and embedding_dim = the embedding size
        - Output: `(*, embedding_dim)`, where `*` is the input shape

    Examples::

        >>> # a batch of 2 samples of 4 indices each
        >>> input = torch.tensor([[1,2,4,5],[4,3,2,9]])
        >>> # an embedding matrix containing 10 tensors of size 3
        >>> embedding_matrix = torch.rand(10, 3)
        >>> F.embedding(input, embedding_matrix)
        tensor([[[ 0.8490,  0.9625,  0.6753],
                [ 0.9666,  0.7761,  0.6108],
                [ 0.6246,  0.9751,  0.3618],
                [ 0.4161,  0.2419,  0.7383]],

                [[ 0.6246,  0.9751,  0.3618],
                [ 0.0237,  0.7794,  0.0528],
                [ 0.9666,  0.7761,  0.6108],
                [ 0.3385,  0.8612,  0.1867]]])

        >>> # example with padding_idx
        >>> weights = torch.rand(10, 3)
        >>> weights[0, :].zero_()
        >>> embedding_matrix = weights
        >>> input = torch.tensor([[0,2,0,5]])
        >>> F.embedding(input, embedding_matrix, padding_idx=0)
        tensor([[[ 0.0000,  0.0000,  0.0000],
                [ 0.5609,  0.5384,  0.8720],
                [ 0.0000,  0.0000,  0.0000],
                [ 0.6262,  0.2438,  0.7471]]])
    """
    if padding_idx is not None:
        if padding_idx > 0:
            assert padding_idx < weight.size(0), 'Padding_idx must be within num_embeddings'
        elif padding_idx < 0:
            assert padding_idx >= -weight.size(0), 'Padding_idx must be within num_embeddings'
            padding_idx = weight.size(0) + padding_idx
    elif padding_idx is None:
            padding_idx = -1
    if max_norm is not None:
        # `embedding_renorm_` will call .contiguous() on input anyways, so we
        # call it here and take advantage of the improved locality in the
        # `embedding` call below too.
        input = input.contiguous()
        with torch.no_grad():
            torch.embedding_renorm_(weight, input, max_norm, norm_type)
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
Beispiel #11
0
def function_hook(input, weight, *args, **kwargs):
    return torch.embedding(weight, input)
Beispiel #12
0
 def forward(self, position_ids):
     return torch.embedding(self.pos_enc, position_ids)
Beispiel #13
0
def RNN(testloader, lines_test):
    act_bit = 8
    model = torch.load('lstm_model.pt')
    embed_weight = model.embed.weight.data.numpy()
    lstm_ih_weight = model.rnn.weight_ih_l0.data.numpy().T
    lstm_hh_weight = model.rnn.weight_hh_l0.data.numpy().T
    lstm_ih_bias = model.rnn.bias_ih_l0.data.numpy()
    lstm_hh_bias = model.rnn.bias_hh_l0.data.numpy()

    w_ii = lstm_ih_weight[:, :50]
    w_if = lstm_ih_weight[:, 50:100]
    w_ig = lstm_ih_weight[:, 100:150]
    w_io = lstm_ih_weight[:, 150:]

    w_hi = lstm_hh_weight[:, :50]
    w_hf = lstm_hh_weight[:, 50:100]
    w_hg = lstm_hh_weight[:, 100:150]
    w_ho = lstm_hh_weight[:, 150:]

    b_ii = lstm_ih_bias[:50]
    b_if = lstm_ih_bias[50:100]
    b_ig = lstm_ih_bias[100:150]
    b_io = lstm_ih_bias[150:]

    b_hi = lstm_hh_bias[:50]
    b_hf = lstm_hh_bias[50:100]
    b_hg = lstm_hh_bias[100:150]
    b_ho = lstm_hh_bias[150:]

    fc_weight = model.fc.weight.data.numpy().T
    fc_bias = model.fc.bias.data.numpy()

    # scale_w_ii = (max(abs(np.max(w_ii)), abs(np.min(w_ii))) / 127)
    # scale_w_if = (max(abs(np.max(w_if)), abs(np.min(w_if))) / 127)
    # scale_w_ig = (max(abs(np.max(w_ig)), abs(np.min(w_ig))) / 127)
    # scale_w_io = (max(abs(np.max(w_io)), abs(np.min(w_io))) / 127)

    # scale_w_hi = (max(abs(np.max(w_hi)), abs(np.min(w_hi))) / 127)
    # scale_w_hf = (max(abs(np.max(w_hf)), abs(np.min(w_hf))) / 127)
    # scale_w_hg = (max(abs(np.max(w_hg)), abs(np.min(w_hg))) / 127)
    # scale_w_ho = (max(abs(np.max(w_ho)), abs(np.min(w_ho))) / 127)

    scale_ih_weight = (
        max(abs(np.max(lstm_ih_weight)), abs(np.min(lstm_ih_weight))) /
        (2**(act_bit - 1) - 1))
    scale_hh_weight = (
        max(abs(np.max(lstm_hh_weight)), abs(np.min(lstm_hh_weight))) /
        (2**(act_bit - 1) - 1))
    scale_fc_weight = (max(abs(np.max(fc_weight)), abs(np.min(fc_weight))) /
                       (2**(act_bit - 1) - 1))

    # w_ii = np.round(w_ii / scale_w_ii)
    # w_if = np.round(w_if / scale_w_if)
    # w_ig = np.round(w_ig / scale_w_ig)
    # w_io = np.round(w_io / scale_w_io)

    # w_hi = np.round(w_hi / scale_w_hi)
    # w_hf = np.round(w_hf / scale_w_hf)
    # w_hg = np.round(w_hg / scale_w_hg)
    # w_ho = np.round(w_ho / scale_w_ho)

    # w_ii = np.round(w_ii / scale_ih_weight)
    # w_if = np.round(w_if / scale_ih_weight)
    # w_ig = np.round(w_ig / scale_ih_weight)
    # w_io = np.round(w_io / scale_ih_weight)

    # w_hi = np.round(w_hi / scale_hh_weight)
    # w_hf = np.round(w_hf / scale_hh_weight)
    # w_hg = np.round(w_hg / scale_hh_weight)
    # w_ho = np.round(w_ho / scale_hh_weight)

    lstm_ih_weight = np.round(lstm_ih_weight / scale_ih_weight)
    lstm_hh_weight = np.round(lstm_hh_weight / scale_hh_weight)
    fc_weight = np.round(fc_weight / scale_fc_weight)

    # w_ii_mapped = fc_mapping(w_ii)
    # w_if_mapped = fc_mapping(w_if)
    # w_ig_mapped = fc_mapping(w_ig)
    # w_io_mapped = fc_mapping(w_io)

    # w_hi_mapped = fc_mapping(w_hi)
    # w_hf_mapped = fc_mapping(w_hf)
    # w_hg_mapped = fc_mapping(w_hg)
    # w_ho_mapped = fc_mapping(w_ho)

    weight_ih_mapped = fc_mapping(lstm_ih_weight)
    weight_hh_mapped = fc_mapping(lstm_hh_weight)
    fc_weight_mapped = fc_mapping(fc_weight)

    input_dim, hidden_dim = w_ii.shape
    total = 0
    correct = 0
    for data in testloader:
        line = lines_test[total][2:-1]
        total += 1
        inputs, labels = data
        embed = np.squeeze(
            torch.embedding(torch.tensor(embed_weight),
                            inputs).numpy())[:, np.newaxis, :]
        inputs = inputs.numpy()

        labels = labels.numpy()
        _, time_step = inputs.shape
        hidden_prev = np.zeros((1, hidden_dim))
        c_prev = np.zeros((1, hidden_dim))
        output_rnn = np.zeros((time_step, hidden_dim))

        scale_input = max(abs(np.max(embed)), abs(
            np.min(embed))) / (2**(act_bit - 1) - 1)

        # b_ii_q = b_ii / scale_w_ii / scale2bit(scale_input)
        # b_if_q = b_if / scale_w_if / scale2bit(scale_input)
        # b_ig_q = b_ig / scale_w_ig / scale2bit(scale_input)
        # b_io_q = b_io / scale_w_io / scale2bit(scale_input)

        # b_hi_q = b_hi / scale_w_hi / scale2bit(scale_input)
        # b_hf_q = b_hf / scale_w_hf / scale2bit(scale_input)
        # b_hg_q = b_hg / scale_w_hg / scale2bit(scale_input)
        # b_ho_q = b_ho / scale_w_ho / scale2bit(scale_input)

        # b_ii_q = b_ii / scale_ih_weight / scale2bit(scale_input)
        # b_if_q = b_if / scale_ih_weight / scale2bit(scale_input)
        # b_ig_q = b_ig / scale_ih_weight / scale2bit(scale_input)
        # b_io_q = b_io / scale_ih_weight / scale2bit(scale_input)

        # b_hi_q = b_hi / scale_hh_weight / scale2bit(scale_input)
        # b_hf_q = b_hf / scale_hh_weight / scale2bit(scale_input)
        # b_hg_q = b_hg / scale_hh_weight / scale2bit(scale_input)
        # b_ho_q = b_ho / scale_hh_weight / scale2bit(scale_input)
        lstm_ih_bias_q = lstm_ih_bias / scale_ih_weight / (scale_input)
        lstm_hh_bias_q = lstm_hh_bias / scale_hh_weight / (scale_input)

        embed = np.round(embed / (scale_input))
        for t in range(time_step):
            scale_hidden = max(abs(
                np.max(hidden_prev)), abs(
                    np.min(hidden_prev))) / (2**(act_bit - 1) - 1)
            scale_c = max(abs(np.max(c_prev)), abs(
                np.min(c_prev))) / (2**(act_bit - 1) - 1)
            if scale_hidden == 0:
                scale_hidden = 0.001
            if scale_c == 0:
                scale_c = 0.001
            hidden_prev = np.round(hidden_prev / (scale_hidden))
            # c_prev = np.round(c_prev/ scale2bit(scale_c / (2**(act_bit-1)-1)))
            # i = sigmoid(fc_int(embed[t], scale_input, w_ii, w_ii_mapped, scale_w_ii, b_ii_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_hi, w_hi_mapped, scale_w_hi, b_hi_q, 8, True))
            # f = sigmoid(fc_int(embed[t], scale_input, w_if, w_if_mapped, scale_w_if, b_if_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_hf, w_hf_mapped, scale_w_hf, b_hf_q, 8, True))
            # g = tanh(fc_int(embed[t], scale_input, w_ig, w_ig_mapped, scale_w_ig, b_ig_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_hg, w_hg_mapped, scale_w_hg, b_hg_q, 8, True))
            # o = sigmoid(fc_int(embed[t], scale_input, w_io, w_io_mapped, scale_w_io, b_io_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_ho, w_ho_mapped, scale_w_ho, b_ho_q, 8, True))
            tmp = fc_int(embed[t], scale_input, lstm_ih_weight, weight_ih_mapped, scale_ih_weight, lstm_ih_bias_q, 8, True) \
                  + fc_int(hidden_prev, scale_hidden, lstm_hh_weight, weight_hh_mapped, scale_hh_weight, lstm_hh_bias_q, 8, True)
            i = sigmoid(tmp[:, :50])
            f = sigmoid(tmp[:, 50:100])
            g = tanh(tmp[:, 100:150])
            o = sigmoid(tmp[:, 150:])
            # i = sigmoid(fc_int(embed[t], scale_input, w_ii, w_ii_mapped, scale_ih_weight, b_ii_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_hi, w_hi_mapped, scale_hh_weight, b_hi_q, 8, True))
            # f = sigmoid(fc_int(embed[t], scale_input, w_if, w_if_mapped, scale_ih_weight, b_if_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_hf, w_hf_mapped, scale_hh_weight, b_hf_q, 8, True))
            # g = tanh(fc_int(embed[t], scale_input, w_ig, w_ig_mapped, scale_ih_weight, b_ig_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_hg, w_hg_mapped, scale_hh_weight, b_hg_q, 8, True))
            # o = sigmoid(fc_int(embed[t], scale_input, w_io, w_io_mapped, scale_ih_weight, b_io_q, 8, True) \
            #     + fc_int(hidden_prev, scale_hidden, w_ho, w_ho_mapped, scale_hh_weight, b_ho_q, 8, True))

            c = f * c_prev + i * g
            output_rnn[t] = o * tanh(c)
            hidden_prev = output_rnn[t][np.newaxis, :]
            c_prev = c
        scale_fc = max(abs(np.max(output_rnn)), abs(
            np.min(output_rnn))) / (2**(act_bit - 1) - 1)
        if scale_fc == 0:
            scale_fc = 0.001
        fc_bias_q = fc_bias / scale_fc_weight / (scale_fc)
        output_rnn = np.round(output_rnn / (scale_fc))
        output_fc = fc_int(output_rnn, scale_fc, fc_weight, fc_weight_mapped,
                           scale_fc_weight, fc_bias_q, 2, True)
        # output_fc = fc(output_rnn, fc_weight, fc_weight_mapped) + fc_bias
        outputs = np.mean(output_fc, axis=0)
        prediction = np.argmax(outputs)
        print(line)
        print('label=' + str(int(data[-1][0])) + ', prediction=' +
              str(prediction) + '\n')
        if prediction == labels:
            correct += 1
    accuracy = correct / total
    print(accuracy)
Beispiel #14
0
 def __init__(self, num_user, num_movies):
     super().__init__()
     self.user_embed = nn.embedding(num_user, 32)
     self.movie_embed = nn.embedding(num_movies, 32)
     self.out = nn.Linear(64, 1)
     self.step_scheduler_after = 'epoch'
    def forward(ctx, inputs, weights):
        inputs = inputs.contiguous()
        ctx.save_for_backward(inputs)
        ctx.num_weights = weights.size(0)

        return torch.embedding(weights, inputs)
def RNN(testloader, lines_test):
    act_bit = 8
    model = torch.load('rnn_model.pt')
    embed_weight = model.embed.weight.data.numpy()
    rnn_ih_weight = model.rnn.weight_ih_l0.data.numpy().T
    rnn_hh_weight = model.rnn.weight_hh_l0.data.numpy().T
    rnn_ih_bias = model.rnn.bias_ih_l0.data.numpy()
    rnn_hh_bias = model.rnn.bias_hh_l0.data.numpy()
    fc_weight = model.fc.weight.data.numpy().T
    fc_bias = model.fc.bias.data.numpy()

    scale_ih_weight = (max(abs(np.max(rnn_ih_weight)), abs(np.min(rnn_ih_weight))) / (2**(act_bit-1)-1))
    scale_hh_weight = (max(abs(np.max(rnn_hh_weight)), abs(np.min(rnn_hh_weight))) / (2**(act_bit-1)-1))
    scale_fc_weight = (max(abs(np.max(fc_weight)), abs(np.min(fc_weight))) / (2**(act_bit-1)-1))
    
    rnn_ih_weight = np.round(rnn_ih_weight / scale_ih_weight)
    rnn_hh_weight = np.round(rnn_hh_weight / scale_hh_weight)
    fc_weight = np.round(fc_weight / scale_fc_weight)

    

    rnn_ih_weight_mapped = fc_mapping(rnn_ih_weight)
    rnn_hh_weight_mapped = fc_mapping(rnn_hh_weight)
    fc_weight_mapped = fc_mapping(fc_weight)
    
    # fc_bias = (fc_bias / scale_fc_weight / scale2bit(scale_input))

    input_dim, hidden_dim = rnn_ih_weight.shape
    total = 0
    correct = 0

    for data in testloader:
        line = lines_test[total][2:-1]
        total += 1
        inputs, labels = data
        embed = np.squeeze(torch.embedding(torch.tensor(embed_weight), inputs).numpy())[:,np.newaxis,:]
        inputs = inputs.numpy()
        
        labels = labels.numpy()
        _, time_step = inputs.shape
        hidden_prev = np.zeros((1, hidden_dim))
        output_rnn = np.zeros((time_step, hidden_dim))
        
        scale_input = max(abs(np.max(embed)), abs(np.min(embed))) / (2**(act_bit-1)-1)
        embed = np.round(embed/ (scale_input))

        rnn_ih_bias_q = (rnn_ih_bias / scale_ih_weight / (scale_input))
        rnn_hh_bias_q = (rnn_hh_bias / scale_hh_weight / (scale_input))

        for i in range(time_step):
            scale_hidden = max(abs(np.max(hidden_prev)), abs(np.min(hidden_prev))) / (2**(act_bit-1)-1)
            if scale_hidden == 0:
                scale_hidden = 0.001
            hidden_prev = np.round(hidden_prev/ (scale_hidden))
            hidden = fc_int(embed[i], scale_input, rnn_ih_weight, rnn_ih_weight_mapped, scale_ih_weight, rnn_ih_bias_q, 8, True) \
                    + fc_int(hidden_prev, scale_hidden, rnn_hh_weight, rnn_hh_weight_mapped, scale_hh_weight, rnn_hh_bias_q, 8, True)
            # hidden = fc(embed[i], rnn_ih_weight, rnn_ih_weight_mapped) + fc(hidden, rnn_hh_weight, rnn_hh_weight_mapped) + rnn_ih_bias + rnn_hh_bias     
            output_rnn[i] = tanh(hidden)
            hidden_prev = output_rnn[i][np.newaxis,:]
        scale_fc = max(abs(np.max(output_rnn)), abs(np.min(output_rnn))) / (2**(act_bit-1)-1)
        if scale_fc == 0:
            scale_fc = 0.001
        output_rnn = np.round(output_rnn/ (scale_fc))
        fc_bias_q = (fc_bias / scale_fc_weight / (scale_fc))
        output_fc = fc_int(output_rnn, scale_fc, fc_weight, fc_weight_mapped, scale_fc_weight, fc_bias_q, 2, True)
        # output_fc = fc(output_rnn, fc_weight, fc_weight_mapped) + fc_bias
        outputs = np.mean(output_fc, axis=0)
        prediction = np.argmax(outputs)
        print(line)
        print('label='+str(int(data[-1][0]))+', prediction='+str(prediction)+'\n')
        if prediction == labels:
            correct += 1
    accuracy = correct / total
    print(accuracy)
Beispiel #17
0
    def forward(self,
                _input,
                use_pos_encods=False,
                skip_slots=False,
                slot_mask=None,
                slot_vals=None,
                slot_lengths=None):
        if isinstance(_input, dict):
            _input = _input[DATA_GLOVE]
        embed_input = _input.clamp(min=0, max=self.num_words - 1)
        embeds = self.embeddings(embed_input)

        if not skip_slots and self.use_slot_embeddings:
            slot_input = (self.slot_start_index - _input).clamp(
                min=0, max=self.num_slots - 1)
            slot_embeds = self.slot_embeddings(slot_input)

            is_slot_embed = (_input <= self.slot_start_index).float()

            if self.use_slot_value_embeddings:
                if slot_mask is not None and slot_vals is not None and slot_lengths is not None:
                    slot_value_embeddings = self.slot_value_encoder(
                        slot_vals=slot_vals,
                        slot_lengths=slot_lengths,
                        embedding_module=self)

                    step_size = torch.arange(start=0,
                                             end=slot_mask.size(0),
                                             dtype=slot_mask.dtype,
                                             device=slot_mask.device
                                             ) * slot_value_embeddings.shape[1]
                    extended_slot_mask = (slot_mask -
                                          1) * (slot_mask < 0).long()
                    extended_slot_mask = (extended_slot_mask +
                                          step_size.unsqueeze(dim=-1)).view(-1)

                    slot_value_embeddings = torch.embedding(
                        slot_value_embeddings.view(
                            -1, slot_value_embeddings.size(-1)),
                        extended_slot_mask).view(
                            slot_mask.size(0), slot_mask.size(1),
                            slot_value_embeddings.size(-1))
                    slot_value_gate = self.gated_slot_embedding_layer(
                        slot_embeds)

                    slot_embeds = slot_embeds + slot_value_gate * slot_value_embeddings
                elif slot_vals is not None or slot_lengths is not None:
                    print(
                        "[#] WARNING: At least one of the slot embedding values were None, although another was given."
                    )
                    print("\tSlot mask: %s, slot vals: %s, slot lengths: %s" %
                          ("None" if
                           (slot_mask is None) else "Not none", "None" if
                           (slot_vals is None) else "Not none", "None" if
                           (slot_lengths is None) else "Not none"))

            if use_pos_encods and len(
                    _input.shape
            ) == 2 and self.positional_embedding_factor != 0:
                pos_indices = self.generate_pos_indices(
                    slot_input, is_slot_embed)
                slot_embeds = slot_embeds + self.positional_embedding_factor * self.positional_embeddings(
                    pos_indices)

            is_slot_embed = is_slot_embed.view(*(list(_input.shape) + [1]))
            embeds = (1 - is_slot_embed) * embeds + is_slot_embed * slot_embeds

        embeds = self.embedding_dropout(embeds)
        return embeds