Ejemplo n.º 1
0
def test_k_in_order(setup):
    input_, top, k = setup
    result = topk(k, input_)
    start = -1e4
    for x in top:
        assert result[x] > start
        start = result[x]
Ejemplo n.º 2
0
    def conditional(self,
                    context,
                    target: Optional[List[str]] = None,
                    limit: Optional[int] = None,
                    raw: bool = False,
                    **kwargs):
        """Get the conditional probabilities of the next tokens.

        :param context: The tokens
        :param target: A list of values that you want the conditional prob of P(target | context)
        :param limit: The number of (next word, score) pairs to return
        :param raw: Should you just return the raw softmax values? This will override the limit argument

        :returns: The conditional probs of a specific target word if `target` is defined, the top limit softmax scores
            for the next step, or the raw softmax numpy array if `raw` is set
        """
        if kwargs.get('preproc', None) is not None:
            logger.warning(
                "Warning: Passing `preproc` to `LanguageModelService.predict` is deprecated."
            )
        tokens_batch = self.batch_input(context)
        self.prepare_vectorizers(tokens_batch)
        batch_dict = self.vectorize(tokens_batch)
        next_softmax = self.model.predict(batch_dict)[:, -1, :]
        next_softmax = to_numpy(next_softmax)
        if target is not None:
            target_batch = [[t] for t in target]
            self.prepare_vectorizers(target_batch)
            target_batch = self.vectorize(target_batch)
            target = target_batch[self.model.tgt_key]
            return np.array([
                v.item()
                for v in next_softmax[np.arange(next_softmax.shape[0]), target]
            ])
        if raw:
            return next_softmax
        limit = next_softmax.shape[-1] if limit is None else limit
        scores = [topk(limit, soft) for soft in next_softmax]
        return [{self.idx_to_token[k]: v
                 for k, v in score.items()} for score in scores]
Ejemplo n.º 3
0
    def predict_one(self, src, encoder_outputs, **kwargs):
        K = int(kwargs.get('beam', 5))
        mxlen = int(kwargs.get('mxlen', 100))

        paths = [[Offsets.GO] for _ in range(K)]
        # Which beams are done?
        done = np.array([False] * K)
        scores = np.array([0.0] * K)
        hidden, output_i, context = self.arc_policy(encoder_outputs,
                                                    self.hsz,
                                                    beam_width=K)
        num_states = len(hidden)
        rnn_state = self.decoder_rnn.initial_state(hidden)
        self.attn_cache(context)
        src_mask = encoder_outputs.src_mask

        for i in range(mxlen):
            dst_last = np.array([path[-1] for path in paths]).reshape(1, K)
            embed_i = self.tgt_embeddings.encode(dst_last)[-1]
            embed_i = self.input_i(embed_i, output_i)
            rnn_state = rnn_state.add_input(embed_i)
            rnn_output_i = rnn_state.output()
            output_i = self.attn(rnn_output_i, src_mask)
            wll = self.prediction([output_i])[-1].npvalue()  # (V,) K
            V = wll.shape[0]
            if i > 0:
                # expanded_history = np.expand_dims(scores, -1)
                # done_mask = np.expand_dims((done == False).astype(np.uint8), -1)
                # sll = np.multiply(wll.T, done_mask) + expanded_history

                wll = wll.T
                expanded_history = np.expand_dims(scores, -1)
                done_mask = np.expand_dims((done == False).astype(np.uint8),
                                           -1)
                done_mask_inv = (done_mask != 1).astype(np.uint8)
                eos_mask = np.zeros((1, V)).astype(np.uint8)
                mask = ((done_mask & eos_mask) != 1).astype(np.uint8)
                masked_wll = np.multiply(done_mask, wll)
                negged_wll = masked_wll + (done_mask_inv * -1e4)
                removed_eos = np.multiply(mask, negged_wll)
                sll = removed_eos + expanded_history
            else:
                sll = wll.T

            flat_sll = sll.reshape(-1)

            bests = topk(K, flat_sll)
            best_idx_flat = np.array(list(bests.keys()))
            best_beams = best_idx_flat // V
            best_idx = best_idx_flat % V

            new_paths = []
            new_done = []

            hidden = rnn_state.s()
            new_hidden = [[] for _ in range(num_states)]
            for j, best_flat in enumerate(best_idx_flat):
                beam_id = best_beams[j]
                best_word = best_idx[j]
                if done[j]:
                    new_paths.append(paths[beam_id] + [Offsets.EOS])
                else:
                    new_paths.append(paths[beam_id] + [best_word])
                if best_word == Offsets.EOS:
                    done[j] = True
                new_done.append(done[beam_id])
                scores[j] = bests[best_flat]
                # For each path, we need to pick that out and add it to the hiddens
                # This will be (c1, c2, ..., h1, h2, ...)
                for h_i, h in enumerate(hidden):
                    new_hidden[h_i].append(dy.pick_batch_elem(h, beam_id))

            done = np.array(new_done)
            new_hidden = [
                dy.concatenate_to_batch(new_h) for new_h in new_hidden
            ]
            paths = new_paths
            rnn_state = self.decoder_rnn.initial_state(new_hidden)

        paths = np.stack([p[1:] for p in paths])
        return paths, scores
Ejemplo n.º 4
0
    def predict_one(self, src, encoder_outputs, **kwargs):
        K = int(kwargs.get('beam', 5))
        mxlen = int(kwargs.get('mxlen', 100))

        paths = [[Offsets.GO] for _ in range(K)]
        # Which beams are done?
        done = np.array([False] * K)
        scores = np.array([0.0]*K)
        hidden, output_i, context = self.arc_policy(encoder_outputs, self.hsz, beam_width=K)
        num_states = len(hidden)
        rnn_state = self.decoder_rnn.initial_state(hidden)
        self.attn_cache(context)
        src_mask = encoder_outputs.src_mask

        for i in range(mxlen):
            dst_last = np.array([path[-1] for path in paths]).reshape(1, K)
            embed_i = self.tgt_embeddings.encode(dst_last)[-1]
            embed_i = self.input_i(embed_i, output_i)
            rnn_state = rnn_state.add_input(embed_i)
            rnn_output_i = rnn_state.output()
            output_i = self.attn(rnn_output_i, src_mask)
            wll = self.prediction([output_i])[-1].npvalue()  # (V,) K
            V = wll.shape[0]
            if i > 0:
                # expanded_history = np.expand_dims(scores, -1)
                # done_mask = np.expand_dims((done == False).astype(np.uint8), -1)
                # sll = np.multiply(wll.T, done_mask) + expanded_history

                wll = wll.T
                expanded_history = np.expand_dims(scores, -1)
                done_mask = np.expand_dims((done == False).astype(np.uint8), -1)
                done_mask_inv = (done_mask != 1).astype(np.uint8)
                eos_mask = np.zeros((1, V)).astype(np.uint8)
                mask = ((done_mask & eos_mask) != 1).astype(np.uint8)
                masked_wll = np.multiply(done_mask, wll)
                negged_wll = masked_wll + (done_mask_inv * -1e4)
                removed_eos = np.multiply(mask, negged_wll)
                sll = removed_eos + expanded_history
            else:
                sll = wll.T

            flat_sll = sll.reshape(-1)

            bests = topk(K, flat_sll)
            best_idx_flat = np.array(list(bests.keys()))
            best_beams = best_idx_flat // V
            best_idx = best_idx_flat % V

            new_paths = []
            new_done = []

            hidden = rnn_state.s()
            new_hidden = [[] for _ in range(num_states)]
            for j, best_flat in enumerate(best_idx_flat):
                beam_id = best_beams[j]
                best_word = best_idx[j]
                if done[j]:
                    new_paths.append(paths[beam_id] + [Offsets.EOS])
                else:
                    new_paths.append(paths[beam_id] + [best_word])
                if best_word == Offsets.EOS:
                    done[j] = True
                new_done.append(done[beam_id])
                scores[j] = bests[best_flat]
                # For each path, we need to pick that out and add it to the hiddens
                # This will be (c1, c2, ..., h1, h2, ...)
                for h_i, h in enumerate(hidden):
                    new_hidden[h_i].append(dy.pick_batch_elem(h, beam_id))

            done = np.array(new_done)
            new_hidden = [dy.concatenate_to_batch(new_h) for new_h in new_hidden]
            paths = new_paths
            rnn_state = self.decoder_rnn.initial_state(new_hidden)

        paths = np.stack([p[1:] for p in paths])
        return paths, scores
Ejemplo n.º 5
0
    def beam_decode(self, src, src_len, K=2):

        GO = self.vocab2['<GO>']
        EOS = self.vocab2['<EOS>']
        dy.renew_cg()

        paths = [[GO] for _ in range(K)]
        # Which beams are done?
        done = np.array([False] * K)
        scores = np.array([0.0] * K)
        dy.renew_cg()
        rnn_enc_seq, hidden = self.encode(src, src_len)
        context_mx = dy.concatenate_cols(rnn_enc_seq)
        # To vectorize, we need to expand along the batch dimension, K times
        final_encoder_state_k = (dy.concatenate_to_batch([h] * K)
                                 for h in hidden)
        num_states = len(hidden)
        rnn_state = self.decoder_rnn.initial_state(final_encoder_state_k)
        attn_fn = self._attn(context_mx)

        output_i = dy.concatenate_to_batch([rnn_enc_seq[-1]] * K)
        for i in range(100):
            dst_last = np.array([path[-1] for path in paths]).reshape(1, K)
            embed_i = self.embed_out(dst_last)[-1]
            embed_i = self.input_i(embed_i, output_i)
            rnn_state = rnn_state.add_input(embed_i)
            rnn_output_i = rnn_state.output()
            output_i = attn_fn(rnn_output_i)
            wll = self.prediction(output_i).npvalue()  # (V,) K
            V = wll.shape[0]
            if i > 0:
                expanded_history = scores.reshape(scores.shape +
                                                  (1, ))  # scores = K
                # TODO: dont add anything when the beam is done
                sll = wll.T + expanded_history
            else:
                sll = wll.T

            flat_sll = sll.reshape(-1)

            bests = topk(K, flat_sll)
            best_idx_flat = np.array(list(bests.keys()))
            best_beams = best_idx_flat // V
            best_idx = best_idx_flat % V

            new_paths = []
            new_done = []

            hidden = rnn_state.s()
            # For each hidden state
            new_hidden = [[] for _ in range(num_states)]
            for j, best_flat in enumerate(best_idx_flat):
                beam_id = best_beams[j]
                best_word = best_idx[j]
                if best_word == EOS:
                    done[j] = True
                new_done.append(done[beam_id])
                new_paths.append(paths[beam_id] + [best_word])
                scores[j] = bests[best_flat]
                # For each path, we need to pick that out and add it to the hiddens
                # This will be (c1, c2, ..., h1, h2, ...)
                for h_i, h in enumerate(hidden):
                    new_hidden[h_i] += [dy.pick_batch_elem(h, beam_id)]

            done = np.array(new_done)
            new_hidden = [
                dy.concatenate_to_batch(new_h) for new_h in new_hidden
            ]
            paths = new_paths
            # Now comes the hard part, fix the hidden units
            # Copy the beam states of the winners
            rnn_state = self.decoder_rnn.initial_state(new_hidden)

        return [p[1:] for p in paths], scores
Ejemplo n.º 6
0
def test_k_values_are_correct(setup):
    input_, top, k = setup
    result = topk(k, input_)
    for k, v in result.items():
        assert v == input_[k]
Ejemplo n.º 7
0
def test_k_are_correct(setup):
    input_, top, k = setup
    result = topk(k, input_)
    for x in top:
        assert x in result
Ejemplo n.º 8
0
def test_k_drawn(setup):
    input_, top, k = setup
    result = topk(k, input_)
    assert len(result) == k