예제 #1
0
    def get_answerable_logits(self, contextual_embedding, p_mask):
        """Get the answerable logits.

        Parameters
        ----------
        contextual_embedding
            Shape (batch_size, sequence_length, C)
        p_mask
            Shape (batch_size, sequence_length)
            Mask the sequence.
            0 --> Denote that the element is masked,
            1 --> Denote that the element is not masked

        Returns
        -------
        answerable_logits
            Shape (batch_size, 2)
        """
        # Shape (batch_size, sequence_length)
        start_scores = np.squeeze(self.start_scores(contextual_embedding), -1)
        start_score_weights = masked_softmax(start_scores, p_mask, axis=-1)
        start_agg_feature = npx.batch_dot(np.expand_dims(start_score_weights, axis=1),
                                          contextual_embedding)
        start_agg_feature = np.squeeze(start_agg_feature, 1)
        cls_feature = contextual_embedding[:, 0, :]
        answerable_scores = self.answerable_scores(np.concatenate([start_agg_feature,
                                                                  cls_feature], axis=-1))
        answerable_logits = npx.log_softmax(answerable_scores, axis=-1)
        return answerable_logits
예제 #2
0
    def forward(self, user_id, seq, item_id):
        item_embs = np.expand_dims(self.Q(seq), 1)
        user_emb = self.P(user_id)  # (4096, 10)
        out, out_h, out_v, out_hs = None, None, None, []
        # 横向卷积
        if self.d_prime:
            out_v = self.conv_v(item_embs)
            out_v = out_v.reshape(
                out_v.shape[0], self.fc1_dim_v)  # (4096, 4*10)
        # 纵向卷积 - 时间
        if self.d:
            for conv, maxp in zip(self.conv_h, self.max_pool):  # 滑动
                conv_out = np.squeeze(npx.relu(conv(item_embs)), axis=3)
                t = maxp(conv_out)
                pool_out = np.squeeze(t, axis=2)
                out_hs.append(pool_out)
            out_h = np.concatenate(out_hs, axis=1)  # (4096, 16*3)
        out = np.concatenate([out_v, out_h], axis=1)  # (4096, 4*10+16*3)
        z = self.fc(self.dropout(out))  # (4096, 10)

        # 和user_emb
        x = np.concatenate([z, user_emb], axis=1)  # (4096, 20)

        # 和item_emb计算
        q_prime_i = np.squeeze(self.Q_prime(item_id))  # (4096, 20)
        b = np.squeeze(self.b(item_id))
        res = (x * q_prime_i).sum(1) + b  # (4096,)
        return res
예제 #3
0
 def forward(self, user_id, item_id):
     P_u = self.P(user_id)
     Q_i = self.Q(item_id)
     b_u = self.user_bias(user_id)
     b_i = self.item_bias(item_id)
     outputs = (P_u * Q_i).sum(axis=1) + np.squeeze(b_u) + np.squeeze(b_i)
     return outputs.flatten()
예제 #4
0
    def _training_cell_state_transform(
            previous_cell_state, weighted_inputs,
            forget_rates) -> Tuple[np.ndarray, np.ndarray]:
        """Update SSRU cell at training time"""
        def _time_step_update(
                step_input_and_forget_rate,
                previous_step_state) -> Tuple[np.ndarray, np.ndarray]:
            """
            Recurrently update the SSRU cell state for one time step.

            :param step_input_and_forget_rate: List = [step_input, forget_rate]
            :param previous_step_state: cell state at (t-1)
            :return: twice the current time step state. NOTE: The first instance will be stacked in the final
            foreach output and the second will be the input to the next time_step_update iteration.
            """
            step_input, forget_rate = step_input_and_forget_rate  # each of shape (batch_size, model_size)
            current_step_state = forget_rate * previous_step_state + step_input
            return current_step_state, current_step_state

        # (max_length, batch, input_depth), (batch, input_depth)
        cell_state, last_step_state = npx.foreach(
            _time_step_update, [weighted_inputs, forget_rates],
            np.squeeze(previous_cell_state, axis=0))

        return cell_state, np.expand_dims(last_step_state, axis=0)
예제 #5
0
 def _func(*states):
     i = states[0]
     s = states[1: ]
     data = np.squeeze(np.take(inputs, i), axis=0)
     out, new_s = self.cell(data, s)
     new_s = [i + 1] + new_s
     return out, new_s
예제 #6
0
    def forward(self, tokens, token_types, valid_length, p_mask, start_position):
        """

        Parameters
        ----------
        tokens
            Shape (batch_size, sequence_length)
        token_types
            Shape (batch_size, sequence_length)
        valid_length
            Shape (batch_size,)
        p_mask
            Shape (batch_size, sequence_length)
        start_position
            Shape (batch_size,)

        Returns
        -------
        start_logits
            Shape (batch_size, sequence_length)
        end_logits
            Shape (batch_size, sequence_length)
        answerable_logits
        """
        if self.use_segmentation:
            contextual_embeddings = self.backbone(tokens, token_types, valid_length)
        else:
            contextual_embeddings = self.backbone(tokens, valid_length)
        start_logits = self.get_start_logits(contextual_embeddings, p_mask)
        end_logits = self.get_end_logits(contextual_embeddings,
                                         np.expand_dims(start_position, axis=1),
                                         p_mask)
        end_logits = np.squeeze(end_logits, axis=1)
        answerable_logits = self.get_answerable_logits(contextual_embeddings, p_mask)
        return start_logits, end_logits, answerable_logits
예제 #7
0
    def get_end_logits(self, contextual_embedding, start_positions, p_mask):
        """

        Parameters
        ----------
        contextual_embedding
            Shape (batch_size, sequence_length, C)
        start_positions
            Shape (batch_size, N)
            We process multiple candidates simultaneously
        p_mask
            Shape (batch_size, sequence_length)

        Returns
        -------
        end_logits
            Shape (batch_size, N, sequence_length)
        """
        # Select the features at the start_positions
        # start_feature will have shape (batch_size, N, C)
        start_features = select_vectors_by_position(contextual_embedding, start_positions)
        # Concatenate the start_feature and the contextual_embedding
        contextual_embedding = np.expand_dims(contextual_embedding, axis=1)  # (B, 1, T, C)
        start_features = np.expand_dims(start_features, axis=2)  # (B, N, 1, C)
        concat_features = np.concatenate([npx.broadcast_like(start_features,
                                                                 contextual_embedding, 2, 2),
                                            npx.broadcast_like(contextual_embedding,
                                                                 start_features, 1, 1)],
                                           axis=-1)  # (B, N, T, 2C)
        end_scores = self.end_scores(concat_features)
        end_scores = np.squeeze(end_scores, -1)
        end_logits = masked_logsoftmax(end_scores, mask=np.expand_dims(p_mask, axis=1),
                                       axis=-1)
        return end_logits
예제 #8
0
    def forward(self, data, valid_length):  # pylint: disable=arguments-differ
        # We will catch the optional factor weights in kwargs
        average_factors_embeds = []  # type: List[np.ndarray]
        concat_factors_embeds = []  # type: List[np.ndarray]
        sum_factors_embeds = []  # type: List[np.ndarray]
        if self.config.num_factors > 1 and self.config.factor_configs is not None:
            data, *data_factors = (np.squeeze(x, axis=2) for x in np.split(data, self.config.num_factors, axis=2))
            for i, (factor_data, factor_config) in enumerate(zip(data_factors,
                                                                 self.config.factor_configs)):
                factor_weight = self.factor_weights[i]
                factor_embedding = npx.embedding(factor_data,
                                                 input_dim=factor_config.vocab_size,
                                                 weight=factor_weight.data(),
                                                 output_dim=factor_config.num_embed)
                if factor_config.combine == C.FACTORS_COMBINE_CONCAT:
                    concat_factors_embeds.append(factor_embedding)
                elif factor_config.combine == C.FACTORS_COMBINE_SUM:
                    sum_factors_embeds.append(factor_embedding)
                elif factor_config.combine == C.FACTORS_COMBINE_AVERAGE:
                    average_factors_embeds.append(factor_embedding)
                else:
                    raise ValueError("Unknown combine value for factors: %s" % factor_config.combine)
        else:
            data = np.squeeze(data, axis=2)

        embed = npx.embedding(data,
                              weight=self.weight.data(),
                              input_dim=self.config.vocab_size,
                              output_dim=self.config.num_embed,
                              dtype=self._dtype,
                              sparse_grad=False)

        if self.config.num_factors > 1 and self.config.factor_configs is not None:
            if average_factors_embeds:
                embed = npx.add_n(embed, *average_factors_embeds) / (len(average_factors_embeds) + 1)
            if sum_factors_embeds:
                embed = npx.add_n(embed, *sum_factors_embeds)
            if concat_factors_embeds:
                embed = np.concatenate((embed, *concat_factors_embeds), axis=2)

        if self.config.dropout > 0:
            embed = npx.dropout(data=embed, p=self.config.dropout)

        return embed, np.copy(valid_length)  # See https://github.com/apache/incubator-mxnet/issues/14228
예제 #9
0
 def forward(self, user_id, seq, item_id):
     item_embs = np.expand_dims(self.Q(seq), 1)
     user_emb = self.P(user_id)
     out, out_h, out_v, out_hs = None, None, None, []
     if self.d_prime:
         out_v = self.conv_v(item_embs)
         out_v = out_v.reshape(out_v.shape[0], self.fc1_dim_v)
     if self.d:
         for conv, maxp in zip(self.conv_h, self.max_pool):
             conv_out = np.squeeze(npx.relu(conv(item_embs)), axis=3)
             t = maxp(conv_out)
             pool_out = np.squeeze(t, axis=2)
             out_hs.append(pool_out)
         out_h = np.concatenate(out_hs, axis=1)
     out = np.concatenate([out_v, out_h], axis=1)
     z = self.fc(self.dropout(out))
     x = np.concatenate([z, user_emb], axis=1)
     q_prime_i = np.squeeze(self.Q_prime(item_id))
     b = np.squeeze(self.b(item_id))
     res = (x * q_prime_i).sum(1) + b
     return res
예제 #10
0
    def get_start_logits(self, contextual_embedding, p_mask):
        """

        Parameters
        ----------
        contextual_embedding
            Shape (batch_size, sequence_length, C)

        Returns
        -------
        start_logits
            Shape (batch_size, sequence_length)
        """
        start_scores = np.squeeze(self.start_scores(contextual_embedding), -1)
        start_logits = masked_logsoftmax(start_scores, mask=p_mask, axis=-1)
        return start_logits
예제 #11
0
 def forward(self, queries, keys, values, valid_lens):
     queries, keys = self.W_q(queries), self.W_k(keys)
     # After dimension expansion, shape of `queries`: (`batch_size`, no. of
     # queries, 1, `num_hiddens`) and shape of `keys`: (`batch_size`, 1,
     # no. of key-value pairs, `num_hiddens`). Sum them up with
     # broadcasting
     features = np.expand_dims(queries, axis=2) + np.expand_dims(
         keys, axis=1)
     features = np.tanh(features)
     # There is only one output of `self.w_v`, so we remove the last
     # one-dimensional entry from the shape. Shape of `scores`:
     # (`batch_size`, no. of queries, no. of key-value pairs)
     scores = np.squeeze(self.w_v(features), axis=-1)
     self.attention_weights = masked_softmax(scores, valid_lens)
     # Shape of `values`: (`batch_size`, no. of key-value pairs, value
     # dimension)
     return npx.batch_dot(self.dropout(self.attention_weights), values)
예제 #12
0
 def forward(self, inputs):
     # Concatenate the output of two embedding layers with shape of
     # (batch size, no. of words, word vector dimension) by word vector
     embeddings = np.concatenate((
         self.embedding(inputs), self.constant_embedding(inputs)), axis=2)
     # According to the input format required by Conv1D, the word vector
     # dimension, that is, the channel dimension of the one-dimensional
     # convolutional layer, is transformed into the previous dimension
     embeddings = embeddings.transpose(0, 2, 1)
     # For each one-dimensional convolutional layer, after max-over-time
     # pooling, an ndarray with the shape of (batch size, channel size, 1)
     # can be obtained. Use the flatten function to remove the last
     # dimension and then concatenate on the channel dimension
     encoding = np.concatenate([
         np.squeeze(self.pool(conv(embeddings)), axis=-1)
         for conv in self.convs], axis=1)
     # After applying the dropout method, use a fully connected layer to
     # obtain the output
     outputs = self.decoder(self.dropout(encoding))
     return outputs
예제 #13
0
def test_np_squeeze():
    config = [((), None), ((), -1), ((), 0), ((4, 1, 2), None),
              ((1, 1, 1), None), ((1, 0, 1, 5), 2), ((1, 0, 1, 1), (-1, -4))]

    class TestSqueeze(HybridBlock):
        def __init__(self, axis):
            super(TestSqueeze, self).__init__()
            self._axis = axis

        def hybrid_forward(self, F, x):
            return F.np.squeeze(x, axis=self._axis)

    for shape, axis in config:
        data_np = _np.random.uniform(size=shape)
        data_mx = np.array(data_np, dtype=data_np.dtype)
        ret_np = _np.squeeze(data_np, axis=axis)
        ret_mx = np.squeeze(data_mx, axis=axis)
        assert_almost_equal(ret_mx.asnumpy(),
                            ret_np,
                            rtol=1e-5,
                            atol=1e-6,
                            use_broadcast=False)

        net = TestSqueeze(axis)
        for hybrid in [False, True]:
            if hybrid:
                net.hybridize()
            data_mx.attach_grad()
            with mx.autograd.record():
                ret_mx = net(data_mx)
            assert_almost_equal(ret_mx.asnumpy(),
                                ret_np,
                                rtol=1e-5,
                                atol=1e-6,
                                use_broadcast=False)
            ret_mx.backward()
            assert_almost_equal(data_mx.grad.asnumpy(),
                                _np.ones_like(data_np),
                                rtol=1e-5,
                                atol=1e-6,
                                use_broadcast=False)
def multibox_detection(cls_probs,
                       offset_preds,
                       anchors,
                       nms_threshold=0.5,
                       pos_threshold=0.00999999978):
    device, batch_size = cls_probs.ctx, cls_probs.shape[0]
    anchors = np.squeeze(anchors, axis=0)
    num_classes, num_anchors = cls_probs.shape[1], cls_probs.shape[2]
    out = []
    # print(offset_preds)
    for i in range(batch_size):
        cls_prob, offset_pred = cls_probs[i], offset_preds[i].reshape(-1, 4)
        conf, class_id = np.max(cls_prob[1:], 0), np.argmax(cls_prob[1:], 0)
        predicted_bb = offset_inverse(anchors, offset_pred)
        keep = nms(predicted_bb, conf, 0.5)
        print(keep)
        # Find all non_keep indices and set the class_id to background
        all_idx = np.arange(num_anchors, dtype=np.int32, ctx=device)
        combined = np.concatenate((keep, all_idx))
        unique, counts = np.unique(combined, return_counts=True)
        print(unique, " . ", counts)
        non_keep = unique[counts == 1]
        all_id_sorted = np.concatenate((keep, non_keep))
        class_id[non_keep] = -1
        print(class_id)
        class_id = class_id[all_id_sorted].astype('float32')
        print(class_id)
        conf, predicted_bb = conf[all_id_sorted], predicted_bb[all_id_sorted]
        print(conf)
        print(predicted_bb)
        # threshold to be a positive prediction
        below_min_idx = (conf < pos_threshold)
        class_id[below_min_idx] = -1
        conf[below_min_idx] = 1 - conf[below_min_idx]
        pred_info = np.concatenate((np.expand_dims(
            class_id, axis=1), np.expand_dims(conf, axis=1), predicted_bb),
                                   axis=1)
        out.append(pred_info)
    return np.stack(out)
예제 #15
0
    def forward(self, source_encoded: np.ndarray,
                source_encoded_length: np.ndarray) -> np.ndarray:
        """
        Transformation to the length ratio. Returns a vector.

        :param source_encoded: Encoder representation for n elements. Shape: (n, source_encoded_length, hidden_size).
        :param source_encoded_length: A vector of encoded sequence lengths. Shape: (n,).
        :return: Predictions of the ratio length(hypothesis)/length(reference). Shape(n, 1).
        """
        # source_masked: (n, source_encoded_length, hidden_size)
        source_masked = npx.sequence_mask(
            source_encoded,
            axis=1,
            sequence_length=source_encoded_length,
            use_sequence_length=True,
            value=0.)
        # calculate the proper means of encoded sources
        # data: (n, hidden_size)
        data = np.sum(source_masked, axis=1, keepdims=False) / np.reshape(
            source_encoded_length, (-1, 1))
        # MLP. Shape: (n, 1)
        data = self.layers(data)
        # Shape: (n,)
        return np.squeeze(data)
예제 #16
0
    def forward(self,
                source: np.ndarray,
                source_length: np.ndarray,
                restrict_lexicon: Optional[lexicon.TopKLexicon],
                raw_constraint_list: List[Optional[constrained.RawConstraintList]],
                raw_avoid_list: List[Optional[constrained.RawConstraintList]],
                max_output_lengths: np.ndarray) -> Tuple[np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         List[Optional[np.ndarray]],
                                                         List[Optional[constrained.ConstrainedHypothesis]]]:
        """
        Translates multiple sentences using beam search.

        :param source: Source ids. Shape: (batch_size, bucket_key, num_factors).
        :param source_length: Valid source lengths. Shape: (batch_size,).
        :param restrict_lexicon: Lexicon to use for vocabulary restriction.
        :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs)
               that must appear in each output.
        :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs)
               that must NOT appear in each output.
        :param max_output_lengths: ndarray of maximum output lengths per input in source.
                Shape: (batch_size,). Dtype: int32.
        :return List of best hypotheses indices, list of best word indices,
                array of accumulated length-normalized negative log-probs, hypotheses lengths,
                predicted lengths of references (if any), constraints (if any).
        """
        batch_size = source.shape[0]
        logger.debug("beam_search batch size: %d", batch_size)

        # Maximum beam search iterations (determined by longest input with eos)
        max_iterations = max_output_lengths.max().item()
        logger.debug("max beam search iterations: %d", max_iterations)

        sample_best_hyp_indices = None
        if self._sample is not None:
            utils.check_condition(restrict_lexicon is None,
                                  "Sampling is not available when working with a restricted lexicon.")
            sample_best_hyp_indices = np.arange(0, batch_size * self.beam_size, dtype='int32', ctx=self.context)

        # General data structure: batch_size * beam_size blocks in total;
        # a full beam for each sentence, followed by the next beam-block for the next sentence and so on

        # best word_indices (also act as input: (batch*beam, num_target_factors
        best_word_indices = np.full((batch_size * self.beam_size, self.num_target_factors),
                                    fill_value=self.bos_id, ctx=self.context, dtype='int32')

        # offset for hypothesis indices in batch decoding
        offset = np.repeat(np.arange(0, batch_size * self.beam_size, self.beam_size,
                                     dtype='int32', ctx=self.context), self.beam_size)

        # locations of each batch item when first dimension is (batch * beam)
        batch_indices = np.arange(0, batch_size * self.beam_size, self.beam_size, dtype='int32', ctx=self.context)
        first_step_mask = np.full((batch_size * self.beam_size, 1),
                                  fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        first_step_mask[batch_indices] = 0.0

        # Best word and hypotheses indices across beam search steps from topk operation.
        best_hyp_indices_list = []  # type: List[np.ndarray]
        best_word_indices_list = []  # type: List[np.ndarray]

        lengths = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32')
        finished = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32')

        # Extending max_output_lengths to shape (batch_size * beam_size, 1)
        max_output_lengths = np.repeat(np.expand_dims(max_output_lengths, axis=1), self.beam_size, axis=0)

        # scores_accumulated: chosen smallest scores in scores (ascending).
        scores_accumulated = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype=self.dtype)

        output_vocab_size = self.output_vocab_size

        # If using a top-k lexicon, select param rows for logit computation that correspond to the
        # target vocab for this sentence.
        vocab_slice_ids = None  # type: Optional[np.ndarrays]
        if restrict_lexicon:
            source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2)
            vocab_slice_ids, output_vocab_size, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon,
                                                                                           source_words,
                                                                                           raw_constraint_list,
                                                                                           self.eos_id, beam_size=1)

        pad_dist = np.full((batch_size * self.beam_size, output_vocab_size - 1),
                           fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        eos_dist = np.full((batch_size * self.beam_size, output_vocab_size),
                           fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        eos_dist[:, C.EOS_ID] = 0
        unk_dist = None
        if self.prevent_unk:
            unk_dist = np.zeros_like(eos_dist)
            unk_dist[:, C.UNK_ID] = np.inf  # pylint: disable=E1137

        # Initialize the beam to track constraint sets, where target-side lexical constraints are present
        constraints = constrained.init_batch(raw_constraint_list, self.beam_size, self.bos_id, self.eos_id)

        if self.global_avoid_trie or any(raw_avoid_list):
            avoid_states = constrained.AvoidBatch(batch_size, self.beam_size,
                                                  avoid_list=raw_avoid_list,
                                                  global_avoid_trie=self.global_avoid_trie)
            avoid_states.consume(best_word_indices[:, 0])  # constraints operate only on primary target factor

        # (0) encode source sentence, returns a list
        model_states, estimated_reference_lengths = self._inference.encode_and_initialize(source, source_length)
        # repeat states to beam_size
        model_states = _repeat_states(model_states, self.beam_size, self._inference.state_structure())
        # repeat estimated_reference_lengths to shape (batch_size * beam_size, 1)
        estimated_reference_lengths = np.repeat(estimated_reference_lengths, self.beam_size, axis=0)

        # Records items in the beam that are inactive. At the beginning (t==1), there is only one valid or active
        # item on the beam for each sentence
        inactive = np.zeros((batch_size * self.beam_size, 1), dtype='int32', ctx=self.context)
        t = 1
        for t in range(1, max_iterations + 1):  # max_iterations + 1 required to get correct results
            # (1) obtain next predictions and advance models' state
            # target_dists: (batch_size * beam_size, target_vocab_size)
            target_dists, model_states, target_factors = self._inference.decode_step(best_word_indices,
                                                                                     model_states,
                                                                                     vocab_slice_ids)

            # (2) Produces the accumulated cost of target words in each row.
            # There is special treatment for finished and inactive rows: inactive rows are inf everywhere;
            # finished rows are inf everywhere except column zero, which holds the accumulated model score
            scores, lengths = self._update_scores(target_dists,
                                                  finished,
                                                  inactive,
                                                  scores_accumulated,
                                                  lengths,
                                                  max_output_lengths,
                                                  unk_dist,
                                                  pad_dist,
                                                  eos_dist)

            # Mark entries that should be blocked as having a score of np.inf
            if self.global_avoid_trie or any(raw_avoid_list):
                block_indices = avoid_states.avoid()
                if len(block_indices) > 0:
                    scores[block_indices] = np.inf
                    if self._sample is not None:
                        target_dists[block_indices] = np.inf

            # (3) Get beam_size winning hypotheses for each sentence block separately. Only look as
            # far as the active beam size for each sentence.
            if self._sample is not None:
                best_hyp_indices, best_word_indices, scores_accumulated = self._sample(scores,
                                                                                       target_dists,
                                                                                       finished,
                                                                                       sample_best_hyp_indices)
            else:
                # On the first timestep, all hypotheses have identical histories, so force topk() to choose extensions
                # of the first row only by setting all other rows to inf
                if t == 1:
                    scores += first_step_mask

                best_hyp_indices, best_word_indices, scores_accumulated = self._top(scores, offset)

            # Constraints for constrained decoding are processed sentence by sentence
            if any(raw_constraint_list):
                best_hyp_indices, best_word_indices, scores_accumulated, constraints, inactive = constrained.topk(
                    t,
                    batch_size,
                    self.beam_size,
                    inactive,
                    scores,
                    constraints,
                    best_hyp_indices,
                    best_word_indices,
                    scores_accumulated)

            # Map from restricted to full vocab ids if needed
            if restrict_lexicon:
                best_word_indices = np.take(vocab_slice_ids, best_word_indices, axis=0)

            # (4) Normalize the scores of newly finished hypotheses. Note that after this until the
            # next call to topk(), hypotheses may not be in sorted order.
            _sort_inputs = [best_hyp_indices, best_word_indices, finished, scores_accumulated, lengths,
                            estimated_reference_lengths]
            if target_factors is not None:
                _sort_inputs.append(target_factors)
            best_word_indices, finished, scores_accumulated, lengths, estimated_reference_lengths = \
                self._sort_norm_and_update_finished(*_sort_inputs)

            # Collect best hypotheses, best word indices
            best_word_indices_list.append(best_word_indices)
            best_hyp_indices_list.append(best_hyp_indices)

            if self._should_stop(finished, batch_size):
                break

            # (5) update models' state with winning hypotheses (ascending)
            model_states = self._sort_states(best_hyp_indices, *model_states)

        logger.debug("Finished after %d out of %d steps.", t, max_iterations)

        # (9) Sort the hypotheses within each sentence (normalization for finished hyps may have unsorted them).
        scores_accumulated_shape = scores_accumulated.shape
        folded_accumulated_scores = scores_accumulated.reshape((batch_size, -1))
        indices = np.argsort(folded_accumulated_scores.astype('float32', copy=False), axis=1).reshape((-1,))
        best_hyp_indices = np.unravel_index(indices, scores_accumulated_shape)[0].astype('int32') + offset
        scores_accumulated = scores_accumulated.take(best_hyp_indices, axis=0)
        best_hyp_indices_list.append(best_hyp_indices)
        lengths = lengths.take(best_hyp_indices, axis=0)
        all_best_hyp_indices = np.stack(best_hyp_indices_list, axis=1)
        all_best_word_indices = np.stack(best_word_indices_list, axis=2)
        constraints = [constraints[x] for x in best_hyp_indices.tolist()]

        return all_best_hyp_indices, \
               all_best_word_indices, \
               scores_accumulated, \
               lengths.astype('int32', copy=False), \
               estimated_reference_lengths, \
               constraints
예제 #17
0
    def forward(self,
                source: np.ndarray,
                source_length: np.ndarray,
                restrict_lexicon: Optional[lexicon.TopKLexicon],
                raw_constraint_list: List[Optional[constrained.RawConstraintList]],
                raw_avoid_list: List[Optional[constrained.RawConstraintList]],
                max_output_lengths: np.ndarray) -> Tuple[np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         List[Optional[np.ndarray]],
                                                         List[Optional[constrained.ConstrainedHypothesis]]]:
        """
        Translates a single sentence (batch_size=1) using greedy search.

        :param source: Source ids. Shape: (batch_size=1, bucket_key, num_factors).
        :param source_length: Valid source lengths. Shape: (batch_size=1,).
        :param restrict_lexicon: Lexicon to use for vocabulary restriction.
        :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs)
                that must appear in each output.
        :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs)
                that must NOT appear in each output.
        :param max_output_lengths: ndarray of maximum output lengths per input in source.
                Shape: (batch_size=1,). Dtype: int32.
        :return List of best hypotheses indices, list of best word indices,
                array of accumulated length-normalized negative log-probs, hypotheses lengths,
                predicted lengths of references (if any), constraints (if any).
        """
        batch_size = source.shape[0]
        assert batch_size == 1, "Greedy Search does not support batch_size != 1"

        # Maximum  search iterations (determined by longest input with eos)
        max_iterations = max_output_lengths.max().item()
        logger.debug("max greedy search iterations: %d", max_iterations)

        # best word_indices (also act as input: (batch*beam, num_target_factors
        best_word_index = np.full((batch_size, self.num_target_factors),
                                  fill_value=self.bos_id, ctx=self.context, dtype='int32')
        outputs = []  # type: List[np.ndarray]

        vocab_slice_ids = None  # type: Optional[np.ndarray]
        # If using a top-k lexicon, select param rows for logit computation that correspond to the
        # target vocab for this sentence.
        if restrict_lexicon:
            source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2)
            vocab_slice_ids, _, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon, source_words,
                                                                           raw_constraint_list,
                                                                           self.eos_id, beam_size=1)

        # (0) encode source sentence, returns a list
        model_states, _ = self._inference.encode_and_initialize(source, source_length)
        # TODO: check for disabled predicted output length

        t = 1
        for t in range(1, max_iterations + 1):
            scores, model_states, target_factors = self._inference.decode_step(best_word_index,
                                                                               model_states,
                                                                               vocab_slice_ids=vocab_slice_ids)
            # shape: (batch*beam=1, 1)
            best_word_index = self.work_block(scores, vocab_slice_ids, target_factors)
            outputs.append(best_word_index)

            if best_word_index == self.eos_id or best_word_index == C.PAD_ID:
                break

        logger.debug("Finished after %d out of %d steps.", t, max_iterations)

        # shape: (1, num_factors, length)
        stacked_outputs = np.stack(outputs, axis=2)
        length = np.array([t], dtype='int32')  # shape (1,)
        hyp_indices = np.zeros((1, t + 1), dtype='int32')
        score = np.array([-1.])  # TODO: return unnormalized proper score

        return hyp_indices, stacked_outputs, score, length, None, []  # type: ignore