def multibox_target(anchors, labels):
    batch_size, anchors = labels.shape[0], anchors.squeeze(0)
    batch_offset, batch_mask, batch_class_labels = [], [], []
    device, num_anchors = anchors.ctx, anchors.shape[0]
    print(labels.shape)
    print(batch_size)
    for i in range(batch_size):
        label = labels[i, :, :]
        anchors_bbox_map = match_anchor_to_bbox(label[:, 1:], anchors,
                                                device)  # [-1, 0, 1, -1 , 1]

        bbox_mask = np.tile((np.expand_dims((anchors_bbox_map >= 0), axis=-1)),
                            (1, 4)).astype('int32')
        # Initialize class_labels and assigned bbox coordinates with zeros
        class_labels = np.zeros(num_anchors, dtype=np.int32, ctx=device)
        assigned_bb = np.zeros((num_anchors, 4), dtype=np.float32, ctx=device)
        # Assign class labels to the anchor boxes using matched gt bbox labels
        # If no gt bbox is assigned to an anchor box, then let the
        # class_labels and assigned_bb remain zero, i.e the background class
        indices_true = np.nonzero(anchors_bbox_map >= 0)[0]  #[1,2,4]
        print(indices_true)
        bb_idx = anchors_bbox_map[indices_true]  #[0, 1, 1]
        class_labels[indices_true] = label[bb_idx, 0].astype(
            'int32') + 1  # Get category
        assigned_bb[indices_true] = label[bb_idx, 1:]  # Get ground-truth
        # offset transformations
        offset = offset_boxes(anchors, assigned_bb) * bbox_mask
        batch_offset.append(offset.reshape(-1))
        batch_mask.append(bbox_mask.reshape(-1))
        batch_class_labels.append(class_labels)
    bbox_offset = np.stack(batch_offset)
    bbox_mask = np.stack(batch_mask)
    class_labels = np.stack(batch_class_labels)
    return (bbox_offset, bbox_mask, class_labels)
Exemple #2
0
    def forward(self, x, states):
        """

        Parameters
        ----------
        x
            - layout = 'NT'
                Shape (batch_size, seq_length)
            - layout = 'TN'
                Shape (seq_length, batch_size)

        states
            The previous states

            - layout = 'NT'
                Shape (num_layers, 2, batch_size, prev_len, C_in)]
            - layout = 'TN'
                Shape (num_layers, 2, prev_len, batch_size, C_in)]

        Returns
        -------
        new_x
            Output

            - layout = 'NT'
                Shape (batch_size, seq_length, C_out)
            - layout = 'TN'
                Shape (seq_length, batch_size, C_out)

        new_states
            The new states

            - layout = 'NT'
                Shape (num_layers, 2, batch_size, prev_len + seq_length, C_in)
            - layout = 'TN'
                Shape (num_layers, 2, prev_len + seq_length, batch_size, C_in)

        """
        prev_len = npx.shape_array(states)[3] if self._layout == 'NT' else \
                   npx.shape_array(states)[2]
        x = self.get_initial_embedding(x, prev_len)

        if self._layout != self._compute_layout:
            x = np.swapaxes(x, 0, 1)
            states = np.swapaxes(states, 2, 3)

        new_states = []
        for layer_idx in range(self._num_layers):
            layer_states = None if states is None else states[layer_idx]
            x, new_layer_states = self._layers[layer_idx](x, layer_states)
            new_states.append(new_layer_states)
        new_states = np.stack(new_states, axis=0)

        x = self._final_ln(x)
        if self._layout != self._compute_layout:
            x = np.swapaxes(x, 0, 1)
            new_states = np.swapaxes(new_states, 2, 3)
        return x, new_states
def multibox_prior(data, sizes, ratios):
    #data: batch, channels, height, width
    in_height, in_width = data.shape[-2:]

    device, num_sizes, num_ratios = data.ctx, len(sizes), len(ratios)
    boxes_per_pixel = num_sizes + num_ratios - 1
    size_tensor = np.array(sizes, ctx=device)
    ratio_tensor = np.array(ratios, ctx=device)

    # Offsets are required to move the anchor to center of a pixel
    # Since pixel (height=1, width=1), we choose to offset our centers by 0.5
    offset_w, offset_h = 0.5, 0.5
    steps_h = 1.0 / in_height  # Scaled steps in y axis
    steps_w = 1.0 / in_width  # Scaled steps in x axis

    # Generate all center points for the anchor boxes
    center_h = (np.arange(in_height, ctx=device) + offset_h) * steps_h
    center_w = (np.arange(in_width, ctx=device) + offset_w) * steps_w
    shift_x, shift_y = np.meshgrid(center_w, center_h)
    shift_x, shift_y = shift_x.reshape(-1), shift_y.reshape(-1)

    # Generate boxes_per_pixel number of heights and widths which are later
    # used to create anchor box corner coordinates (xmin, xmax, ymin, ymax)
    # concat (various sizes, first ratio) and (first size, various ratios)

    w = np.concatenate((size_tensor * np.sqrt(ratio_tensor[0]),
                        size_tensor[0]* np.sqrt(ratio_tensor[1:])))\
                        * in_height / in_width

    h = np.concatenate((size_tensor / np.sqrt(ratio_tensor[0]),
                        sizes[0] / np.sqrt(ratio_tensor[1:])))

    # Divide by 2 to get half height and half width
    anchor_manipulations = np.tile(
        np.stack((-w, -h, w, h)).T, (in_height * in_width, 1)) / 2

    # Each center point will have boxes_per_pixel number of anchor boxes, so
    # generate grid of all anchor box centers with boxes_per_pixel repeats
    out_grid = np.stack([shift_x, shift_y, shift_x, shift_y],
                        axis=1).repeat(boxes_per_pixel, axis=0)

    output = out_grid + anchor_manipulations
    # print(output)
    print(in_height, in_width)
    return np.expand_dims(output, axis=0)
def box_center_to_corner(boxes):
    """Convert from (center, width, height) to (upper_left, bottom_right)"""
    cx, cy, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    x1 = cx - w / 2
    y1 = cy - h / 2
    x2 = cx + w / 2
    y2 = cy + h / 2
    boxes = np.stack((x1, y1, x2, y2), axis=-1)
    return boxes
Exemple #5
0
    def forward(self, x, layer_states):
        """

        Parameters
        ----------
        x
            - layout = 'NT'
                Shape (batch_size, seq_length, C_in)
            - layout = 'TN'
                Shape (seq_length, batch_size, C_in)

        layer_states
            - layout = 'NT'
                Shape (2, batch_size, prev_len, C_in)
            - layout = 'TN'
                Shape (2, prev_len, batch_size, C_in)
        """
        x = self.ln(x)
        if self._layout == 'NT':
            batch_axis, time_axis = 0, 1
            prev_len = npx.shape_array(layer_states)[2]
        else:
            batch_axis, time_axis = 1, 0
            prev_len = npx.shape_array(layer_states)[1]

        query, key, value = np.split(self.qkv(x), 3, axis=-1)
        if layer_states is not None:
            prev_key, prev_value = layer_states[0], layer_states[1]
            key = np.concatenate([prev_key, key], axis=time_axis)
            value = np.concatenate([prev_value, value], axis=time_axis)
        new_states = np.stack([key, value], axis=0)

        # gen mask
        query_pos = npx.arange_like(query, axis=time_axis)
        if prev_len is not None:
            query_pos = query_pos + prev_len
        key_pos = npx.arange_like(key, axis=time_axis)
        # (query_len, key_len)
        mask = (npx.reshape(key_pos,
                            (1, -1)) <= npx.reshape(query_pos,
                                                    (-1, 1))).astype(
                                                        self._dtype)
        # broadcast to (batch_size, query_len, key_len)
        mask = npx.broadcast_like(np.expand_dims(mask, axis=0),
                                  query,
                                  lhs_axes=0,
                                  rhs_axes=batch_axis)

        query = npx.reshape(query, (-2, -2, self._num_heads, -1))
        key = npx.reshape(key, (-2, -2, self._num_heads, -1))
        value = npx.reshape(value, (-2, -2, self._num_heads, -1))

        out, [_, attn_weight] = self.attention_cell(query, key, value, mask)
        out = self.out_proj(out)
        out = self.hidden_dropout(out)

        return out, new_states
def box_corner_to_tensor(boxes):
    """Convert from (upper_left, bottom_right) to (center, width, height)"""
    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2
    w = x2 - x1
    h = y2 - y1
    boxes = np.stack((cx, cy, w, h), axis=-1)
    return boxes
Exemple #7
0
 def encode_and_initialize(self, inputs: np.ndarray, valid_length: Optional[np.ndarray] = None):
     model_states = []  # type: List[np.ndarray]
     predicted_output_lengths = []  # type: List[np.ndarray]
     for model in self._models:
         states, predicted_output_length = model.encode_and_initialize(inputs, valid_length, self._const_lr)
         predicted_output_lengths.append(predicted_output_length)
         model_states += states
     # average predicted output lengths, (batch, 1)
     predicted_output_lengths = np.mean(np.stack(predicted_output_lengths, axis=1), axis=1)
     return model_states, predicted_output_lengths
Exemple #8
0
def add_vectors_by_position(data, increment, positions):
    """Scatter each batch with the given positions.

    data[i, positions[i, j], ...] += increment[i, j, ...]

    Parameters
    ----------
    F
    data
        Input tensor of the array to be updated.
        Shape (batch_size, seq_length, ...)
    increment
        Input tensor of token ids
        Shape (batch_size, num_disp_position, ...)
    positions
        Input tensor of the positions.
        Shape (batch_size, num_disp_position).
        For each sample in the batch, the values in this tensor must not exceed
        the length of the sequence.

    Returns
    -------
    out
        The updated result.
        Shape (batch_size, seq_length, ...)
    """
    # Here, we use index_add to disperse the output from data:
    # Need to compute
    #   out[i, masked_position[i, j], :] = in[i, j, :]
    # Thus, construct an indices with shape [2, batch_size * num_masked_position], where
    #     indices[0, i * num_masked_position + j] = i
    #     indices[1, i * num_masked_position + j] = masked_position[i, j]
    # And convert data to the shape of the (batch_size * num_masked_position, )
    # Then, out = npx.index_add(data, indices, increment)
    positions = positions.astype(np.int32)
    # batch_idx.shape = (batch_size, 1) as [[0], [1], [2], ...]
    batch_idx = np.expand_dims(npx.arange_like(positions, axis=0),
                               axis=1).astype(np.int32)
    batch_idx = batch_idx + np.zeros_like(positions)
    indices = np.stack([batch_idx.reshape((-1, )), positions.reshape((-1, ))])
    out = npx.index_add(data, indices, npx.reshape(increment, (-5, -4)))
    return out
Exemple #9
0
def select_vectors_by_position(data, positions):
    """Select each batch with the given positions.

    Once advanced indexing can be hybridized, we can revise the implementation.

    out[i, j, ...] = data[i, positions[i, j], ...]

    Parameters
    ----------
    data
        Input tensor of contextualized token embeddings
        Shape (batch_size, seq_length, ...)
    positions
        Input tensor of the positions.
        Shape (batch_size, num_sel_positions).
        For each sample in the batch, the values in this tensor must not exceed
        the length of the sequence.

    Returns
    -------
    out
        The selection result.
        Shape (batch_size, num_sel_positions, ...)
    """
    # Here, we use gather_nd to select the output from data:
    # Need to compute
    #   out[i, j, :] = in[i, masked_position[i, j], :]
    # Thus, construct a indices with shape [2, batch_size, num_masked_position], where
    #     indices[0, i, j] = i
    #     indices[1, i, j] = masked_position[i, j]
    # Then, out = gather_nd(in, indices)
    positions = positions.astype(np.int32)
    # batch_idx.shape = (batch_size, 1) as [[0], [1], [2], ...]
    batch_idx = np.expand_dims(npx.arange_like(positions, axis=0),
                               axis=1).astype(np.int32)
    batch_idx = batch_idx + np.zeros_like(positions)
    indices = np.stack([batch_idx, positions])
    # TODO(sxjscience) We can revise the implementation to advanced indexing
    #  once the bug in MXNet is solved:
    #  https://github.com/apache/incubator-mxnet/issues/18919
    out = npx.gather_nd(data, indices)
    return out
def multibox_detection(cls_probs,
                       offset_preds,
                       anchors,
                       nms_threshold=0.5,
                       pos_threshold=0.00999999978):
    device, batch_size = cls_probs.ctx, cls_probs.shape[0]
    anchors = np.squeeze(anchors, axis=0)
    num_classes, num_anchors = cls_probs.shape[1], cls_probs.shape[2]
    out = []
    # print(offset_preds)
    for i in range(batch_size):
        cls_prob, offset_pred = cls_probs[i], offset_preds[i].reshape(-1, 4)
        conf, class_id = np.max(cls_prob[1:], 0), np.argmax(cls_prob[1:], 0)
        predicted_bb = offset_inverse(anchors, offset_pred)
        keep = nms(predicted_bb, conf, 0.5)
        print(keep)
        # Find all non_keep indices and set the class_id to background
        all_idx = np.arange(num_anchors, dtype=np.int32, ctx=device)
        combined = np.concatenate((keep, all_idx))
        unique, counts = np.unique(combined, return_counts=True)
        print(unique, " . ", counts)
        non_keep = unique[counts == 1]
        all_id_sorted = np.concatenate((keep, non_keep))
        class_id[non_keep] = -1
        print(class_id)
        class_id = class_id[all_id_sorted].astype('float32')
        print(class_id)
        conf, predicted_bb = conf[all_id_sorted], predicted_bb[all_id_sorted]
        print(conf)
        print(predicted_bb)
        # threshold to be a positive prediction
        below_min_idx = (conf < pos_threshold)
        class_id[below_min_idx] = -1
        conf[below_min_idx] = 1 - conf[below_min_idx]
        pred_info = np.concatenate((np.expand_dims(
            class_id, axis=1), np.expand_dims(conf, axis=1), predicted_bb),
                                   axis=1)
        out.append(pred_info)
    return np.stack(out)
Exemple #11
0
def update_vectors_by_position(data, val, positions):
    """
    Update each batch with the given positions. Considered as a reversed process of
    "select_vectors_by_position", this is an operator similar to "add_vectors_by_position"
    that updates the results instead of adding.

    data[i, positions[i, j], :] = val[i, j, :]

    Parameters
    ----------
    F
    data:
        Input tensor of the array to be updated.
        Shape (batch_size, seq_length)
    val
        Input tensor of token ids
        Shape (batch_size, num_disp_position)
    positions
        Input tensor of the positions.
        Shape (batch_size, num_disp_position).
        For each sample in the batch, the values in this tensor must not exceed
        the length of the sequence.

    Returns
    -------
    out
        The updated result.
        Shape (batch_size, seq_length)
    """
    positions = positions.astype(np.int32)
    # batch_idx.shape = (batch_size, 1) as [[0], [1], [2], ...]
    batch_idx = np.expand_dims(npx.arange_like(positions, axis=0),
                               axis=1).astype(np.int32)
    batch_idx = batch_idx + np.zeros_like(positions)
    indices = np.stack([batch_idx.reshape((-1, )), positions.reshape((-1, ))])

    out = npx.index_update(data, indices, npx.reshape(val, (-5, -4)))
    return out
Exemple #12
0
    def forward(self,
                source: np.ndarray,
                source_length: np.ndarray,
                restrict_lexicon: Optional[lexicon.TopKLexicon],
                raw_constraint_list: List[Optional[constrained.RawConstraintList]],
                raw_avoid_list: List[Optional[constrained.RawConstraintList]],
                max_output_lengths: np.ndarray) -> Tuple[np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         List[Optional[np.ndarray]],
                                                         List[Optional[constrained.ConstrainedHypothesis]]]:
        """
        Translates multiple sentences using beam search.

        :param source: Source ids. Shape: (batch_size, bucket_key, num_factors).
        :param source_length: Valid source lengths. Shape: (batch_size,).
        :param restrict_lexicon: Lexicon to use for vocabulary restriction.
        :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs)
               that must appear in each output.
        :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs)
               that must NOT appear in each output.
        :param max_output_lengths: ndarray of maximum output lengths per input in source.
                Shape: (batch_size,). Dtype: int32.
        :return List of best hypotheses indices, list of best word indices,
                array of accumulated length-normalized negative log-probs, hypotheses lengths,
                predicted lengths of references (if any), constraints (if any).
        """
        batch_size = source.shape[0]
        logger.debug("beam_search batch size: %d", batch_size)

        # Maximum beam search iterations (determined by longest input with eos)
        max_iterations = max_output_lengths.max().item()
        logger.debug("max beam search iterations: %d", max_iterations)

        sample_best_hyp_indices = None
        if self._sample is not None:
            utils.check_condition(restrict_lexicon is None,
                                  "Sampling is not available when working with a restricted lexicon.")
            sample_best_hyp_indices = np.arange(0, batch_size * self.beam_size, dtype='int32', ctx=self.context)

        # General data structure: batch_size * beam_size blocks in total;
        # a full beam for each sentence, followed by the next beam-block for the next sentence and so on

        # best word_indices (also act as input: (batch*beam, num_target_factors
        best_word_indices = np.full((batch_size * self.beam_size, self.num_target_factors),
                                    fill_value=self.bos_id, ctx=self.context, dtype='int32')

        # offset for hypothesis indices in batch decoding
        offset = np.repeat(np.arange(0, batch_size * self.beam_size, self.beam_size,
                                     dtype='int32', ctx=self.context), self.beam_size)

        # locations of each batch item when first dimension is (batch * beam)
        batch_indices = np.arange(0, batch_size * self.beam_size, self.beam_size, dtype='int32', ctx=self.context)
        first_step_mask = np.full((batch_size * self.beam_size, 1),
                                  fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        first_step_mask[batch_indices] = 0.0

        # Best word and hypotheses indices across beam search steps from topk operation.
        best_hyp_indices_list = []  # type: List[np.ndarray]
        best_word_indices_list = []  # type: List[np.ndarray]

        lengths = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32')
        finished = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype='int32')

        # Extending max_output_lengths to shape (batch_size * beam_size, 1)
        max_output_lengths = np.repeat(np.expand_dims(max_output_lengths, axis=1), self.beam_size, axis=0)

        # scores_accumulated: chosen smallest scores in scores (ascending).
        scores_accumulated = np.zeros((batch_size * self.beam_size, 1), ctx=self.context, dtype=self.dtype)

        output_vocab_size = self.output_vocab_size

        # If using a top-k lexicon, select param rows for logit computation that correspond to the
        # target vocab for this sentence.
        vocab_slice_ids = None  # type: Optional[np.ndarrays]
        if restrict_lexicon:
            source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2)
            vocab_slice_ids, output_vocab_size, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon,
                                                                                           source_words,
                                                                                           raw_constraint_list,
                                                                                           self.eos_id, beam_size=1)

        pad_dist = np.full((batch_size * self.beam_size, output_vocab_size - 1),
                           fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        eos_dist = np.full((batch_size * self.beam_size, output_vocab_size),
                           fill_value=np.inf, ctx=self.context, dtype=self.dtype)
        eos_dist[:, C.EOS_ID] = 0
        unk_dist = None
        if self.prevent_unk:
            unk_dist = np.zeros_like(eos_dist)
            unk_dist[:, C.UNK_ID] = np.inf  # pylint: disable=E1137

        # Initialize the beam to track constraint sets, where target-side lexical constraints are present
        constraints = constrained.init_batch(raw_constraint_list, self.beam_size, self.bos_id, self.eos_id)

        if self.global_avoid_trie or any(raw_avoid_list):
            avoid_states = constrained.AvoidBatch(batch_size, self.beam_size,
                                                  avoid_list=raw_avoid_list,
                                                  global_avoid_trie=self.global_avoid_trie)
            avoid_states.consume(best_word_indices[:, 0])  # constraints operate only on primary target factor

        # (0) encode source sentence, returns a list
        model_states, estimated_reference_lengths = self._inference.encode_and_initialize(source, source_length)
        # repeat states to beam_size
        model_states = _repeat_states(model_states, self.beam_size, self._inference.state_structure())
        # repeat estimated_reference_lengths to shape (batch_size * beam_size, 1)
        estimated_reference_lengths = np.repeat(estimated_reference_lengths, self.beam_size, axis=0)

        # Records items in the beam that are inactive. At the beginning (t==1), there is only one valid or active
        # item on the beam for each sentence
        inactive = np.zeros((batch_size * self.beam_size, 1), dtype='int32', ctx=self.context)
        t = 1
        for t in range(1, max_iterations + 1):  # max_iterations + 1 required to get correct results
            # (1) obtain next predictions and advance models' state
            # target_dists: (batch_size * beam_size, target_vocab_size)
            target_dists, model_states, target_factors = self._inference.decode_step(best_word_indices,
                                                                                     model_states,
                                                                                     vocab_slice_ids)

            # (2) Produces the accumulated cost of target words in each row.
            # There is special treatment for finished and inactive rows: inactive rows are inf everywhere;
            # finished rows are inf everywhere except column zero, which holds the accumulated model score
            scores, lengths = self._update_scores(target_dists,
                                                  finished,
                                                  inactive,
                                                  scores_accumulated,
                                                  lengths,
                                                  max_output_lengths,
                                                  unk_dist,
                                                  pad_dist,
                                                  eos_dist)

            # Mark entries that should be blocked as having a score of np.inf
            if self.global_avoid_trie or any(raw_avoid_list):
                block_indices = avoid_states.avoid()
                if len(block_indices) > 0:
                    scores[block_indices] = np.inf
                    if self._sample is not None:
                        target_dists[block_indices] = np.inf

            # (3) Get beam_size winning hypotheses for each sentence block separately. Only look as
            # far as the active beam size for each sentence.
            if self._sample is not None:
                best_hyp_indices, best_word_indices, scores_accumulated = self._sample(scores,
                                                                                       target_dists,
                                                                                       finished,
                                                                                       sample_best_hyp_indices)
            else:
                # On the first timestep, all hypotheses have identical histories, so force topk() to choose extensions
                # of the first row only by setting all other rows to inf
                if t == 1:
                    scores += first_step_mask

                best_hyp_indices, best_word_indices, scores_accumulated = self._top(scores, offset)

            # Constraints for constrained decoding are processed sentence by sentence
            if any(raw_constraint_list):
                best_hyp_indices, best_word_indices, scores_accumulated, constraints, inactive = constrained.topk(
                    t,
                    batch_size,
                    self.beam_size,
                    inactive,
                    scores,
                    constraints,
                    best_hyp_indices,
                    best_word_indices,
                    scores_accumulated)

            # Map from restricted to full vocab ids if needed
            if restrict_lexicon:
                best_word_indices = np.take(vocab_slice_ids, best_word_indices, axis=0)

            # (4) Normalize the scores of newly finished hypotheses. Note that after this until the
            # next call to topk(), hypotheses may not be in sorted order.
            _sort_inputs = [best_hyp_indices, best_word_indices, finished, scores_accumulated, lengths,
                            estimated_reference_lengths]
            if target_factors is not None:
                _sort_inputs.append(target_factors)
            best_word_indices, finished, scores_accumulated, lengths, estimated_reference_lengths = \
                self._sort_norm_and_update_finished(*_sort_inputs)

            # Collect best hypotheses, best word indices
            best_word_indices_list.append(best_word_indices)
            best_hyp_indices_list.append(best_hyp_indices)

            if self._should_stop(finished, batch_size):
                break

            # (5) update models' state with winning hypotheses (ascending)
            model_states = self._sort_states(best_hyp_indices, *model_states)

        logger.debug("Finished after %d out of %d steps.", t, max_iterations)

        # (9) Sort the hypotheses within each sentence (normalization for finished hyps may have unsorted them).
        scores_accumulated_shape = scores_accumulated.shape
        folded_accumulated_scores = scores_accumulated.reshape((batch_size, -1))
        indices = np.argsort(folded_accumulated_scores.astype('float32', copy=False), axis=1).reshape((-1,))
        best_hyp_indices = np.unravel_index(indices, scores_accumulated_shape)[0].astype('int32') + offset
        scores_accumulated = scores_accumulated.take(best_hyp_indices, axis=0)
        best_hyp_indices_list.append(best_hyp_indices)
        lengths = lengths.take(best_hyp_indices, axis=0)
        all_best_hyp_indices = np.stack(best_hyp_indices_list, axis=1)
        all_best_word_indices = np.stack(best_word_indices_list, axis=2)
        constraints = [constraints[x] for x in best_hyp_indices.tolist()]

        return all_best_hyp_indices, \
               all_best_word_indices, \
               scores_accumulated, \
               lengths.astype('int32', copy=False), \
               estimated_reference_lengths, \
               constraints
Exemple #13
0
def corr2d_multi_in_out(X, K):
    # Iterate through the 0th dimension of `K`, and each time, perform
    # cross-correlation operations with input `X`. All of the results are
    # stacked together
    return np.stack([corr2d_multi_in(X, k) for k in K], 0)
Exemple #14
0

X = np.array([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = np.array([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

print(corr2d_multi_in(X, K))

def corr2d_multi_in_out(X, K):
    # Iterate through the 0th dimension of `K`, and each time, perform
    # cross-correlation operations with input `X`. All of the results are
    # stacked together
    return np.stack([corr2d_multi_in(X, k) for k in K], 0)


K = np.stack((K, K + 1, K + 2), 0)
print(f'K-shape: {K.shape}')
corr2d_multi_in_out(X, K)

def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.reshape((c_i, h * w))
    K = K.reshape((c_o, c_i))
    Y = np.dot(K, X)  # Matrix multiplication in the fully-connected layer
    return Y.reshape((c_o, h, w))

X = np.random.normal(0, 1, (3, 3, 3))
K = np.random.normal(0, 1, (2, 3, 1, 1))

Exemple #15
0
    def forward(self,
                source: np.ndarray,
                source_length: np.ndarray,
                restrict_lexicon: Optional[lexicon.TopKLexicon],
                raw_constraint_list: List[Optional[constrained.RawConstraintList]],
                raw_avoid_list: List[Optional[constrained.RawConstraintList]],
                max_output_lengths: np.ndarray) -> Tuple[np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         np.ndarray,
                                                         List[Optional[np.ndarray]],
                                                         List[Optional[constrained.ConstrainedHypothesis]]]:
        """
        Translates a single sentence (batch_size=1) using greedy search.

        :param source: Source ids. Shape: (batch_size=1, bucket_key, num_factors).
        :param source_length: Valid source lengths. Shape: (batch_size=1,).
        :param restrict_lexicon: Lexicon to use for vocabulary restriction.
        :param raw_constraint_list: A list of optional lists containing phrases (as lists of target word IDs)
                that must appear in each output.
        :param raw_avoid_list: A list of optional lists containing phrases (as lists of target word IDs)
                that must NOT appear in each output.
        :param max_output_lengths: ndarray of maximum output lengths per input in source.
                Shape: (batch_size=1,). Dtype: int32.
        :return List of best hypotheses indices, list of best word indices,
                array of accumulated length-normalized negative log-probs, hypotheses lengths,
                predicted lengths of references (if any), constraints (if any).
        """
        batch_size = source.shape[0]
        assert batch_size == 1, "Greedy Search does not support batch_size != 1"

        # Maximum  search iterations (determined by longest input with eos)
        max_iterations = max_output_lengths.max().item()
        logger.debug("max greedy search iterations: %d", max_iterations)

        # best word_indices (also act as input: (batch*beam, num_target_factors
        best_word_index = np.full((batch_size, self.num_target_factors),
                                  fill_value=self.bos_id, ctx=self.context, dtype='int32')
        outputs = []  # type: List[np.ndarray]

        vocab_slice_ids = None  # type: Optional[np.ndarray]
        # If using a top-k lexicon, select param rows for logit computation that correspond to the
        # target vocab for this sentence.
        if restrict_lexicon:
            source_words = np.squeeze(np.split(source, self.num_source_factors, axis=2)[0], axis=2)
            vocab_slice_ids, _, raw_constraint_list = _get_vocab_slice_ids(restrict_lexicon, source_words,
                                                                           raw_constraint_list,
                                                                           self.eos_id, beam_size=1)

        # (0) encode source sentence, returns a list
        model_states, _ = self._inference.encode_and_initialize(source, source_length)
        # TODO: check for disabled predicted output length

        t = 1
        for t in range(1, max_iterations + 1):
            scores, model_states, target_factors = self._inference.decode_step(best_word_index,
                                                                               model_states,
                                                                               vocab_slice_ids=vocab_slice_ids)
            # shape: (batch*beam=1, 1)
            best_word_index = self.work_block(scores, vocab_slice_ids, target_factors)
            outputs.append(best_word_index)

            if best_word_index == self.eos_id or best_word_index == C.PAD_ID:
                break

        logger.debug("Finished after %d out of %d steps.", t, max_iterations)

        # shape: (1, num_factors, length)
        stacked_outputs = np.stack(outputs, axis=2)
        length = np.array([t], dtype='int32')  # shape (1,)
        hyp_indices = np.zeros((1, t + 1), dtype='int32')
        score = np.array([-1.])  # TODO: return unnormalized proper score

        return hyp_indices, stacked_outputs, score, length, None, []  # type: ignore
def test_np_stack():
    class TestStack(HybridBlock):
        def __init__(self, axis=None):
            super(TestStack, self).__init__()
            self._axis = axis

        def hybrid_forward(self, F, a, *args):
            return F.np.stack([a] + list(args), axis=self._axis)

    a, b, c, d = mx.sym.Variable("a"), mx.sym.Variable("b"), mx.sym.Variable(
        "c"), mx.sym.Variable("d")
    ret = mx.sym.np.stack([
        a.as_np_ndarray(),
        b.as_np_ndarray(),
        c.as_np_ndarray(),
        d.as_np_ndarray()
    ])
    assert type(ret) == mx.sym.np._Symbol

    for shape in [(0, 0), (2, 3)]:
        for hybridize in [True, False]:
            for axis in range(2):
                test_stack = TestStack(axis=axis)
                if hybridize:
                    test_stack.hybridize()
                np_a = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32)
                np_b = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32)
                np_c = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32)
                np_d = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32)

                mx_a = np.array(np_a)
                mx_a.attach_grad()
                mx_b = np.array(np_b)
                mx_b.attach_grad()
                mx_c = np.array(np_c)
                mx_c.attach_grad()
                mx_d = np.array(np_d)
                mx_d.attach_grad()
                expected_ret = _np.stack([np_a, np_b, np_c, np_d], axis=axis)
                with mx.autograd.record():
                    y = test_stack(mx_a, mx_b, mx_c, mx_d)

                y.backward()

                assert_almost_equal(mx_a.grad.asnumpy(),
                                    _np.ones(shape),
                                    rtol=1e-3,
                                    atol=1e-5)
                assert_almost_equal(mx_b.grad.asnumpy(),
                                    _np.ones(shape),
                                    rtol=1e-3,
                                    atol=1e-5)
                assert_almost_equal(mx_c.grad.asnumpy(),
                                    _np.ones(shape),
                                    rtol=1e-3,
                                    atol=1e-5)
                assert_almost_equal(mx_d.grad.asnumpy(),
                                    _np.ones(shape),
                                    rtol=1e-3,
                                    atol=1e-5)

                np_out = _np.stack([np_a, np_b, np_c, np_d], axis=axis)
                mx_out = np.stack([mx_a, mx_b, mx_c, mx_d], axis=axis)
                assert same(mx_out.asnumpy(), np_out)
Exemple #17
0
    def forward(self, rel_positions, query=None):
        """Forward function

        Parameters
        ----------
        rel_positions
            The relative shifts. Shape (query_length, mem_length).
            Each element represents the shift between the :math:`i-th` element of query and
            the :math:`j-th` element of memory.
        query
            The query for computing the relative scores. The shape depends on the layout.
            If we use T5 attention, the query will not be used.

        Returns
        -------
        rel_scores
            The relative attention scores
            Can have shape (batch_size, num_heads, query_length, mem_length)
            or (num_heads, query_length, mem_length)
        """
        if self._method == 'transformer_xl' or self._method == 'shaw':
            assert query is not None, 'Must specify query if method={}'.format(self._method)
            if self._bidirectional:
                if self._max_distance is not None:
                    rel_positions = np.clip(rel_positions,
                                              a_min=-self._max_distance, a_max=self._max_distance)
            else:
                if self._max_distance is not None:
                    rel_positions = np.clip(rel_positions,
                                              a_min=0, a_max=self._max_distance)
            # uniq_rel.shape = (#uniq,), rev_index.shape = (L_q, L_m)
            uniq_rel, rev_index = np.unique(rel_positions, return_inverse=True)

            uniq_rel_pos_embed = self._rel_pos_embed(uniq_rel)
            if self._method == 'transformer_xl':
                uniq_rel_pos_embed = self._rel_proj(self._dropout_layer(uniq_rel_pos_embed))
            # Shape (#uniq, K, C_q)
            uniq_rel_pos_embed = npx.reshape(uniq_rel_pos_embed,
                                               (-2, self._num_heads, self._head_query_units))
            # Calculate the dot-product between query and the relative positional embeddings.
            # After the calculation, rel_score.shape = (L_q, #uniq, N, K)
            if self._layout == 'NKT':
                # query_for_rel: (N, K, L_q, C_q)
                if self._use_einsum:
                    rel_score = np.einsum('bnid,jnd->ijbn', query, uniq_rel_pos_embed)
                else:
                    rel_score = np.transpose(
                        np.matmul(query,
                                    np.transpose(uniq_rel_pos_embed, (1, 2, 0))),
                        (2, 3, 0, 1)
                    )
            elif self._layout == 'NTK':
                # query_for_rel: (N, L_q, K, C_q)
                if self._use_einsum:
                    rel_score = np.einsum('bind,jnd->ijbn', query, uniq_rel_pos_embed)
                else:
                    rel_score = np.transpose(
                        np.matmul(np.swapaxes(query, 1, 2),
                                    np.transpose(uniq_rel_pos_embed, (1, 2, 0))),
                        (2, 3, 0, 1)
                    )
            elif self._layout == 'TNK':
                # query_for_rel: (L_q, N, K, C_q)
                if self._use_einsum:
                    rel_score = np.einsum('ibnd,jnd->ijbn', query, uniq_rel_pos_embed)
                else:
                    rel_score = np.transpose(
                        np.matmul(np.transpose(query, (1, 2, 0, 3)),
                                    np.transpose(uniq_rel_pos_embed, (1, 2, 0))),
                        (2, 3, 0, 1)
                    )
            else:
                raise NotImplementedError
            # We use gather_nd to select the elements
            # TODO(sxjscience) Use advanced indexing once available
            rev_index = npx.reshape_like(rev_index, rel_positions).astype(np.int32)
            query_idx = np.expand_dims(npx.arange_like(rel_positions, axis=0).astype(np.int32),
                                         axis=-1) + np.zeros_like(rev_index)
            rel_score = npx.gather_nd(rel_score, np.stack([query_idx, rev_index]))
            rel_score = np.transpose(rel_score, (2, 3, 0, 1))
        elif self._method == 't5':
            # shape is (K, L_q, L_m)
            rel_score = self._rel_pos_embed(rel_positions).transpose((2, 0, 1))
        else:
            raise NotImplementedError
        return rel_score
Exemple #18
0
def corr2d_multi_in_out(X, K):
    return np.stack([corr2d_multi_in(X, k) for k in K])
Exemple #19
0
import d2l_dx
from mxnet import np, npx
npx.set_np()

def corr2d_multi_in(X, K):
    return sum(d2l_dx.corr2d(x, k) for x, k in zip(X, K))

def corr2d_multi_in_out(X, K):
    return np.stack([corr2d_multi_in(X, k) for k in K])

X = np.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
K = np.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])

print('X.shape: ', X.shape, ' K.shape: ', K.shape)

print(corr2d_multi_in(X, K).shape)

K_stacked = np.stack((K, K + 1, K + 2))
print('K_stacked.shape: ', K_stacked.shape)

print(corr2d_multi_in_out(X, K_stacked).shape)